[x265] [PATCH] distortion: change data type of distortion for 10 bit to avoid overflow

Mon Oct 12 12:29:54 CEST 2015

# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1443607299 -19800
#      Wed Sep 30 15:31:39 2015 +0530
# Node ID 534d80f9272b43485cca8bb0c0a522ec8abeaa13
# Parent  b6156a08b1def3584647f26096866c1a0c11e54a
distortion: change data type of distortion for 10 bit to avoid overflow
            while adding for Y, U, V planes

diff -r b6156a08b1de -r 534d80f9272b source/common/common.h

--- a/source/common/common.h	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/common/common.h	Wed Sep 30 15:31:39 2015 +0530
@@ -141,6 +141,12 @@
 typedef uint64_t sse_ret_t;
 #endif
 
+#if X265_DEPTH < 10
+typedef uint32_t dist_ret_t;
+#else
+typedef uint64_t dist_ret_t;
+#endif
+
 #ifndef NULL
 #define NULL 0
 #endif
diff -r b6156a08b1de -r 534d80f9272b source/encoder/rdcost.h
--- a/source/encoder/rdcost.h	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/encoder/rdcost.h	Wed Sep 30 15:31:39 2015 +0530
@@ -89,9 +89,9 @@
         m_lambda = (uint64_t)floor(256.0 * lambda);
     }
 
-    inline uint64_t calcRdCost(sse_ret_t distortion, uint32_t bits) const
+    inline uint64_t calcRdCost(dist_ret_t distortion, uint32_t bits) const
     {
-#if X265_DEPTH <= 10
+#if X265_DEPTH < 10
         X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
                    "calcRdCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n",
                    distortion, bits, m_lambda2);
@@ -116,9 +116,9 @@
     }
 
     /* return the RD cost of this prediction, including the effect of psy-rd */
-    inline uint64_t calcPsyRdCost(sse_ret_t distortion, uint32_t bits, uint32_t psycost) const
+    inline uint64_t calcPsyRdCost(dist_ret_t distortion, uint32_t bits, uint32_t psycost) const
     {
-#if X265_DEPTH <= 10
+#if X265_DEPTH < 10
         X265_CHECK((bits <= (UINT64_MAX / m_lambda2)) && (psycost <= UINT64_MAX / (m_lambda * m_psyRd)),
                    "calcPsyRdCost wrap detected dist: %u, bits: %u, lambda: " X265_LL ", lambda2: " X265_LL "\n",
                    distortion, bits, m_lambda, m_lambda2);
diff -r b6156a08b1de -r 534d80f9272b source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/encoder/search.cpp	Wed Sep 30 15:31:39 2015 +0530
@@ -762,7 +762,8 @@
     if (tuDepth < cu.m_tuDepth[absPartIdx])
     {
         uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
-        uint32_t outDist = 0, splitCbfU = 0, splitCbfV = 0;
+        uint32_t splitCbfU = 0, splitCbfV = 0;
+        sse_ret_t outDist = 0;
         for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
         {
             outDist += codeIntraChromaQt(mode, cuGeom, tuDepth + 1, qPartIdx, psyEnergy);
@@ -884,7 +885,7 @@
     uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
     const uint32_t log2TrSizeC = 2;
     uint32_t qtLayer = log2TrSize - 2;
-    uint32_t outDist = 0;
+    sse_ret_t outDist = 0;
 
     /* At the TU layers above this one, no RDO is performed, only distortion is being measured,
      * so the entropy coder is not very accurate. The best we can do is return it in the same
@@ -930,7 +931,7 @@
             predIntraChromaAng(chromaPredMode, pred, stride, log2TrSizeC);
 
             uint64_t bCost = MAX_INT64;
-            uint32_t bDist = 0;
+            sse_ret_t bDist = 0;
             uint32_t bCbf = 0;
             uint32_t bEnergy = 0;
             int      bTSkip = 0;
@@ -2550,7 +2551,7 @@
     uint32_t tqBypass = cu.m_tqBypass[0];
     if (!tqBypass)
     {
-        sse_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
+        dist_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
         cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
         cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
 
diff -r b6156a08b1de -r 534d80f9272b source/encoder/search.h
--- a/source/encoder/search.h	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/encoder/search.h	Wed Sep 30 15:31:39 2015 +0530
@@ -106,17 +106,17 @@
     // temporal candidate.
     InterNeighbourMV interNeighbours[6];
 
-    uint64_t   rdCost;     // sum of partition (psy) RD costs          (sse(fenc, recon) + lambda2 * bits)
-    uint64_t   sa8dCost;   // sum of partition sa8d distortion costs   (sa8d(fenc, pred) + lambda * bits)
-    uint32_t   sa8dBits;   // signal bits used in sa8dCost calculation
-    uint32_t   psyEnergy;  // sum of partition psycho-visual energy difference
-    sse_ret_t  resEnergy;  // sum of partition residual energy after motion prediction
-    sse_ret_t  lumaDistortion;
-    sse_ret_t  chromaDistortion;
-    sse_ret_t  distortion; // sum of partition SSE distortion
-    uint32_t   totalBits;  // sum of partition bits (mv + coeff)
-    uint32_t   mvBits;     // Mv bits + Ref + block type (or intra mode)
-    uint32_t   coeffBits;  // Texture bits (DCT Coeffs)
+    uint64_t    rdCost;     // sum of partition (psy) RD costs          (sse(fenc, recon) + lambda2 * bits)
+    uint64_t    sa8dCost;   // sum of partition sa8d distortion costs   (sa8d(fenc, pred) + lambda * bits)
+    uint32_t    sa8dBits;   // signal bits used in sa8dCost calculation
+    uint32_t    psyEnergy;  // sum of partition psycho-visual energy difference
+    sse_ret_t   resEnergy;  // sum of partition residual energy after motion prediction
+    sse_ret_t   lumaDistortion;
+    sse_ret_t   chromaDistortion;
+    dist_ret_t  distortion; // sum of partition SSE distortion
+    uint32_t    totalBits;  // sum of partition bits (mv + coeff)
+    uint32_t    mvBits;     // Mv bits + Ref + block type (or intra mode)
+    uint32_t    coeffBits;  // Texture bits (DCT Coeffs)
 
     void initCosts()
     {