[x265] [PATCH 1 of 2 Update] fix SSE_PP intermedia result overflow in Main12, (fixes #180)

Min Chen chenm003 at 163.com
Thu Sep 17 00:58:23 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1442442319 18000
# Node ID b585c3b6834dc3e98b78be5c750047a5e988926a
# Parent  bc09a5c2aa76a1b9928104a6cbd7cf0c5bc8b00b
fix SSE_PP intermedia result overflow in Main12, (fixes #180)
---
 source/encoder/rdcost.h   |    4 ++--
 source/encoder/search.cpp |   12 ++++++------
 source/encoder/search.h   |    2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff -r bc09a5c2aa76 -r b585c3b6834d source/encoder/rdcost.h
--- a/source/encoder/rdcost.h	Wed Sep 16 17:25:17 2015 -0500
+++ b/source/encoder/rdcost.h	Wed Sep 16 17:25:19 2015 -0500
@@ -125,11 +125,11 @@
         return sadCost + ((bits * m_lambda + 128) >> 8);
     }
 
-    inline uint32_t scaleChromaDist(uint32_t plane, uint32_t dist) const
+    inline sse_ret_t scaleChromaDist(uint32_t plane, sse_ret_t dist) const
     {
         X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
                    "scaleChromaDist wrap detected dist: %u, lambda: %u\n", dist, m_chromaDistWeight[plane - 1]);
-        return (uint32_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
+        return (sse_ret_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
     }
 
     inline uint32_t getCost(uint32_t bits) const
diff -r bc09a5c2aa76 -r b585c3b6834d source/encoder/search.cpp
--- a/source/encoder/search.cpp	Wed Sep 16 17:25:17 2015 -0500
+++ b/source/encoder/search.cpp	Wed Sep 16 17:25:19 2015 -0500
@@ -531,7 +531,7 @@
             // no residual coded, recon = pred
             primitives.cu[sizeIdx].copy_pp(tmpRecon, tmpReconStride, pred, stride);
 
-        uint32_t tmpDist = primitives.cu[sizeIdx].sse_pp(tmpRecon, tmpReconStride, fenc, stride);
+        sse_ret_t tmpDist = primitives.cu[sizeIdx].sse_pp(tmpRecon, tmpReconStride, fenc, stride);
 
         cu.setTransformSkipSubParts(useTSkip, TEXT_LUMA, absPartIdx, fullDepth);
         cu.setCbfSubParts((!!numSig) << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
@@ -800,7 +800,7 @@
     uint32_t qtLayer = log2TrSize - 2;
     uint32_t stride = mode.fencYuv->m_csize;
     const uint32_t sizeIdxC = log2TrSizeC - 2;
-    uint32_t outDist = 0;
+    sse_ret_t outDist = 0;
 
     uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
     const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
@@ -960,7 +960,7 @@
                     primitives.cu[sizeIdxC].copy_pp(recon, reconStride, pred, stride);
                     cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
                 }
-                uint32_t tmpDist = primitives.cu[sizeIdxC].sse_pp(recon, reconStride, fenc, stride);
+                sse_ret_t tmpDist = primitives.cu[sizeIdxC].sse_pp(recon, reconStride, fenc, stride);
                 tmpDist = m_rdCost.scaleChromaDist(chromaId, tmpDist);
 
                 cu.setTransformSkipPartRange(useTSkip, ttype, absPartIdxC, tuIterator.absPartIdxStep);
@@ -2549,7 +2549,7 @@
     uint32_t tqBypass = cu.m_tqBypass[0];
     if (!tqBypass)
     {
-        uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
+        sse_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
         cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
         cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
 
@@ -2620,8 +2620,8 @@
         reconYuv->copyFromYuv(*predYuv);
 
     // update with clipped distortion and cost (qp estimation loop uses unclipped values)
-    uint32_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
-    uint32_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+    sse_ret_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+    sse_ret_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
     bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
     if (m_rdCost.m_psyRd)
         interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
diff -r bc09a5c2aa76 -r b585c3b6834d source/encoder/search.h
--- a/source/encoder/search.h	Wed Sep 16 17:25:17 2015 -0500
+++ b/source/encoder/search.h	Wed Sep 16 17:25:19 2015 -0500
@@ -417,7 +417,7 @@
     {
         uint64_t rdcost;
         uint32_t bits;
-        uint32_t distortion;
+        sse_ret_t distortion;
         uint32_t energy;
         Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
     };



More information about the x265-devel mailing list