[x265] [PATCH 1 of 2 Update] fix SSE_PP intermedia result overflow in Main12, (fixes #180)
Min Chen
chenm003 at 163.com
Thu Sep 17 00:58:23 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1442442319 18000
# Node ID b585c3b6834dc3e98b78be5c750047a5e988926a
# Parent bc09a5c2aa76a1b9928104a6cbd7cf0c5bc8b00b
fix SSE_PP intermedia result overflow in Main12, (fixes #180)
---
source/encoder/rdcost.h | 4 ++--
source/encoder/search.cpp | 12 ++++++------
source/encoder/search.h | 2 +-
3 files changed, 9 insertions(+), 9 deletions(-)
diff -r bc09a5c2aa76 -r b585c3b6834d source/encoder/rdcost.h
--- a/source/encoder/rdcost.h Wed Sep 16 17:25:17 2015 -0500
+++ b/source/encoder/rdcost.h Wed Sep 16 17:25:19 2015 -0500
@@ -125,11 +125,11 @@
return sadCost + ((bits * m_lambda + 128) >> 8);
}
- inline uint32_t scaleChromaDist(uint32_t plane, uint32_t dist) const
+ inline sse_ret_t scaleChromaDist(uint32_t plane, sse_ret_t dist) const
{
X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
"scaleChromaDist wrap detected dist: %u, lambda: %u\n", dist, m_chromaDistWeight[plane - 1]);
- return (uint32_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
+ return (sse_ret_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) >> 8);
}
inline uint32_t getCost(uint32_t bits) const
diff -r bc09a5c2aa76 -r b585c3b6834d source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Sep 16 17:25:17 2015 -0500
+++ b/source/encoder/search.cpp Wed Sep 16 17:25:19 2015 -0500
@@ -531,7 +531,7 @@
// no residual coded, recon = pred
primitives.cu[sizeIdx].copy_pp(tmpRecon, tmpReconStride, pred, stride);
- uint32_t tmpDist = primitives.cu[sizeIdx].sse_pp(tmpRecon, tmpReconStride, fenc, stride);
+ sse_ret_t tmpDist = primitives.cu[sizeIdx].sse_pp(tmpRecon, tmpReconStride, fenc, stride);
cu.setTransformSkipSubParts(useTSkip, TEXT_LUMA, absPartIdx, fullDepth);
cu.setCbfSubParts((!!numSig) << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
@@ -800,7 +800,7 @@
uint32_t qtLayer = log2TrSize - 2;
uint32_t stride = mode.fencYuv->m_csize;
const uint32_t sizeIdxC = log2TrSizeC - 2;
- uint32_t outDist = 0;
+ sse_ret_t outDist = 0;
uint32_t curPartNum = cuGeom.numPartitions >> tuDepthC * 2;
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
@@ -960,7 +960,7 @@
primitives.cu[sizeIdxC].copy_pp(recon, reconStride, pred, stride);
cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
- uint32_t tmpDist = primitives.cu[sizeIdxC].sse_pp(recon, reconStride, fenc, stride);
+ sse_ret_t tmpDist = primitives.cu[sizeIdxC].sse_pp(recon, reconStride, fenc, stride);
tmpDist = m_rdCost.scaleChromaDist(chromaId, tmpDist);
cu.setTransformSkipPartRange(useTSkip, ttype, absPartIdxC, tuIterator.absPartIdxStep);
@@ -2549,7 +2549,7 @@
uint32_t tqBypass = cu.m_tqBypass[0];
if (!tqBypass)
{
- uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
+ sse_ret_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
@@ -2620,8 +2620,8 @@
reconYuv->copyFromYuv(*predYuv);
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
- uint32_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
- uint32_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+ sse_ret_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+ sse_ret_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
if (m_rdCost.m_psyRd)
interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
diff -r bc09a5c2aa76 -r b585c3b6834d source/encoder/search.h
--- a/source/encoder/search.h Wed Sep 16 17:25:17 2015 -0500
+++ b/source/encoder/search.h Wed Sep 16 17:25:19 2015 -0500
@@ -417,7 +417,7 @@
{
uint64_t rdcost;
uint32_t bits;
- uint32_t distortion;
+ sse_ret_t distortion;
uint32_t energy;
Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
};
More information about the x265-devel
mailing list