[x265] [PATCH] rd level: remove unnecessary calculations in intra residual encoding

deepthidevaki at multicorewareinc.com deepthidevaki at multicorewareinc.com
Mon Dec 16 10:11:42 CET 2013


# HG changeset patch
# User Deepthi Devaki <deepthidevaki at multicorewareinc.com>
# Date 1387184637 -19800
# Node ID a8e34580f5354f281a0d8bbebbee75a3e62b6824
# Parent  238d7f272d1e5c49d90bb15f40b2d7e90be78ba1
rd level: remove unnecessary calculations in intra residual encoding

calcrecon calculates recon,reconQT and reconIpred which is redundant. Use add and block copy instead of calcrecon.

diff -r 238d7f272d1e -r a8e34580f535 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Dec 16 10:57:06 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Dec 16 14:33:57 2013 +0530
@@ -1026,7 +1026,6 @@
         uint32_t numCoeffPerInc = cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
         TCoeff*  coeff          = m_qtTempCoeffY[qtLayer] + numCoeffPerInc * absPartIdx;
 
-        int16_t* reconQt        = m_qtTempTComYuv[qtLayer].getLumaAddr(absPartIdx);
         assert(m_qtTempTComYuv[qtLayer].m_width == MAX_CU_SIZE);
 
         uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
@@ -1077,7 +1076,9 @@
 
         //Generate Recon
         assert(width <= 32);
-        primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
+        int part = partitionFromSizes(width, height);
+        primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
+        primitives.blockcpy_pp(width, height, reconIPred, reconIPredStride, recon, stride);
     }
 
     if (bCheckSplit && !bCheckFull)
@@ -1652,7 +1653,6 @@
             Pel*     recon          = (chromaId > 0 ? reconYuv->getCrAddr(absPartIdx) : reconYuv->getCbAddr(absPartIdx));
             uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> 2;
             TCoeff*  coeff          = (chromaId > 0 ? m_qtTempCoeffCr[qtlayer] : m_qtTempCoeffCb[qtlayer]) + numCoeffPerInc * absPartIdx;
-            int16_t* reconQt        = (chromaId > 0 ? m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx) : m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx));
             assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
 
             uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
@@ -1718,7 +1718,9 @@
             //===== reconstruction =====
             assert(((uint32_t)(size_t)residual & (width - 1)) == 0);
             assert(width <= 32);
-            primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE / 2, reconIPredStride);
+            int part = partitionFromSizes(cu->getWidth(0) >> (trDepth), cu->getHeight(0) >> (trDepth));
+            primitives.chroma[m_cfg->param.internalCsp].add_ps[part](recon, stride, pred, residual, stride, stride);
+            primitives.chroma[m_cfg->param.internalCsp].copy_pp[part](reconIPred, reconIPredStride, recon, stride);
         }
 
         //===== copy transform coefficients =====


More information about the x265-devel mailing list