[x265-commits] [x265] TEncSearch: remove unnecessary pragma

Steve Borho steve at borho.org
Tue Jun 3 05:41:47 CEST 2014


details:   http://hg.videolan.org/x265/rev/bc38a1637220
branches:  
changeset: 6956:bc38a1637220
user:      Steve Borho <steve at borho.org>
date:      Mon Jun 02 21:00:52 2014 -0500
description:
TEncSearch: remove unnecessary pragma
Subject: [x265] refine cbf==0 path: remove clearing coeff and resi

details:   http://hg.videolan.org/x265/rev/b46dd1095ed8
branches:  
changeset: 6957:b46dd1095ed8
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Mon Jun 02 11:44:59 2014 +0900
description:
refine cbf==0 path: remove clearing coeff and resi
Subject: [x265] primitives: move more aliasing to Setup_Alias_Primitives

details:   http://hg.videolan.org/x265/rev/31f93f0d024f
branches:  
changeset: 6958:31f93f0d024f
user:      Steve Borho <steve at borho.org>
date:      Mon Jun 02 22:22:10 2014 -0500
description:
primitives: move more aliasing to Setup_Alias_Primitives
Subject: [x265] TEncSearch: rename variable to avoid shadowing an earlier 'part'

details:   http://hg.videolan.org/x265/rev/92ef2e02f653
branches:  
changeset: 6959:92ef2e02f653
user:      Steve Borho <steve at borho.org>
date:      Mon Jun 02 22:27:36 2014 -0500
description:
TEncSearch: rename variable to avoid shadowing an earlier 'part'

diffstat:

 source/Lib/TLibEncoder/TEncEntropy.cpp |    1 -
 source/Lib/TLibEncoder/TEncEntropy.h   |    1 -
 source/Lib/TLibEncoder/TEncSbac.cpp    |    5 +-
 source/Lib/TLibEncoder/TEncSearch.cpp  |  438 +++++++++++++++-----------------
 source/common/primitives.cpp           |   47 +++
 source/common/primitives.h             |    4 +
 source/common/x86/asm-primitives.cpp   |   33 --
 7 files changed, 265 insertions(+), 264 deletions(-)

diffs (truncated from 1080 to 300 lines):

diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp	Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp	Mon Jun 02 22:27:36 2014 -0500
@@ -202,7 +202,6 @@ bool TEncEntropy::isNextTUSection(TComTU
 
 void TEncEntropy::initTUEntropySection(TComTURecurse *tuIterator, uint32_t splitMode, uint32_t absPartIdxStep, uint32_t m_absPartIdxTU)
 {
-    tuIterator->m_partOffset        = 0;
     tuIterator->m_section           = 0;
     tuIterator->m_absPartIdxTURelCU = m_absPartIdxTU;
     tuIterator->m_splitMode         = splitMode;
diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h	Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncEntropy.h	Mon Jun 02 22:27:36 2014 -0500
@@ -66,7 +66,6 @@ struct TComTURecurse
     uint32_t          m_splitMode;
     uint32_t          m_absPartIdxTURelCU;
     uint32_t          m_absPartIdxStep;
-    uint32_t          m_partOffset;
 };
 
 // ====================================================================================================================
diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Mon Jun 02 22:27:36 2014 -0500
@@ -2120,8 +2120,9 @@ void TEncSbac::codeCoeffNxN(TComDataCU* 
     // compute number of significant coefficients
     uint32_t numSig = primitives.count_nonzero(coeff, trSize * trSize);
 
-    if (numSig == 0)
-        return;
+#if CHECKED_BUILD || _DEBUG
+    X265_CHECK(numSig > 0, "cbf check fail");
+#endif
 
     bool beValid;
     if (cu->getCUTransquantBypass(absPartIdx))
diff -r 5b6c9cda191b -r 92ef2e02f653 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Jun 02 14:21:04 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Jun 02 22:27:36 2014 -0500
@@ -408,8 +408,8 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
     coeff_t* coeff          = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
 
     int16_t* reconQt        = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
-
     X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+    const uint32_t reconQtStride = MAX_CU_SIZE;
 
     uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
     pixel*   reconIPred       = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
@@ -443,25 +443,29 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
     //--- set coded block flag ---
     cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
 
-    //--- inverse transform ---
     if (absSum)
     {
+        //--- inverse transform ---
         int scalingListType = 0 + TEXT_LUMA;
-        X265_CHECK(scalingListType < 6, "scalingListType is too large %d\n", scalingListType);
+        X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
         m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
+        X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
+        //===== reconstruction =====
+        primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
+        //===== update distortion =====
+        outDist += primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
     }
     else
     {
-        int16_t* resiTmp = residual;
+#if CHECKED_BUILD || _DEBUG
         memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
-        primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
+#endif
+        //===== reconstruction =====
+        primitives.luma_copy_ps[part](reconQt,    reconQtStride,    pred, stride);
+        primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);
+        //===== update distortion =====
+        outDist += primitives.sse_pp[part](pred, stride, fenc, stride);
     }
-
-    X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
-    //===== reconstruction =====
-    primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
-    //===== update distortion =====
-    outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride);
 }
 
 void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,
@@ -519,67 +523,67 @@ void TEncSearch::xIntraCodingChromaBlk(T
     primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
 
     //===== transform and quantization =====
+    //--- init rate estimation arrays for RDOQ ---
+    if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
     {
-        //--- init rate estimation arrays for RDOQ ---
-        if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
-        {
-            m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);
-        }
-        //--- transform and quantization ---
-        uint32_t absSum = 0;
-        int lastPos = -1;
-
-        int curChromaQpOffset;
-        if (ttype == TEXT_CHROMA_U)
-        {
-            curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
-        }
-        else
-        {
-            curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
-        }
-        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
-
-        m_trQuant->selectLambda(TEXT_CHROMA);
-
-        absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
-
-        //--- set coded block flag ---
-        cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
-
+        m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);
+    }
+
+    //--- transform and quantization ---
+    uint32_t absSum = 0;
+    int lastPos = -1;
+
+    int curChromaQpOffset;
+    if (ttype == TEXT_CHROMA_U)
+    {
+        curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
+    }
+    else
+    {
+        curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
+    }
+    m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
+    m_trQuant->selectLambda(TEXT_CHROMA);
+
+    absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
+
+    //--- set coded block flag ---
+    cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
+
+    uint32_t dist;
+    if (absSum)
+    {
         //--- inverse transform ---
-        if (absSum)
-        {
-            int scalingListType = 0 + ttype;
-            X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
-            m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
-        }
-        else
-        {
-            int16_t* resiTmp = residual;
-            memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
-            primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
-        }
+        int scalingListType = 0 + ttype;
+        X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
+        m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
+        X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
+        //===== reconstruction =====
+        primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
+        //===== update distortion =====
+        dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
     }
-
-    X265_CHECK(((intptr_t)residual & (tuSize - 1)) == 0, "residual alignment check failure\n");
-    X265_CHECK(tuSize <= 32, "tuSize invalud\n");
-    //===== reconstruction =====
-    primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
-    //===== update distortion =====
-    uint32_t dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
+    else
+    {
+#if CHECKED_BUILD || _DEBUG
+        memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
+#endif
+        //===== reconstruction =====
+        primitives.square_copy_ps[sizeIdx](reconQt,    reconQtStride,    pred, stride);
+        primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
+        //===== update distortion =====
+        dist = primitives.sse_pp[part](pred, stride, fenc, stride);
+    }
+
+    X265_CHECK(ttype == TEXT_CHROMA_U || ttype == TEXT_CHROMA_V, "invalid ttype\n");
     if (ttype == TEXT_CHROMA_U)
     {
         outDist += m_rdCost->scaleChromaDistCb(dist);
     }
-    else if (ttype == TEXT_CHROMA_V)
+    else
     {
         outDist += m_rdCost->scaleChromaDistCr(dist);
     }
-    else
-    {
-        outDist += dist;
-    }
 }
 
 void TEncSearch::xRecurIntraCodingQT(TComDataCU* cu,
@@ -800,15 +804,15 @@ void TEncSearch::xRecurIntraCodingQT(TCo
         cu->setTransformSkipSubParts(bestModeId, TEXT_LUMA, absPartIdx, fullDepth);
 
         //--- set reconstruction for next intra prediction blocks ---
-        uint32_t width     = cu->getCUSize(0) >> trDepth;
-        uint32_t height    = cu->getCUSize(0) >> trDepth;
         uint32_t qtLayer   = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
         uint32_t zorder    = cu->getZorderIdxInCU() + absPartIdx;
         int16_t* src       = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
         X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+        const uint32_t srcstride = MAX_CU_SIZE;
         pixel*   dst       = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
         uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
-        primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE);
+        int sizeIdx = trSizeLog2 - 2;
+        primitives.square_copy_sp[sizeIdx](dst, dststride, src, srcstride);
     }
 
     outDistY += singleDistY;
@@ -882,25 +886,29 @@ void TEncSearch::residualTransformQuantI
         //--- set coded block flag ---
         cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
 
-        //--- inverse transform ---
+        int part = partitionFromSize(tuSize);
+
         if (absSum)
         {
+            //--- inverse transform ---
             int scalingListType = 0 + TEXT_LUMA;
             X265_CHECK(scalingListType < 6, "scalingListType %d\n", scalingListType);
             m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
+
+            // Generate Recon
+            primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
+            primitives.luma_copy_pp[part](reconIPred, reconIPredStride, recon, stride);
         }
         else
         {
-            int16_t* resiTmp = residual;
+#if CHECKED_BUILD || _DEBUG
             memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
-            primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
+#endif
+
+            // Generate Recon
+            primitives.luma_copy_pp[part](recon,      stride,           pred, stride);
+            primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);
         }
-
-        //Generate Recon
-        X265_CHECK(tuSize <= 32, "tuSize is too large\n");
-        int part = partitionFromSize(tuSize);
-        primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
-        primitives.blockcpy_pp(tuSize, tuSize, reconIPred, reconIPredStride, recon, stride);
     }
 
     if (bCheckSplit && !bCheckFull)
@@ -996,8 +1004,10 @@ void TEncSearch::xLoadIntraResultQT(TCom
     pixel*     reconIPred       = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zOrder);
     uint32_t   reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
     int16_t*   reconQt          = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
-    primitives.blockcpy_ps(trSize, trSize, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE);
     X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+    const uint32_t reconQtStride = MAX_CU_SIZE;
+    int sizeIdx = trSizeLog2 - 2;
+    primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride, reconQt, reconQtStride);
 }
 
 void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
@@ -1075,8 +1085,7 @@ void TEncSearch::xLoadIntraResultChromaQ
         }
 
         //===== copy transform coefficients =====
-        uint32_t trSizeC  = 1 << trSizeCLog2;
-        uint32_t numCoeffC = 1 << trSizeCLog2 * 2;
+        uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);
         uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
 
         coeff_t* coeffDst = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
@@ -1088,12 +1097,13 @@ void TEncSearch::xLoadIntraResultChromaQ
         m_qtTempTransformSkipYuv.copyPartToPartChroma(&m_qtTempShortYuv[qtlayer], absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
 
         uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
-        uint32_t reconQtStride    = m_qtTempShortYuv[qtlayer].m_cwidth;
         uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
 
         pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
         int16_t* reconQt  = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
-        primitives.blockcpy_ps(trSizeC, trSizeC, reconIPred, reconIPredStride, reconQt, reconQtStride);
+        uint32_t reconQtStride    = m_qtTempShortYuv[qtlayer].m_cwidth;


More information about the x265-commits mailing list