[x265-commits] [x265] refine block size related

Sun May 25 02:07:39 CEST 2014

details:   http://hg.videolan.org/x265/rev/74f8aa42020f
branches:  
changeset: 6919:74f8aa42020f
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Fri May 23 13:34:51 2014 +0900
description:
refine block size related
Subject: [x265] assert to optional runtime check

details:   http://hg.videolan.org/x265/rev/5e8cce428457
branches:  
changeset: 6920:5e8cce428457
user:      Steve Borho <steve at borho.org>
date:      Fri May 23 09:11:15 2014 -0500
description:
assert to optional runtime check

diffstat:

 source/Lib/TLibCommon/TComBitStream.cpp  |    2 +-
 source/Lib/TLibCommon/TComDataCU.cpp     |    2 +-
 source/Lib/TLibCommon/TComPrediction.cpp |   16 +-
 source/Lib/TLibCommon/TComSlice.h        |    2 +
 source/Lib/TLibCommon/TComYuv.cpp        |    6 +-
 source/Lib/TLibEncoder/TEncCu.cpp        |   27 +-
 source/Lib/TLibEncoder/TEncEntropy.cpp   |   22 +-
 source/Lib/TLibEncoder/TEncSbac.cpp      |    3 +-
 source/Lib/TLibEncoder/TEncSearch.cpp    |  583 ++++++++++++++----------------
 source/Lib/TLibEncoder/TEncSearch.h      |    4 +-
 source/common/pixel.cpp                  |   10 +-
 source/common/primitives.cpp             |    7 +-
 source/common/primitives.h               |   13 +-
 source/common/shortyuv.cpp               |   26 +-
 source/common/vec/blockcopy-sse3.cpp     |    8 +-
 source/encoder/compress.cpp              |   28 +-
 source/encoder/slicetype.cpp             |   12 +-
 17 files changed, 379 insertions(+), 392 deletions(-)

diffs (truncated from 2186 to 300 lines):

diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibCommon/TComBitStream.cpp

--- a/source/Lib/TLibCommon/TComBitStream.cpp	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibCommon/TComBitStream.cpp	Fri May 23 09:11:15 2014 -0500
@@ -88,7 +88,7 @@ void TComOutputBitstream::write(uint32_t
     /* any modulo 8 remainder of num_total_bits cannot be written this time,
      * and will be held until next time. */
     uint32_t num_total_bits = numBits + m_num_held_bits;
-    uint32_t next_num_held_bits = num_total_bits % 8;
+    uint32_t next_num_held_bits = num_total_bits & 7;
 
     /* form a byte aligned word (write_bits), by concatenating any held bits
      * with the new bits, discarding the bits that will form the next_held_bits.
diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Fri May 23 09:11:15 2014 -0500
@@ -1337,7 +1337,7 @@ bool TComDataCU::isFirstAbsZorderIdxInDe
 {
     uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
 
-    return ((m_absIdxInLCU + absPartIdx) % curPartNum) == 0;
+    return ((m_absIdxInLCU + absPartIdx) & (curPartNum - 1)) == 0;
 }
 
 void TComDataCU::setPartSizeSubParts(PartSize mode, uint32_t absPartIdx, uint32_t depth)
diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibCommon/TComPrediction.cpp	Fri May 23 09:11:15 2014 -0500
@@ -117,15 +117,15 @@ bool TComPrediction::filteringIntraRefer
 {
     bool bFilter;
 
-    if (dirMode == DC_IDX)
+    if (dirMode == DC_IDX || tuSize <= 4)
     {
-        bFilter = false; // no smoothing for DC or LM chroma
+        bFilter = false; // no smoothing for DC
     }
     else
     {
         int diff = std::min<int>(abs((int)dirMode - HOR_IDX), abs((int)dirMode - VER_IDX));
-        uint32_t sizeIndex = g_convertToBit[tuSize];
-        bFilter = diff > intraFilterThreshold[sizeIndex];
+        uint32_t sizeIdx = g_convertToBit[tuSize];
+        bFilter = diff > intraFilterThreshold[sizeIdx];
     }
 
     return bFilter;
@@ -134,7 +134,7 @@ bool TComPrediction::filteringIntraRefer
 void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize)
 {
     X265_CHECK(tuSize >= 4 && tuSize <= 64, "intra block size is out of range\n");
-    int log2BlkSize = g_convertToBit[tuSize];
+    int sizeIdx = g_convertToBit[tuSize];
     bool bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
 
     pixel *refLft, *refAbv;
@@ -148,13 +148,13 @@ void TComPrediction::predIntraLumaAng(ui
     }
 
     bool bFilter = tuSize <= 16 && dirMode != PLANAR_IDX;
-    primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
+    primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
 }
 
 // Angular chroma
 void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize, int chFmt)
 {
-    int log2BlkSize = g_convertToBit[tuSize];
+    int sizeIdx = g_convertToBit[tuSize];
     uint32_t tuSize2 = tuSize << 1;
 
     // Create the prediction
@@ -222,7 +222,7 @@ void TComPrediction::predIntraChromaAng(
         }
     }
 
-    primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
+    primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
 }
 
 /** Function for checking identical motion.
diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibCommon/TComSlice.h	Fri May 23 09:11:15 2014 -0500
@@ -906,6 +906,8 @@ public:
 
     void setLog2DiffMaxMinCodingBlockSize(int val) { m_log2DiffMaxMinCodingBlockSize = val; }
 
+    int  getLog2MaxCodingBlockSize() const { return m_log2MinCodingBlockSize + m_log2DiffMaxMinCodingBlockSize; }
+
     void setMaxCUSize(uint32_t u) { m_maxCUSize = u; }
 
     uint32_t getMaxCUSize() const  { return m_maxCUSize; }
diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibCommon/TComYuv.cpp	Fri May 23 09:11:15 2014 -0500
@@ -186,7 +186,7 @@ void TComYuv::copyPartToPartYuv(TComYuv*
 
 void TComYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize)
 {
-    int part = partitionFromSizes(lumaSize, lumaSize);
+    int part = partitionFromSize(lumaSize);
 
     int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
     uint32_t dststride = dstPicYuv->m_width;
@@ -196,7 +196,7 @@ void TComYuv::copyPartToPartLuma(ShortYu
 
 void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
 {
-    int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSizes(lumaSize, lumaSize);
+    int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
 
     if (chromaId == 1)
     {
@@ -235,7 +235,7 @@ void TComYuv::copyPartToPartChroma(Short
 
 void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
 {
-    int part = partitionFromSizes(partSize, partSize);
+    int part = partitionFromSize(partSize);
 
     addClipLuma(srcYuv0, srcYuv1, part);
     addClipChroma(srcYuv0, srcYuv1, part);
diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Fri May 23 09:11:15 2014 -0500
@@ -571,13 +571,14 @@ void TEncCu::xCompressIntraCU(TComDataCU
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
     }
 
+    uint32_t cuSize = outTempCU->getCUSize(0);
     TComSlice* slice = outTempCU->getSlice();
     if (!bInsidePicture)
     {
         uint32_t lpelx = outBestCU->getCUPelX();
         uint32_t tpely = outBestCU->getCUPelY();
-        uint32_t rpelx = lpelx + outBestCU->getCUSize(0);
-        uint32_t bpely = tpely + outBestCU->getCUSize(0);
+        uint32_t rpelx = lpelx + cuSize;
+        uint32_t bpely = tpely + cuSize;
         bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
                           bpely <= slice->getSPS()->getPicHeightInLumaSamples());
     }
@@ -592,7 +593,7 @@ void TEncCu::xCompressIntraCU(TComDataCU
 
         if (depth == g_maxCUDepth - g_addCUDepth)
         {
-            if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+            if (cuSize > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
             {
                 xCheckRDCostIntra(outBestCU, outTempCU, SIZE_NxN);
             }
@@ -715,13 +716,14 @@ void TEncCu::xCompressCU(TComDataCU*& ou
     bool doNotBlockPu = true;
     bool earlyDetectionSkipMode = false;
 
+    uint32_t cuSize = outTempCU->getCUSize(0);
     TComSlice* slice = outTempCU->getSlice();
     if (!bInsidePicture)
     {
         uint32_t lpelx = outBestCU->getCUPelX();
         uint32_t tpely = outBestCU->getCUPelY();
-        uint32_t rpelx = lpelx + outBestCU->getCUSize(0);
-        uint32_t bpely = tpely + outBestCU->getCUSize(0);
+        uint32_t rpelx = lpelx + cuSize;
+        uint32_t bpely = tpely + cuSize;
         bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
                           bpely <= slice->getSPS()->getPicHeightInLumaSamples());
     }
@@ -765,7 +767,7 @@ void TEncCu::xCompressCU(TComDataCU*& ou
             if (slice->getSliceType() != I_SLICE)
             {
                 // 2Nx2N, NxN
-                if (!(outBestCU->getCUSize(0) == 8))
+                if (!(cuSize == 8))
                 {
                     if (depth == g_maxCUDepth - g_addCUDepth && doNotBlockPu)
                     {
@@ -899,7 +901,7 @@ void TEncCu::xCompressCU(TComDataCU*& ou
 
                 if (depth == g_maxCUDepth - g_addCUDepth)
                 {
-                    if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+                    if (cuSize > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
                     {
                         xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_NxN);
                         outTempCU->initEstData(depth);
@@ -908,10 +910,10 @@ void TEncCu::xCompressCU(TComDataCU*& ou
             }
             // test PCM
             if (slice->getSPS()->getUsePCM()
-                && outTempCU->getCUSize(0) <= (1 << slice->getSPS()->getPCMLog2MaxSize())
-                && outTempCU->getCUSize(0) >= (1 << slice->getSPS()->getPCMLog2MinSize()))
+                && cuSize <= (1 << slice->getSPS()->getPCMLog2MaxSize())
+                && cuSize >= (1 << slice->getSPS()->getPCMLog2MinSize()))
             {
-                uint32_t rawbits = (2 * X265_DEPTH + X265_DEPTH) * outBestCU->getCUSize(0) * outBestCU->getCUSize(0) / 2;
+                uint32_t rawbits = (2 * X265_DEPTH + X265_DEPTH) * cuSize * cuSize / 2;
                 uint32_t bestbits = outBestCU->m_totalBits;
                 if ((bestbits > rawbits) || (outBestCU->m_totalCost > m_rdCost->calcRdCost(0, rawbits)))
                 {
@@ -1045,6 +1047,7 @@ void TEncCu::finishCU(TComDataCU* cu, ui
     uint32_t posy = (externalAddress / pic->getFrameWidthInCU()) * g_maxCUSize + g_rasterToPelY[g_zscanToRaster[internalAddress]];
     uint32_t width = slice->getSPS()->getPicWidthInLumaSamples();
     uint32_t height = slice->getSPS()->getPicHeightInLumaSamples();
+    uint32_t cuSize = cu->getCUSize(absPartIdx);
 
     while (posx >= width || posy >= height)
     {
@@ -1070,8 +1073,8 @@ void TEncCu::finishCU(TComDataCU* cu, ui
     uint32_t uiGranularityWidth = g_maxCUSize;
     posx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
     posy = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
-    bool granularityBoundary = ((posx + cu->getCUSize(absPartIdx)) % uiGranularityWidth == 0 || (posx + cu->getCUSize(absPartIdx) == width))
-        && ((posy + cu->getCUSize(absPartIdx)) % uiGranularityWidth == 0 || (posy + cu->getCUSize(absPartIdx) == height));
+    bool granularityBoundary = ((posx + cuSize) % uiGranularityWidth == 0 || (posx + cuSize == width))
+        && ((posy + cuSize) % uiGranularityWidth == 0 || (posy + cuSize == height));
 
     if (granularityBoundary)
     {
diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp	Fri May 23 09:11:15 2014 -0500
@@ -212,7 +212,7 @@ void TEncEntropy::initTUEntropySection(T
 void TEncEntropy::xEncodeTransform(TComDataCU* cu, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t tuSize, uint32_t trIdx, bool& bCodeDQP)
 {
     const uint32_t subdiv = cu->getTransformIdx(absPartIdx) + cu->getDepth(absPartIdx) > depth;
-    const uint32_t log2TrafoSize = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize()] + 2 - depth;
+    const uint32_t log2TrafoSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
     uint32_t hChromaShift        = cu->getHorzChromaShift();
     uint32_t vChromaShift        = cu->getVertChromaShift();
     uint32_t cbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trIdx);
@@ -227,12 +227,12 @@ void TEncEntropy::xEncodeTransform(TComD
     if ((log2TrafoSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
     {
         uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
-        if ((absPartIdx % partNum) == 0)
+        if ((absPartIdx & (partNum - 1)) == 0)
         {
             m_bakAbsPartIdx   = absPartIdx;
             m_bakChromaOffset = offsetChroma;
         }
-        else if ((absPartIdx % partNum) == (partNum - 1))
+        else if ((absPartIdx & (partNum - 1)) == (partNum - 1))
         {
             cbfU = cu->getCbf(m_bakAbsPartIdx, TEXT_CHROMA_U, trIdx);
             cbfV = cu->getCbf(m_bakAbsPartIdx, TEXT_CHROMA_V, trIdx);
@@ -369,9 +369,9 @@ void TEncEntropy::xEncodeTransform(TComD
         if ((log2TrafoSize == 2) && !(chFmt == CHROMA_444))
         {
             uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
-            if ((absPartIdx % partNum) == (partNum - 1))
+            if ((absPartIdx & (partNum - 1)) == (partNum - 1))
             {
-                uint32_t trWidthC          = log2TrafoSize << 1;
+                uint32_t trSizeC           = 1 << log2TrafoSize;
                 const bool splitIntoSubTUs = (chFmt == CHROMA_422);
 
                 uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
@@ -384,10 +384,10 @@ void TEncEntropy::xEncodeTransform(TComD
                     do
                     {
                         uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
-                        uint32_t subTUIndex = tuIterator.m_section * trWidthC * trWidthC;
+                        uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
                         if (cbf)
                         {
-                            m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trWidthC, (TextType)chromaId);
+                            m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
                         }
                     }
                     while (isNextTUSection(&tuIterator));
@@ -396,10 +396,8 @@ void TEncEntropy::xEncodeTransform(TComD
         }
         else
         {
-            uint32_t trWidthC  = tuSize >> hChromaShift;
-            uint32_t trHeightC = tuSize >> vChromaShift;
+            uint32_t trSizeC  = tuSize >> hChromaShift;
             const bool splitIntoSubTUs = (chFmt == CHROMA_422);
-            trHeightC = splitIntoSubTUs ? trHeightC >> 1 : trHeightC;
             uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> (depth << 1);
             for (uint32_t chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
             {
@@ -409,10 +407,10 @@ void TEncEntropy::xEncodeTransform(TComD
                 do
                 {
                     uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
-                    uint32_t subTUIndex = tuIterator.m_section * trWidthC * trHeightC;
+                    uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
                     if (cbf)
                     {
-                        m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trWidthC, (TextType)chromaId);
+                        m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
                     }
                 }
                 while (isNextTUSection(&tuIterator));
diff -r 91330e7dddd7 -r 5e8cce428457 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Sat May 24 00:07:16 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Fri May 23 09:11:15 2014 -0500