[x265-commits] [x265] refine tuDepth related

Mon Dec 8 18:47:55 CET 2014

details:   http://hg.videolan.org/x265/rev/53f7efef5ebd
branches:  
changeset: 8953:53f7efef5ebd
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Sat Dec 06 17:17:59 2014 +0900
description:
refine tuDepth related

diffstat:

 source/common/cudata.h      |    2 +-
 source/encoder/analysis.cpp |    8 +-
 source/encoder/entropy.cpp  |  297 +++++++-----------
 source/encoder/entropy.h    |   25 +-
 source/encoder/search.cpp   |  696 ++++++++++++++++++++-----------------------
 source/encoder/search.h     |   32 +-
 6 files changed, 469 insertions(+), 591 deletions(-)

diffs (truncated from 1989 to 300 lines):

diff -r 35d086074bb5 -r 53f7efef5ebd source/common/cudata.h

--- a/source/common/cudata.h	Fri Dec 05 10:59:33 2014 -0600
+++ b/source/common/cudata.h	Sat Dec 06 17:17:59 2014 +0900
@@ -192,7 +192,7 @@ public:
     void     setPUMv(int list, const MV& mv, int absPartIdx, int puIdx);
     void     setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx);
 
-    uint8_t  getCbf(uint32_t absPartIdx, TextType ttype, uint32_t trDepth) const { return (m_cbf[ttype][absPartIdx] >> trDepth) & 0x1; }
+    uint8_t  getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; }
     uint8_t  getQtRootCbf(uint32_t absPartIdx) const                             { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; }
     int8_t   getRefQP(uint32_t currAbsIdxInCTU) const;
     uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const;
diff -r 35d086074bb5 -r 53f7efef5ebd source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/analysis.cpp	Sat Dec 06 17:17:59 2014 +0900
@@ -943,8 +943,8 @@ void Analysis::compressInterCU_rd0_4(con
                         uint32_t tuDepthRange[2];
                         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
-                        uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
-                        residualTransformQuantIntra(*md.bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+                        uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+                        residualTransformQuantIntra(*md.bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
                         getBestIntraModeChroma(*md.bestMode, cuGeom);
                         residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
                         md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.encodeIdx); // TODO:
@@ -1682,8 +1682,8 @@ void Analysis::encodeResidue(const CUDat
         uint32_t tuDepthRange[2];
         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
-        uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
-        residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+        uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+        residualTransformQuantIntra(*bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
         getBestIntraModeChroma(*bestMode, cuGeom);
         residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
     }
diff -r 35d086074bb5 -r 53f7efef5ebd source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/entropy.cpp	Sat Dec 06 17:17:59 2014 +0900
@@ -529,10 +529,10 @@ void Entropy::encodeCU(const CUData& ctu
 
     if (!cuUnsplitFlag)
     {
-        uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
-        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
+        uint32_t qNumParts = cuGeom.numPartitions >> 2;
+        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
         {
-            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
             if (childGeom.flags & CUGeom::PRESENT)
                 encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
         }
@@ -545,11 +545,10 @@ void Entropy::encodeCU(const CUData& ctu
 
     if (depth < ctu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
     {
-        uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
-
-        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
+        uint32_t qNumParts = cuGeom.numPartitions >> 2;
+        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
         {
-            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
             encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
         }
         return;
@@ -582,7 +581,7 @@ void Entropy::encodeCU(const CUData& ctu
         ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
 
     // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
-    codeCoeff(ctu, absPartIdx, depth, bEncodeDQP, tuDepthRange);
+    codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
 
     // --- write terminating bit ---
     finishCU(ctu, absPartIdx, depth);
@@ -619,41 +618,18 @@ void Entropy::finishCU(const CUData& ctu
     }
 }
 
-void Entropy::encodeTransform(const CUData& cu, CoeffCodeState& state, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx,
-                              uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t trIdx, bool& bCodeDQP, uint32_t depthRange[2])
+void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
+                              bool& bCodeDQP, const uint32_t depthRange[2])
 {
-    const bool subdiv = cu.m_tuDepth[absPartIdx] + cu.m_cuDepth[absPartIdx] > (uint8_t)depth;
-    uint32_t hChromaShift = cu.m_hChromaShift;
-    uint32_t vChromaShift = cu.m_vChromaShift;
-    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, trIdx);
-    uint32_t cbfU = cu.getCbf(absPartIdx, TEXT_CHROMA_U, trIdx);
-    uint32_t cbfV = cu.getCbf(absPartIdx, TEXT_CHROMA_V, trIdx);
-
-    if (!trIdx)
-        state.bakAbsPartIdxCU = absPartIdx;
-
-    if (log2TrSize == 2 && cu.m_chromaFormat != X265_CSP_I444)
-    {
-        uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
-        if (!(absPartIdx & (partNum - 1)))
-        {
-            state.bakAbsPartIdx   = absPartIdx;
-            state.bakChromaOffset = offsetChroma;
-        }
-        else if ((absPartIdx & (partNum - 1)) == (partNum - 1))
-        {
-            cbfU = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_U, trIdx);
-            cbfV = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_V, trIdx);
-        }
-    }
+    const bool subdiv = cu.m_tuDepth[absPartIdx] > tuDepth;
 
     /* in each of these conditions, the subdiv flag is implied and not signaled,
      * so we have checks to make sure the implied value matches our intentions */
-    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx])
+    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth)
     {
         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
     }
-    else if (cu.isInter(absPartIdx) && (cu.m_partSize[absPartIdx] != SIZE_2Nx2N) && depth == cu.m_cuDepth[absPartIdx] &&
+    else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth &&
              cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
     {
         X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
@@ -672,127 +648,111 @@ void Entropy::encodeTransform(const CUDa
         codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
     }
 
-    const uint32_t trDepthCurr = depth - cu.m_cuDepth[absPartIdx];
-    const bool bFirstCbfOfCU = trDepthCurr == 0;
-
-    bool mCodeAll = true;
-    const uint32_t numPels = 1 << (log2TrSize * 2 - hChromaShift - vChromaShift);
-    if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
-        mCodeAll = false;
-
-    if (bFirstCbfOfCU || mCodeAll)
+    uint32_t hChromaShift = cu.m_hChromaShift;
+    uint32_t vChromaShift = cu.m_vChromaShift;
+    bool bSmallChroma = (log2TrSize - hChromaShift < 2);
+    if (!tuDepth || !bSmallChroma)
     {
-        uint32_t tuSize = 1 << log2TrSize;
-        if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1))
-            codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_U, trDepthCurr, (subdiv == 0));
-        if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1))
-            codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_V, trDepthCurr, (subdiv == 0));
+        if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
+            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv);
+        if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
+            codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !subdiv);
     }
     else
     {
-        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1), "chroma xform size match failure\n");
-        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1), "chroma xform size match failure\n");
+        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma xform size match failure\n");
+        X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma xform size match failure\n");
     }
 
     if (subdiv)
     {
-        log2TrSize--;
-        uint32_t numCoeff  = 1 << (log2TrSize * 2);
-        uint32_t numCoeffC = (numCoeff >> (hChromaShift + vChromaShift));
-        trIdx++;
-        ++depth;
-        absPartIdxStep >>= 2;
-        const uint32_t partNum = NUM_CU_PARTITIONS >> (depth << 1);
+        --log2TrSize;
+        ++tuDepth;
 
-        encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
 
-        absPartIdx += partNum;
-        offsetLuma += numCoeff;
-        offsetChroma += numCoeffC;
-        encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 0 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 1 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 2 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+        encodeTransform(cu, absPartIdx + 3 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+        return;
+    }
 
-        absPartIdx += partNum;
-        offsetLuma += numCoeff;
-        offsetChroma += numCoeffC;
-        encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+    uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
 
-        absPartIdx += partNum;
-        offsetLuma += numCoeff;
-        offsetChroma += numCoeffC;
-        encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+    if (cu.isInter(absPartIdxC) && !tuDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
+    {
+        X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
+    }
+    else
+        codeQtCbfLuma(cu, absPartIdx, tuDepth);
+
+    uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth);
+    uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, tuDepth);
+    uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, tuDepth);
+    if (!(cbfY || cbfU || cbfV))
+        return;
+
+    // dQP: only for CTU once
+    if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
+    {
+        uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
+        uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
+        codeDeltaQP(cu, absPartIdxLT);
+        bCodeDQP = false;
+    }
+
+    if (cbfY)
+    {
+        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
+        codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2TrSize, TEXT_LUMA);
+        if (!(cbfU || cbfV))
+            return;
+    }
+
+    if (bSmallChroma)
+    {
+        if ((absPartIdx & 3) != 3)
+            return;
+
+        const uint32_t log2TrSizeC = 2;
+        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
+        const uint32_t curPartNum = 4;
+        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
+        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+        {
+            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
+            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
+            do
+            {
+                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
+                {
+                    uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
+                }
+            }
+            while (tuIterator.isNextSection());
+        }
     }
     else
     {
-        if (cu.isInter(absPartIdx) && depth == cu.m_cuDepth[absPartIdx] && !cu.getCbf(absPartIdx, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdx, TEXT_CHROMA_V, 0))
+        uint32_t log2TrSizeC = log2TrSize - hChromaShift;
+        const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
+        uint32_t curPartNum = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
+        uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
+        for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
         {
-            X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
-        }
-        else
-            codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu.m_tuDepth[absPartIdx]);
-
-        if (cbfY || cbfU || cbfV)
-        {
-            // dQP: only for CTU once
-            if (cu.m_slice->m_pps->bUseDQP)
+            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
+            const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
+            do
             {
-                if (bCodeDQP)
+                if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
                 {
-                    codeDeltaQP(cu, state.bakAbsPartIdxCU);
-                    bCodeDQP = false;
+                    uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+                    codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
                 }
             }
-        }
-        if (cbfY)
-            codeCoeffNxN(cu, cu.m_trCoeff[0] + offsetLuma, absPartIdx, log2TrSize, TEXT_LUMA);
-
-        int chFmt = cu.m_chromaFormat;
-        if (log2TrSize == 2 && chFmt != X265_CSP_I444)
-        {
-            uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
-            if ((absPartIdx & (partNum - 1)) == (partNum - 1))
-            {
-                const uint32_t log2TrSizeC = 2;
-                const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
-
-                uint32_t curPartNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
-
-                for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
-                {
-                    TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, state.bakAbsPartIdx);