[x265] split rate calculation functions to luma and chroma to simplify luma path

Satoshi Nakagawa nakagawa424 at oki.com
Mon Jun 30 04:52:22 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1404096538 -32400
#      Mon Jun 30 11:48:58 2014 +0900
# Node ID 4c30d66afc78ed99385c04645d7b1303d80dea2c
# Parent  32aa6cc3cf4d108ac92f5d29258b2c38ca888d29
split rate calculation functions to luma and chroma to simplify luma path

diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp	Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp	Mon Jun 30 11:48:58 2014 +0900
@@ -325,7 +325,7 @@
         }
         else
         {
-            m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu->getTransformIdx(absPartIdx), absPartIdxStep, tuSize, tuSize, (subdiv == 0));
+            m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu->getTransformIdx(absPartIdx));
         }
 
         if (cbfY || cbfU || cbfV)
@@ -342,7 +342,7 @@
         }
         if (cbfY)
         {
-            m_entropyCoder->codeCoeffNxN(cu, (cu->getCoeffY() + offsetLuma), absPartIdx, tuSize, TEXT_LUMA);
+            m_entropyCoder->codeCoeffNxN(cu, (cu->getCoeffY() + offsetLuma), absPartIdx, log2TrSize, TEXT_LUMA);
         }
 
         int chFmt = cu->getChromaFormat();
@@ -351,7 +351,7 @@
             uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
             if ((absPartIdx & (partNum - 1)) == (partNum - 1))
             {
-                uint32_t trSizeC           = 1 << log2TrSize;
+                const uint32_t log2TrSizeC = 2;
                 const bool splitIntoSubTUs = (chFmt == CHROMA_422);
 
                 uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
@@ -364,10 +364,10 @@
                     do
                     {
                         uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
-                        uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
                         if (cbf)
                         {
-                            m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
+                            uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
+                            m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
                         }
                     }
                     while (isNextTUSection(&tuIterator));
@@ -376,7 +376,7 @@
         }
         else
         {
-            uint32_t trSizeC  = tuSize >> hChromaShift;
+            uint32_t log2TrSizeC = log2TrSize - hChromaShift;
             const bool splitIntoSubTUs = (chFmt == CHROMA_422);
             uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> (depth << 1);
             for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
@@ -387,10 +387,10 @@
                 do
                 {
                     uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
-                    uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
                     if (cbf)
                     {
-                        m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
+                        uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
+                        m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
                     }
                 }
                 while (isNextTUSection(&tuIterator));
@@ -540,11 +540,6 @@
     m_entropyCoder->codeQtRootCbf(cu, absPartIdx);
 }
 
-void TEncEntropy::encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth)
-{
-    m_entropyCoder->codeQtCbfZero(cu, ttype, trDepth);
-}
-
 void TEncEntropy::encodeQtRootCbfZero(TComDataCU* cu)
 {
     m_entropyCoder->codeQtRootCbfZero(cu);
@@ -593,11 +588,6 @@
     xEncodeTransform(cu, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, cuSize, 0, bCodeDQP);
 }
 
-void TEncEntropy::encodeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
-{
-    m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, trSize, ttype);
-}
-
 void TEncEntropy::estimateBit(estBitsSbacStruct* estBitsSBac, int trSize, TextType ttype)
 {
     ttype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA;
diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h	Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncEntropy.h	Mon Jun 30 11:48:58 2014 +0900
@@ -114,14 +114,24 @@
 
     void encodeTransformSubdivFlag(uint32_t symbol, uint32_t ctx);
     void encodeQtCbf(TComDataCU* cu, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, TextType ttype, uint32_t trDepth, bool lowestLevel);
-    void encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth);
+    void encodeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth)
+    {
+        m_entropyCoder->codeQtCbf(cu, absPartIdx, ttype, trDepth);
+    }
+    void encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth)
+    {
+        m_entropyCoder->codeQtCbfZero(cu, ttype, trDepth);
+    }
+
     void encodeQtRootCbfZero(TComDataCU* cu);
     void encodeQtRootCbf(TComDataCU* cu, uint32_t absPartIdx);
     void encodeQP(TComDataCU* cu, uint32_t absPartIdx);
     void encodeScalingList(TComScalingList* scalingList);
     void encodeCoeff(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, uint32_t cuSize, bool& bCodeDQP);
-    void encodeCoeffNxN(TComDataCU* cu, coeff_t* pcCoeff, uint32_t absPartIdx, uint32_t trSize, TextType ttype);
-
+    void encodeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
+    {
+        m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype);
+    }
 
     void estimateBit(estBitsSbacStruct* estBitsSbac, int trSize, TextType ttype);
     void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Mon Jun 30 11:48:58 2014 +0900
@@ -1873,6 +1873,25 @@
     }
 }
 
+void TEncSbac::codeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth)
+{
+    uint32_t ctx = cu->getCtxQtCbf(ttype, trDepth);
+    uint32_t cbf = cu->getCbf(absPartIdx, ttype, trDepth);
+    m_cabac->encodeBin(cbf, m_contextModels[OFF_QT_CBF_CTX + ctx]);
+
+    DTRACE_CABAC_VL(g_nSymbolCounter++)
+    DTRACE_CABAC_T("\tparseQtCbf()")
+    DTRACE_CABAC_T("\tsymbol=")
+    DTRACE_CABAC_V(cbf)
+    DTRACE_CABAC_T("\tctx=")
+    DTRACE_CABAC_V(ctx)
+    DTRACE_CABAC_T("\tetype=")
+    DTRACE_CABAC_V(ttype)
+    DTRACE_CABAC_T("\tuiAbsPartIdx=")
+    DTRACE_CABAC_V(absPartIdx)
+    DTRACE_CABAC_T("\n")
+}
+
 void TEncSbac::codeTransformSkipFlags(TComDataCU* cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
 {
     if (cu->getCUTransquantBypass(absPartIdx))
@@ -1999,8 +2018,9 @@
     }
 }
 
-void TEncSbac::codeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
+void TEncSbac::codeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
 {
+    uint32_t trSize = 1 << log2TrSize;
 #if ENC_DEC_TRACE
     DTRACE_CABAC_VL(g_nSymbolCounter++)
     DTRACE_CABAC_T("\tparseCoeffNxN()\teType=")
@@ -2028,8 +2048,6 @@
 
     X265_CHECK(trSize <= m_slice->getSPS()->getMaxTrSize(), "transform size out of range\n");
 
-    const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
-
     // compute number of significant coefficients
     uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
 
diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSbac.h
--- a/source/Lib/TLibEncoder/TEncSbac.h	Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSbac.h	Mon Jun 30 11:48:58 2014 +0900
@@ -114,6 +114,7 @@
     void codePredMode(TComDataCU* cu, uint32_t absPartIdx);
     void codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx);
     void codeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool lowestLevel);
+    void codeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth);
     void codeQtRootCbf(TComDataCU* cu, uint32_t absPartIdx);
     void codeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth);
     void codeQtRootCbfZero(TComDataCU* cu);
@@ -127,7 +128,7 @@
     void codeDeltaQP(TComDataCU* cu, uint32_t absPartIdx);
 
     void codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, TextType ttype, uint32_t scanIdx);
-    void codeCoeffNxN(TComDataCU* cu, coeff_t* coef, uint32_t absPartIdx, uint32_t trSize, TextType ttype);
+    void codeCoeffNxN(TComDataCU* cu, coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
     void codeTransformSkipFlags(TComDataCU* cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype);
 
     // -------------------------------------------------------------------------------------------------------------------
diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Jun 30 11:48:58 2014 +0900
@@ -151,7 +151,7 @@
     m_rdCost->setCrDistortionWeight(lambdaOffset);
 }
 
-void TEncSearch::xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
+void TEncSearch::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
 {
     uint32_t fullDepth  = cu->getDepth(0) + trDepth;
     uint32_t trMode     = cu->getTransformIdx(absPartIdx);
@@ -177,77 +177,116 @@
     else
     {
         X265_CHECK(log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size too small\n");
-        if (bLuma)
-        {
-            m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - log2TrSize);
-        }
+        m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - log2TrSize);
     }
 
-    if (bChroma)
-    {
-        int      chFmt      = cu->getChromaFormat();
-        if ((log2TrSize > 2) && !(chFmt == CHROMA_444))
-        {
-            if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
-                m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_U, trDepth, (subdiv == 0));
-
-            if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepth - 1))
-                m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_V, trDepth, (subdiv == 0));
-        }
-    }
-
-    if (subdiv)
-    {
-        TComTURecurse tuIterator;
-        initSection(&tuIterator, QUAD_SPLIT, absPartIdxStep);
-        width  >>= 1;
-        height >>= 1;
-
-        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
-        for (uint32_t part = 0; part < 4; part++)
-        {
-            xEncSubdivCbfQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, tuIterator.m_absPartIdxStep, width, height, bLuma, bChroma);
-        }
-
-        return;
-    }
-
-    //===== Cbfs =====
-    if (bLuma)
-    {
-        m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, width, height, TEXT_LUMA, trMode, (subdiv == 0));
-    }
-}
-
-void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype)
-{
-    if (!cu->getCbf(absPartIdx, ttype, trDepth))
-        return;
-
-    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
-    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
-    uint32_t subdiv     = (trMode > trDepth ? 1 : 0);
-
     if (subdiv)
     {
         uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
         for (uint32_t part = 0; part < 4; part++)
         {
-            xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype);
+            xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum);
         }
 
         return;
     }
 
-    uint32_t origTrDepth = trDepth;
-
+    //===== Cbfs =====
+    m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
+}
+
+void TEncSearch::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height)
+{
+    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
+    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
+    uint32_t subdiv     = (trMode > trDepth ? 1 : 0);
     uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
-    int chFmt           = cu->getChromaFormat();
-    if ((ttype != TEXT_LUMA) && (log2TrSize == 2) && !(chFmt == CHROMA_444))
+
+    int      chFmt      = cu->getChromaFormat();
+    if ((log2TrSize > 2) && !(chFmt == CHROMA_444))
+    {
+        if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
+            m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_U, trDepth, (subdiv == 0));
+
+        if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepth - 1))
+            m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_V, trDepth, (subdiv == 0));
+    }
+
+    if (subdiv)
+    {
+        absPartIdxStep >>= 2;
+        width  >>= 1;
+        height >>= 1;
+
+        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
+        for (uint32_t part = 0; part < 4; part++)
+        {
+            xEncSubdivCbfQTChroma(cu, trDepth + 1, absPartIdx + part * qtPartNum, absPartIdxStep, width, height);
+        }
+    }
+}
+
+void TEncSearch::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
+{
+    const TextType ttype = TEXT_LUMA;
+
+    if (!cu->getCbf(absPartIdx, ttype, trDepth))
+        return;
+
+    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
+    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
+
+    if (trMode > trDepth)
+    {
+        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
+        for (uint32_t part = 0; part < 4; part++)
+        {
+            xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum);
+        }
+
+        return;
+    }
+
+    uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+    uint32_t qtLayer    = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
+    uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
+    uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2);
+    coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
+    m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype);
+}
+
+void TEncSearch::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype)
+{
+    if (!cu->getCbf(absPartIdx, ttype, trDepth))
+        return;
+
+    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
+    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
+
+    if (trMode > trDepth)
+    {
+        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
+        for (uint32_t part = 0; part < 4; part++)
+        {
+            xEncCoeffQTChroma(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype);
+        }
+
+        return;
+    }
+
+    uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+    uint32_t qtLayer    = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
+    uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
+
+    uint32_t trDepthC = trDepth;
+    uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
+    int chFmt = cu->getChromaFormat();
+    if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
     {
         X265_CHECK(trDepth > 0, "transform size too small\n");
-        trDepth--;
-        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
+        trDepthC--;
+        log2TrSizeC++;
+        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
         bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
         if (!bFirstQ)
         {
@@ -255,132 +294,122 @@
         }
     }
 
-    //===== coefficients =====
-    uint32_t chroma     = (ttype != TEXT_LUMA ? 1 : 0);
-    int cspx = chroma ? m_hChromaShift : 0;
-    int cspy = chroma ? m_vChromaShift : 0;
-    uint32_t width = cu->getCUSize(0) >> (trDepth + cspx);
-    uint32_t height = cu->getCUSize(0) >> (trDepth + cspy);
-    uint32_t coeffOffset = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (cspx + cspy));
-    uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
-    coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
-
-    if (width == height)
+    if (chFmt != CHROMA_422)
     {
-        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, width, ttype);
+        uint32_t shift = (chFmt == CHROMA_420) ? 2 : 0;
+        uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2 - shift);
+        coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
+        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, ttype);
     }
     else
     {
-        uint32_t subTUSize = width * width;
-        uint32_t partIdxesPerSubTU  = cu->getPic()->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trDepth) << 1) + 1);
-
-        if (cu->getCbf(absPartIdx, ttype, origTrDepth + 1))
-            m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, width, ttype);
-        if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, origTrDepth + 1))
-            m_entropyCoder->encodeCoeffNxN(cu, coeff + subTUSize, absPartIdx + partIdxesPerSubTU, width, ttype);
+        uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2 - 1);
+        coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
+        uint32_t subTUSize = 1 << (log2TrSizeC * 2);
+        uint32_t partIdxesPerSubTU  = cu->getPic()->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trDepthC) << 1) + 1);
+        if (cu->getCbf(absPartIdx, ttype, trDepth + 1))
+            m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, ttype);
+        if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, trDepth + 1))
+            m_entropyCoder->encodeCoeffNxN(cu, coeff + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, ttype);
     }
 }
 
-void TEncSearch::xEncIntraHeader(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLuma, bool bChroma)
+void TEncSearch::xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
 {
-    if (bLuma)
+    // CU header
+    if (absPartIdx == 0)
     {
-        // CU header
+        if (!cu->getSlice()->isIntra())
+        {
+            if (cu->getSlice()->getPPS()->getTransquantBypassEnableFlag())
+            {
+                m_entropyCoder->encodeCUTransquantBypassFlag(cu, 0);
+            }
+            m_entropyCoder->encodeSkipFlag(cu, 0);
+            m_entropyCoder->encodePredMode(cu, 0);
+        }
+
+        m_entropyCoder->encodePartSize(cu, 0, cu->getDepth(0));
+    }
+    // luma prediction mode
+    if (cu->getPartitionSize(0) == SIZE_2Nx2N)
+    {
         if (absPartIdx == 0)
         {
-            if (!cu->getSlice()->isIntra())
+            m_entropyCoder->encodeIntraDirModeLuma(cu, 0);
+        }
+    }
+    else
+    {
+        uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
+        if (trDepth == 0)
+        {
+            X265_CHECK(absPartIdx == 0, "unexpected absPartIdx %d\n", absPartIdx);
+            for (uint32_t part = 0; part < 4; part++)
             {
-                if (cu->getSlice()->getPPS()->getTransquantBypassEnableFlag())
-                {
-                    m_entropyCoder->encodeCUTransquantBypassFlag(cu, 0);
-                }
-                m_entropyCoder->encodeSkipFlag(cu, 0);
-                m_entropyCoder->encodePredMode(cu, 0);
-            }
-
-            m_entropyCoder->encodePartSize(cu, 0, cu->getDepth(0));
-        }
-        // luma prediction mode
-        if (cu->getPartitionSize(0) == SIZE_2Nx2N)
-        {
-            if (absPartIdx == 0)
-            {
-                m_entropyCoder->encodeIntraDirModeLuma(cu, 0);
+                m_entropyCoder->encodeIntraDirModeLuma(cu, part * qtNumParts);
             }
         }
-        else
+        else if ((absPartIdx & (qtNumParts - 1)) == 0)
         {
-            uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
-            if (trDepth == 0)
-            {
-                X265_CHECK(absPartIdx == 0, "unexpected absPartIdx %d\n", absPartIdx);
-                for (uint32_t part = 0; part < 4; part++)
-                {
-                    m_entropyCoder->encodeIntraDirModeLuma(cu, part * qtNumParts);
-                }
-            }
-            else if ((absPartIdx & (qtNumParts - 1)) == 0)
-            {
-                m_entropyCoder->encodeIntraDirModeLuma(cu, absPartIdx);
-            }
-        }
-    }
-    if (bChroma)
-    {
-        // chroma prediction mode
-        if ((cu->getPartitionSize(0) == SIZE_2Nx2N) || !(cu->getChromaFormat() == CHROMA_444))
-        {
-            if (absPartIdx == 0)
-            {
-                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
-            }
-        }
-        else
-        {
-            uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
-            X265_CHECK(trDepth > 0, "unexpected trDepth %d\n", trDepth);
-            if ((absPartIdx & (qtNumParts - 1)) == 0)
-                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
+            m_entropyCoder->encodeIntraDirModeLuma(cu, absPartIdx);
         }
     }
 }
 
-uint32_t TEncSearch::xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma)
+void TEncSearch::xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx)
+{
+    // chroma prediction mode
+    if ((cu->getPartitionSize(0) == SIZE_2Nx2N) || !(cu->getChromaFormat() == CHROMA_444))
+    {
+        if (absPartIdx == 0)
+        {
+            m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
+        }
+    }
+    else
+    {
+        uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
+        if ((absPartIdx & (qtNumParts - 1)) == 0)
+            m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
+    }
+}
+
+uint32_t TEncSearch::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
 {
     m_entropyCoder->resetBits();
-    xEncIntraHeader(cu, trDepth, absPartIdx, bLuma, bChroma);
-    xEncSubdivCbfQT(cu, trDepth, absPartIdx, absPartIdxStep, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx), bLuma, bChroma);
-
-    if (bLuma)
-    {
-        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_LUMA);
-    }
-    if (bChroma)
-    {
-        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_U);
-        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_V);
-    }
+    xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
+    xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx);
+    xEncCoeffQTLuma(cu, trDepth, absPartIdx);
     return m_entropyCoder->getNumberOfWrittenBits();
 }
 
-uint32_t TEncSearch::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff)
+uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep)
 {
     m_entropyCoder->resetBits();
-    xEncIntraHeader(cu, trDepth, absPartIdx, true, false);
-    xEncSubdivCbfQT(cu, trDepth, absPartIdx, 0, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx), true, false);
-
-    if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth))
-    {
-        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, 1 << log2TrSize, TEXT_LUMA);
-    }
-
+    xEncIntraHeaderChroma(cu, absPartIdx);
+    xEncSubdivCbfQTChroma(cu, trDepth, absPartIdx, absPartIdxStep, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx));
+    xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_U);
+    xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_V);
     return m_entropyCoder->getNumberOfWrittenBits();
 }
 
-uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff)
+uint32_t TEncSearch::xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff)
 {
     m_entropyCoder->resetBits();
-    m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, 1 << log2TrSizeC, (TextType)chromaId);
+    xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
+    xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx);
+
+    if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth))
+        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, TEXT_LUMA);
+
+    return m_entropyCoder->getNumberOfWrittenBits();
+}
+
+uint32_t TEncSearch::xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff)
+{
+    m_entropyCoder->resetBits();
+    m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, (TextType)chromaId);
     return m_entropyCoder->getNumberOfWrittenBits();
 }
 
@@ -688,7 +717,7 @@
                 }
                 else
                 {
-                    uint32_t singleBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, log2TrSize, coeff);
+                    uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeff);
                     if (m_rdCost->psyRdEnabled())
                         singleCostTmp = m_rdCost->calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp);
                     else
@@ -748,7 +777,7 @@
             }
             cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
 
-            uint32_t singleBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY);
+            uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY);
             if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)
                 singleBits *= 4;
 
@@ -799,7 +828,7 @@
         m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
 
         //----- determine rate and r-d cost -----
-        uint32_t splitBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
+        uint32_t splitBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx);
         if (m_rdCost->psyRdEnabled())
             splitCost = m_rdCost->calcPsyRdCost(splitDistY, splitBits, splitPsyEnergyY);
         else
@@ -1174,7 +1203,7 @@
                         }
                         else
                         {
-                            uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsQTChroma(cu, absPartIdxC, log2TrSizeC, chromaId, coeff) : 0;
+                            uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsChroma(cu, absPartIdxC, log2TrSizeC, chromaId, coeff) : 0;
                             if (m_rdCost->psyRdEnabled())
                             {
                                 uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
@@ -1889,7 +1918,7 @@
                 m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
             }
 
-            uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, absPartIdxC, tuIterator.m_absPartIdxStep, false, true);
+            uint32_t bits = xGetIntraBitsQTChroma(cu, initTrDepth, absPartIdxC, tuIterator.m_absPartIdxStep);
             uint64_t cost = 0; 
             if (m_rdCost->psyRdEnabled())
                 cost = m_rdCost->calcPsyRdCost(dist, bits, cu->m_psyEnergy);
@@ -2736,7 +2765,7 @@
             do
             {
                 uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
-                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
+                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
 
                 int16_t *curResiU = resiYuv->getCbAddr(absPartIdxC);
                 int16_t *curResiV = resiYuv->getCrAddr(absPartIdxC);
@@ -2747,13 +2776,13 @@
                 int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
                 m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
                 m_trQuant->selectLambda(TEXT_CHROMA_U);
-                absSumU = m_trQuant->transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUBufferOffset,
+                absSumU = m_trQuant->transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUOffset,
                                                   trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosU, false, curuseRDOQ);
 
                 curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
                 m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
                 m_trQuant->selectLambda(TEXT_CHROMA_V);
-                absSumV = m_trQuant->transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUBufferOffset,
+                absSumV = m_trQuant->transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUOffset,
                                                   trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosV, false, curuseRDOQ);
 
                 cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -2766,7 +2795,7 @@
 
                     int scalingListType = 3 + TEXT_CHROMA_U;
                     X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
-                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset, trSizeC, scalingListType, false, lastPosU);
+                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUOffset, trSizeC, scalingListType, false, lastPosU);
                 }
                 else
                 {
@@ -2779,7 +2808,7 @@
 
                     int scalingListType = 3 + TEXT_CHROMA_V;
                     X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
-                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset, trSizeC, scalingListType, false, lastPosV);
+                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUOffset, trSizeC, scalingListType, false, lastPosV);
                 }
                 else
                 {
@@ -2923,9 +2952,9 @@
         cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
 
         m_entropyCoder->resetBits();
-        m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
+        m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
         if (absSum[TEXT_LUMA][0])
-            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,  trSize, TEXT_LUMA);
+            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
         singleBitsComp[TEXT_LUMA][0] = m_entropyCoder->getNumberOfWrittenBits();
 
         uint32_t singleBitsPrev = singleBitsComp[TEXT_LUMA][0];
@@ -2938,7 +2967,7 @@
             do
             {
                 uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
-                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
+                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
 
                 cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
                 cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -2951,26 +2980,26 @@
                 int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
                 m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
                 m_trQuant->selectLambda(TEXT_CHROMA_U);
-                absSum[TEXT_CHROMA_U][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
+                absSum[TEXT_CHROMA_U][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUOffset,
                                                                                       trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPos[TEXT_CHROMA_U][tuIterator.m_section], false, curuseRDOQ);
                 //Cr transform
                 curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
                 m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
                 m_trQuant->selectLambda(TEXT_CHROMA_V);
-                absSum[TEXT_CHROMA_V][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
+                absSum[TEXT_CHROMA_V][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUOffset,
                                                                                       trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPos[TEXT_CHROMA_V][tuIterator.m_section], false, curuseRDOQ);
 
                 cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
                 cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
 
-                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
+                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_U, trMode);
                 if (absSum[TEXT_CHROMA_U][tuIterator.m_section])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_U);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
                 singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsPrev;
 
-                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
+                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_V, trMode);
                 if (absSum[TEXT_CHROMA_V][tuIterator.m_section])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_V);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
                 uint32_t newBits = m_entropyCoder->getNumberOfWrittenBits();
                 singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = newBits - (singleBitsPrev + singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
 
@@ -3105,7 +3134,7 @@
             do
             {
                 uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
-                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
+                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
 
                 int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
                 int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
@@ -3123,7 +3152,7 @@
 
                     int scalingListType = 3 + TEXT_CHROMA_U;
                     X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
-                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset,
+                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUOffset,
                                                trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_U][tuIterator.m_section]);
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  curResiU, strideResiC);
@@ -3166,7 +3195,7 @@
                         {
                             absSum[TEXT_CHROMA_U][tuIterator.m_section] = 0;
 #if CHECKED_BUILD || _DEBUG
-                            ::memset(coeffCurU + subTUBufferOffset, 0, sizeof(coeff_t) * numCoeffC);
+                            ::memset(coeffCurU + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
 #endif
                             if (checkTransformSkipUV)
                             {
@@ -3215,7 +3244,7 @@
 
                     int scalingListType = 3 + TEXT_CHROMA_V;
                     X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
-                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset,
+                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUOffset,
                                                trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_V][tuIterator.m_section]);
                     uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
                                                                  curResiV, strideResiC);
@@ -3259,7 +3288,7 @@
                         {
                             absSum[TEXT_CHROMA_V][tuIterator.m_section] = 0;
 #if CHECKED_BUILD || _DEBUG
-                            ::memset(coeffCurV + subTUBufferOffset, 0, sizeof(coeff_t) * numCoeffC);
+                            ::memset(coeffCurV + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
 #endif
                             if (checkTransformSkipUV)
                             {
@@ -3330,8 +3359,8 @@
             if (absSumTransformSkipY)
             {
                 m_entropyCoder->resetBits();
-                m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
-                m_entropyCoder->encodeCoeffNxN(cu, tsCoeffY, absPartIdx, trSize, TEXT_LUMA);
+                m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
+                m_entropyCoder->encodeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
                 const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
 
                 m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
@@ -3396,7 +3425,7 @@
             do
             {
                 uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
-                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
+                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
 
                 int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
                 int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
@@ -3433,8 +3462,8 @@
 
                 if (absSumTransformSkipU)
                 {
-                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
-                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffU, absPartIdxC, trSizeC, TEXT_CHROMA_U);
+                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_U, trMode);
+                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffU, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
                     singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits();
 
                     curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
@@ -3475,14 +3504,14 @@
                     singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroPsyEnergyU;
                     absSum[TEXT_CHROMA_U][tuIterator.m_section] = absSumTransformSkipU;
                     bestTransformMode[TEXT_CHROMA_U][tuIterator.m_section] = 1;
-                    memcpy(coeffCurU + subTUBufferOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
+                    memcpy(coeffCurU + subTUOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
                     primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, tsResiU, trSizeC);
                 }
 
                 if (absSumTransformSkipV)
                 {
-                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
-                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffV, absPartIdxC, trSizeC, TEXT_CHROMA_V);
+                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_V, trMode);
+                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffV, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
                     singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section];
 
                     curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
@@ -3523,7 +3552,7 @@
                     singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroPsyEnergyV;
                     absSum[TEXT_CHROMA_V][tuIterator.m_section] = absSumTransformSkipV;
                     bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
-                    memcpy(coeffCurV + subTUBufferOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
+                    memcpy(coeffCurV + subTUOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
                     primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, tsResiV, trSizeC);
                 }
 
@@ -3556,32 +3585,32 @@
             m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, trSizeC, trHeightC, TEXT_CHROMA_V, trMode, true);
         }
 
-        m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA,     trMode, true);
+        m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
         if (absSum[TEXT_LUMA][0])
-            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
+            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
 
         if (bCodeChroma)
         {
             if (!splitIntoSubTUs)
             {
                 if (absSum[TEXT_CHROMA_U][0])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
                 if (absSum[TEXT_CHROMA_V][0])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
             }
             else
             {
-                uint32_t subTUSize = trSizeC * trSizeC;
+                uint32_t subTUSize = 1 << (log2TrSizeC * 2);
                 uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
 
                 if (absSum[TEXT_CHROMA_U][0])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
                 if (absSum[TEXT_CHROMA_U][1])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_U);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_U);
                 if (absSum[TEXT_CHROMA_V][0])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
                 if (absSum[TEXT_CHROMA_V][1])
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_V);
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_V);
             }
         }
 
@@ -3769,7 +3798,6 @@
     X265_CHECK(cu->getPredictionMode(absPartIdx) != MODE_INTRA, "xEncodeResidualQT() with intra block\n");
 
     bool mCodeAll = true;
-    uint32_t trSize    = 1 << log2TrSize;
     uint32_t trWidthC  = 1 << log2TrSizeC;
     uint32_t trHeightC = splitIntoSubTUs ? (trWidthC << 1) : trWidthC;
 
@@ -3821,49 +3849,48 @@
 
         if (bSubdivAndCbf)
         {
-            m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
+            m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
         }
         else
         {
             if (ttype == TEXT_LUMA && cu->getCbf(absPartIdx, TEXT_LUMA, trMode))
             {
-                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
+                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
             }
             if (bCodeChroma)
             {
                 uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
                 coeff_t *coeffCurU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
                 coeff_t *coeffCurV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
-                uint32_t trSizeC = 1 << log2TrSizeC;
 
                 if (!splitIntoSubTUs)
                 {
                     if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx, TEXT_CHROMA_U, trMode))
                     {
-                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
+                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
                     }
                     if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx, TEXT_CHROMA_V, trMode))
                     {
-                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
+                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
                     }
                 }
                 else
                 {
                     uint32_t partIdxesPerSubTU  = cu->getPic()->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trModeC) << 1) + 1);
-                    uint32_t subTUSize = trSizeC * trSizeC;
+                    uint32_t subTUSize = 1 << (log2TrSizeC * 2);
                     if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx, TEXT_CHROMA_U, trMode))
                     {
                         if (cu->getCbf(absPartIdx, ttype, trMode + 1))
-                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
+                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
                         if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, trMode + 1))
-                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_U);
+                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_U);
                     }
                     if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx, TEXT_CHROMA_V, trMode))
                     {
                         if (cu->getCbf(absPartIdx, ttype, trMode + 1))
-                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
+                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
                         if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, trMode + 1))
-                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_V);
+                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_V);
                     }
                 }
             }
diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Mon Jun 30 11:48:58 2014 +0900
@@ -193,13 +193,17 @@
     // Intra search
     // --------------------------------------------------------------------------------------------
 
-    void xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx,  uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
+    void xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
+    void xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx,  uint32_t absPartIdxStep, uint32_t width, uint32_t height);
 
-    void xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
-    void xEncIntraHeader(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLuma, bool bChroma);
-    uint32_t xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma);
-    uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff);
-    uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
+    void xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
+    void xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
+    void xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
+    void xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx);
+    uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
+    uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep);
+    uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff);
+    uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
     void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
                              int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff,
                              uint32_t& cbf, uint32_t& outDist);


More information about the x265-devel mailing list