[x265] [PATCH] [OUTPUT CHANGED for 422] made loops for chroma components in xEstimateResidualQT()

ashok at multicorewareinc.com ashok at multicorewareinc.com
Tue Oct 28 11:34:15 CET 2014


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1414482701 -19800
#      Tue Oct 28 13:21:41 2014 +0530
# Node ID 1a475a6ef230ec8bf4d82c91ae67d5a1619215e5
# Parent  3ccb20b6c0223e3e4ccf253faedd930c6bb98403
[OUTPUT CHANGED for 422] made loops for chroma components in xEstimateResidualQT()

The output change for 422 is valid. Initially the no. of bits(cbf and coeff.) were calculated
per block and per chroma component. Now the no. of bits are calculated per chroma component.

diff -r 3ccb20b6c022 -r 1a475a6ef230 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Mon Oct 27 21:59:30 2014 -0500
+++ b/source/encoder/search.cpp	Tue Oct 28 13:21:41 2014 +0530
@@ -2516,16 +2516,13 @@
         int partSizeC = partitionFromLog2Size(log2TrSizeC);
         const uint32_t qtLayer = log2TrSize - 2;
         uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
-        uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
         coeff_t* coeffCurY = m_rqt[qtLayer].coeffRQT[0] + coeffOffsetY;
-        coeff_t* coeffCurU = m_rqt[qtLayer].coeffRQT[1] + coeffOffsetC;
-        coeff_t* coeffCurV = m_rqt[qtLayer].coeffRQT[2] + coeffOffsetC;
-
-        cu.setTUDepthSubParts(depth - cu.m_cuDepth[0], absPartIdx, depth);
+
         bool checkTransformSkip   = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0];
         bool checkTransformSkipY  = checkTransformSkip && log2TrSize  <= MAX_LOG2_TS_SIZE;
-        bool checkTransformSkipUV = checkTransformSkip && log2TrSizeC <= MAX_LOG2_TS_SIZE;
-
+        bool checkTransformSkipC = checkTransformSkip && log2TrSizeC <= MAX_LOG2_TS_SIZE;
+
+        cu.setTUDepthSubParts(depth - cu.m_cuDepth[0], absPartIdx, depth);
         cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
 
         if (m_bEnableRDOQ)
@@ -2546,44 +2543,38 @@
 
         if (bCodeChroma)
         {
-            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
-
-            do
+            uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+            for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
             {
-                uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
-                uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
-
-                cu.setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
-                cu.setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
-
-                if (m_bEnableRDOQ)
-                    m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
-
-                fenc = const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
-                resi = resiYuv.getCbAddr(absPartIdxC);
-                numSig[TEXT_CHROMA_U][tuIterator.section] = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
-                cbfFlag[TEXT_CHROMA_U][tuIterator.section] = !!numSig[TEXT_CHROMA_U][tuIterator.section];
-
-                fenc = const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
-                resi = resiYuv.getCrAddr(absPartIdxC);
-                numSig[TEXT_CHROMA_V][tuIterator.section] = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
-                cbfFlag[TEXT_CHROMA_V][tuIterator.section] = !!numSig[TEXT_CHROMA_V][tuIterator.section];
-
-                m_entropyCoder.codeQtCbf(cbfFlag[TEXT_CHROMA_U][tuIterator.section], TEXT_CHROMA_U, tuDepth);
-                if (cbfFlag[TEXT_CHROMA_U][tuIterator.section])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurU + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
-                singleBitsComp[TEXT_CHROMA_U][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits() - singleBitsPrev;
-
-                m_entropyCoder.codeQtCbf(cbfFlag[TEXT_CHROMA_V][tuIterator.section], TEXT_CHROMA_V, tuDepth);
-                if (cbfFlag[TEXT_CHROMA_V][tuIterator.section])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurV + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
-
-                uint32_t newBits = m_entropyCoder.getNumberOfWrittenBits();
-                singleBitsComp[TEXT_CHROMA_V][tuIterator.section] = newBits - (singleBitsPrev + singleBitsComp[TEXT_CHROMA_U][tuIterator.section]);
-
-                singleBitsPrev = newBits;
+                coeff_t* coeffCurC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
+                TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
+
+                do
+                {
+                    uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+                    uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+
+                    cu.setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
+
+                    if (m_bEnableRDOQ && (chromaId != TEXT_CHROMA_V))
+                        m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
+
+                    fenc = const_cast<pixel*>(fencYuv->getChromaAddr(chromaId, absPartIdxC));
+                    resi = resiYuv.getChromaAddr(chromaId, absPartIdxC);
+                    numSig[chromaId][tuIterator.section] = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, coeffCurC + subTUOffset, log2TrSizeC, (TextType)chromaId, absPartIdxC, false);
+                    cbfFlag[chromaId][tuIterator.section] = !!numSig[chromaId][tuIterator.section];
+
+                    m_entropyCoder.codeQtCbf(cbfFlag[chromaId][tuIterator.section], (TextType)chromaId, tuDepth);
+                    if (cbfFlag[chromaId][tuIterator.section])
+                        m_entropyCoder.codeCoeffNxN(cu, coeffCurC + subTUOffset, absPartIdxC, log2TrSizeC, (TextType)chromaId);
+
+                    uint32_t newBits = m_entropyCoder.getNumberOfWrittenBits();
+                    singleBitsComp[chromaId][tuIterator.section] = newBits - singleBitsPrev;
+
+                    singleBitsPrev = newBits;
+                }
+                while (tuIterator.isNextSection());
             }
-            while (tuIterator.isNextSection());
         }
 
         const uint32_t numCoeffY = 1 << (log2TrSize * 2);
@@ -2597,7 +2588,6 @@
 
         int16_t *curResiY    = m_rqt[qtLayer].resiQtYuv.getLumaAddr(absPartIdx);
         uint32_t strideResiY = m_rqt[qtLayer].resiQtYuv.m_size;
-        uint32_t strideResiC = m_rqt[qtLayer].resiQtYuv.m_csize;
 
         if (cbfFlag[TEXT_LUMA][0])
         {
@@ -2663,160 +2653,94 @@
             primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
         cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
 
-        uint32_t distU = 0;
-        uint32_t distV = 0;
-        uint32_t psyEnergyU = 0;
-        uint32_t psyEnergyV = 0;
         if (bCodeChroma)
         {
-            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
+            uint32_t strideResiC = m_rqt[qtLayer].resiQtYuv.m_csize;
+            uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+            for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+            {
+                uint32_t distC = 0, psyEnergyC = 0;
+                coeff_t* coeffCurC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
+                TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
 
             do
             {
                 uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
                 uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
 
-                int16_t *curResiU = m_rqt[qtLayer].resiQtYuv.getCbAddr(absPartIdxC);
-                int16_t *curResiV = m_rqt[qtLayer].resiQtYuv.getCrAddr(absPartIdxC);
-
-                distU = m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC - 2](resiYuv.getCbAddr(absPartIdxC), resiYuv.m_csize));
-
-                if (cbfFlag[TEXT_CHROMA_U][tuIterator.section])
+                int16_t *curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
+
+                distC = m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC - 2](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize));
+
+                if (cbfFlag[chromaId][tuIterator.section])
                 {
-                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiU, strideResiC, coeffCurU + subTUOffset,
-                                            log2TrSizeC, TEXT_CHROMA_U, false, false, numSig[TEXT_CHROMA_U][tuIterator.section]);
-                    uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getCbAddr(absPartIdxC), resiYuv.m_csize, curResiU, strideResiC);
-                    const uint32_t nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
-                    uint32_t nonZeroPsyEnergyU = 0;
+                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiC, strideResiC, coeffCurC + subTUOffset,
+                                            log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
+                    uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, curResiC, strideResiC);
+                    const uint32_t nonZeroDistC = m_rdCost.scaleChromaDistCb(dist);
+                    uint32_t nonZeroPsyEnergyC = 0;
                     if (m_rdCost.m_psyRd)
-                        nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC, resiYuv.getCbAddr(absPartIdxC), resiYuv.m_csize, curResiU, strideResiC);
+                        nonZeroPsyEnergyC = m_rdCost.psyCost(partSizeC, resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, curResiC, strideResiC);
 
                     if (cu.m_tqBypass[0])
                     {
-                        distU = nonZeroDistU;
-                        psyEnergyU = nonZeroPsyEnergyU;
+                        distC = nonZeroDistC;
+                        psyEnergyC = nonZeroPsyEnergyC;
                     }
                     else
                     {
-                        uint64_t singleCostU = 0;
+                        uint64_t singleCostC = 0;
                         if (m_rdCost.m_psyRd)
-                            singleCostU = m_rdCost.calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.section], nonZeroPsyEnergyU);
+                            singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
                         else
-                            singleCostU = m_rdCost.calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.section]);
+                            singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
                         m_entropyCoder.resetBits();
-                        m_entropyCoder.codeQtCbfZero(TEXT_CHROMA_U, tuDepth);
-                        const uint32_t nullBitsU = m_entropyCoder.getNumberOfWrittenBits();
-                        uint64_t nullCostU = 0;
+                        m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepth);
+                        const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
+                        uint64_t nullCostC = 0;
                         if (m_rdCost.m_psyRd)
-                            nullCostU = m_rdCost.calcPsyRdCost(distU, nullBitsU, psyEnergyU);
+                            nullCostC = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
                         else
-                            nullCostU = m_rdCost.calcRdCost(distU, nullBitsU);
-                        if (nullCostU < singleCostU)
+                            nullCostC = m_rdCost.calcRdCost(distC, nullBitsC);
+                        if (nullCostC < singleCostC)
                         {
-                            cbfFlag[TEXT_CHROMA_U][tuIterator.section] = 0;
+                            cbfFlag[chromaId][tuIterator.section] = 0;
 #if CHECKED_BUILD || _DEBUG
-                            memset(coeffCurU + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
+                                memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
 #endif
-                            if (checkTransformSkipUV)
-                                minCost[TEXT_CHROMA_U][tuIterator.section] = nullCostU;
-                        }
-                        else
-                        {
-                            distU = nonZeroDistU;
-                            psyEnergyU = nonZeroPsyEnergyU;
-                            if (checkTransformSkipUV)
-                                minCost[TEXT_CHROMA_U][tuIterator.section] = singleCostU;
+                                if (checkTransformSkipC)
+                                    minCost[chromaId][tuIterator.section] = nullCostC;
+                            }
+                            else
+                            {
+                                distC = nonZeroDistC;
+                                psyEnergyC = nonZeroPsyEnergyC;
+                                if (checkTransformSkipC)
+                                    minCost[chromaId][tuIterator.section] = singleCostC;
+                            }
                         }
                     }
+                    else if (checkTransformSkipC)
+                    {
+                        m_entropyCoder.resetBits();
+                        m_entropyCoder.codeQtCbfZero((TextType)chromaId, trModeC);
+                        const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
+                        if (m_rdCost.m_psyRd)
+                            minCost[chromaId][tuIterator.section] = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
+                        else
+                            minCost[chromaId][tuIterator.section] = m_rdCost.calcRdCost(distC, nullBitsC);
+                    }
+
+                    singleDistComp[chromaId][tuIterator.section] = distC;
+                    singlePsyEnergyComp[chromaId][tuIterator.section] = psyEnergyC;
+
+                    if (!cbfFlag[chromaId][tuIterator.section])
+                        primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
+
+                    cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
                 }
-                else if (checkTransformSkipUV)
-                {
-                    m_entropyCoder.resetBits();
-                    m_entropyCoder.codeQtCbfZero(TEXT_CHROMA_U, trModeC);
-                    const uint32_t nullBitsU = m_entropyCoder.getNumberOfWrittenBits();
-                    if (m_rdCost.m_psyRd)
-                        minCost[TEXT_CHROMA_U][tuIterator.section] = m_rdCost.calcPsyRdCost(distU, nullBitsU, psyEnergyU);
-                    else
-                        minCost[TEXT_CHROMA_U][tuIterator.section] = m_rdCost.calcRdCost(distU, nullBitsU);
-                }
-
-                singleDistComp[TEXT_CHROMA_U][tuIterator.section] = distU;
-                singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.section] = psyEnergyU;
-
-                if (!cbfFlag[TEXT_CHROMA_U][tuIterator.section])
-                    primitives.blockfill_s[partSizeC](curResiU, strideResiC, 0);
-
-                distV = m_rdCost.scaleChromaDistCr(primitives.ssd_s[partSizeC](resiYuv.getCrAddr(absPartIdxC), resiYuv.m_csize));
-
-                if (cbfFlag[TEXT_CHROMA_V][tuIterator.section])
-                {
-                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiV, strideResiC, coeffCurV + subTUOffset,
-                                            log2TrSizeC, TEXT_CHROMA_V, false, false, numSig[TEXT_CHROMA_V][tuIterator.section]);
-                    uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getCrAddr(absPartIdxC), resiYuv.m_csize, curResiV, strideResiC);
-                    const uint32_t nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
-                    uint32_t nonZeroPsyEnergyV = 0;
-                    if (m_rdCost.m_psyRd)
-                        nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC, resiYuv.getCrAddr(absPartIdxC), resiYuv.m_csize, curResiV, strideResiC);
-
-                    if (cu.m_tqBypass[0])
-                    {
-                        distV = nonZeroDistV;
-                        psyEnergyV = nonZeroPsyEnergyV;
-                    }
-                    else
-                    {
-                        uint64_t singleCostV = 0;
-                        if (m_rdCost.m_psyRd)
-                            singleCostV = m_rdCost.calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.section], nonZeroPsyEnergyV);
-                        else
-                            singleCostV = m_rdCost.calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.section]);
-                        m_entropyCoder.resetBits();
-                        m_entropyCoder.codeQtCbfZero(TEXT_CHROMA_V, tuDepth);
-                        const uint32_t nullBitsV = m_entropyCoder.getNumberOfWrittenBits();
-                        uint64_t nullCostV = 0;
-                        if (m_rdCost.m_psyRd)
-                            nullCostV = m_rdCost.calcPsyRdCost(distV, nullBitsV, psyEnergyV);
-                        else
-                            nullCostV = m_rdCost.calcRdCost(distV, nullBitsV);
-                        if (nullCostV < singleCostV)
-                        {
-                            cbfFlag[TEXT_CHROMA_V][tuIterator.section] = 0;
-#if CHECKED_BUILD || _DEBUG
-                            memset(coeffCurV + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
-#endif
-                            if (checkTransformSkipUV)
-                                minCost[TEXT_CHROMA_V][tuIterator.section] = nullCostV;
-                        }
-                        else
-                        {
-                            distV = nonZeroDistV;
-                            psyEnergyV = nonZeroPsyEnergyV;
-                            if (checkTransformSkipUV)
-                                minCost[TEXT_CHROMA_V][tuIterator.section] = singleCostV;
-                        }
-                    }
-                }
-                else if (checkTransformSkipUV)
-                {
-                    m_entropyCoder.resetBits();
-                    m_entropyCoder.codeQtCbfZero(TEXT_CHROMA_V, trModeC);
-                    const uint32_t nullBitsV = m_entropyCoder.getNumberOfWrittenBits();
-                    if (m_rdCost.m_psyRd)
-                        minCost[TEXT_CHROMA_V][tuIterator.section] = m_rdCost.calcPsyRdCost(distV, nullBitsV, psyEnergyV);
-                    else
-                        minCost[TEXT_CHROMA_V][tuIterator.section] = m_rdCost.calcRdCost(distV, nullBitsV);
-                }
-
-                singleDistComp[TEXT_CHROMA_V][tuIterator.section] = distV;
-                singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.section] = psyEnergyV;
-
-                if (!cbfFlag[TEXT_CHROMA_V][tuIterator.section])
-                    primitives.blockfill_s[partSizeC](curResiV, strideResiC, 0);
-
-                cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_U][tuIterator.section] << tuDepth, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
-                cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_V][tuIterator.section] << tuDepth, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
+                while (tuIterator.isNextSection());
             }
-            while (tuIterator.isNextSection());
         }
 
         if (checkTransformSkipY)
@@ -2874,113 +2798,77 @@
             cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
         }
 
-        if (bCodeChroma && checkTransformSkipUV)
+        if (bCodeChroma && checkTransformSkipC)
         {
-            uint32_t nonZeroDistU = 0, nonZeroDistV = 0;
-            uint32_t nonZeroPsyEnergyU = 0, nonZeroPsyEnergyV = 0;
-            uint64_t singleCostU = MAX_INT64;
-            uint64_t singleCostV = MAX_INT64;
+            uint32_t nonZeroDistC = 0, nonZeroPsyEnergyC = 0;
+            uint64_t singleCostC = MAX_INT64;
+            uint32_t strideResiC = m_rqt[qtLayer].resiQtYuv.m_csize;
+            uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
 
             m_entropyCoder.load(m_rqt[depth].rqtRoot);
 
-            TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
-
-            do
+            for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
             {
-                uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
-                uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
-
-                int16_t *curResiU = m_rqt[qtLayer].resiQtYuv.getCbAddr(absPartIdxC);
-                int16_t *curResiV = m_rqt[qtLayer].resiQtYuv.getCrAddr(absPartIdxC);
-
-                ALIGN_VAR_32(coeff_t, tsCoeffU[MAX_TS_SIZE * MAX_TS_SIZE]);
-                ALIGN_VAR_32(int16_t, tsResiU[MAX_TS_SIZE * MAX_TS_SIZE]);
-                ALIGN_VAR_32(coeff_t, tsCoeffV[MAX_TS_SIZE * MAX_TS_SIZE]);
-                ALIGN_VAR_32(int16_t, tsResiV[MAX_TS_SIZE * MAX_TS_SIZE]);
-
-                cu.setTransformSkipPartRange(1, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
-                cu.setTransformSkipPartRange(1, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
-
-                if (m_bEnableRDOQ)
-                    m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
-
-                fenc = const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
-                resi = resiYuv.getCbAddr(absPartIdxC);
-                uint32_t numSigTSkipU = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, tsCoeffU, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, true);
-
-                fenc = const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
-                resi = resiYuv.getCrAddr(absPartIdxC);
-                uint32_t numSigTSkipV = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, tsCoeffV, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, true);
-
-                m_entropyCoder.resetBits();
-                singleBitsComp[TEXT_CHROMA_U][tuIterator.section] = 0;
-
-                if (numSigTSkipU)
+                coeff_t* coeffCurC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
+                TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
+
+                do
                 {
-                    m_entropyCoder.codeQtCbf(!!numSigTSkipU, TEXT_CHROMA_U, tuDepth);
-                    m_entropyCoder.codeCoeffNxN(cu, tsCoeffU, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
-                    singleBitsComp[TEXT_CHROMA_U][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
-
-                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiU, trSizeC, tsCoeffU,
-                                            log2TrSizeC, TEXT_CHROMA_U, false, true, numSigTSkipU);
-                    uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getCbAddr(absPartIdxC), resiYuv.m_csize, tsResiU, trSizeC);
-                    nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
-                    if (m_rdCost.m_psyRd)
+                    uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
+                    uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+
+                    int16_t *curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
+
+                    ALIGN_VAR_32(coeff_t, tsCoeffC[MAX_TS_SIZE * MAX_TS_SIZE]);
+                    ALIGN_VAR_32(int16_t, tsResiC[MAX_TS_SIZE * MAX_TS_SIZE]);
+
+                    cu.setTransformSkipPartRange(1, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
+
+                    if (m_bEnableRDOQ && (chromaId != TEXT_CHROMA_V))
+                        m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
+
+                    fenc = const_cast<pixel*>(fencYuv->getChromaAddr(chromaId, absPartIdxC));
+                    resi = resiYuv.getChromaAddr(chromaId, absPartIdxC);
+                    uint32_t numSigTSkipC = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, tsCoeffC, log2TrSizeC, (TextType)chromaId, absPartIdxC, true);
+
+                    m_entropyCoder.resetBits();
+                    singleBitsComp[chromaId][tuIterator.section] = 0;
+
+                    if (numSigTSkipC)
                     {
-                        nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC, resiYuv.getCbAddr(absPartIdxC), resiYuv.m_csize, tsResiU, trSizeC);
-                        singleCostU = m_rdCost.calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.section], nonZeroPsyEnergyU);
+                        m_entropyCoder.codeQtCbf(!!numSigTSkipC, (TextType)chromaId, tuDepth);
+                        m_entropyCoder.codeCoeffNxN(cu, tsCoeffC, absPartIdxC, log2TrSizeC, (TextType)chromaId);
+                        singleBitsComp[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
+
+                        m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiC, trSizeC, tsCoeffC,
+                                                log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
+                        uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
+                        nonZeroDistC = m_rdCost.scaleChromaDistCb(dist);
+                        if (m_rdCost.m_psyRd)
+                        {
+                            nonZeroPsyEnergyC = m_rdCost.psyCost(partSizeC, resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
+                            singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
+                        }
+                        else
+                            singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
                     }
+
+                    if (!numSigTSkipC || minCost[chromaId][tuIterator.section] < singleCostC)
+                        cu.setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
                     else
-                        singleCostU = m_rdCost.calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.section]);
+                    {
+                        singleDistComp[chromaId][tuIterator.section] = nonZeroDistC;
+                        singlePsyEnergyComp[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
+                        cbfFlag[chromaId][tuIterator.section] = !!numSigTSkipC;
+                        bestTransformMode[chromaId][tuIterator.section] = 1;
+                        memcpy(coeffCurC + subTUOffset, tsCoeffC, sizeof(coeff_t) * numCoeffC);
+                        primitives.square_copy_ss[partSizeC](curResiC, strideResiC, tsResiC, trSizeC);
+                    }
+
+                    cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
                 }
-
-                if (!numSigTSkipU || minCost[TEXT_CHROMA_U][tuIterator.section] < singleCostU)
-                    cu.setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
-                else
-                {
-                    singleDistComp[TEXT_CHROMA_U][tuIterator.section] = nonZeroDistU;
-                    singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.section] = nonZeroPsyEnergyU;
-                    cbfFlag[TEXT_CHROMA_U][tuIterator.section] = !!numSigTSkipU;
-                    bestTransformMode[TEXT_CHROMA_U][tuIterator.section] = 1;
-                    memcpy(coeffCurU + subTUOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
-                    primitives.square_copy_ss[partSizeC](curResiU, strideResiC, tsResiU, trSizeC);
-                }
-
-                if (numSigTSkipV)
-                {
-                    m_entropyCoder.codeQtCbf(!!numSigTSkipV, TEXT_CHROMA_V, tuDepth);
-                    m_entropyCoder.codeCoeffNxN(cu, tsCoeffV, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
-                    singleBitsComp[TEXT_CHROMA_V][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.section];
-
-                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiV, trSizeC, tsCoeffV,
-                                            log2TrSizeC, TEXT_CHROMA_V, false, true, numSigTSkipV);
-                    uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getCrAddr(absPartIdxC), resiYuv.m_csize, tsResiV, trSizeC);
-                    nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
-                    if (m_rdCost.m_psyRd)
-                    {
-                        nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC, resiYuv.getCrAddr(absPartIdxC), resiYuv.m_csize, tsResiV, trSizeC);
-                        singleCostV = m_rdCost.calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.section], nonZeroPsyEnergyV);
-                    }
-                    else
-                        singleCostV = m_rdCost.calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.section]);
-                }
-
-                if (!numSigTSkipV || minCost[TEXT_CHROMA_V][tuIterator.section] < singleCostV)
-                    cu.setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
-                else
-                {
-                    singleDistComp[TEXT_CHROMA_V][tuIterator.section] = nonZeroDistV;
-                    singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.section] = nonZeroPsyEnergyV;
-                    cbfFlag[TEXT_CHROMA_V][tuIterator.section] = !!numSigTSkipV;
-                    bestTransformMode[TEXT_CHROMA_V][tuIterator.section] = 1;
-                    memcpy(coeffCurV + subTUOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
-                    primitives.square_copy_ss[partSizeC](curResiV, strideResiC, tsResiV, trSizeC);
-                }
-
-                cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_U][tuIterator.section] << tuDepth, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
-                cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_V][tuIterator.section] << tuDepth, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
+                while (tuIterator.isNextSection());
             }
-            while (tuIterator.isNextSection());
         }
 
         m_entropyCoder.load(m_rqt[depth].rqtRoot);
@@ -2994,14 +2882,14 @@
         {
             for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
             {
-                if (splitIntoSubTUs)
+                if (!splitIntoSubTUs)
+                    m_entropyCoder.codeQtCbf(cbfFlag[chromaId][0], (TextType)chromaId, tuDepth);
+                else
                 {
                     offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, absPartIdx);
                     for (uint32_t subTU = 0; subTU < 2; subTU++)
                         m_entropyCoder.codeQtCbf(cbfFlag[chromaId][subTU], (TextType)chromaId, tuDepth);
                 }
-                else
-                    m_entropyCoder.codeQtCbf(cbfFlag[chromaId][0], (TextType)chromaId, tuDepth);
             }
         }
 
@@ -3011,26 +2899,26 @@
 
         if (bCodeChroma)
         {
-            if (!splitIntoSubTUs)
+            uint32_t subTUSize = 1 << (log2TrSizeC * 2);
+            uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
+            uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+
+            for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
             {
-                if (cbfFlag[TEXT_CHROMA_U][0])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
-                if (cbfFlag[TEXT_CHROMA_V][0])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
-            }
-            else
-            {
-                uint32_t subTUSize = 1 << (log2TrSizeC * 2);
-                uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
-
-                if (cbfFlag[TEXT_CHROMA_U][0])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
-                if (cbfFlag[TEXT_CHROMA_U][1])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_U);
-                if (cbfFlag[TEXT_CHROMA_V][0])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
-                if (cbfFlag[TEXT_CHROMA_V][1])
-                    m_entropyCoder.codeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_V);
+                coeff_t* coeffCurC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
+                if (!splitIntoSubTUs)
+                {
+                    if (cbfFlag[chromaId][0])
+                        m_entropyCoder.codeCoeffNxN(cu, coeffCurC, absPartIdx, log2TrSizeC, (TextType)chromaId);
+                }
+                else
+                {
+                    for (uint32_t subTU = 0; subTU < 2; subTU++)
+                    {
+                        if (cbfFlag[chromaId][subTU])
+                            m_entropyCoder.codeCoeffNxN(cu, coeffCurC + subTU * subTUSize, absPartIdx + subTU * partIdxesPerSubTU, log2TrSizeC, (TextType)chromaId);
+                    }
+                }
             }
         }
 


More information about the x265-devel mailing list