[x265] [PATCH] search: made a function for null cost calculation in xEstimateResidualQT()

ashok at multicorewareinc.com ashok at multicorewareinc.com
Wed Nov 5 16:05:51 CET 2014


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1415184822 -19800
#      Wed Nov 05 16:23:42 2014 +0530
# Node ID 18344f74ded0e192bc7177a217e9112c9de31983
# Parent  2a8f3d5820a6ebe0937ce73fa81154c263df2ae9
search: made a function for null cost calculation in xEstimateResidualQT()

diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/search.cpp	Wed Nov 05 16:23:42 2014 +0530
@@ -2714,6 +2714,17 @@
     }
 }
 
+uint64_t Search::deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId)
+{
+    m_entropyCoder.resetBits();
+    m_entropyCoder.codeQtCbfZero(compId, tuDepth);
+    const uint32_t nullBits = m_entropyCoder.getNumberOfWrittenBits();
+    if (m_rdCost.m_psyRd)
+        return m_rdCost.calcPsyRdCost(dist, nullBits, psyEnergy);
+    else
+        return m_rdCost.calcRdCost(dist, nullBits);
+}
+
 void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, uint32_t depthRange[2])
 {
     CUData& cu = mode.cu;
@@ -2828,9 +2839,6 @@
             }
         }
 
-        const uint32_t numCoeffY = 1 << (log2TrSize * 2);
-        const uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
-
         X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
         uint32_t distY = primitives.ssd_s[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size);
         uint32_t psyEnergyY = 0;
@@ -2861,19 +2869,15 @@
                     singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], nonZeroPsyEnergyY);
                 else
                     singleCostY = m_rdCost.calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
-                m_entropyCoder.resetBits();
-                m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
-                const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
-                uint64_t nullCostY = 0;
-                if (m_rdCost.m_psyRd)
-                    nullCostY = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
-                else
-                    nullCostY = m_rdCost.calcRdCost(distY, nullBitsY);
+
+                uint64_t nullCostY = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
                 if (nullCostY < singleCostY)
                 {
                     cbfFlag[TEXT_LUMA][0] = 0;
 #if CHECKED_BUILD || _DEBUG
+                    uint32_t numCoeffY = 1 << (log2TrSize << 1);
                     memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
+                    primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
 #endif
                     if (checkTransformSkipY)
                         minCost[TEXT_LUMA][0] = nullCostY;
@@ -2887,21 +2891,16 @@
                 }
             }
         }
-        else if (checkTransformSkipY)
+        else
         {
-            m_entropyCoder.resetBits();
-            m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
-            const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
-            if (m_rdCost.m_psyRd)
-                minCost[TEXT_LUMA][0] = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
-            else
-                minCost[TEXT_LUMA][0] = m_rdCost.calcRdCost(distY, nullBitsY);
+            if (checkTransformSkipY)
+                minCost[TEXT_LUMA][0] = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
+            primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
         }
 
         singleDistComp[TEXT_LUMA][0] = distY;
         singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
-        if (!cbfFlag[TEXT_LUMA][0])
-            primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
+
         cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
 
         if (bCodeChroma)
@@ -2945,19 +2944,16 @@
                                 singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
                             else
                                 singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
-                            m_entropyCoder.resetBits();
-                            m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepth);
-                            const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
-                            uint64_t nullCostC = 0;
-                            if (m_rdCost.m_psyRd)
-                                nullCostC = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
-                            else
-                                nullCostC = m_rdCost.calcRdCost(distC, nullBitsC);
+
+                            uint64_t nullCostC = deriveNullCost(distC, psyEnergyC, tuDepth, (TextType)chromaId);
+
                             if (nullCostC < singleCostC)
                             {
                                 cbfFlag[chromaId][tuIterator.section] = 0;
 #if CHECKED_BUILD || _DEBUG
+                                uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
                                 memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
+                                primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
 #endif
                                 if (checkTransformSkipC)
                                     minCost[chromaId][tuIterator.section] = nullCostC;
@@ -2971,23 +2967,16 @@
                             }
                         }
                     }
-                    else if (checkTransformSkipC)
+                    else
                     {
-                        m_entropyCoder.resetBits();
-                        m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepthC);
-                        const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
-                        if (m_rdCost.m_psyRd)
-                            minCost[chromaId][tuIterator.section] = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
-                        else
-                            minCost[chromaId][tuIterator.section] = m_rdCost.calcRdCost(distC, nullBitsC);
+                        if (checkTransformSkipC)
+                            minCost[chromaId][tuIterator.section] = deriveNullCost(distC, psyEnergyC, tuDepthC, (TextType)chromaId);
+                        primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
                     }
 
                     singleDistComp[chromaId][tuIterator.section] = distC;
                     singlePsyEnergyComp[chromaId][tuIterator.section] = psyEnergyC;
 
-                    if (!cbfFlag[chromaId][tuIterator.section])
-                        primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
-
                     cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
                 }
                 while (tuIterator.isNextSection());
@@ -3042,6 +3031,7 @@
                 singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
                 cbfFlag[TEXT_LUMA][0] = !!numSigTSkipY;
                 bestTransformMode[TEXT_LUMA][0] = 1;
+                uint32_t numCoeffY = 1 << (log2TrSize << 1);
                 memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
                 primitives.square_copy_ss[partSize](curResiY, strideResiY, tsResiY, trSize);
             }
@@ -3112,6 +3102,7 @@
                         singlePsyEnergyComp[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
                         cbfFlag[chromaId][tuIterator.section] = !!numSigTSkipC;
                         bestTransformMode[chromaId][tuIterator.section] = 1;
+                        uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
                         memcpy(coeffCurC + subTUOffset, tsCoeffC, sizeof(coeff_t) * numCoeffC);
                         primitives.square_copy_ss[partSizeC](curResiC, strideResiC, tsResiC, trSizeC);
                     }
diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.h
--- a/source/encoder/search.h	Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/search.h	Wed Nov 05 16:23:42 2014 +0530
@@ -217,6 +217,7 @@
         Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
     };
 
+    uint64_t deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
     void     estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, uint32_t depthRange[2]);
 
     // estimate bit cost of residual QT


More information about the x265-devel mailing list