[x265] [PATCH 5 of 6] split fast RD path in codeCoeffNxN()

Min Chen chenm003 at 163.com
Tue Jun 9 20:06:05 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1433872875 25200
# Node ID 134670771e0c1dd0800c3e9db0a1f9f69c467e36
# Parent  04c9567aa2bb7b549cd6a3514a1ef29d64724638
split fast RD path in codeCoeffNxN()
---
 source/encoder/entropy.cpp |  131 +++++++++++++++++++++++++++++---------------
 1 files changed, 86 insertions(+), 45 deletions(-)

diff -r 04c9567aa2bb -r 134670771e0c source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Tue Jun 09 11:01:13 2015 -0700
+++ b/source/encoder/entropy.cpp	Tue Jun 09 11:01:15 2015 -0700
@@ -1690,66 +1690,106 @@
             CTZ(firstNZPosInCG, subCoeffFlag);
 
             bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
-            uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
+            uint32_t ctxSet = ((subSet > 0) & bIsLuma) ? 2 : 0;
 
-            if (c1 == 0)
-                ctxSet++;
+            ctxSet += (c1 == 0);
 
             c1 = 1;
-            uint8_t *baseCtxMod = bIsLuma ? &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
+            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
 
             uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
-            int firstC2Flag = -1;
+            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
 
-            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
-            idx = 0;
-            do
+            uint32_t firstC2Flag = 2;
+            uint32_t c1Next = 0xFFFFFFFE;
+            if (!m_bitIf)
             {
-                uint32_t symbol1 = absCoeff[idx] > 1;
-                uint32_t symbol2 = absCoeff[idx] > 2;
-                encodeBin(symbol1, baseCtxMod[c1]);
+                uint32_t sum = 0;
+                // Fast RD path
+                idx = 0;
+                do
+                {
+                    uint32_t symbol1 = absCoeff[idx] > 1;
+                    uint32_t symbol2 = absCoeff[idx] > 2;
+                    //encodeBin(symbol1, baseCtxMod[c1]);
+                    {
+                        const uint32_t mstate = baseCtxMod[c1];
+                        baseCtxMod[c1] = sbacNext(mstate, symbol1);
+                        sum += sbacGetEntropyBits(mstate, symbol1);
+                    }
 
-                // TODO: VC can't work fine on below style, but ICL can generate branch free code
-#ifdef __INTEL_COMPILER
-                if (symbol)
-                    c1 = 0;
+                    if (symbol1)
+                        c1Next = 0;
+                    if (symbol1 + firstC2Flag == 3)
+                        firstC2Flag = symbol2;
 
-                if ((firstC2Flag < 0) & symbol)
-                    firstC2Flag = (int)symbol2;
-#else
-                if (symbol1)
+                    c1 = (c1Next & 3);
+                    c1Next >>= 2;
+                    X265_CHECK(c1 <= 3, "c1 check failure\n");
+                    idx++;
+                }
+                while(idx < numC1Flag);
+
+                if (!c1)
                 {
-                    c1 = 0;
-                    if (firstC2Flag < 0)
-                        firstC2Flag = (int)symbol2;
+                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
+
+                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
+
+                    //encodeBin(firstC2Flag, baseCtxMod[0]);
+                    {
+                        const uint32_t mstate = baseCtxMod[0];
+                        baseCtxMod[0] = sbacNext(mstate, firstC2Flag);
+                        sum += sbacGetEntropyBits(mstate, firstC2Flag);
+                    }
                 }
-#endif
-                c1 += ((c1 >> 1) ^ c1) & 1;
-                X265_CHECK(c1 <= 3, "c1 check failure\n");
-                idx++;
-            }
-            while(idx < numC1Flag);
+                m_fracBits += (sum & 0xFFFFFF);
 
-            if (!c1)
-            {
-                baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + ctxSet];
+                const int hiddenShift = (bHideFirstSign & signHidden) ? 1 : 0;
+                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
+                m_fracBits += (numNonZero - hiddenShift) << 15;
 
-                X265_CHECK((firstC2Flag != -1), "firstC2FlagIdx check failure\n");
-                encodeBin(firstC2Flag, baseCtxMod[0]);
-            }
-
-            const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
-            encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
-
-            if (!c1 || numNonZero > C1FLAG_NUMBER)
-            {
-                if (!m_bitIf)
+                if (!c1 || numNonZero > C1FLAG_NUMBER)
                 {
-                    // Fast RD path
                     uint32_t sum = primitives.costCoeffRemain(absCoeff, numNonZero);
                     m_fracBits += ((uint64_t)sum << 15);
                 }
-                else
+            }
+            // Standard path
+            else
+            {
+                idx = 0;
+                do
+                {
+                    uint32_t symbol1 = absCoeff[idx] > 1;
+                    uint32_t symbol2 = absCoeff[idx] > 2;
+                    encodeBin(symbol1, baseCtxMod[c1]);
+
+                    if (symbol1)
+                        c1Next = 0;
+
+                    if (symbol1 + firstC2Flag == 3)
+                        firstC2Flag = symbol2;
+
+                    c1 = (c1Next & 3);
+                    c1Next >>= 2;
+                    X265_CHECK(c1 <= 3, "c1 check failure\n");
+                    idx++;
+                }
+                while(idx < numC1Flag);
+
+                if (!c1)
+                {
+                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
+
+                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
+                    encodeBin(firstC2Flag, baseCtxMod[0]);
+                }
+
+                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
+                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
+
+                if (!c1 || numNonZero > C1FLAG_NUMBER)
                 {
                     // Standard path
                     uint32_t goRiceParam = 0;
@@ -1776,8 +1816,9 @@
                     }
                     while(idx < numNonZero);
                 }
-            }
-        }
+            } // end of !bitIf
+        } // end of (numNonZero > 0)
+
         // Initialize value for next loop
         numNonZero = 0;
         scanPosSigOff = (1 << MLS_CG_SIZE) - 1;



More information about the x265-devel mailing list