<div dir="ltr">Ok, so this patch removes the code introduced by the previous one, I will fold it in together to not break tests. <br></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jun 9, 2015 at 11:36 PM, Min Chen <span dir="ltr"><<a href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Min Chen <<a href="mailto:chenm003@163.com">chenm003@163.com</a>><br>
# Date 1433872875 25200<br>
# Node ID 134670771e0c1dd0800c3e9db0a1f9f69c467e36<br>
# Parent  04c9567aa2bb7b549cd6a3514a1ef29d64724638<br>
split fast RD path in codeCoeffNxN()<br>
---<br>
 source/encoder/entropy.cpp |  131 +++++++++++++++++++++++++++++---------------<br>
 1 files changed, 86 insertions(+), 45 deletions(-)<br>
<br>
diff -r 04c9567aa2bb -r 134670771e0c source/encoder/entropy.cpp<br>
--- a/source/encoder/entropy.cpp        Tue Jun 09 11:01:13 2015 -0700<br>
+++ b/source/encoder/entropy.cpp        Tue Jun 09 11:01:15 2015 -0700<br>
@@ -1690,66 +1690,106 @@<br>
             CTZ(firstNZPosInCG, subCoeffFlag);<br>
<br>
             bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);<br>
-            uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;<br>
+            uint32_t ctxSet = ((subSet > 0) & bIsLuma) ? 2 : 0;<br>
<br>
-            if (c1 == 0)<br>
-                ctxSet++;<br>
+            ctxSet += (c1 == 0);<br>
<br>
             c1 = 1;<br>
-            uint8_t *baseCtxMod = bIsLuma ? &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];<br>
+            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];<br>
<br>
             uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);<br>
-            int firstC2Flag = -1;<br>
+            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");<br>
<br>
-            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");<br>
-            idx = 0;<br>
-            do<br>
+            uint32_t firstC2Flag = 2;<br>
+            uint32_t c1Next = 0xFFFFFFFE;<br>
+            if (!m_bitIf)<br>
             {<br>
-                uint32_t symbol1 = absCoeff[idx] > 1;<br>
-                uint32_t symbol2 = absCoeff[idx] > 2;<br>
-                encodeBin(symbol1, baseCtxMod[c1]);<br>
+                uint32_t sum = 0;<br>
+                // Fast RD path<br>
+                idx = 0;<br>
+                do<br>
+                {<br>
+                    uint32_t symbol1 = absCoeff[idx] > 1;<br>
+                    uint32_t symbol2 = absCoeff[idx] > 2;<br>
+                    //encodeBin(symbol1, baseCtxMod[c1]);<br>
+                    {<br>
+                        const uint32_t mstate = baseCtxMod[c1];<br>
+                        baseCtxMod[c1] = sbacNext(mstate, symbol1);<br>
+                        sum += sbacGetEntropyBits(mstate, symbol1);<br>
+                    }<br>
<br>
-                // TODO: VC can't work fine on below style, but ICL can generate branch free code<br>
-#ifdef __INTEL_COMPILER<br>
-                if (symbol)<br>
-                    c1 = 0;<br>
+                    if (symbol1)<br>
+                        c1Next = 0;<br>
+                    if (symbol1 + firstC2Flag == 3)<br>
+                        firstC2Flag = symbol2;<br>
<br>
-                if ((firstC2Flag < 0) & symbol)<br>
-                    firstC2Flag = (int)symbol2;<br>
-#else<br>
-                if (symbol1)<br>
+                    c1 = (c1Next & 3);<br>
+                    c1Next >>= 2;<br>
+                    X265_CHECK(c1 <= 3, "c1 check failure\n");<br>
+                    idx++;<br>
+                }<br>
+                while(idx < numC1Flag);<br>
+<br>
+                if (!c1)<br>
                 {<br>
-                    c1 = 0;<br>
-                    if (firstC2Flag < 0)<br>
-                        firstC2Flag = (int)symbol2;<br>
+                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");<br>
+<br>
+                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];<br>
+<br>
+                    //encodeBin(firstC2Flag, baseCtxMod[0]);<br>
+                    {<br>
+                        const uint32_t mstate = baseCtxMod[0];<br>
+                        baseCtxMod[0] = sbacNext(mstate, firstC2Flag);<br>
+                        sum += sbacGetEntropyBits(mstate, firstC2Flag);<br>
+                    }<br>
                 }<br>
-#endif<br>
-                c1 += ((c1 >> 1) ^ c1) & 1;<br>
-                X265_CHECK(c1 <= 3, "c1 check failure\n");<br>
-                idx++;<br>
-            }<br>
-            while(idx < numC1Flag);<br>
+                m_fracBits += (sum & 0xFFFFFF);<br>
<br>
-            if (!c1)<br>
-            {<br>
-                baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + ctxSet];<br>
+                const int hiddenShift = (bHideFirstSign & signHidden) ? 1 : 0;<br>
+                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);<br>
+                m_fracBits += (numNonZero - hiddenShift) << 15;<br>
<br>
-                X265_CHECK((firstC2Flag != -1), "firstC2FlagIdx check failure\n");<br>
-                encodeBin(firstC2Flag, baseCtxMod[0]);<br>
-            }<br>
-<br>
-            const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;<br>
-            encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);<br>
-<br>
-            if (!c1 || numNonZero > C1FLAG_NUMBER)<br>
-            {<br>
-                if (!m_bitIf)<br>
+                if (!c1 || numNonZero > C1FLAG_NUMBER)<br>
                 {<br>
-                    // Fast RD path<br>
                     uint32_t sum = primitives.costCoeffRemain(absCoeff, numNonZero);<br>
                     m_fracBits += ((uint64_t)sum << 15);<br>
                 }<br>
-                else<br>
+            }<br>
+            // Standard path<br>
+            else<br>
+            {<br>
+                idx = 0;<br>
+                do<br>
+                {<br>
+                    uint32_t symbol1 = absCoeff[idx] > 1;<br>
+                    uint32_t symbol2 = absCoeff[idx] > 2;<br>
+                    encodeBin(symbol1, baseCtxMod[c1]);<br>
+<br>
+                    if (symbol1)<br>
+                        c1Next = 0;<br>
+<br>
+                    if (symbol1 + firstC2Flag == 3)<br>
+                        firstC2Flag = symbol2;<br>
+<br>
+                    c1 = (c1Next & 3);<br>
+                    c1Next >>= 2;<br>
+                    X265_CHECK(c1 <= 3, "c1 check failure\n");<br>
+                    idx++;<br>
+                }<br>
+                while(idx < numC1Flag);<br>
+<br>
+                if (!c1)<br>
+                {<br>
+                    baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];<br>
+<br>
+                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");<br>
+                    encodeBin(firstC2Flag, baseCtxMod[0]);<br>
+                }<br>
+<br>
+                const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;<br>
+                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);<br>
+<br>
+                if (!c1 || numNonZero > C1FLAG_NUMBER)<br>
                 {<br>
                     // Standard path<br>
                     uint32_t goRiceParam = 0;<br>
@@ -1776,8 +1816,9 @@<br>
                     }<br>
                     while(idx < numNonZero);<br>
                 }<br>
-            }<br>
-        }<br>
+            } // end of !bitIf<br>
+        } // end of (numNonZero > 0)<br>
+<br>
         // Initialize value for next loop<br>
         numNonZero = 0;<br>
         scanPosSigOff = (1 << MLS_CG_SIZE) - 1;<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>