[x265] [PATCH 5 of 6] split fast RD path in codeCoeffNxN()

Deepthi Nandakumar deepthi at multicorewareinc.com
Wed Jun 10 08:52:35 CEST 2015


Ok, so this patch removes the code introduced by the previous one, I will
fold it in together to not break tests.

On Tue, Jun 9, 2015 at 11:36 PM, Min Chen <chenm003 at 163.com> wrote:

> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1433872875 25200
> # Node ID 134670771e0c1dd0800c3e9db0a1f9f69c467e36
> # Parent  04c9567aa2bb7b549cd6a3514a1ef29d64724638
> split fast RD path in codeCoeffNxN()
> ---
>  source/encoder/entropy.cpp |  131
> +++++++++++++++++++++++++++++---------------
>  1 files changed, 86 insertions(+), 45 deletions(-)
>
> diff -r 04c9567aa2bb -r 134670771e0c source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp        Tue Jun 09 11:01:13 2015 -0700
> +++ b/source/encoder/entropy.cpp        Tue Jun 09 11:01:15 2015 -0700
> @@ -1690,66 +1690,106 @@
>              CTZ(firstNZPosInCG, subCoeffFlag);
>
>              bool signHidden = (lastNZPosInCG - firstNZPosInCG >=
> SBH_THRESHOLD);
> -            uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
> +            uint32_t ctxSet = ((subSet > 0) & bIsLuma) ? 2 : 0;
>
> -            if (c1 == 0)
> -                ctxSet++;
> +            ctxSet += (c1 == 0);
>
>              c1 = 1;
> -            uint8_t *baseCtxMod = bIsLuma ?
> &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] :
> &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
> +            uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 :
> NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
>
>              uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
> -            int firstC2Flag = -1;
> +            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
>
> -            X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
> -            idx = 0;
> -            do
> +            uint32_t firstC2Flag = 2;
> +            uint32_t c1Next = 0xFFFFFFFE;
> +            if (!m_bitIf)
>              {
> -                uint32_t symbol1 = absCoeff[idx] > 1;
> -                uint32_t symbol2 = absCoeff[idx] > 2;
> -                encodeBin(symbol1, baseCtxMod[c1]);
> +                uint32_t sum = 0;
> +                // Fast RD path
> +                idx = 0;
> +                do
> +                {
> +                    uint32_t symbol1 = absCoeff[idx] > 1;
> +                    uint32_t symbol2 = absCoeff[idx] > 2;
> +                    //encodeBin(symbol1, baseCtxMod[c1]);
> +                    {
> +                        const uint32_t mstate = baseCtxMod[c1];
> +                        baseCtxMod[c1] = sbacNext(mstate, symbol1);
> +                        sum += sbacGetEntropyBits(mstate, symbol1);
> +                    }
>
> -                // TODO: VC can't work fine on below style, but ICL can
> generate branch free code
> -#ifdef __INTEL_COMPILER
> -                if (symbol)
> -                    c1 = 0;
> +                    if (symbol1)
> +                        c1Next = 0;
> +                    if (symbol1 + firstC2Flag == 3)
> +                        firstC2Flag = symbol2;
>
> -                if ((firstC2Flag < 0) & symbol)
> -                    firstC2Flag = (int)symbol2;
> -#else
> -                if (symbol1)
> +                    c1 = (c1Next & 3);
> +                    c1Next >>= 2;
> +                    X265_CHECK(c1 <= 3, "c1 check failure\n");
> +                    idx++;
> +                }
> +                while(idx < numC1Flag);
> +
> +                if (!c1)
>                  {
> -                    c1 = 0;
> -                    if (firstC2Flag < 0)
> -                        firstC2Flag = (int)symbol2;
> +                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check
> failure\n");
> +
> +                    baseCtxMod = &m_contextState[(bIsLuma ? 0 :
> NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
> +
> +                    //encodeBin(firstC2Flag, baseCtxMod[0]);
> +                    {
> +                        const uint32_t mstate = baseCtxMod[0];
> +                        baseCtxMod[0] = sbacNext(mstate, firstC2Flag);
> +                        sum += sbacGetEntropyBits(mstate, firstC2Flag);
> +                    }
>                  }
> -#endif
> -                c1 += ((c1 >> 1) ^ c1) & 1;
> -                X265_CHECK(c1 <= 3, "c1 check failure\n");
> -                idx++;
> -            }
> -            while(idx < numC1Flag);
> +                m_fracBits += (sum & 0xFFFFFF);
>
> -            if (!c1)
> -            {
> -                baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX +
> ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA +
> ctxSet];
> +                const int hiddenShift = (bHideFirstSign & signHidden) ? 1
> : 0;
> +                //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero -
> hiddenShift);
> +                m_fracBits += (numNonZero - hiddenShift) << 15;
>
> -                X265_CHECK((firstC2Flag != -1), "firstC2FlagIdx check
> failure\n");
> -                encodeBin(firstC2Flag, baseCtxMod[0]);
> -            }
> -
> -            const int hiddenShift = (bHideFirstSign && signHidden) ? 1 :
> 0;
> -            encodeBinsEP((coeffSigns >> hiddenShift), numNonZero -
> hiddenShift);
> -
> -            if (!c1 || numNonZero > C1FLAG_NUMBER)
> -            {
> -                if (!m_bitIf)
> +                if (!c1 || numNonZero > C1FLAG_NUMBER)
>                  {
> -                    // Fast RD path
>                      uint32_t sum = primitives.costCoeffRemain(absCoeff,
> numNonZero);
>                      m_fracBits += ((uint64_t)sum << 15);
>                  }
> -                else
> +            }
> +            // Standard path
> +            else
> +            {
> +                idx = 0;
> +                do
> +                {
> +                    uint32_t symbol1 = absCoeff[idx] > 1;
> +                    uint32_t symbol2 = absCoeff[idx] > 2;
> +                    encodeBin(symbol1, baseCtxMod[c1]);
> +
> +                    if (symbol1)
> +                        c1Next = 0;
> +
> +                    if (symbol1 + firstC2Flag == 3)
> +                        firstC2Flag = symbol2;
> +
> +                    c1 = (c1Next & 3);
> +                    c1Next >>= 2;
> +                    X265_CHECK(c1 <= 3, "c1 check failure\n");
> +                    idx++;
> +                }
> +                while(idx < numC1Flag);
> +
> +                if (!c1)
> +                {
> +                    baseCtxMod = &m_contextState[(bIsLuma ? 0 :
> NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
> +
> +                    X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check
> failure\n");
> +                    encodeBin(firstC2Flag, baseCtxMod[0]);
> +                }
> +
> +                const int hiddenShift = (bHideFirstSign && signHidden) ?
> 1 : 0;
> +                encodeBinsEP((coeffSigns >> hiddenShift), numNonZero -
> hiddenShift);
> +
> +                if (!c1 || numNonZero > C1FLAG_NUMBER)
>                  {
>                      // Standard path
>                      uint32_t goRiceParam = 0;
> @@ -1776,8 +1816,9 @@
>                      }
>                      while(idx < numNonZero);
>                  }
> -            }
> -        }
> +            } // end of !bitIf
> +        } // end of (numNonZero > 0)
> +
>          // Initialize value for next loop
>          numNonZero = 0;
>          scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150610/a57967df/attachment-0001.html>


More information about the x265-devel mailing list