[x265] [PATCH 5 of 6] split fast RD path in codeCoeffNxN()
Deepthi Nandakumar
deepthi at multicorewareinc.com
Wed Jun 10 08:52:35 CEST 2015
Ok, so this patch removes the code introduced by the previous one, I will
fold it in together to not break tests.
On Tue, Jun 9, 2015 at 11:36 PM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1433872875 25200
> # Node ID 134670771e0c1dd0800c3e9db0a1f9f69c467e36
> # Parent 04c9567aa2bb7b549cd6a3514a1ef29d64724638
> split fast RD path in codeCoeffNxN()
> ---
> source/encoder/entropy.cpp | 131
> +++++++++++++++++++++++++++++---------------
> 1 files changed, 86 insertions(+), 45 deletions(-)
>
> diff -r 04c9567aa2bb -r 134670771e0c source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Tue Jun 09 11:01:13 2015 -0700
> +++ b/source/encoder/entropy.cpp Tue Jun 09 11:01:15 2015 -0700
> @@ -1690,66 +1690,106 @@
> CTZ(firstNZPosInCG, subCoeffFlag);
>
> bool signHidden = (lastNZPosInCG - firstNZPosInCG >=
> SBH_THRESHOLD);
> - uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
> + uint32_t ctxSet = ((subSet > 0) & bIsLuma) ? 2 : 0;
>
> - if (c1 == 0)
> - ctxSet++;
> + ctxSet += (c1 == 0);
>
> c1 = 1;
> - uint8_t *baseCtxMod = bIsLuma ?
> &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] :
> &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
> + uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 :
> NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
>
> uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
> - int firstC2Flag = -1;
> + X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
>
> - X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
> - idx = 0;
> - do
> + uint32_t firstC2Flag = 2;
> + uint32_t c1Next = 0xFFFFFFFE;
> + if (!m_bitIf)
> {
> - uint32_t symbol1 = absCoeff[idx] > 1;
> - uint32_t symbol2 = absCoeff[idx] > 2;
> - encodeBin(symbol1, baseCtxMod[c1]);
> + uint32_t sum = 0;
> + // Fast RD path
> + idx = 0;
> + do
> + {
> + uint32_t symbol1 = absCoeff[idx] > 1;
> + uint32_t symbol2 = absCoeff[idx] > 2;
> + //encodeBin(symbol1, baseCtxMod[c1]);
> + {
> + const uint32_t mstate = baseCtxMod[c1];
> + baseCtxMod[c1] = sbacNext(mstate, symbol1);
> + sum += sbacGetEntropyBits(mstate, symbol1);
> + }
>
> - // TODO: VC can't work fine on below style, but ICL can
> generate branch free code
> -#ifdef __INTEL_COMPILER
> - if (symbol)
> - c1 = 0;
> + if (symbol1)
> + c1Next = 0;
> + if (symbol1 + firstC2Flag == 3)
> + firstC2Flag = symbol2;
>
> - if ((firstC2Flag < 0) & symbol)
> - firstC2Flag = (int)symbol2;
> -#else
> - if (symbol1)
> + c1 = (c1Next & 3);
> + c1Next >>= 2;
> + X265_CHECK(c1 <= 3, "c1 check failure\n");
> + idx++;
> + }
> + while(idx < numC1Flag);
> +
> + if (!c1)
> {
> - c1 = 0;
> - if (firstC2Flag < 0)
> - firstC2Flag = (int)symbol2;
> + X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check
> failure\n");
> +
> + baseCtxMod = &m_contextState[(bIsLuma ? 0 :
> NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
> +
> + //encodeBin(firstC2Flag, baseCtxMod[0]);
> + {
> + const uint32_t mstate = baseCtxMod[0];
> + baseCtxMod[0] = sbacNext(mstate, firstC2Flag);
> + sum += sbacGetEntropyBits(mstate, firstC2Flag);
> + }
> }
> -#endif
> - c1 += ((c1 >> 1) ^ c1) & 1;
> - X265_CHECK(c1 <= 3, "c1 check failure\n");
> - idx++;
> - }
> - while(idx < numC1Flag);
> + m_fracBits += (sum & 0xFFFFFF);
>
> - if (!c1)
> - {
> - baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX +
> ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA +
> ctxSet];
> + const int hiddenShift = (bHideFirstSign & signHidden) ? 1
> : 0;
> + //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero -
> hiddenShift);
> + m_fracBits += (numNonZero - hiddenShift) << 15;
>
> - X265_CHECK((firstC2Flag != -1), "firstC2FlagIdx check
> failure\n");
> - encodeBin(firstC2Flag, baseCtxMod[0]);
> - }
> -
> - const int hiddenShift = (bHideFirstSign && signHidden) ? 1 :
> 0;
> - encodeBinsEP((coeffSigns >> hiddenShift), numNonZero -
> hiddenShift);
> -
> - if (!c1 || numNonZero > C1FLAG_NUMBER)
> - {
> - if (!m_bitIf)
> + if (!c1 || numNonZero > C1FLAG_NUMBER)
> {
> - // Fast RD path
> uint32_t sum = primitives.costCoeffRemain(absCoeff,
> numNonZero);
> m_fracBits += ((uint64_t)sum << 15);
> }
> - else
> + }
> + // Standard path
> + else
> + {
> + idx = 0;
> + do
> + {
> + uint32_t symbol1 = absCoeff[idx] > 1;
> + uint32_t symbol2 = absCoeff[idx] > 2;
> + encodeBin(symbol1, baseCtxMod[c1]);
> +
> + if (symbol1)
> + c1Next = 0;
> +
> + if (symbol1 + firstC2Flag == 3)
> + firstC2Flag = symbol2;
> +
> + c1 = (c1Next & 3);
> + c1Next >>= 2;
> + X265_CHECK(c1 <= 3, "c1 check failure\n");
> + idx++;
> + }
> + while(idx < numC1Flag);
> +
> + if (!c1)
> + {
> + baseCtxMod = &m_contextState[(bIsLuma ? 0 :
> NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
> +
> + X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check
> failure\n");
> + encodeBin(firstC2Flag, baseCtxMod[0]);
> + }
> +
> + const int hiddenShift = (bHideFirstSign && signHidden) ?
> 1 : 0;
> + encodeBinsEP((coeffSigns >> hiddenShift), numNonZero -
> hiddenShift);
> +
> + if (!c1 || numNonZero > C1FLAG_NUMBER)
> {
> // Standard path
> uint32_t goRiceParam = 0;
> @@ -1776,8 +1816,9 @@
> }
> while(idx < numNonZero);
> }
> - }
> - }
> + } // end of !bitIf
> + } // end of (numNonZero > 0)
> +
> // Initialize value for next loop
> numNonZero = 0;
> scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150610/a57967df/attachment-0001.html>
More information about the x265-devel
mailing list