[x265] [PATCH 5 of 6] split fast RD path in codeCoeffNxN()
Min Chen
chenm003 at 163.com
Tue Jun 9 20:06:05 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1433872875 25200
# Node ID 134670771e0c1dd0800c3e9db0a1f9f69c467e36
# Parent 04c9567aa2bb7b549cd6a3514a1ef29d64724638
split fast RD path in codeCoeffNxN()
---
source/encoder/entropy.cpp | 131 +++++++++++++++++++++++++++++---------------
1 files changed, 86 insertions(+), 45 deletions(-)
diff -r 04c9567aa2bb -r 134670771e0c source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Tue Jun 09 11:01:13 2015 -0700
+++ b/source/encoder/entropy.cpp Tue Jun 09 11:01:15 2015 -0700
@@ -1690,66 +1690,106 @@
CTZ(firstNZPosInCG, subCoeffFlag);
bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
- uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
+ uint32_t ctxSet = ((subSet > 0) & bIsLuma) ? 2 : 0;
- if (c1 == 0)
- ctxSet++;
+ ctxSet += (c1 == 0);
c1 = 1;
- uint8_t *baseCtxMod = bIsLuma ? &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
+ uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
- int firstC2Flag = -1;
+ X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
- X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
- idx = 0;
- do
+ uint32_t firstC2Flag = 2;
+ uint32_t c1Next = 0xFFFFFFFE;
+ if (!m_bitIf)
{
- uint32_t symbol1 = absCoeff[idx] > 1;
- uint32_t symbol2 = absCoeff[idx] > 2;
- encodeBin(symbol1, baseCtxMod[c1]);
+ uint32_t sum = 0;
+ // Fast RD path
+ idx = 0;
+ do
+ {
+ uint32_t symbol1 = absCoeff[idx] > 1;
+ uint32_t symbol2 = absCoeff[idx] > 2;
+ //encodeBin(symbol1, baseCtxMod[c1]);
+ {
+ const uint32_t mstate = baseCtxMod[c1];
+ baseCtxMod[c1] = sbacNext(mstate, symbol1);
+ sum += sbacGetEntropyBits(mstate, symbol1);
+ }
- // TODO: VC can't work fine on below style, but ICL can generate branch free code
-#ifdef __INTEL_COMPILER
- if (symbol)
- c1 = 0;
+ if (symbol1)
+ c1Next = 0;
+ if (symbol1 + firstC2Flag == 3)
+ firstC2Flag = symbol2;
- if ((firstC2Flag < 0) & symbol)
- firstC2Flag = (int)symbol2;
-#else
- if (symbol1)
+ c1 = (c1Next & 3);
+ c1Next >>= 2;
+ X265_CHECK(c1 <= 3, "c1 check failure\n");
+ idx++;
+ }
+ while(idx < numC1Flag);
+
+ if (!c1)
{
- c1 = 0;
- if (firstC2Flag < 0)
- firstC2Flag = (int)symbol2;
+ X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
+
+ baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
+
+ //encodeBin(firstC2Flag, baseCtxMod[0]);
+ {
+ const uint32_t mstate = baseCtxMod[0];
+ baseCtxMod[0] = sbacNext(mstate, firstC2Flag);
+ sum += sbacGetEntropyBits(mstate, firstC2Flag);
+ }
}
-#endif
- c1 += ((c1 >> 1) ^ c1) & 1;
- X265_CHECK(c1 <= 3, "c1 check failure\n");
- idx++;
- }
- while(idx < numC1Flag);
+ m_fracBits += (sum & 0xFFFFFF);
- if (!c1)
- {
- baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + ctxSet];
+ const int hiddenShift = (bHideFirstSign & signHidden) ? 1 : 0;
+ //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
+ m_fracBits += (numNonZero - hiddenShift) << 15;
- X265_CHECK((firstC2Flag != -1), "firstC2FlagIdx check failure\n");
- encodeBin(firstC2Flag, baseCtxMod[0]);
- }
-
- const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
- encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
-
- if (!c1 || numNonZero > C1FLAG_NUMBER)
- {
- if (!m_bitIf)
+ if (!c1 || numNonZero > C1FLAG_NUMBER)
{
- // Fast RD path
uint32_t sum = primitives.costCoeffRemain(absCoeff, numNonZero);
m_fracBits += ((uint64_t)sum << 15);
}
- else
+ }
+ // Standard path
+ else
+ {
+ idx = 0;
+ do
+ {
+ uint32_t symbol1 = absCoeff[idx] > 1;
+ uint32_t symbol2 = absCoeff[idx] > 2;
+ encodeBin(symbol1, baseCtxMod[c1]);
+
+ if (symbol1)
+ c1Next = 0;
+
+ if (symbol1 + firstC2Flag == 3)
+ firstC2Flag = symbol2;
+
+ c1 = (c1Next & 3);
+ c1Next >>= 2;
+ X265_CHECK(c1 <= 3, "c1 check failure\n");
+ idx++;
+ }
+ while(idx < numC1Flag);
+
+ if (!c1)
+ {
+ baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
+
+ X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
+ encodeBin(firstC2Flag, baseCtxMod[0]);
+ }
+
+ const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
+ encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
+
+ if (!c1 || numNonZero > C1FLAG_NUMBER)
{
// Standard path
uint32_t goRiceParam = 0;
@@ -1776,8 +1816,9 @@
}
while(idx < numNonZero);
}
- }
- }
+ } // end of !bitIf
+ } // end of (numNonZero > 0)
+
// Initialize value for next loop
numNonZero = 0;
scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
More information about the x265-devel
mailing list