[x265] [PATCH 3 of 3] rdoQuant: optimize getSigCtxInc()
Min Chen
chenm003 at 163.com
Fri Apr 24 15:46:17 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1429883161 -28800
# Node ID 9e1a28600afa7454f5742d99786ae30efd84f826
# Parent 1c2c6bc05addac68b4e8f4c94abe322a2a743dc0
rdoQuant: optimize getSigCtxInc()
---
source/common/quant.cpp | 71 ++++++++++++++++++++++++++++++++++++++--------
1 files changed, 58 insertions(+), 13 deletions(-)
diff -r 1c2c6bc05add -r 9e1a28600afa source/common/quant.cpp
--- a/source/common/quant.cpp Fri Apr 24 21:45:58 2015 +0800
+++ b/source/common/quant.cpp Fri Apr 24 21:46:01 2015 +0800
@@ -659,6 +659,45 @@
}
}
+ static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
+ {
+ // patternSigCtx = 0
+ {
+ 2, 1, 1, 0,
+ 1, 1, 0, 0,
+ 1, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ // patternSigCtx = 1
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ // patternSigCtx = 2
+ {
+ 2, 1, 0, 0,
+ 2, 1, 0, 0,
+ 2, 1, 0, 0,
+ 2, 1, 0, 0,
+ },
+ // patternSigCtx = 3
+ {
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ },
+ // 4x4
+ {
+ 0, 1, 4, 5,
+ 2, 3, 4, 5,
+ 6, 6, 8, 8,
+ 7, 7, 8, 8
+ }
+ };
+
/* iterate over coding groups in reverse scan order */
for (int cgScanPos = cgLastScanPos; cgScanPos >= 0; cgScanPos--)
{
@@ -668,6 +707,7 @@
const uint32_t cgPosX = cgBlkPos - (cgPosY << codeParams.log2TrSizeCG);
const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
const int patternSigCtx = calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, cgStride);
+ const int ctxSigOffset = codeParams.firstSignificanceMapContext + (cgScanPos && bIsLuma ? 3 : 0);
if (c1 == 0)
ctxSet++;
@@ -676,8 +716,8 @@
if (cgScanPos && (coeffNum[cgScanPos] == 0))
{
// TODO: does we need zero-coeff cost?
- uint32_t scanPosBase = (cgScanPos << MLS_CG_SIZE);
- uint32_t blkPos = codeParams.scan[scanPosBase];
+ const uint32_t scanPosBase = (cgScanPos << MLS_CG_SIZE);
+ uint32_t blkPos = codeParams.scan[scanPosBase];
if (usePsyMask)
{
@@ -697,10 +737,13 @@
totalUncodedCost += costUncoded[blkPos + x];
totalRdCost += costUncoded[blkPos + x];
- scanPos = scanPosBase + y * MLS_CG_SIZE + x;
- const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, codeParams.scan[scanPos], bIsLuma, codeParams.firstSignificanceMapContext);
- costSig[scanPos] = SIGCOST(estBitsSbac.significantBits[ctxSig][0]);
- costCoeff[scanPos] = costUncoded[blkPos + x];
+ const uint32_t scanPosOffset = y * MLS_CG_SIZE + x;
+ const uint32_t ctxSig = table_cnt[patternSigCtx][g_scan4x4[codeParams.scanType][scanPosOffset]] + ctxSigOffset;
+ X265_CHECK(trSize > 4, "trSize check failure\n");
+ X265_CHECK(ctxSig == getSigCtxInc(patternSigCtx, log2TrSize, trSize, codeParams.scan[scanPosBase + scanPosOffset], bIsLuma, codeParams.firstSignificanceMapContext), "sigCtx check failure\n");
+
+ costSig[scanPosBase + scanPosOffset] = SIGCOST(estBitsSbac.significantBits[ctxSig][0]);
+ costCoeff[scanPosBase + scanPosOffset] = costUncoded[blkPos + x];
sigRateDelta[blkPos + x] = estBitsSbac.significantBits[ctxSig][1] - estBitsSbac.significantBits[ctxSig][0];
}
blkPos += trSize;
@@ -719,10 +762,13 @@
totalUncodedCost += costUncoded[blkPos + x];
totalRdCost += costUncoded[blkPos + x];
- scanPos = scanPosBase + y * MLS_CG_SIZE + x;
- const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, codeParams.scan[scanPos], bIsLuma, codeParams.firstSignificanceMapContext);
- costSig[scanPos] = SIGCOST(estBitsSbac.significantBits[ctxSig][0]);
- costCoeff[scanPos] = costUncoded[blkPos + x];
+ const uint32_t scanPosOffset = y * MLS_CG_SIZE + x;
+ const uint32_t ctxSig = table_cnt[patternSigCtx][g_scan4x4[codeParams.scanType][scanPosOffset]] + ctxSigOffset;
+ X265_CHECK(trSize > 4, "trSize check failure\n");
+ X265_CHECK(ctxSig == getSigCtxInc(patternSigCtx, log2TrSize, trSize, codeParams.scan[scanPosBase + scanPosOffset], bIsLuma, codeParams.firstSignificanceMapContext), "sigCtx check failure\n");
+
+ costSig[scanPosBase + scanPosOffset] = SIGCOST(estBitsSbac.significantBits[ctxSig][0]);
+ costCoeff[scanPosBase + scanPosOffset] = costUncoded[blkPos + x];
sigRateDelta[blkPos + x] = estBitsSbac.significantBits[ctxSig][1] - estBitsSbac.significantBits[ctxSig][0];
}
blkPos += trSize;
@@ -770,6 +816,8 @@
// coefficient level estimation
const int* greaterOneBits = estBitsSbac.greaterOneBits[4 * ctxSet + c1];
+ const uint32_t ctxSig = (blkPos == 0) ? 0 : table_cnt[(trSize == 4) ? 4 : patternSigCtx][g_scan4x4[codeParams.scanType][scanPosinCG]] + ctxSigOffset;
+ X265_CHECK(ctxSig == getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codeParams.firstSignificanceMapContext), "sigCtx check failure\n");
// before find lastest non-zero coeff
if (scanPos > (uint32_t)lastScanPos)
@@ -786,8 +834,6 @@
else if (!(subFlagMask & 1))
{
// fast zero coeff path
- const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codeParams.firstSignificanceMapContext);
-
/* set default costs to uncoded costs */
costSig[scanPos] = SIGCOST(estBitsSbac.significantBits[ctxSig][0]);
costCoeff[scanPos] = costUncoded[blkPos] + costSig[scanPos];
@@ -819,7 +865,6 @@
sigRateDelta[blkPos] = 0;
else
{
- const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codeParams.firstSignificanceMapContext);
if (maxAbsLevel < 3)
{
/* set default costs to uncoded costs */
More information about the x265-devel
mailing list