[x265] [PATCH] improve codeCoeffNxN by calculate context in scanLast loop
Min Chen
chenm003 at 163.com
Fri Jan 30 13:19:20 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1422620352 -28800
# Node ID e00bbb6f0fe2c1dadc70ff6b31f681f8847624fe
# Parent 5e5dc3763f6386da9722903033a2b9dd263a5226
improve codeCoeffNxN by calculate context in scanLast loop
---
source/common/common.h | 1 +
source/encoder/entropy.cpp | 118 ++++++++++++++++++++++++++++---------------
2 files changed, 78 insertions(+), 41 deletions(-)
diff -r 5e5dc3763f63 -r e00bbb6f0fe2 source/common/common.h
--- a/source/common/common.h Thu Jan 29 10:37:54 2015 -0600
+++ b/source/common/common.h Fri Jan 30 20:19:12 2015 +0800
@@ -281,6 +281,7 @@
#define MLS_GRP_NUM 64 // Max number of coefficient groups, max(16, 64)
#define MLS_CG_SIZE 4 // Coefficient group size of 4x4
+#define MLS_CG_BLK_SIZE (MLS_CG_SIZE * MLS_CG_SIZE)
#define MLS_CG_LOG2_SIZE 2
#define QUANT_IQUANT_SHIFT 20 // Q(QP%6) * IQ(QP%6) = 2^20
diff -r 5e5dc3763f63 -r e00bbb6f0fe2 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Thu Jan 29 10:37:54 2015 -0600
+++ b/source/encoder/entropy.cpp Fri Jan 30 20:19:12 2015 +0800
@@ -1428,30 +1428,65 @@
TUEntropyCodingParameters codingParameters;
cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
+ uint8_t coeffNum[MLS_GRP_NUM]; // value range[0, 16]
+ uint16_t coeffSign[MLS_GRP_NUM]; // bit mask map for non-zero coeff sign
+ uint16_t coeffFlag[MLS_GRP_NUM]; // bit mask map for non-zero coeff
+ memset(coeffNum, 0, sizeof(coeffNum));
+ memset(coeffFlag, 0, sizeof(coeffFlag));
+ memset(coeffSign, 0, sizeof(coeffSign));
+
//----- encode significance map -----
// Find position of last coefficient
int scanPosLast = 0;
uint32_t posLast;
uint64_t sigCoeffGroupFlag64 = 0;
- const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
- assert((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1));
+ //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
+ X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
+
+ uint32_t cgBlkNum = 0;
do
{
+ const uint32_t cgBlkIdx = scanPosLast & (MLS_CG_BLK_SIZE - 1);
+ const uint32_t cgIdx = scanPosLast >> MLS_CG_SIZE;
+
posLast = codingParameters.scan[scanPosLast++];
- const uint32_t isNZCoeff = (coeff[posLast] != 0);
+ const int curCoeff = coeff[posLast];
+ const uint32_t isNZCoeff = (curCoeff != 0);
// get L1 sig map
// NOTE: the new algorithm is complicated, so I keep reference code here
//uint32_t posy = posLast >> log2TrSize;
//uint32_t posx = posLast - (posy << log2TrSize);
//uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
- const uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
- sigCoeffGroupFlag64 |= ((uint64_t)isNZCoeff << blkIdx);
+ //const uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
+ //sigCoeffGroupFlag64 |= ((uint64_t)isNZCoeff << blkIdx);
numSig -= isNZCoeff;
+
+ // TODO: optimize by instruction BTS
+ coeffSign[cgIdx] += (uint16_t)(((uint32_t)curCoeff >> 31) << cgBlkNum);
+ coeffFlag[cgIdx] = (coeffFlag[cgIdx] << 1) + (uint16_t)isNZCoeff;
+ cgBlkNum += isNZCoeff;
+ // TODO: reduce memory store operator, but avoid conditional branch
+ coeffNum[cgIdx] = (uint8_t)cgBlkNum;
+
+ if (cgBlkIdx == (MLS_CG_BLK_SIZE - 1))
+ {
+ cgBlkNum = 0;
+ }
}
while (numSig > 0);
scanPosLast--;
+ const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
+
+ // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
+ for(int idx = 0; idx < lastScanSet; idx++)
+ {
+ const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
+ const uint8_t nonZero = (coeffNum[idx] != 0);
+ sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
+ }
+
// Code position of last coefficient
{
@@ -1492,33 +1527,33 @@
// code significance flag
uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
- const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
uint32_t c1 = 1;
uint32_t goRiceParam = 0;
- int scanPosSig = scanPosLast;
+ int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
+ int absCoeff[1 << MLS_CG_SIZE];
+ int numNonZero = 1;
+ unsigned long lastNZPosInCG;
+ unsigned long firstNZPosInCG;
+
+ absCoeff[0] = int(abs(coeff[posLast]));
for (int subSet = lastScanSet; subSet >= 0; subSet--)
{
- int numNonZero = 0;
- int subPos = subSet << MLS_CG_SIZE;
+ const uint32_t subCoeffFlag = coeffFlag[subSet];
+ uint32_t scanFlagMask = subCoeffFlag;
+ int subPosBase = subSet << MLS_CG_SIZE;
goRiceParam = 0;
- int absCoeff[1 << MLS_CG_SIZE];
- uint32_t coeffSigns = 0;
- int lastNZPosInCG = -1;
- int firstNZPosInCG = 1 << MLS_CG_SIZE;
- if (scanPosSig == scanPosLast)
+
+ if (subSet == lastScanSet)
{
- absCoeff[0] = int(abs(coeff[posLast]));
- coeffSigns = (coeff[posLast] < 0);
- numNonZero = 1;
- lastNZPosInCG = scanPosSig;
- firstNZPosInCG = scanPosSig;
- scanPosSig--;
+ X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
+ scanFlagMask >>= 1;
}
+
// encode significant_coeffgroup_flag
const int cgBlkPos = codingParameters.scanCG[subSet];
- const int cgPosY = cgBlkPos >> codingParameters.log2TrSizeCG;
- const int cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+ const int cgPosY = cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
+ const int cgPosX = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
if (subSet == lastScanSet || !subSet)
@@ -1535,31 +1570,31 @@
{
const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
uint32_t blkPos, sig, ctxSig;
- for (; scanPosSig >= subPos; scanPosSig--)
+ for (; scanPosSigOff >= 0; scanPosSigOff--)
{
- blkPos = codingParameters.scan[scanPosSig];
- sig = (coeff[blkPos] != 0);
- if (scanPosSig > subPos || subSet == 0 || numNonZero)
+ blkPos = codingParameters.scan[subPosBase + scanPosSigOff];
+ sig = scanFlagMask & 1;
+ scanFlagMask >>= 1;
+ X265_CHECK((uint32_t)(coeff[blkPos] != 0) == sig, "sign bit mistake\n");
+ if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
{
ctxSig = Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext);
encodeBin(sig, baseCtx[ctxSig]);
}
- if (sig)
- {
- absCoeff[numNonZero] = int(abs(coeff[blkPos]));
- coeffSigns = 2 * coeffSigns + ((uint32_t)coeff[blkPos] >> 31);
- numNonZero++;
- if (lastNZPosInCG < 0)
- lastNZPosInCG = scanPosSig;
- firstNZPosInCG = scanPosSig;
- }
+ absCoeff[numNonZero] = int(abs(coeff[blkPos]));
+ numNonZero += sig;
}
}
- else
- scanPosSig = subPos - 1;
+ X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
+ uint32_t coeffSigns = coeffSign[subSet];
+ numNonZero = coeffNum[subSet];
if (numNonZero > 0)
{
+ X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
+ CLZ(lastNZPosInCG, subCoeffFlag);
+ CTZ(firstNZPosInCG, subCoeffFlag);
+
bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
@@ -1596,10 +1631,8 @@
}
}
- if (bHideFirstSign && signHidden)
- encodeBinsEP((coeffSigns >> 1), numNonZero - 1);
- else
- encodeBinsEP(coeffSigns, numNonZero);
+ const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
+ encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
int firstCoeff2 = 1;
if (!c1 || numNonZero > C1FLAG_NUMBER)
@@ -1619,6 +1652,9 @@
}
}
}
+ // Initialize value for next loop
+ numNonZero = 0;
+ scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
}
}
More information about the x265-devel
mailing list