[x265] [PATCH 4 of 4] optimize: reduce memory and improvement performance by replace sigCoeffGroupFlag[] to sigCoeffGroupFlag64
Min Chen
chenm003 at 163.com
Wed Mar 12 19:03:24 CET 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1394647361 25200
# Node ID f8fcbf42684bd8a5f6f1cc7bb401365d830f3f78
# Parent ad1470a0e17e48d3d198da09fb8b251b84c59614
optimize: reduce memory and improvement performance by replace sigCoeffGroupFlag[] to sigCoeffGroupFlag64
diff -r ad1470a0e17e -r f8fcbf42684b source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Wed Mar 12 11:02:24 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Wed Mar 12 11:02:41 2014 -0700
@@ -557,7 +557,7 @@
const uint32_t cgSize = (1 << MLS_CG_SIZE); // 16
double costCoeffGroupSig[MLS_GRP_NUM];
- uint32_t sigCoeffGroupFlag[MLS_GRP_NUM];
+ uint64_t sigCoeffGroupFlag64 = 0;
uint32_t ctxSet = 0;
int c1 = 1;
int c2 = 0;
@@ -568,18 +568,18 @@
int cgLastScanPos = -1;
int baseLevel;
uint32_t cgNum = 1 << codingParameters.log2TrSizeCG * 2;
- memset(sigCoeffGroupFlag, 0, sizeof(uint32_t) * cgNum);
int scanPos;
coeffGroupRDStats rdStats;
for (int cgScanPos = cgNum - 1; cgScanPos >= 0; cgScanPos--)
{
- uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
- uint32_t cgPosY = cgBlkPos >> codingParameters.log2TrSizeCG;
- uint32_t cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+ const uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
+ const uint32_t cgPosY = cgBlkPos >> codingParameters.log2TrSizeCG;
+ const uint32_t cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+ const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
memset(&rdStats, 0, sizeof(coeffGroupRDStats));
- const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+ const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
{
scanPos = cgScanPos * cgSize + scanPosinCG;
@@ -698,7 +698,7 @@
}
if (dstCoeff[blkPos])
{
- sigCoeffGroupFlag[cgBlkPos] = 1;
+ sigCoeffGroupFlag64 |= cgBlkPosMask;
rdStats.codedLevelAndDist += costCoeff[scanPos] - costSig[scanPos];
rdStats.uncodedDist += costCoeff0[scanPos];
if (scanPosinCG != 0)
@@ -713,9 +713,9 @@
costCoeffGroupSig[cgScanPos] = 0;
if (cgScanPos)
{
- if (sigCoeffGroupFlag[cgBlkPos] == 0)
+ if ((sigCoeffGroupFlag64 & cgBlkPosMask) == 0)
{
- uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+ uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
baseCost += xGetRateSigCoeffGroup(0, ctxSig) - rdStats.sigCost;
costCoeffGroupSig[cgScanPos] = xGetRateSigCoeffGroup(0, ctxSig);
}
@@ -732,7 +732,7 @@
double costZeroCG = baseCost;
// add SigCoeffGroupFlag cost to total cost
- uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+ uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
if (cgScanPos < cgLastScanPos)
{
baseCost += xGetRateSigCoeffGroup(1, ctxSig);
@@ -748,7 +748,7 @@
// if we can save cost, change this block to all-zero block
if (costZeroCG < baseCost)
{
- sigCoeffGroupFlag[cgBlkPos] = 0;
+ sigCoeffGroupFlag64 &= ~cgBlkPosMask;
baseCost = costZeroCG;
if (cgScanPos < cgLastScanPos)
{
@@ -772,7 +772,7 @@
}
else
{
- sigCoeffGroupFlag[cgBlkPos] = 1;
+ sigCoeffGroupFlag64 |= cgBlkPosMask;
}
}
} //end for (cgScanPos)
@@ -804,7 +804,7 @@
{
uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
baseCost -= costCoeffGroupSig[cgScanPos];
- if (sigCoeffGroupFlag[cgBlkPos])
+ if (sigCoeffGroupFlag64 & ((uint64_t)1 << cgBlkPos))
{
for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
{
@@ -1002,14 +1002,15 @@
* \param height height of the block
* \returns pattern for current coefficient group
*/
-int TComTrQuant::calcPatternSigCtx(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
+int TComTrQuant::calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
{
if (log2TrSizeCG == 0) return 0;
const uint32_t trSizeCG = 1 << log2TrSizeCG;
- const uint32_t* sigPos = &sigCoeffGroupFlag[(cgPosY << log2TrSizeCG) + cgPosX];
- uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && (sigPos[1] != 0);
- uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && (sigPos[trSizeCG] != 0);
+ assert(trSizeCG <= 32);
+ const uint32_t sigPos = sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX);
+ uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && ((sigPos & 1) != 0);
+ uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && ((sigPos & (1 << (trSizeCG - 1))) != 0);
return sigRight + (sigLower << 1);
}
@@ -1332,15 +1333,16 @@
* \param uiLog2BlkSize log2 value of block size
* \returns ctxInc for current scan position
*/
-uint32_t TComTrQuant::getSigCoeffGroupCtxInc(const uint32_t* sigCoeffGroupFlag,
+uint32_t TComTrQuant::getSigCoeffGroupCtxInc(const uint64_t sigCoeffGroupFlag64,
const uint32_t cgPosX,
const uint32_t cgPosY,
const uint32_t log2TrSizeCG)
{
const uint32_t trSizeCG = 1 << log2TrSizeCG;
- const uint32_t* sigPos = &sigCoeffGroupFlag[(cgPosY << log2TrSizeCG) + cgPosX];
- uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && (sigPos[1] != 0);
- uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && (sigPos[trSizeCG] != 0);
+ assert(trSizeCG <= 32);
+ const uint32_t sigPos = sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX);
+ uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && ((sigPos & 1) != 0);
+ uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && ((sigPos & (1 << (trSizeCG - 1))) != 0);
return sigRight | sigLower;
}
diff -r ad1470a0e17e -r f8fcbf42684b source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h Wed Mar 12 11:02:24 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.h Wed Mar 12 11:02:41 2014 -0700
@@ -158,9 +158,9 @@
void setScalingList(TComScalingList *scalingList);
void processScalingListEnc(int32_t *coeff, int32_t *quantcoeff, int quantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
void processScalingListDec(int32_t *coeff, int32_t *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
- static int calcPatternSigCtx(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG);
+ static int calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG);
static int getSigCtxInc(int patternSigCtx, const TUEntropyCodingParameters &codingParameters, const int scanPosition, const int log2TrSize, const TextType ttype);
- static uint32_t getSigCoeffGroupCtxInc(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, const uint32_t log2TrSizeCG);
+ static uint32_t getSigCoeffGroupCtxInc(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, const uint32_t log2TrSizeCG);
static void getTUEntropyCodingParameters(TComDataCU* cu, TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
estBitsSbacStruct* m_estBitsSbac;
diff -r ad1470a0e17e -r f8fcbf42684b source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Wed Mar 12 11:02:24 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Wed Mar 12 11:02:41 2014 -0700
@@ -2089,9 +2089,7 @@
// Find position of last coefficient
int scanPosLast = -1;
int posLast;
- uint32_t sigCoeffGroupFlag[MLS_GRP_NUM];
- uint32_t cgNum = 1 << codingParameters.log2TrSizeCG * 2;
- memset(sigCoeffGroupFlag, 0, sizeof(uint32_t) * cgNum);
+ uint64_t sigCoeffGroupFlag64 = 0;
const uint32_t maskPosXY = (1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1;
do
{
@@ -2104,7 +2102,7 @@
//uint32_t posx = posLast - (posy << log2TrSize);
//uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
- sigCoeffGroupFlag[blkIdx] = 1;
+ sigCoeffGroupFlag64 |= ((uint64_t)1 << blkIdx);
numSig--;
}
@@ -2142,24 +2140,25 @@
scanPosSig--;
}
// encode significant_coeffgroup_flag
- int cgBlkPos = codingParameters.scanCG[subSet];
- int cgPosY = cgBlkPos >> codingParameters.log2TrSizeCG;
- int cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+ const int cgBlkPos = codingParameters.scanCG[subSet];
+ const int cgPosY = cgBlkPos >> codingParameters.log2TrSizeCG;
+ const int cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+ const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
if (subSet == lastScanSet || subSet == 0)
{
- sigCoeffGroupFlag[cgBlkPos] = 1;
+ sigCoeffGroupFlag64 |= cgBlkPosMask;
}
else
{
- uint32_t sigCoeffGroup = (sigCoeffGroupFlag[cgBlkPos] != 0);
- uint32_t ctxSig = TComTrQuant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+ uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
+ uint32_t ctxSig = TComTrQuant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
m_binIf->encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
}
// encode significant_coeff_flag
- if (sigCoeffGroupFlag[cgBlkPos])
+ if (sigCoeffGroupFlag64 & cgBlkPosMask)
{
- const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+ const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
uint32_t blkPos, sig, ctxSig;
for (; scanPosSig >= subPos; scanPosSig--)
{
More information about the x265-devel
mailing list