[x265] [PATCH 4 of 6] reduce stack space since log2TrSize is constant
Min Chen
chenm003 at 163.com
Thu Sep 10 00:33:43 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1441837042 18000
# Node ID 5731695ca0c1c8e2c71bdf3cc3aa2b7fe13ea786
# Parent 7411ad14770ba8830c09789361fb48a6d4e5b44c
reduce stack space since log2TrSize is constant
---
source/common/cudata.cpp | 4 ++--
source/common/cudata.h | 1 -
source/common/quant.cpp | 25 ++++++++++++-------------
3 files changed, 14 insertions(+), 16 deletions(-)
diff -r 7411ad14770b -r 5731695ca0c1 source/common/cudata.cpp
--- a/source/common/cudata.cpp Wed Sep 09 17:17:19 2015 -0500
+++ b/source/common/cudata.cpp Wed Sep 09 17:17:22 2015 -0500
@@ -1951,7 +1951,7 @@
bool bIsIntra = isIntra(absPartIdx);
// set the group layout
- result.log2TrSizeCG = log2TrSize - 2;
+ const uint32_t log2TrSizeCG = log2TrSize - 2;
// set the scan orders
if (bIsIntra)
@@ -1979,7 +1979,7 @@
result.scanType = SCAN_DIAG;
result.scan = g_scanOrder[result.scanType][log2TrSize - 2];
- result.scanCG = g_scanOrderCG[result.scanType][result.log2TrSizeCG];
+ result.scanCG = g_scanOrderCG[result.scanType][log2TrSizeCG];
if (log2TrSize == 2)
result.firstSignificanceMapContext = 0;
diff -r 7411ad14770b -r 5731695ca0c1 source/common/cudata.h
--- a/source/common/cudata.h Wed Sep 09 17:17:19 2015 -0500
+++ b/source/common/cudata.h Wed Sep 09 17:17:22 2015 -0500
@@ -323,7 +323,6 @@
const uint16_t *scan;
const uint16_t *scanCG;
ScanType scanType;
- uint32_t log2TrSizeCG;
uint32_t firstSignificanceMapContext;
};
diff -r 7411ad14770b -r 5731695ca0c1 source/common/quant.cpp
--- a/source/common/quant.cpp Wed Sep 09 17:17:19 2015 -0500
+++ b/source/common/quant.cpp Wed Sep 09 17:17:22 2015 -0500
@@ -573,7 +573,7 @@
if (!numSig)
return 0;
- uint32_t trSize = 1 << log2TrSize;
+ const uint32_t trSize = 1 << log2TrSize;
int64_t lambda2 = m_qpParam[ttype].lambda2;
const int64_t psyScale = ((int64_t)m_psyRdoqScale * m_qpParam[ttype].lambda);
@@ -590,13 +590,13 @@
#define RDCOST(d, bits) ((((int64_t)d * d) << scaleBits) + SIGCOST(bits))
#define PSYVALUE(rec) ((psyScale * (rec)) >> (2 * transformShift + 1))
- int64_t costCoeff[32 * 32]; /* d*d + lambda * bits */
- int64_t costUncoded[32 * 32]; /* d*d + lambda * 0 */
- int64_t costSig[32 * 32]; /* lambda * bits */
+ int64_t costCoeff[trSize * trSize]; /* d*d + lambda * bits */
+ int64_t costUncoded[trSize * trSize]; /* d*d + lambda * 0 */
+ int64_t costSig[trSize * trSize]; /* lambda * bits */
- int rateIncUp[32 * 32]; /* signal overhead of increasing level */
- int rateIncDown[32 * 32]; /* signal overhead of decreasing level */
- int sigRateDelta[32 * 32]; /* signal difference between zero and non-zero */
+ int rateIncUp[trSize * trSize]; /* signal overhead of increasing level */
+ int rateIncDown[trSize * trSize]; /* signal overhead of decreasing level */
+ int sigRateDelta[trSize * trSize]; /* signal difference between zero and non-zero */
int64_t costCoeffGroupSig[MLS_GRP_NUM]; /* lambda * bits of group coding cost */
uint64_t sigCoeffGroupFlag64 = 0;
@@ -614,7 +614,8 @@
TUEntropyCodingParameters codeParams;
cu.getTUEntropyCodingParameters(codeParams, absPartIdx, log2TrSize, bIsLuma);
- const uint32_t cgNum = 1 << (codeParams.log2TrSizeCG * 2);
+ const uint32_t log2TrSizeCG = log2TrSize - 2;
+ const uint32_t cgNum = 1 << (log2TrSizeCG * 2);
const uint32_t cgStride = (trSize >> MLS_CG_LOG2_SIZE);
uint8_t coeffNum[MLS_GRP_NUM]; // value range[0, 16]
@@ -745,8 +746,8 @@
{
uint32_t ctxSet = (cgScanPos && bIsLuma) ? 2 : 0;
const uint32_t cgBlkPos = codeParams.scanCG[cgScanPos];
- const uint32_t cgPosY = cgBlkPos >> codeParams.log2TrSizeCG;
- const uint32_t cgPosX = cgBlkPos - (cgPosY << codeParams.log2TrSizeCG);
+ const uint32_t cgPosY = cgBlkPos >> log2TrSizeCG;
+ const uint32_t cgPosX = cgBlkPos - (cgPosY << log2TrSizeCG);
const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
const int patternSigCtx = calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, cgStride);
const int ctxSigOffset = codeParams.firstSignificanceMapContext + (cgScanPos && bIsLuma ? 3 : 0);
@@ -860,9 +861,7 @@
const int* greaterOneBits = estBitsSbac.greaterOneBits[4 * ctxSet + c1];
//const uint32_t ctxSig = (blkPos == 0) ? 0 : table_cnt[(trSize == 4) ? 4 : patternSigCtx][g_scan4x4[codeParams.scanType][scanPosinCG]] + ctxSigOffset;
static const uint64_t table_cnt64[4] = {0x0000000100110112ULL, 0x0000000011112222ULL, 0x0012001200120012ULL, 0x2222222222222222ULL};
- uint64_t ctxCnt = table_cnt64[patternSigCtx];
- if (trSize == 4)
- ctxCnt = 0x8877886654325410ULL;
+ uint64_t ctxCnt = (trSize == 4) ? 0x8877886654325410ULL : table_cnt64[patternSigCtx];
const uint32_t ctxSig = (blkPos == 0) ? 0 : ((ctxCnt >> (4 * g_scan4x4[codeParams.scanType][scanPosinCG])) & 0xF) + ctxSigOffset;
// NOTE: above equal to 'table_cnt[(trSize == 4) ? 4 : patternSigCtx][g_scan4x4[codeParams.scanType][scanPosinCG]] + ctxSigOffset'
X265_CHECK(ctxSig == getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codeParams.firstSignificanceMapContext), "sigCtx check failure\n");
More information about the x265-devel
mailing list