[x265] [PATCH 4 of 6] reduce stack space since log2TrSize is constant

Thu Sep 10 00:33:43 CEST 2015

# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1441837042 18000
# Node ID 5731695ca0c1c8e2c71bdf3cc3aa2b7fe13ea786
# Parent  7411ad14770ba8830c09789361fb48a6d4e5b44c
reduce stack space since log2TrSize is constant
---
 source/common/cudata.cpp |    4 ++--
 source/common/cudata.h   |    1 -
 source/common/quant.cpp  |   25 ++++++++++++-------------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff -r 7411ad14770b -r 5731695ca0c1 source/common/cudata.cpp

--- a/source/common/cudata.cpp	Wed Sep 09 17:17:19 2015 -0500
+++ b/source/common/cudata.cpp	Wed Sep 09 17:17:22 2015 -0500
@@ -1951,7 +1951,7 @@
     bool bIsIntra = isIntra(absPartIdx);
 
     // set the group layout
-    result.log2TrSizeCG = log2TrSize - 2;
+    const uint32_t log2TrSizeCG = log2TrSize - 2;
 
     // set the scan orders
     if (bIsIntra)
@@ -1979,7 +1979,7 @@
         result.scanType = SCAN_DIAG;
 
     result.scan     = g_scanOrder[result.scanType][log2TrSize - 2];
-    result.scanCG   = g_scanOrderCG[result.scanType][result.log2TrSizeCG];
+    result.scanCG   = g_scanOrderCG[result.scanType][log2TrSizeCG];
 
     if (log2TrSize == 2)
         result.firstSignificanceMapContext = 0;
diff -r 7411ad14770b -r 5731695ca0c1 source/common/cudata.h
--- a/source/common/cudata.h	Wed Sep 09 17:17:19 2015 -0500
+++ b/source/common/cudata.h	Wed Sep 09 17:17:22 2015 -0500
@@ -323,7 +323,6 @@
     const uint16_t *scan;
     const uint16_t *scanCG;
     ScanType        scanType;
-    uint32_t        log2TrSizeCG;
     uint32_t        firstSignificanceMapContext;
 };
 
diff -r 7411ad14770b -r 5731695ca0c1 source/common/quant.cpp
--- a/source/common/quant.cpp	Wed Sep 09 17:17:19 2015 -0500
+++ b/source/common/quant.cpp	Wed Sep 09 17:17:22 2015 -0500
@@ -573,7 +573,7 @@
     if (!numSig)
         return 0;
 
-    uint32_t trSize = 1 << log2TrSize;
+    const uint32_t trSize = 1 << log2TrSize;
     int64_t lambda2 = m_qpParam[ttype].lambda2;
     const int64_t psyScale = ((int64_t)m_psyRdoqScale * m_qpParam[ttype].lambda);
 
@@ -590,13 +590,13 @@
 #define RDCOST(d, bits) ((((int64_t)d * d) << scaleBits) + SIGCOST(bits))
 #define PSYVALUE(rec)   ((psyScale * (rec)) >> (2 * transformShift + 1))
 
-    int64_t costCoeff[32 * 32];   /* d*d + lambda * bits */
-    int64_t costUncoded[32 * 32]; /* d*d + lambda * 0    */
-    int64_t costSig[32 * 32];     /* lambda * bits       */
+    int64_t costCoeff[trSize * trSize];   /* d*d + lambda * bits */
+    int64_t costUncoded[trSize * trSize]; /* d*d + lambda * 0    */
+    int64_t costSig[trSize * trSize];     /* lambda * bits       */
 
-    int rateIncUp[32 * 32];      /* signal overhead of increasing level */
-    int rateIncDown[32 * 32];    /* signal overhead of decreasing level */
-    int sigRateDelta[32 * 32];   /* signal difference between zero and non-zero */
+    int rateIncUp[trSize * trSize];      /* signal overhead of increasing level */
+    int rateIncDown[trSize * trSize];    /* signal overhead of decreasing level */
+    int sigRateDelta[trSize * trSize];   /* signal difference between zero and non-zero */
 
     int64_t costCoeffGroupSig[MLS_GRP_NUM]; /* lambda * bits of group coding cost */
     uint64_t sigCoeffGroupFlag64 = 0;
@@ -614,7 +614,8 @@
 
     TUEntropyCodingParameters codeParams;
     cu.getTUEntropyCodingParameters(codeParams, absPartIdx, log2TrSize, bIsLuma);
-    const uint32_t cgNum = 1 << (codeParams.log2TrSizeCG * 2);
+    const uint32_t log2TrSizeCG = log2TrSize - 2;
+    const uint32_t cgNum = 1 << (log2TrSizeCG * 2);
     const uint32_t cgStride = (trSize >> MLS_CG_LOG2_SIZE);
 
     uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
@@ -745,8 +746,8 @@
     {
         uint32_t ctxSet = (cgScanPos && bIsLuma) ? 2 : 0;
         const uint32_t cgBlkPos = codeParams.scanCG[cgScanPos];
-        const uint32_t cgPosY   = cgBlkPos >> codeParams.log2TrSizeCG;
-        const uint32_t cgPosX   = cgBlkPos - (cgPosY << codeParams.log2TrSizeCG);
+        const uint32_t cgPosY   = cgBlkPos >> log2TrSizeCG;
+        const uint32_t cgPosX   = cgBlkPos - (cgPosY << log2TrSizeCG);
         const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
         const int patternSigCtx = calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, cgStride);
         const int ctxSigOffset = codeParams.firstSignificanceMapContext + (cgScanPos && bIsLuma ? 3 : 0);
@@ -860,9 +861,7 @@
             const int* greaterOneBits = estBitsSbac.greaterOneBits[4 * ctxSet + c1];
             //const uint32_t ctxSig = (blkPos == 0) ? 0 : table_cnt[(trSize == 4) ? 4 : patternSigCtx][g_scan4x4[codeParams.scanType][scanPosinCG]] + ctxSigOffset;
             static const uint64_t table_cnt64[4] = {0x0000000100110112ULL, 0x0000000011112222ULL, 0x0012001200120012ULL, 0x2222222222222222ULL};
-            uint64_t ctxCnt = table_cnt64[patternSigCtx];
-            if (trSize == 4)
-                ctxCnt = 0x8877886654325410ULL;
+            uint64_t ctxCnt = (trSize == 4) ? 0x8877886654325410ULL : table_cnt64[patternSigCtx];
             const uint32_t ctxSig = (blkPos == 0) ? 0 : ((ctxCnt >> (4 * g_scan4x4[codeParams.scanType][scanPosinCG])) & 0xF) + ctxSigOffset;
             // NOTE: above equal to 'table_cnt[(trSize == 4) ? 4 : patternSigCtx][g_scan4x4[codeParams.scanType][scanPosinCG]] + ctxSigOffset'
             X265_CHECK(ctxSig == getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codeParams.firstSignificanceMapContext), "sigCtx check failure\n");