[x265] [PATCH 4 of 4] optimize: reduce memory and improvement performance by replace sigCoeffGroupFlag[] to sigCoeffGroupFlag64

Min Chen chenm003 at 163.com
Wed Mar 12 19:03:24 CET 2014


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1394647361 25200
# Node ID f8fcbf42684bd8a5f6f1cc7bb401365d830f3f78
# Parent  ad1470a0e17e48d3d198da09fb8b251b84c59614
optimize: reduce memory and improvement performance by replace sigCoeffGroupFlag[] to sigCoeffGroupFlag64

diff -r ad1470a0e17e -r f8fcbf42684b source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Wed Mar 12 11:02:24 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Wed Mar 12 11:02:41 2014 -0700
@@ -557,7 +557,7 @@
 
     const uint32_t cgSize = (1 << MLS_CG_SIZE); // 16
     double costCoeffGroupSig[MLS_GRP_NUM];
-    uint32_t sigCoeffGroupFlag[MLS_GRP_NUM];
+    uint64_t sigCoeffGroupFlag64 = 0;
     uint32_t   ctxSet    = 0;
     int    c1            = 1;
     int    c2            = 0;
@@ -568,18 +568,18 @@
     int    cgLastScanPos = -1;
     int    baseLevel;
     uint32_t cgNum = 1 << codingParameters.log2TrSizeCG * 2;
-    memset(sigCoeffGroupFlag, 0, sizeof(uint32_t) * cgNum);
 
     int scanPos;
     coeffGroupRDStats rdStats;
 
     for (int cgScanPos = cgNum - 1; cgScanPos >= 0; cgScanPos--)
     {
-        uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
-        uint32_t cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
-        uint32_t cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
+        const uint32_t cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
+        const uint32_t cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
         memset(&rdStats, 0, sizeof(coeffGroupRDStats));
-        const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+        const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
         for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
         {
             scanPos = cgScanPos * cgSize + scanPosinCG;
@@ -698,7 +698,7 @@
             }
             if (dstCoeff[blkPos])
             {
-                sigCoeffGroupFlag[cgBlkPos] = 1;
+                sigCoeffGroupFlag64 |= cgBlkPosMask;
                 rdStats.codedLevelAndDist += costCoeff[scanPos] - costSig[scanPos];
                 rdStats.uncodedDist += costCoeff0[scanPos];
                 if (scanPosinCG != 0)
@@ -713,9 +713,9 @@
             costCoeffGroupSig[cgScanPos] = 0;
             if (cgScanPos)
             {
-                if (sigCoeffGroupFlag[cgBlkPos] == 0)
+                if ((sigCoeffGroupFlag64 & cgBlkPosMask) == 0)
                 {
-                    uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+                    uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
                     baseCost += xGetRateSigCoeffGroup(0, ctxSig) - rdStats.sigCost;
                     costCoeffGroupSig[cgScanPos] = xGetRateSigCoeffGroup(0, ctxSig);
                 }
@@ -732,7 +732,7 @@
                         double costZeroCG = baseCost;
 
                         // add SigCoeffGroupFlag cost to total cost
-                        uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+                        uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
                         if (cgScanPos < cgLastScanPos)
                         {
                             baseCost  += xGetRateSigCoeffGroup(1, ctxSig);
@@ -748,7 +748,7 @@
                         // if we can save cost, change this block to all-zero block
                         if (costZeroCG < baseCost)
                         {
-                            sigCoeffGroupFlag[cgBlkPos] = 0;
+                            sigCoeffGroupFlag64 &= ~cgBlkPosMask;
                             baseCost = costZeroCG;
                             if (cgScanPos < cgLastScanPos)
                             {
@@ -772,7 +772,7 @@
             }
             else
             {
-                sigCoeffGroupFlag[cgBlkPos] = 1;
+                sigCoeffGroupFlag64 |= cgBlkPosMask;
             }
         }
     } //end for (cgScanPos)
@@ -804,7 +804,7 @@
     {
         uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
         baseCost -= costCoeffGroupSig[cgScanPos];
-        if (sigCoeffGroupFlag[cgBlkPos])
+        if (sigCoeffGroupFlag64 & ((uint64_t)1 << cgBlkPos))
         {
             for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
             {
@@ -1002,14 +1002,15 @@
  * \param height height of the block
  * \returns pattern for current coefficient group
  */
-int TComTrQuant::calcPatternSigCtx(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
+int TComTrQuant::calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
 {
     if (log2TrSizeCG == 0) return 0;
 
     const uint32_t trSizeCG = 1 << log2TrSizeCG;
-    const uint32_t* sigPos = &sigCoeffGroupFlag[(cgPosY << log2TrSizeCG) + cgPosX];
-    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && (sigPos[1] != 0);
-    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && (sigPos[trSizeCG] != 0);
+    assert(trSizeCG <= 32);
+    const uint32_t sigPos = sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX);
+    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && ((sigPos & 1) != 0);
+    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && ((sigPos & (1 << (trSizeCG - 1))) != 0);
 
     return sigRight + (sigLower << 1);
 }
@@ -1332,15 +1333,16 @@
  * \param uiLog2BlkSize log2 value of block size
  * \returns ctxInc for current scan position
  */
-uint32_t TComTrQuant::getSigCoeffGroupCtxInc(const uint32_t* sigCoeffGroupFlag,
+uint32_t TComTrQuant::getSigCoeffGroupCtxInc(const uint64_t  sigCoeffGroupFlag64,
                                              const uint32_t  cgPosX,
                                              const uint32_t  cgPosY,
                                              const uint32_t  log2TrSizeCG)
 {
     const uint32_t trSizeCG = 1 << log2TrSizeCG;
-    const uint32_t* sigPos = &sigCoeffGroupFlag[(cgPosY << log2TrSizeCG) + cgPosX];
-    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && (sigPos[1] != 0);
-    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && (sigPos[trSizeCG] != 0);
+    assert(trSizeCG <= 32);
+    const uint32_t sigPos = sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX);
+    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && ((sigPos & 1) != 0);
+    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && ((sigPos & (1 << (trSizeCG - 1))) != 0);
 
     return sigRight | sigLower;
 }
diff -r ad1470a0e17e -r f8fcbf42684b source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h	Wed Mar 12 11:02:24 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.h	Wed Mar 12 11:02:41 2014 -0700
@@ -158,9 +158,9 @@
     void setScalingList(TComScalingList *scalingList);
     void processScalingListEnc(int32_t *coeff, int32_t *quantcoeff, int quantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
     void processScalingListDec(int32_t *coeff, int32_t *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
-    static int calcPatternSigCtx(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG);
+    static int calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG);
     static int getSigCtxInc(int patternSigCtx, const TUEntropyCodingParameters &codingParameters, const int scanPosition, const int log2TrSize, const TextType ttype);
-    static uint32_t getSigCoeffGroupCtxInc(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, const uint32_t log2TrSizeCG);
+    static uint32_t getSigCoeffGroupCtxInc(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, const uint32_t log2TrSizeCG);
     static void getTUEntropyCodingParameters(TComDataCU* cu, TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
     estBitsSbacStruct* m_estBitsSbac;
 
diff -r ad1470a0e17e -r f8fcbf42684b source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Wed Mar 12 11:02:24 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Wed Mar 12 11:02:41 2014 -0700
@@ -2089,9 +2089,7 @@
     // Find position of last coefficient
     int scanPosLast = -1;
     int posLast;
-    uint32_t sigCoeffGroupFlag[MLS_GRP_NUM];
-    uint32_t cgNum = 1 << codingParameters.log2TrSizeCG * 2;
-    memset(sigCoeffGroupFlag, 0, sizeof(uint32_t) * cgNum);
+    uint64_t sigCoeffGroupFlag64 = 0;
     const uint32_t maskPosXY = (1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1;
     do
     {
@@ -2104,7 +2102,7 @@
             //uint32_t posx   = posLast - (posy << log2TrSize);
             //uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
             uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
-            sigCoeffGroupFlag[blkIdx] = 1;
+            sigCoeffGroupFlag64 |= ((uint64_t)1 << blkIdx);
 
             numSig--;
         }
@@ -2142,24 +2140,25 @@
             scanPosSig--;
         }
         // encode significant_coeffgroup_flag
-        int cgBlkPos = codingParameters.scanCG[subSet];
-        int cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
-        int cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const int cgBlkPos = codingParameters.scanCG[subSet];
+        const int cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
+        const int cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
 
         if (subSet == lastScanSet || subSet == 0)
         {
-            sigCoeffGroupFlag[cgBlkPos] = 1;
+            sigCoeffGroupFlag64 |= cgBlkPosMask;
         }
         else
         {
-            uint32_t sigCoeffGroup = (sigCoeffGroupFlag[cgBlkPos] != 0);
-            uint32_t ctxSig = TComTrQuant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
+            uint32_t ctxSig = TComTrQuant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
             m_binIf->encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
         }
         // encode significant_coeff_flag
-        if (sigCoeffGroupFlag[cgBlkPos])
+        if (sigCoeffGroupFlag64 & cgBlkPosMask)
         {
-            const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+            const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
             uint32_t blkPos, sig, ctxSig;
             for (; scanPosSig >= subPos; scanPosSig--)
             {



More information about the x265-devel mailing list