[x265] [PATCH] optimize: reduce memory and improvement performance by replace sigCoeffGroupFlag[] to sigCoeffGroupFlag64

Min Chen chenm003 at 163.com
Wed Mar 12 19:09:21 CET 2014


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1394647754 25200
# Node ID 32a612f172f5e605ba2394ce914211156601d734
# Parent  6fb38d60fe3724eae6b9b40229f29e9c91012575
optimize: reduce memory and improvement performance by replace sigCoeffGroupFlag[] to sigCoeffGroupFlag64

diff -r 6fb38d60fe37 -r 32a612f172f5 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Wed Mar 12 11:08:59 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Wed Mar 12 11:09:14 2014 -0700
@@ -559,7 +559,7 @@
 
     const uint32_t cgSize = (1 << MLS_CG_SIZE); // 16
     double costCoeffGroupSig[MLS_GRP_NUM];
-    uint32_t sigCoeffGroupFlag[MLS_GRP_NUM];
+    uint64_t sigCoeffGroupFlag64 = 0;
     uint32_t   ctxSet    = 0;
     int    c1            = 1;
     int    c2            = 0;
@@ -570,18 +570,18 @@
     int    cgLastScanPos = -1;
     int    baseLevel;
     uint32_t cgNum = 1 << codingParameters.log2TrSizeCG * 2;
-    memset(sigCoeffGroupFlag, 0, sizeof(uint32_t) * cgNum);
 
     int scanPos;
     coeffGroupRDStats rdStats;
 
     for (int cgScanPos = cgNum - 1; cgScanPos >= 0; cgScanPos--)
     {
-        uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
-        uint32_t cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
-        uint32_t cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
+        const uint32_t cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
+        const uint32_t cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
         memset(&rdStats, 0, sizeof(coeffGroupRDStats));
-        const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+        const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
         for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
         {
             scanPos = cgScanPos * cgSize + scanPosinCG;
@@ -700,7 +700,7 @@
             }
             if (dstCoeff[blkPos])
             {
-                sigCoeffGroupFlag[cgBlkPos] = 1;
+                sigCoeffGroupFlag64 |= cgBlkPosMask;
                 rdStats.codedLevelAndDist += costCoeff[scanPos] - costSig[scanPos];
                 rdStats.uncodedDist += costCoeff0[scanPos];
                 if (scanPosinCG != 0)
@@ -715,9 +715,9 @@
             costCoeffGroupSig[cgScanPos] = 0;
             if (cgScanPos)
             {
-                if (sigCoeffGroupFlag[cgBlkPos] == 0)
+                if ((sigCoeffGroupFlag64 & cgBlkPosMask) == 0)
                 {
-                    uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+                    uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
                     baseCost += xGetRateSigCoeffGroup(0, ctxSig) - rdStats.sigCost;
                     costCoeffGroupSig[cgScanPos] = xGetRateSigCoeffGroup(0, ctxSig);
                 }
@@ -734,7 +734,7 @@
                         double costZeroCG = baseCost;
 
                         // add SigCoeffGroupFlag cost to total cost
-                        uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+                        uint32_t ctxSig = getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
                         if (cgScanPos < cgLastScanPos)
                         {
                             baseCost  += xGetRateSigCoeffGroup(1, ctxSig);
@@ -750,7 +750,7 @@
                         // if we can save cost, change this block to all-zero block
                         if (costZeroCG < baseCost)
                         {
-                            sigCoeffGroupFlag[cgBlkPos] = 0;
+                            sigCoeffGroupFlag64 &= ~cgBlkPosMask;
                             baseCost = costZeroCG;
                             if (cgScanPos < cgLastScanPos)
                             {
@@ -774,7 +774,7 @@
             }
             else
             {
-                sigCoeffGroupFlag[cgBlkPos] = 1;
+                sigCoeffGroupFlag64 |= cgBlkPosMask;
             }
         }
     } //end for (cgScanPos)
@@ -806,7 +806,7 @@
     {
         uint32_t cgBlkPos = codingParameters.scanCG[cgScanPos];
         baseCost -= costCoeffGroupSig[cgScanPos];
-        if (sigCoeffGroupFlag[cgBlkPos])
+        if (sigCoeffGroupFlag64 & ((uint64_t)1 << cgBlkPos))
         {
             for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
             {
@@ -1004,14 +1004,15 @@
  * \param height height of the block
  * \returns pattern for current coefficient group
  */
-int TComTrQuant::calcPatternSigCtx(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
+int TComTrQuant::calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
 {
     if (log2TrSizeCG == 0) return 0;
 
     const uint32_t trSizeCG = 1 << log2TrSizeCG;
-    const uint32_t* sigPos = &sigCoeffGroupFlag[(cgPosY << log2TrSizeCG) + cgPosX];
-    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && (sigPos[1] != 0);
-    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && (sigPos[trSizeCG] != 0);
+    assert(trSizeCG <= 32);
+    const uint32_t sigPos = sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX);
+    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && ((sigPos & 1) != 0);
+    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && ((sigPos & (1 << (trSizeCG - 1))) != 0);
 
     return sigRight + (sigLower << 1);
 }
@@ -1319,15 +1320,16 @@
  * \param uiLog2BlkSize log2 value of block size
  * \returns ctxInc for current scan position
  */
-uint32_t TComTrQuant::getSigCoeffGroupCtxInc(const uint32_t* sigCoeffGroupFlag,
+uint32_t TComTrQuant::getSigCoeffGroupCtxInc(const uint64_t  sigCoeffGroupFlag64,
                                              const uint32_t  cgPosX,
                                              const uint32_t  cgPosY,
                                              const uint32_t  log2TrSizeCG)
 {
     const uint32_t trSizeCG = 1 << log2TrSizeCG;
-    const uint32_t* sigPos = &sigCoeffGroupFlag[(cgPosY << log2TrSizeCG) + cgPosX];
-    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && (sigPos[1] != 0);
-    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && (sigPos[trSizeCG] != 0);
+    assert(trSizeCG <= 32);
+    const uint32_t sigPos = sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX);
+    uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && ((sigPos & 1) != 0);
+    uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && ((sigPos & (1 << (trSizeCG - 1))) != 0);
 
     return sigRight | sigLower;
 }
diff -r 6fb38d60fe37 -r 32a612f172f5 source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h	Wed Mar 12 11:08:59 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.h	Wed Mar 12 11:09:14 2014 -0700
@@ -158,9 +158,9 @@
     void setScalingList(TComScalingList *scalingList);
     void processScalingListEnc(int32_t *coeff, int32_t *quantcoeff, int quantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
     void processScalingListDec(int32_t *coeff, int32_t *dequantcoeff, int invQuantScales, uint32_t height, uint32_t width, uint32_t ratio, int sizuNum, uint32_t dc);
-    static int calcPatternSigCtx(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG);
+    static int calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG);
     static int getSigCtxInc(int patternSigCtx, const TUEntropyCodingParameters &codingParameters, const int blkPos);
-    static uint32_t getSigCoeffGroupCtxInc(const uint32_t* sigCoeffGroupFlag, uint32_t cgPosX, uint32_t cgPosY, const uint32_t log2TrSizeCG);
+    static uint32_t getSigCoeffGroupCtxInc(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, const uint32_t log2TrSizeCG);
     static void getTUEntropyCodingParameters(TComDataCU* cu, TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
     estBitsSbacStruct* m_estBitsSbac;
 
diff -r 6fb38d60fe37 -r 32a612f172f5 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Wed Mar 12 11:08:59 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Wed Mar 12 11:09:14 2014 -0700
@@ -2089,9 +2089,7 @@
     // Find position of last coefficient
     int scanPosLast = -1;
     int posLast;
-    uint32_t sigCoeffGroupFlag[MLS_GRP_NUM];
-    uint32_t cgNum = 1 << codingParameters.log2TrSizeCG * 2;
-    memset(sigCoeffGroupFlag, 0, sizeof(uint32_t) * cgNum);
+    uint64_t sigCoeffGroupFlag64 = 0;
     const uint32_t maskPosXY = (1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1;
     do
     {
@@ -2104,7 +2102,7 @@
             //uint32_t posx   = posLast - (posy << log2TrSize);
             //uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
             uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
-            sigCoeffGroupFlag[blkIdx] = 1;
+            sigCoeffGroupFlag64 |= ((uint64_t)1 << blkIdx);
 
             numSig--;
         }
@@ -2142,24 +2140,25 @@
             scanPosSig--;
         }
         // encode significant_coeffgroup_flag
-        int cgBlkPos = codingParameters.scanCG[subSet];
-        int cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
-        int cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const int cgBlkPos = codingParameters.scanCG[subSet];
+        const int cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
+        const int cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
+        const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
 
         if (subSet == lastScanSet || subSet == 0)
         {
-            sigCoeffGroupFlag[cgBlkPos] = 1;
+            sigCoeffGroupFlag64 |= cgBlkPosMask;
         }
         else
         {
-            uint32_t sigCoeffGroup = (sigCoeffGroupFlag[cgBlkPos] != 0);
-            uint32_t ctxSig = TComTrQuant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+            uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
+            uint32_t ctxSig = TComTrQuant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
             m_binIf->encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
         }
         // encode significant_coeff_flag
-        if (sigCoeffGroupFlag[cgBlkPos])
+        if (sigCoeffGroupFlag64 & cgBlkPosMask)
         {
-            const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
+            const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
             uint32_t blkPos, sig, ctxSig;
             for (; scanPosSig >= subPos; scanPosSig--)
             {



More information about the x265-devel mailing list