[x265] [PATCH] SAO: fixed max size of sao statistics buffer to 32

ashok at multicorewareinc.com ashok at multicorewareinc.com
Tue Feb 16 16:10:39 CET 2016


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1455632166 -19800
#      Tue Feb 16 19:46:06 2016 +0530
# Node ID dfc4d87df196e94be70757d648dfb935c100c32a
# Parent  11cb0f838b5a9ba2e38e1412e139d0cf94c66e7d
SAO: fixed max size of sao statistics buffer to 32
Please modify the asm primitive for saoCuStatsBO

diff -r 11cb0f838b5a -r dfc4d87df196 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Feb 02 14:50:24 2016 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Feb 16 19:46:06 2016 +0530
@@ -2530,7 +2530,7 @@
         p.pelFilterLumaStrong[0] = PFX(pelFilterLumaStrong_V_sse4);
         p.pelFilterLumaStrong[1] = PFX(pelFilterLumaStrong_H_sse4);
 
-        p.saoCuStatsBO = PFX(saoCuStatsBO_sse4);
+//        p.saoCuStatsBO = PFX(saoCuStatsBO_sse4);
         p.saoCuStatsE0 = PFX(saoCuStatsE0_sse4);
         p.saoCuStatsE1 = PFX(saoCuStatsE1_sse4);
         p.saoCuStatsE2 = PFX(saoCuStatsE2_sse4);
diff -r 11cb0f838b5a -r dfc4d87df196 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Tue Feb 02 14:50:24 2016 +0530
+++ b/source/encoder/sao.cpp	Tue Feb 16 19:46:06 2016 +0530
@@ -1007,7 +1007,7 @@
         {
             for (x = (y < startY ? startX : 0); x < ctuWidth; x++)
             {
-                int classIdx = 1 + (rec[x] >> boShift);
+                int classIdx = rec[x] >> boShift;
                 stats[classIdx] += (fenc[x] - rec[x]);
                 count[classIdx]++;
             }
@@ -1306,11 +1306,11 @@
                 int typeIdx = mergeSrcParam->typeIdx;
                 if (typeIdx >= 0)
                 {
-                    int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
+                    int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 1;
                     for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
                     {
                         int mergeOffset = mergeSrcParam->offset[classIdx];
-                        estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
+                        estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos]);
                     }
                 }
 
@@ -1393,7 +1393,7 @@
     // BO
     for (int plane = 0; plane < planes; plane++)
     {
-        for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+        for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
         {
             int32_t  count     = m_count[plane][SAO_BO][classIdx];
             int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
@@ -1500,17 +1500,17 @@
 
     //BO RDO
     int64_t estDist = 0;
-    for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+    for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
     {
         int32_t  count     = m_count[0][SAO_BO][classIdx];
         int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
         int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
 
-        distBOClasses[classIdx - 1] = 0;
-        costBOClasses[classIdx - 1] = m_lumaLambda;
+        distBOClasses[classIdx] = 0;
+        costBOClasses[classIdx] = m_lumaLambda;
 
         if (count)
-            offsetOut = estIterOffset(SAO_BO, m_lumaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx - 1], costBOClasses[classIdx - 1]);
+            offsetOut = estIterOffset(SAO_BO, m_lumaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx], costBOClasses[classIdx]);
         else
             offsetOut = 0;
     }
@@ -1538,7 +1538,7 @@
 
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.resetBits();
-    m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + (bestClassBO + 1), bestClassBO, 0);
+    m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO, bestClassBO, 0);
 
     uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
     double cost = (double)estDist + m_lumaLambda * (double)estRate;
@@ -1552,7 +1552,7 @@
         lclCtuParam->typeIdx = SAO_BO;
         lclCtuParam->bandPos = bestClassBO;
         for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
-            lclCtuParam->offset[classIdx] = (int)m_offset[0][SAO_BO][classIdx + bestClassBO + 1];
+            lclCtuParam->offset[classIdx] = (int)m_offset[0][SAO_BO][classIdx + bestClassBO];
     }
 
     mergeDist[0] = ((double)bestDist / m_lumaLambda);
@@ -1636,17 +1636,17 @@
     {
         double bestRDCostBO = MAX_DOUBLE;
 
-        for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+        for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
         {
             int32_t  count = m_count[compIdx][SAO_BO][classIdx];
             int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
             int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
 
-            distBOClasses[classIdx - 1] = 0;
-            costBOClasses[classIdx - 1] = m_chromaLambda;
+            distBOClasses[classIdx] = 0;
+            costBOClasses[classIdx] = m_chromaLambda;
 
             if (count)
-                offsetOut = estIterOffset(SAO_BO, m_chromaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx - 1], costBOClasses[classIdx - 1]);
+                offsetOut = estIterOffset(SAO_BO, m_chromaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx], costBOClasses[classIdx]);
             else
                 offsetOut = 0;
         }
@@ -1673,7 +1673,7 @@
     m_entropyCoder.resetBits();
 
     for (int compIdx = 0; compIdx < 2; compIdx++)
-        m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + (bestClassBO[compIdx] + 1), bestClassBO[compIdx], compIdx + 1);
+        m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1);
 
     uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
     double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
@@ -1689,7 +1689,7 @@
             lclCtuParam[compIdx]->typeIdx = SAO_BO;
             lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx];
             for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
-                lclCtuParam[compIdx]->offset[classIdx] = (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx] + 1];
+                lclCtuParam[compIdx]->offset[classIdx] = (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];
         }
     }
 
@@ -1703,14 +1703,13 @@
 // NOTE: must put in namespace X265_NS since we need class SAO
 void saoCuStatsBO_c(const int16_t *diff, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
 {
-    int x, y;
     const int boShift = X265_DEPTH - SAO_BO_BITS;
 
-    for (y = 0; y < endY; y++)
+    for (int y = 0; y < endY; y++)
     {
-        for (x = 0; x < endX; x++)
+        for (int x = 0; x < endX; x++)
         {
-            int classIdx = 1 + (rec[x] >> boShift);
+            int classIdx = rec[x] >> boShift;
             stats[classIdx] += diff[x];
             count[classIdx]++;
         }
@@ -1722,7 +1721,6 @@
 
 void saoCuStatsE0_c(const int16_t *diff, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
 {
-    int x, y;
     int32_t tmp_stats[SAO::NUM_EDGETYPE];
     int32_t tmp_count[SAO::NUM_EDGETYPE];
 
@@ -1731,10 +1729,10 @@
     memset(tmp_stats, 0, sizeof(tmp_stats));
     memset(tmp_count, 0, sizeof(tmp_count));
 
-    for (y = 0; y < endY; y++)
+    for (int y = 0; y < endY; y++)
     {
         int signLeft = signOf(rec[0] - rec[-1]);
-        for (x = 0; x < endX; x++)
+        for (int x = 0; x < endX; x++)
         {
             int signRight = signOf2(rec[x], rec[x + 1]);
             X265_CHECK(signRight == signOf(rec[x] - rec[x + 1]), "signDown check failure\n");
@@ -1750,7 +1748,7 @@
         rec += stride;
     }
 
-    for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+    for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
     {
         stats[SAO::s_eoTable[x]] += tmp_stats[x];
         count[SAO::s_eoTable[x]] += tmp_count[x];
@@ -1762,7 +1760,6 @@
     X265_CHECK(endX <= MAX_CU_SIZE, "endX check failure\n");
     X265_CHECK(endY <= MAX_CU_SIZE, "endY check failure\n");
 
-    int x, y;
     int32_t tmp_stats[SAO::NUM_EDGETYPE];
     int32_t tmp_count[SAO::NUM_EDGETYPE];
 
@@ -1770,9 +1767,9 @@
     memset(tmp_count, 0, sizeof(tmp_count));
 
     X265_CHECK(endX * endY <= (4096 - 16), "Assembly of saoE1 may overflow with this block size\n");
-    for (y = 0; y < endY; y++)
+    for (int y = 0; y < endY; y++)
     {
-        for (x = 0; x < endX; x++)
+        for (int x = 0; x < endX; x++)
         {
             int signDown = signOf2(rec[x], rec[x + stride]);
             X265_CHECK(signDown == signOf(rec[x] - rec[x + stride]), "signDown check failure\n");
@@ -1787,7 +1784,7 @@
         rec += stride;
     }
 
-    for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+    for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
     {
         stats[SAO::s_eoTable[x]] += tmp_stats[x];
         count[SAO::s_eoTable[x]] += tmp_count[x];
@@ -1799,17 +1796,16 @@
     X265_CHECK(endX < MAX_CU_SIZE, "endX check failure\n");
     X265_CHECK(endY < MAX_CU_SIZE, "endY check failure\n");
 
-    int x, y;
     int32_t tmp_stats[SAO::NUM_EDGETYPE];
     int32_t tmp_count[SAO::NUM_EDGETYPE];
 
     memset(tmp_stats, 0, sizeof(tmp_stats));
     memset(tmp_count, 0, sizeof(tmp_count));
 
-    for (y = 0; y < endY; y++)
+    for (int y = 0; y < endY; y++)
     {
         upBufft[0] = signOf(rec[stride] - rec[-1]);
-        for (x = 0; x < endX; x++)
+        for (int x = 0; x < endX; x++)
         {
             int signDown = signOf2(rec[x], rec[x + stride + 1]);
             X265_CHECK(signDown == signOf(rec[x] - rec[x + stride + 1]), "signDown check failure\n");
@@ -1825,7 +1821,7 @@
         diff += MAX_CU_SIZE;
     }
 
-    for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+    for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
     {
         stats[SAO::s_eoTable[x]] += tmp_stats[x];
         count[SAO::s_eoTable[x]] += tmp_count[x];
@@ -1837,16 +1833,15 @@
     X265_CHECK(endX < MAX_CU_SIZE, "endX check failure\n");
     X265_CHECK(endY < MAX_CU_SIZE, "endY check failure\n");
 
-    int x, y;
     int32_t tmp_stats[SAO::NUM_EDGETYPE];
     int32_t tmp_count[SAO::NUM_EDGETYPE];
 
     memset(tmp_stats, 0, sizeof(tmp_stats));
     memset(tmp_count, 0, sizeof(tmp_count));
 
-    for (y = 0; y < endY; y++)
+    for (int y = 0; y < endY; y++)
     {
-        for (x = 0; x < endX; x++)
+        for (int x = 0; x < endX; x++)
         {
             int signDown = signOf2(rec[x], rec[x + stride - 1]);
             X265_CHECK(signDown == signOf(rec[x] - rec[x + stride - 1]), "signDown check failure\n");
@@ -1864,7 +1859,7 @@
         diff += MAX_CU_SIZE;
     }
 
-    for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+    for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
     {
         stats[SAO::s_eoTable[x]] += tmp_stats[x];
         count[SAO::s_eoTable[x]] += tmp_count[x];
diff -r 11cb0f838b5a -r dfc4d87df196 source/encoder/sao.h
--- a/source/encoder/sao.h	Tue Feb 02 14:50:24 2016 +0530
+++ b/source/encoder/sao.h	Tue Feb 16 19:46:06 2016 +0530
@@ -56,7 +56,7 @@
 
     enum { SAO_MAX_DEPTH = 4 };
     enum { SAO_BO_BITS  = 5 };
-    enum { MAX_NUM_SAO_CLASS = 33 };
+    enum { MAX_NUM_SAO_CLASS = 32 };
     enum { SAO_BIT_INC = 0 }; /* in HM12.0, it wrote as X265_MAX(X265_DEPTH - 10, 0) */
     enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
     enum { NUM_EDGETYPE = 5 };


More information about the x265-devel mailing list