[x265] [PATCH] SAO: no need to unroll chroma to avoid increased code size

ashok at multicorewareinc.com ashok at multicorewareinc.com
Tue Feb 16 16:12:02 CET 2016


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1455633570 -19800
#      Tue Feb 16 20:09:30 2016 +0530
# Node ID 36751a3dce37e4f506f4bdec12e20ef665b42012
# Parent  33b02e2af5a4b38cd54d3f94f163aae640855dbf
SAO: no need to unroll chroma to avoid increased code size

diff -r 33b02e2af5a4 -r 36751a3dce37 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Tue Feb 16 20:09:30 2016 +0530
+++ b/source/encoder/sao.cpp	Tue Feb 16 20:09:30 2016 +0530
@@ -113,7 +113,6 @@
         m_clipTable = &(m_clipTableBase[rangeExt]);
 
         // Share with fast clip lookup table
-
         for (int i = 0; i < rangeExt; i++)
             m_clipTableBase[i] = 0;
 
@@ -638,13 +637,8 @@
 {
     PicYuv* reconPic = m_frame->m_reconPic;
     intptr_t stride = reconPic->m_strideC;
-    int ctuWidth  = g_maxCUSize;
-    int ctuHeight = g_maxCUSize;
-
-    {
-        ctuWidth  >>= m_hChromaShift;
-        ctuHeight >>= m_vChromaShift;
-    }
+    int ctuWidth  = g_maxCUSize >> m_hChromaShift;
+    int ctuHeight = g_maxCUSize >> m_vChromaShift;
 
     int addr = idxY * m_numCuInWidth + idxX;
     pixel* recCb = reconPic->getCbAddr(addr);
@@ -652,88 +646,53 @@
 
     if (idxX == 0)
     {
-        for (int i = 0; i < ctuHeight + 1; i++)
+        for (int i = 0; i < ctuHeight + 1; i++, recCb += stride, recCr += stride)
         {
             m_tmpL1[1][i] = recCb[0];
             m_tmpL1[2][i] = recCr[0];
-            recCb += stride;
-            recCr += stride;
         }
     }
 
-    bool mergeLeftFlagCb = (ctuParam[1][addr].mergeMode == SAO_MERGE_LEFT);
-    int typeIdxCb = ctuParam[1][addr].typeIdx;
-
-    bool mergeLeftFlagCr = (ctuParam[2][addr].mergeMode == SAO_MERGE_LEFT);
-    int typeIdxCr = ctuParam[2][addr].typeIdx;
-
     if (idxX != (m_numCuInWidth - 1))
     {
         recCb = reconPic->getCbAddr(addr);
         recCr = reconPic->getCrAddr(addr);
-        for (int i = 0; i < ctuHeight + 1; i++)
+        for (int i = 0; i < ctuHeight + 1; i++, recCb += stride, recCr += stride)
         {
             m_tmpL2[1][i] = recCb[ctuWidth - 1];
             m_tmpL2[2][i] = recCr[ctuWidth - 1];
-            recCb += stride;
-            recCr += stride;
         }
     }
 
-    // Process U
-    if (typeIdxCb >= 0)
+    for (int plane = 1; plane < 3; plane++)
     {
-        if (!mergeLeftFlagCb)
+        int typeIdx = ctuParam[plane][addr].typeIdx;
+        if (typeIdx >= 0)
         {
-            if (typeIdxCb == SAO_BO)
+            if (ctuParam[plane][addr].mergeMode != SAO_MERGE_LEFT)
             {
-                memset(m_offsetBo[1], 0, sizeof(m_offsetBo[0]));
+                if (typeIdx == SAO_BO)
+                {
+                    memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
 
-                for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC);
-            }
-            else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
-            {
-                int offset[NUM_EDGETYPE];
-                offset[0] = 0;
-                for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    offset[i + 1] = ctuParam[1][addr].offset[i] << SAO_BIT_INC;
+                    for (int i = 0; i < SAO_NUM_OFFSET; i++)
+                        m_offsetBo[plane][((ctuParam[plane][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[plane][addr].offset[i] << SAO_BIT_INC);
+                }
+                else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+                {
+                    int offset[NUM_EDGETYPE];
+                    offset[0] = 0;
+                    for (int i = 0; i < SAO_NUM_OFFSET; i++)
+                        offset[i + 1] = ctuParam[plane][addr].offset[i] << SAO_BIT_INC;
 
-                for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
-                    m_offsetEo[1][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+                    for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+                        m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+                }
             }
         }
-        processSaoCu(addr, typeIdxCb, 1);
+        processSaoCu(addr, typeIdx, plane);
+        std::swap(m_tmpL1[plane], m_tmpL2[plane]);
     }
-
-    // Process V
-    if (typeIdxCr >= 0)
-    {
-        if (!mergeLeftFlagCr)
-        {
-            if (typeIdxCr == SAO_BO)
-            {
-                memset(m_offsetBo[2], 0, sizeof(m_offsetBo[0]));
-
-                for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    m_offsetBo[2][((ctuParam[2][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[2][addr].offset[i] << SAO_BIT_INC);
-            }
-            else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
-            {
-                int offset[NUM_EDGETYPE];
-                offset[0] = 0;
-                for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    offset[i + 1] = ctuParam[2][addr].offset[i] << SAO_BIT_INC;
-
-                for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
-                    m_offsetEo[2][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
-            }
-        }
-        processSaoCu(addr, typeIdxCb, 2);
-    }
-
-    std::swap(m_tmpL1[1], m_tmpL2[1]);
-    std::swap(m_tmpL1[2], m_tmpL2[2]);
 }
 
 /* Calculate SAO statistics for current CTU without non-crossing slice */


More information about the x265-devel mailing list