[x265] [PATCH] SAO: no need to unroll chroma to avoid increased code size

Wed Feb 17 04:58:07 CET 2016


At 2016-02-16 23:12:02,ashok at multicorewareinc.com wrote:
># HG changeset patch
># User Ashok Kumar Mishra<ashok at multicorewareinc.com>
># Date 1455633570 -19800
>#      Tue Feb 16 20:09:30 2016 +0530
># Node ID 36751a3dce37e4f506f4bdec12e20ef665b42012
># Parent  33b02e2af5a4b38cd54d3f94f163aae640855dbf
>SAO: no need to unroll chroma to avoid increased code size
>
>diff -r 33b02e2af5a4 -r 36751a3dce37 source/encoder/sao.cpp
>--- a/source/encoder/sao.cpp	Tue Feb 16 20:09:30 2016 +0530
>+++ b/source/encoder/sao.cpp	Tue Feb 16 20:09:30 2016 +0530
>@@ -113,7 +113,6 @@
>         m_clipTable = &(m_clipTableBase[rangeExt]);
> 
>         // Share with fast clip lookup table
>-
>         for (int i = 0; i < rangeExt; i++)
>             m_clipTableBase[i] = 0;
> 
>@@ -638,13 +637,8 @@
> {
>     PicYuv* reconPic = m_frame->m_reconPic;
>     intptr_t stride = reconPic->m_strideC;
>-    int ctuWidth  = g_maxCUSize;
>-    int ctuHeight = g_maxCUSize;
>-
>-    {
>-        ctuWidth  >>= m_hChromaShift;
>-        ctuHeight >>= m_vChromaShift;
>-    }
>+    int ctuWidth  = g_maxCUSize >> m_hChromaShift;
>+    int ctuHeight = g_maxCUSize >> m_vChromaShift;
> 
>     int addr = idxY * m_numCuInWidth + idxX;
>     pixel* recCb = reconPic->getCbAddr(addr);
>@@ -652,88 +646,53 @@
> 
>     if (idxX == 0)
>     {
>-        for (int i = 0; i < ctuHeight + 1; i++)
>+        for (int i = 0; i < ctuHeight + 1; i++, recCb += stride, recCr += stride)

this style is not good to readable

>         {
>             m_tmpL1[1][i] = recCb[0];
>             m_tmpL1[2][i] = recCr[0];
>-            recCb += stride;
>-            recCr += stride;
>         }
>     }
> 
>-    bool mergeLeftFlagCb = (ctuParam[1][addr].mergeMode == SAO_MERGE_LEFT);
>-    int typeIdxCb = ctuParam[1][addr].typeIdx;
in here, no multiplication  operator to access typeIdx, see below

>-
>-    bool mergeLeftFlagCr = (ctuParam[2][addr].mergeMode == SAO_MERGE_LEFT);
>-    int typeIdxCr = ctuParam[2][addr].typeIdx;
>-
>     if (idxX != (m_numCuInWidth - 1))
>     {
>         recCb = reconPic->getCbAddr(addr);
>         recCr = reconPic->getCrAddr(addr);
>-        for (int i = 0; i < ctuHeight + 1; i++)
>+        for (int i = 0; i < ctuHeight + 1; i++, recCb += stride, recCr += stride)
>         {
>             m_tmpL2[1][i] = recCb[ctuWidth - 1];
>             m_tmpL2[2][i] = recCr[ctuWidth - 1];
>-            recCb += stride;
>-            recCr += stride;
>         }
>     }
> 
>-    // Process U
>-    if (typeIdxCb >= 0)
>+    for (int plane = 1; plane < 3; plane++)
>     {
>-        if (!mergeLeftFlagCb)
>+        int typeIdx = ctuParam[plane][addr].typeIdx;
here necessary multiplication on array index access

>+        if (typeIdx >= 0)
>         {
>-            if (typeIdxCb == SAO_BO)
>+            if (ctuParam[plane][addr].mergeMode != SAO_MERGE_LEFT)
>             {
>-                memset(m_offsetBo[1], 0, sizeof(m_offsetBo[0]));
>+                if (typeIdx == SAO_BO)
>+                {
>+                    memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
> 
>-                for (int i = 0; i < SAO_NUM_OFFSET; i++)
>-                    m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC);
>-            }
>-            else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
>-            {
>-                int offset[NUM_EDGETYPE];
>-                offset[0] = 0;
>-                for (int i = 0; i < SAO_NUM_OFFSET; i++)
>-                    offset[i + 1] = ctuParam[1][addr].offset[i] << SAO_BIT_INC;
>+                    for (int i = 0; i < SAO_NUM_OFFSET; i++)
>+                        m_offsetBo[plane][((ctuParam[plane][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[plane][addr].offset[i] << SAO_BIT_INC);
>+                }
>+                else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
>+                {
>+                    int offset[NUM_EDGETYPE];
>+                    offset[0] = 0;
>+                    for (int i = 0; i < SAO_NUM_OFFSET; i++)
>+                        offset[i + 1] = ctuParam[plane][addr].offset[i] << SAO_BIT_INC;
the loop just 4 times, we may merge with below loop

> 
>-                for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
>-                    m_offsetEo[1][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
>+                    for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
>+                        m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
>+                }
>             }
>         }
>-        processSaoCu(addr, typeIdxCb, 1);
>+        processSaoCu(addr, typeIdx, plane);
>+        std::swap(m_tmpL1[plane], m_tmpL2[plane]);
>     }

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160217/276b6352/attachment.html>