[x265] [PATCH 14 of 15] sao: reduce address operators by split into Luma and Chroma path

Min Chen chenm003 at 163.com
Wed Dec 2 18:28:37 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1449076380 21600
# Node ID a6d88a08af3d48cb804aa61819bd45ee685d1f59
# Parent  a3a9660c91b8eeb8f70869fc4022f939c01023f0
sao: reduce address operators by split into Luma and Chroma path
---
 source/encoder/framefilter.cpp |    7 +--
 source/encoder/sao.cpp         |  133 ++++++++++++++++++++++++++++++++++------
 source/encoder/sao.h           |    3 +-
 3 files changed, 118 insertions(+), 25 deletions(-)

diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Wed Dec 02 11:12:57 2015 -0600
+++ b/source/encoder/framefilter.cpp	Wed Dec 02 11:13:00 2015 -0600
@@ -546,13 +546,10 @@
     for(uint32_t col = 0; col < numCols; col++)
     {
         if (saoParam->bSaoFlag[0])
-            m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[0], row, col, 0);
+            m_parallelFilter[row].m_sao.processSaoUnitCuLuma(saoParam->ctuParam[0], row, col);
 
         if (saoParam->bSaoFlag[1])
-        {
-            m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[1], row, col, 1);
-            m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[2], row, col, 2);
-        }
+            m_parallelFilter[row].m_sao.processSaoUnitCuChroma(saoParam->ctuParam, row, col);
     }
 
     if (encData.m_slice->m_pps->bTransquantBypassEnabled)
diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Wed Dec 02 11:12:57 2015 -0600
+++ b/source/encoder/sao.cpp	Wed Dec 02 11:13:00 2015 -0600
@@ -674,29 +674,21 @@
 }
 
 /* Process SAO unit */
-void SAO::processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane)
+void SAO::processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX)
 {
     PicYuv* reconPic = m_frame->m_reconPic;
-    intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
-    uint32_t picWidth  = m_param->sourceWidth;
+    intptr_t stride = reconPic->m_stride;
     int ctuWidth  = g_maxCUSize;
     int ctuHeight = g_maxCUSize;
 
-    if (plane)
-    {
-        picWidth  >>= m_hChromaShift;
-        ctuWidth  >>= m_hChromaShift;
-        ctuHeight >>= m_vChromaShift;
-    }
-
     int addr = idxY * m_numCuInWidth + idxX;
-    pixel* rec = reconPic->getPlaneAddr(plane, addr);
+    pixel* rec = reconPic->getLumaAddr(addr);
 
     if (idxX == 0)
     {
         for (int i = 0; i < ctuHeight + 1; i++)
         {
-            m_tmpL1[plane][i] = rec[0];
+            m_tmpL1[0][i] = rec[0];
             rec += stride;
         }
     }
@@ -706,10 +698,10 @@
 
     if (idxX != (m_numCuInWidth - 1))
     {
-        rec = reconPic->getPlaneAddr(plane, addr);
+        rec = reconPic->getLumaAddr(addr);
         for (int i = 0; i < ctuHeight + 1; i++)
         {
-            m_tmpL2[plane][i] = rec[ctuWidth - 1];
+            m_tmpL2[0][i] = rec[ctuWidth - 1];
             rec += stride;
         }
     }
@@ -720,10 +712,10 @@
         {
             if (typeIdx == SAO_BO)
             {
-                memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
+                memset(m_offsetBo[0], 0, sizeof(m_offsetBo[0]));
 
                 for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
+                    m_offsetBo[0][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
             }
             else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
             {
@@ -733,12 +725,115 @@
                     offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
 
                 for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
-                    m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+                    m_offsetEo[0][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
             }
         }
-        processSaoCu(addr, typeIdx, plane);
+        processSaoCu(addr, typeIdx, 0);
     }
-    std::swap(m_tmpL1[plane], m_tmpL2[plane]);
+    std::swap(m_tmpL1[0], m_tmpL2[0]);
+}
+
+/* Process SAO unit (Chroma only) */
+void SAO::processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX)
+{
+    PicYuv* reconPic = m_frame->m_reconPic;
+    intptr_t stride = reconPic->m_strideC;
+    int ctuWidth  = g_maxCUSize;
+    int ctuHeight = g_maxCUSize;
+
+    {
+        ctuWidth  >>= m_hChromaShift;
+        ctuHeight >>= m_vChromaShift;
+    }
+
+    int addr = idxY * m_numCuInWidth + idxX;
+    pixel* recCb = reconPic->getCbAddr(addr);
+    pixel* recCr = reconPic->getCrAddr(addr);
+
+    if (idxX == 0)
+    {
+        for (int i = 0; i < ctuHeight + 1; i++)
+        {
+            m_tmpL1[1][i] = recCb[0];
+            m_tmpL1[2][i] = recCr[0];
+            recCb += stride;
+            recCr += stride;
+        }
+    }
+
+    bool mergeLeftFlagCb = (ctuParam[1][addr].mergeMode == SAO_MERGE_LEFT);
+    int typeIdxCb = ctuParam[1][addr].typeIdx;
+
+    bool mergeLeftFlagCr = (ctuParam[2][addr].mergeMode == SAO_MERGE_LEFT);
+    int typeIdxCr = ctuParam[2][addr].typeIdx;
+
+    if (idxX != (m_numCuInWidth - 1))
+    {
+        recCb = reconPic->getCbAddr(addr);
+        recCr = reconPic->getCrAddr(addr);
+        for (int i = 0; i < ctuHeight + 1; i++)
+        {
+            m_tmpL2[1][i] = recCb[ctuWidth - 1];
+            m_tmpL2[2][i] = recCr[ctuWidth - 1];
+            recCb += stride;
+            recCr += stride;
+        }
+    }
+
+    // Process U
+    if (typeIdxCb >= 0)
+    {
+        if (!mergeLeftFlagCb)
+        {
+            if (typeIdxCb == SAO_BO)
+            {
+                memset(m_offsetBo[1], 0, sizeof(m_offsetBo[0]));
+
+                for (int i = 0; i < SAO_NUM_OFFSET; i++)
+                    m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC);
+            }
+            else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+            {
+                int offset[NUM_EDGETYPE];
+                offset[0] = 0;
+                for (int i = 0; i < SAO_NUM_OFFSET; i++)
+                    offset[i + 1] = ctuParam[1][addr].offset[i] << SAO_BIT_INC;
+
+                for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+                    m_offsetEo[1][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+            }
+        }
+        processSaoCu(addr, typeIdxCb, 1);
+    }
+
+    // Process V
+    if (typeIdxCr >= 0)
+    {
+        if (!mergeLeftFlagCr)
+        {
+            if (typeIdxCr == SAO_BO)
+            {
+                memset(m_offsetBo[2], 0, sizeof(m_offsetBo[0]));
+
+                for (int i = 0; i < SAO_NUM_OFFSET; i++)
+                    m_offsetBo[2][((ctuParam[2][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[2][addr].offset[i] << SAO_BIT_INC);
+            }
+            else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+            {
+                int offset[NUM_EDGETYPE];
+                offset[0] = 0;
+                for (int i = 0; i < SAO_NUM_OFFSET; i++)
+                    offset[i + 1] = ctuParam[2][addr].offset[i] << SAO_BIT_INC;
+
+                for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+                    m_offsetEo[2][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+            }
+        }
+        processSaoCu(addr, typeIdxCb, 2);
+    }
+
+    std::swap(m_tmpL1[1], m_tmpL2[1]);
+    std::swap(m_tmpL1[2], m_tmpL2[2]);
 }
 
 void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc)
diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/sao.h
--- a/source/encoder/sao.h	Wed Dec 02 11:12:57 2015 -0600
+++ b/source/encoder/sao.h	Wed Dec 02 11:13:00 2015 -0600
@@ -132,7 +132,8 @@
     // CTU-based SAO process without slice granularity
     void processSaoCu(int addr, int typeIdx, int plane);
     void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
-    void processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane);
+    void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX);
+    void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX);
 
     void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc);
 



More information about the x265-devel mailing list