[x265] [PATCH] SAO: removed redundant distortion calculation

ashok at multicorewareinc.com ashok at multicorewareinc.com
Wed Apr 6 09:42:49 CEST 2016


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1459237249 -19800
#      Tue Mar 29 13:10:49 2016 +0530
# Node ID 33ff2e5f6eb7c8cf4f3edaa265762f32aa9b6f0f
# Parent  5b01678f6fb4e89e23cd41295592a9aa5d51d4ba
SAO: removed redundant distortion calculation

diff -r 5b01678f6fb4 -r 33ff2e5f6eb7 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Sat Apr 02 19:08:49 2016 +0100
+++ b/source/encoder/entropy.cpp	Tue Mar 29 13:10:49 2016 +0530
@@ -1000,10 +1000,10 @@
         enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
         if (typeIdx == SAO_BO)
         {
-            for (int i = 0; i < SAO_BO_LEN; i++)
+            for (int i = 0; i < SAO_NUM_OFFSET; i++)
                 codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
 
-            for (int i = 0; i < SAO_BO_LEN; i++)
+            for (int i = 0; i < SAO_NUM_OFFSET; i++)
                 if (ctuParam.offset[i] != 0)
                     encodeBinEP(ctuParam.offset[i] < 0);
 
@@ -1049,10 +1049,10 @@
 
     enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
 
-    for (int i = 0; i < SAO_BO_LEN; i++)
+    for (int i = 0; i < SAO_NUM_OFFSET; i++)
         codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
 
-    for (int i = 0; i < SAO_BO_LEN; i++)
+    for (int i = 0; i < SAO_NUM_OFFSET; i++)
         if (offset[i] != 0)
             encodeBinEP(offset[i] < 0);
 
diff -r 5b01678f6fb4 -r 33ff2e5f6eb7 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Sat Apr 02 19:08:49 2016 +0100
+++ b/source/encoder/sao.cpp	Tue Mar 29 13:10:49 2016 +0530
@@ -600,7 +600,7 @@
                 memset(m_offsetBo[0], 0, sizeof(m_offsetBo[0]));
 
                 for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    m_offsetBo[0][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
+                    m_offsetBo[0][((ctuParam[addr].bandPos + i) & (MAX_NUM_SAO_CLASS - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
             }
             else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
             {
@@ -675,7 +675,7 @@
                 memset(m_offsetBo[1], 0, sizeof(m_offsetBo[0]));
 
                 for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC);
+                    m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (MAX_NUM_SAO_CLASS - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC);
             }
             else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
             {
@@ -701,7 +701,7 @@
                 memset(m_offsetBo[2], 0, sizeof(m_offsetBo[0]));
 
                 for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    m_offsetBo[2][((ctuParam[2][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[2][addr].offset[i] << SAO_BIT_INC);
+                    m_offsetBo[2][((ctuParam[2][addr].bandPos + i) & (MAX_NUM_SAO_CLASS - 1))] = (int8_t)(ctuParam[2][addr].offset[i] << SAO_BIT_INC);
             }
             else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
             {
@@ -1211,13 +1211,6 @@
     bool chroma = m_param->internalCsp != X265_CSP_I400;
     int planes = chroma ? 3 : 1;
 
-    m_entropyCoder.load(m_rdContexts.cur);
-    if (allowMerge[0])
-        m_entropyCoder.codeSaoMerge(0);
-    if (allowMerge[1])
-        m_entropyCoder.codeSaoMerge(0);
-    m_entropyCoder.store(m_rdContexts.temp);
-
     // reset stats Y, Cb, Cr
     X265_CHECK(sizeof(PerPlane) == (sizeof(int32_t) * (NUM_PLANE * MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS)), "Found Padding space in struct PerPlane");
 
@@ -1247,30 +1240,25 @@
 
     saoStatsInitialOffset(chroma);
 
+    // SAO distortion calculation
+    m_entropyCoder.load(m_rdContexts.cur);
+    m_entropyCoder.resetBits();
+    if (allowMerge[0])
+        m_entropyCoder.codeSaoMerge(0);
+    if (allowMerge[1])
+        m_entropyCoder.codeSaoMerge(0);
+    m_entropyCoder.store(m_rdContexts.temp);
+
     double mergeDist[NUM_MERGE_MODE] = { 0.0 };
-    saoLumaComponentParamDist(saoParam, addr, mergeDist, lambda);
+    double bestCost = 0.0;
+
+    // Estimate distortion and cost of new SAO params
+    saoLumaComponentParamDist(saoParam, addr, mergeDist, lambda, bestCost);
     if (chroma)
-        saoChromaComponentParamDist(saoParam, addr, mergeDist, lambda);
+        saoChromaComponentParamDist(saoParam, addr, mergeDist, lambda, bestCost);
 
     if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
     {
-        // Cost of new SAO_params
-        m_entropyCoder.load(m_rdContexts.cur);
-        m_entropyCoder.resetBits();
-        if (allowMerge[0])
-            m_entropyCoder.codeSaoMerge(0);
-        if (allowMerge[1])
-            m_entropyCoder.codeSaoMerge(0);
-        for (int plane = 0; plane < planes; plane++)
-        {
-            if (saoParam->bSaoFlag[plane > 0])
-                m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
-        }
-
-        uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
-        double bestCost = mergeDist[0] + (double)rate;
-        m_entropyCoder.store(m_rdContexts.temp);
-
         // Cost of merge left or Up
         for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
         {
@@ -1302,7 +1290,7 @@
             if (allowMerge[1] && (mergeIdx == 1))
                 m_entropyCoder.codeSaoMerge(1);
 
-            rate = m_entropyCoder.getNumberOfWrittenBits();
+            int32_t rate = m_entropyCoder.getNumberOfWrittenBits();
             double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
             if (mergeCost < bestCost)
             {
@@ -1341,13 +1329,14 @@
 void SAO::saoStatsInitialOffset(bool chroma)
 {
     int planes = chroma ? 3 : 1;
+    memset(m_offset, 0, sizeof(m_offset));
 
     // EO
     for (int plane = 0; plane < planes; plane++)
     {
         for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
         {
-            for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
+            for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
             {
                 int32_t  count     = m_count[plane][typeIdx][classIdx];
                 int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
@@ -1370,7 +1359,7 @@
     // BO
     for (int plane = 0; plane < planes; plane++)
     {
-        for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
+        for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
         {
             int32_t  count     = m_count[plane][SAO_BO][classIdx];
             int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
@@ -1385,9 +1374,10 @@
     }
 }
 
-inline int SAO::estIterOffset(int typeIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int& distBOClasses, double& costBOClasses)
+void SAO::estIterOffset(int typeIdx, double lambda, int32_t count, int32_t offsetOrg, int& offset, int& distClasses, double& costClasses)
 {
     int bestOffset = 0;
+    distClasses    = 0;
 
     // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit.
     // entropy coder can be used to measure the exact rate here.
@@ -1406,27 +1396,26 @@
         {
             bestCost = cost;
             bestOffset = offset;
-            if (typeIdx == SAO_BO)
-            {
-                distBOClasses = (int)dist;
-                costBOClasses = bestCost;
-            }
+            distClasses = (int)dist;
         }
         offset = (offset > 0) ? (offset - 1) : (offset + 1);
     }
-    return bestOffset;
+
+    costClasses = bestCost;
+    offset = bestOffset;
 }
 
-void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda)
+void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda, double &bestCost)
 {
     int64_t bestDist = 0;
     int bestTypeIdx = -1;
 
     SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
 
-    int    distBOClasses[MAX_NUM_SAO_CLASS];
-    double costBOClasses[MAX_NUM_SAO_CLASS];
+    int    distClasses[MAX_NUM_SAO_CLASS];
+    double costClasses[MAX_NUM_SAO_CLASS];
 
+    // RDO SAO_NA
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.resetBits();
     m_entropyCoder.codeSaoType(0);
@@ -1437,18 +1426,16 @@
     for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
     {
         int64_t estDist = 0;
-        for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
+        for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
         {
             int32_t  count     = m_count[0][typeIdx][classIdx];
             int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
             int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
 
-            if (count)
-                offsetOut = estIterOffset(typeIdx, lambda[0], offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]);
-            else
-                offsetOut = 0;
+            estIterOffset(typeIdx, lambda[0], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
 
-            estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
+            //Calculate distortion
+            estDist += distClasses[classIdx];
         }
 
         m_entropyCoder.load(m_rdContexts.temp);
@@ -1477,30 +1464,24 @@
 
     //BO RDO
     int64_t estDist = 0;
-    for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
+    for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
     {
         int32_t  count     = m_count[0][SAO_BO][classIdx];
         int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
         int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
 
-        distBOClasses[classIdx] = 0;
-        costBOClasses[classIdx] = lambda[0];
-
-        if (count)
-            offsetOut = estIterOffset(SAO_BO, lambda[0], offsetOut, count, offsetOrg, distBOClasses[classIdx], costBOClasses[classIdx]);
-        else
-            offsetOut = 0;
+        estIterOffset(SAO_BO, lambda[0], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
     }
 
     // Estimate Best Position
     double bestRDCostBO = MAX_DOUBLE;
     int    bestClassBO  = 0;
 
-    for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
+    for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
     {
         double currentRDCost = 0.0;
-        for (int j = i; j < i + SAO_BO_LEN; j++)
-            currentRDCost += costBOClasses[j];
+        for (int j = i; j < i + SAO_NUM_OFFSET; j++)
+            currentRDCost += costClasses[j];
 
         if (currentRDCost < bestRDCostBO)
         {
@@ -1510,8 +1491,8 @@
     }
 
     estDist = 0;
-    for (int classIdx = bestClassBO; classIdx < bestClassBO + SAO_BO_LEN; classIdx++)
-        estDist += distBOClasses[classIdx];
+    for (int classIdx = bestClassBO; classIdx < bestClassBO + SAO_NUM_OFFSET; classIdx++)
+        estDist += distClasses[classIdx];
 
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.resetBits();
@@ -1536,24 +1517,27 @@
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
     m_entropyCoder.store(m_rdContexts.temp);
+
+    uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
+    bestCost = mergeDist[0] + (double)rate;
 }
 
-void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda)
+void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda, double &bestCost)
 {
     int64_t bestDist = 0;
     int bestTypeIdx = -1;
 
     SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };
 
-    double costBOClasses[MAX_NUM_SAO_CLASS];
-    int    distBOClasses[MAX_NUM_SAO_CLASS];
+    double costClasses[MAX_NUM_SAO_CLASS];
+    int    distClasses[MAX_NUM_SAO_CLASS];
     int    bestClassBO[2] = { 0, 0 };
 
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.resetBits();
     m_entropyCoder.codeSaoType(0);
 
-    double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * lambda[1];
+    double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * lambda[1];
 
     //EO RDO
     for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
@@ -1561,18 +1545,15 @@
         int64_t estDist[2] = {0, 0};
         for (int compIdx = 1; compIdx < 3; compIdx++)
         {
-            for (int classIdx = 1; classIdx < SAO_EO_LEN + 1; classIdx++)
+            for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
             {
                 int32_t  count = m_count[compIdx][typeIdx][classIdx];
                 int32_t& offsetOrg = m_offsetOrg[compIdx][typeIdx][classIdx];
                 int32_t& offsetOut = m_offset[compIdx][typeIdx][classIdx];
 
-                if (count)
-                    offsetOut = estIterOffset(typeIdx, lambda[1], offsetOut, count, offsetOrg, distBOClasses[0], costBOClasses[0]);
-                else
-                    offsetOut = 0;
+                estIterOffset(typeIdx, lambda[1], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
 
-                estDist[compIdx - 1] += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
+                estDist[compIdx - 1] += distClasses[classIdx];
             }
         }
 
@@ -1585,9 +1566,9 @@
         uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
         double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * (double)estRate;
 
-        if (cost < costPartBest)
+        if (cost < dCostPartBest)
         {
-            costPartBest = cost;
+            dCostPartBest = cost;
             bestDist = (estDist[0] + estDist[1]);
             bestTypeIdx = typeIdx;
         }
@@ -1613,26 +1594,20 @@
     {
         double bestRDCostBO = MAX_DOUBLE;
 
-        for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
+        for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
         {
             int32_t  count = m_count[compIdx][SAO_BO][classIdx];
             int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
             int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
 
-            distBOClasses[classIdx] = 0;
-            costBOClasses[classIdx] = lambda[1];
-
-            if (count)
-                offsetOut = estIterOffset(SAO_BO, lambda[1], offsetOut, count, offsetOrg, distBOClasses[classIdx], costBOClasses[classIdx]);
-            else
-                offsetOut = 0;
+            estIterOffset(SAO_BO, lambda[1], count, offsetOrg, offsetOut, distClasses[classIdx], costClasses[classIdx]);
         }
 
-        for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
+        for (int i = 0; i < MAX_NUM_SAO_CLASS - SAO_NUM_OFFSET + 1; i++)
         {
             double currentRDCost = 0.0;
-            for (int j = i; j < i + SAO_BO_LEN; j++)
-                currentRDCost += costBOClasses[j];
+            for (int j = i; j < i + SAO_NUM_OFFSET; j++)
+                currentRDCost += costClasses[j];
 
             if (currentRDCost < bestRDCostBO)
             {
@@ -1642,8 +1617,8 @@
         }
 
         estDist[compIdx - 1] = 0;
-        for (int classIdx = bestClassBO[compIdx - 1]; classIdx < bestClassBO[compIdx - 1] + SAO_BO_LEN; classIdx++)
-            estDist[compIdx - 1] += distBOClasses[classIdx];
+        for (int classIdx = bestClassBO[compIdx - 1]; classIdx < bestClassBO[compIdx - 1] + SAO_NUM_OFFSET; classIdx++)
+            estDist[compIdx - 1] += distClasses[classIdx];
     }
 
     m_entropyCoder.load(m_rdContexts.temp);
@@ -1655,9 +1630,9 @@
     uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
     double cost = (double)(estDist[0] + estDist[1]) + lambda[1] * (double)estRate;
 
-    if (cost < costPartBest)
+    if (cost < dCostPartBest)
     {
-        costPartBest = cost;
+        dCostPartBest = cost;
         bestDist = (estDist[0] + estDist[1]);
 
         for (int compIdx = 0; compIdx < 2; compIdx++)
@@ -1672,9 +1647,13 @@
 
     mergeDist[0] += ((double)bestDist / lambda[1]);
     m_entropyCoder.load(m_rdContexts.temp);
+
     m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
     m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
     m_entropyCoder.store(m_rdContexts.temp);
+
+    uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
+    bestCost = mergeDist[0] + (double)rate;
 }
 
 // NOTE: must put in namespace X265_NS since we need class SAO
diff -r 5b01678f6fb4 -r 33ff2e5f6eb7 source/encoder/sao.h
--- a/source/encoder/sao.h	Sat Apr 02 19:08:49 2016 +0100
+++ b/source/encoder/sao.h	Tue Mar 29 13:10:49 2016 +0530
@@ -33,13 +33,6 @@
 namespace X265_NS {
 // private namespace
 
-enum SAOTypeLen
-{
-    SAO_EO_LEN = 4,
-    SAO_BO_LEN = 4,
-    SAO_NUM_BO_CLASSES = 32
-};
-
 enum SAOType
 {
     SAO_EO_0 = 0,
@@ -81,7 +74,7 @@
     PerPlane*   m_offsetOrgPreDblk;
 
     double*     m_depthSaoRate;
-    int8_t      m_offsetBo[NUM_PLANE][SAO_NUM_BO_CLASSES];
+    int8_t      m_offsetBo[NUM_PLANE][MAX_NUM_SAO_CLASS];
     int8_t      m_offsetEo[NUM_PLANE][NUM_EDGETYPE];
 
     int         m_chromaFormat;
@@ -134,11 +127,10 @@
     void calcSaoStatsCu(int addr, int plane);
     void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
 
-    void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda);
-    void saoChromaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda);
+    void saoLumaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda, double &bestCost);
+    void saoChromaComponentParamDist(SAOParam* saoParam, int addr, double* mergeDist, double* lambda, double &bestCost);
 
-    inline int estIterOffset(int typeIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
-                             int& currentDistortionTableBo, double& currentRdCostTableBo);
+    void estIterOffset(int typeIdx, double lambda, int32_t count, int32_t offsetOrg, int& offset, int& distClasses, double& costClasses);
     void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
     void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr);
 


More information about the x265-devel mailing list