[x265] [PATCH 1 of 2] sao: split SAO context into row base

Min Chen chenm003 at 163.com
Mon Nov 23 23:48:38 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1448318031 21600
# Node ID ef7e244c3e996c356e97c038c77b87aa7e7ab621
# Parent  1f4de5949b3b38ab440dd3547aa9b0a12306efbe
sao: split SAO context into row base
---
 source/encoder/frameencoder.cpp |   34 ++++++------
 source/encoder/framefilter.cpp  |   36 ++++++------
 source/encoder/framefilter.h    |   12 ++--
 source/encoder/sao.cpp          |  111 +++++++++++++++++++++------------------
 source/encoder/sao.h            |   60 +++++++++++-----------
 5 files changed, 131 insertions(+), 122 deletions(-)

diff -r 1f4de5949b3b -r ef7e244c3e99 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Nov 23 16:19:30 2015 -0600
+++ b/source/encoder/frameencoder.cpp	Mon Nov 23 16:33:51 2015 -0600
@@ -1103,24 +1103,24 @@
             if (row > 0)
             {
                 // Waitting last threading finish
-                m_frameFilter.m_pdeblock[row - 1].waitForExit();
+                m_frameFilter.m_parallelFilter[row - 1].waitForExit();
 
                 // Processing new group
-                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_pdeblock[row - 2].m_lastCol.get(), (int)col) : col);
-                m_frameFilter.m_pdeblock[row - 1].m_allowedCol.set(allowCol);
-                m_frameFilter.m_pdeblock[row - 1].tryBondPeers(*this, 1);
+                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get(), (int)col) : col);
+                m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol);
+                m_frameFilter.m_parallelFilter[row - 1].tryBondPeers(*this, 1);
             }
 
             // Last Row may start early
             if (row == m_numRows - 1)
             {
                 // Waitting last threading finish
-                m_frameFilter.m_pdeblock[row].waitForExit();
+                m_frameFilter.m_parallelFilter[row].waitForExit();
 
                 // Processing last row
-                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_pdeblock[row - 1].m_lastCol.get(), (int)col) : col);
-                m_frameFilter.m_pdeblock[row].m_allowedCol.set(allowCol);
-                m_frameFilter.m_pdeblock[row].tryBondPeers(*this, 1);
+                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get(), (int)col) : col);
+                m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol);
+                m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1);
             }
         }
 
@@ -1188,17 +1188,17 @@
         if (m_param->bEnableLoopFilter & (row > 0))
         {
             /* TODO: Multiple Threading */
-            m_frameFilter.m_pdeblock[row - 1].waitForExit();
+            m_frameFilter.m_parallelFilter[row - 1].waitForExit();
 
             /* Check to avoid previous row process slower than current row */
             if (row >= 2)
             {
-                int prevCol = m_frameFilter.m_pdeblock[row - 2].m_lastCol.get();
+                int prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get();
                 while(prevCol != (int)numCols)
-                    prevCol = m_frameFilter.m_pdeblock[row - 2].m_lastCol.waitForChange(prevCol);
+                    prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastCol.waitForChange(prevCol);
             }
-            m_frameFilter.m_pdeblock[row - 1].m_allowedCol.set(numCols);
-            m_frameFilter.m_pdeblock[row - 1].processTasks(-1);
+            m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
+            m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
         }
 
         /* trigger row-wise loop filters */
@@ -1217,12 +1217,12 @@
             /* TODO: Early start last row */
             if (m_param->bEnableLoopFilter)
             {
-                X265_CHECK(m_frameFilter.m_pdeblock[row - 1].m_allowedCol.get() == (int)numCols, "Deblock m_EncodedCol check failed");
+                X265_CHECK(m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.get() == (int)numCols, "Deblock m_EncodedCol check failed");
 
                 /* NOTE: Last Row not execute before, so didn't need wait */
-                m_frameFilter.m_pdeblock[row].waitForExit();
-                m_frameFilter.m_pdeblock[row].m_allowedCol.set(numCols);
-                m_frameFilter.m_pdeblock[row].processTasks(-1);
+                m_frameFilter.m_parallelFilter[row].waitForExit();
+                m_frameFilter.m_parallelFilter[row].m_allowedCol.set(numCols);
+                m_frameFilter.m_parallelFilter[row].processTasks(-1);
             }
 
             for (uint32_t i = m_numRows - m_filterRowDelay; i < m_numRows; i++)
diff -r 1f4de5949b3b -r ef7e244c3e99 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Nov 23 16:19:30 2015 -0600
+++ b/source/encoder/framefilter.cpp	Mon Nov 23 16:33:51 2015 -0600
@@ -35,7 +35,7 @@
 static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height);
 static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt);
 
-uint32_t FrameFilter::ParallelDeblock::numCols = 0;
+uint32_t FrameFilter::ParallelFilter::numCols = 0;
 
 void FrameFilter::destroy()
 {
@@ -44,10 +44,10 @@
 
     X265_FREE(m_ssimBuf);
 
-    if (m_pdeblock)
+    if (m_parallelFilter)
     {
-        delete[] m_pdeblock;
-        m_pdeblock = NULL;
+        delete[] m_parallelFilter;
+        m_parallelFilter = NULL;
     }
 }
 
@@ -71,19 +71,19 @@
         m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
 
     if (m_param->bEnableLoopFilter)
-        m_pdeblock = new ParallelDeblock[numRows];
+        m_parallelFilter = new ParallelFilter[numRows];
 
-    if (m_pdeblock)
+    if (m_parallelFilter)
     {
         for(int row = 0; row < numRows; row++)
         {
-            m_pdeblock[row].m_rowAddr = row * numCols;
-            m_pdeblock[row].m_frameEncoder = m_frameEncoder;
+            m_parallelFilter[row].m_rowAddr = row * numCols;
+            m_parallelFilter[row].m_frameEncoder = m_frameEncoder;
         }
     }
 
     // Setting maximum columns
-    ParallelDeblock::numCols = numCols;
+    ParallelFilter::numCols = numCols;
 }
 
 void FrameFilter::start(Frame *frame, Entropy& initState, int qp)
@@ -91,22 +91,22 @@
     m_frame = frame;
 
     if (m_param->bEnableSAO)
-        m_sao.startSlice(frame, initState, qp);
+        m_sao.startSlice(frame, initState, m_numRows, qp);
 
     // Reset Deblock Data Struct
-    if (m_pdeblock)
+    if (m_parallelFilter)
     {
         for(int row = 0; row < m_numRows; row++)
         {
-            m_pdeblock[row].m_lastCol.set(0);
-            m_pdeblock[row].m_allowedCol.set(0);
-            m_pdeblock[row].m_encData = frame->m_encData;
+            m_parallelFilter[row].m_lastCol.set(0);
+            m_parallelFilter[row].m_allowedCol.set(0);
+            m_parallelFilter[row].m_encData = frame->m_encData;
         }
     }
 }
 
 // NOTE: Single Threading only
-void FrameFilter::ParallelDeblock::processTasks(int /*workerThreadId*/)
+void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)
 {
     const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
     const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
@@ -160,9 +160,9 @@
     SAOParam* saoParam = encData.m_saoParam;
     if (m_param->bEnableSAO)
     {
-        m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
-        m_sao.m_rdContexts.next.load(m_frameEncoder->m_initSliceContext);
-        m_sao.m_rdContexts.cur.load(m_frameEncoder->m_initSliceContext);
+        m_sao.m_entropyCoder[row].load(m_frameEncoder->m_initSliceContext);
+        m_sao.m_rdContexts[row].next.load(m_frameEncoder->m_initSliceContext);
+        m_sao.m_rdContexts[row].cur.load(m_frameEncoder->m_initSliceContext);
 
         m_sao.rdoSaoUnitRow(saoParam, row);
 
diff -r 1f4de5949b3b -r ef7e244c3e99 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Mon Nov 23 16:19:30 2015 -0600
+++ b/source/encoder/framefilter.h	Mon Nov 23 16:33:51 2015 -0600
@@ -59,7 +59,7 @@
     void*         m_ssimBuf; /* Temp storage for ssim computation */
 
 #define MAX_PFILTER_CUS     (4) /* maximum CUs for every thread */
-    class ParallelDeblock : public BondedTaskGroup, public Deblock
+    class ParallelFilter : public BondedTaskGroup, public Deblock
     {
     public:
         static uint32_t     numCols;
@@ -69,31 +69,31 @@
         ThreadSafeInteger   m_lastCol;          /* The column that next to process */
         ThreadSafeInteger   m_allowedCol;       /* The column that processed from Encode pipeline */
 
-        ParallelDeblock()
+        ParallelFilter()
             : m_rowAddr(0)
             , m_frameEncoder(NULL)
             , m_encData(NULL)
         {
         }
 
-        ~ParallelDeblock()
+        ~ParallelFilter()
         { }
 
         void processTasks(int workerThreadId);
 
     protected:
 
-        ParallelDeblock operator=(const ParallelDeblock&);
+        ParallelFilter operator=(const ParallelFilter&);
     };
 
-    ParallelDeblock*    m_pdeblock;
+    ParallelFilter*    m_parallelFilter;
 
     FrameFilter()
         : m_param(NULL)
         , m_frame(NULL)
         , m_frameEncoder(NULL)
         , m_ssimBuf(NULL)
-        , m_pdeblock(NULL)
+        , m_parallelFilter(NULL)
     {
     }
 
diff -r 1f4de5949b3b -r ef7e244c3e99 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Mon Nov 23 16:19:30 2015 -0600
+++ b/source/encoder/sao.cpp	Mon Nov 23 16:33:51 2015 -0600
@@ -117,6 +117,12 @@
     const pixel rangeExt = maxY >> 1;
     int numCtu = m_numCuInWidth * m_numCuInHeight;
 
+    m_entropyCoder = new Entropy[m_numCuInHeight];
+    X265_CHECK(m_entropyCoder != NULL, "memory alloc failed on m_entropyCoder");
+
+    m_rdContexts = new SAOContexts[m_numCuInHeight];
+    X265_CHECK(m_rdContexts != NULL, "memory alloc failed on m_rdContexts");
+
     CHECKED_MALLOC(m_clipTableBase,  pixel, maxY + 2 * rangeExt);
 
     CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1);
@@ -185,7 +191,7 @@
     saoParam->ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
 }
 
-void SAO::startSlice(Frame* frame, Entropy& initState, int qp)
+void SAO::startSlice(Frame* frame, Entropy& initState, int numRows, int qp)
 {
     Slice* slice = frame->m_encData->m_slice;
     int qpCb = qp;
@@ -212,9 +218,12 @@
 
     resetStats();
 
-    m_entropyCoder.load(initState);
-    m_rdContexts.next.load(initState);
-    m_rdContexts.cur.load(initState);
+    for(int i = 0; i < numRows; i++)
+    {
+        m_entropyCoder[i].load(initState);
+        m_rdContexts[i].next.load(initState);
+        m_rdContexts[i].cur.load(initState);
+    }
 
     SAOParam* saoParam = frame->m_encData->m_saoParam;
     if (!saoParam)
@@ -1150,12 +1159,12 @@
         int addrLeft = idxX ? addr - 1 : -1;
         allowMerge[0] = (idxX > 0);
 
-        m_entropyCoder.load(m_rdContexts.cur);
+        m_entropyCoder[idxY].load(m_rdContexts[idxY].cur);
         if (allowMerge[0])
-            m_entropyCoder.codeSaoMerge(0);
+            m_entropyCoder[idxY].codeSaoMerge(0);
         if (allowMerge[1])
-            m_entropyCoder.codeSaoMerge(0);
-        m_entropyCoder.store(m_rdContexts.temp);
+            m_entropyCoder[idxY].codeSaoMerge(0);
+        m_entropyCoder[idxY].store(m_rdContexts[idxY].temp);
 
         // reset stats Y, Cb, Cr
         X265_CHECK(sizeof(PerPlane) == (sizeof(int32_t) * (NUM_PLANE * MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS)), "Found Padding space in struct PerPlane");
@@ -1186,28 +1195,28 @@
             calcSaoStatsCu(addr, 2);
         }
 
-        saoComponentParamDist(saoParam, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist);
+        saoComponentParamDist(saoParam, idxY, addr, addrUp, addrLeft, &mergeSaoParam[0][0], mergeDist);
 
-        sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
+        sao2ChromaParamDist(saoParam, idxY, addr, addrUp, addrLeft, mergeSaoParam, mergeDist);
 
         if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
         {
             // Cost of new SAO_params
-            m_entropyCoder.load(m_rdContexts.cur);
-            m_entropyCoder.resetBits();
+            m_entropyCoder[idxY].load(m_rdContexts[idxY].cur);
+            m_entropyCoder[idxY].resetBits();
             if (allowMerge[0])
-                m_entropyCoder.codeSaoMerge(0);
+                m_entropyCoder[idxY].codeSaoMerge(0);
             if (allowMerge[1])
-                m_entropyCoder.codeSaoMerge(0);
+                m_entropyCoder[idxY].codeSaoMerge(0);
             for (int plane = 0; plane < 3; plane++)
             {
                 if (saoParam->bSaoFlag[plane > 0])
-                    m_entropyCoder.codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
+                    m_entropyCoder[idxY].codeSaoOffset(saoParam->ctuParam[plane][addr], plane);
             }
 
-            uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
+            uint32_t rate = m_entropyCoder[idxY].getNumberOfWrittenBits();
             double bestCost = mergeDist[0] + (double)rate;
-            m_entropyCoder.store(m_rdContexts.temp);
+            m_entropyCoder[idxY].store(m_rdContexts[idxY].temp);
 
             // Cost of Merge
             for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
@@ -1215,20 +1224,20 @@
                 if (!allowMerge[mergeIdx])
                     continue;
 
-                m_entropyCoder.load(m_rdContexts.cur);
-                m_entropyCoder.resetBits();
+                m_entropyCoder[idxY].load(m_rdContexts[idxY].cur);
+                m_entropyCoder[idxY].resetBits();
                 if (allowMerge[0])
-                    m_entropyCoder.codeSaoMerge(1 - mergeIdx);
+                    m_entropyCoder[idxY].codeSaoMerge(1 - mergeIdx);
                 if (allowMerge[1] && (mergeIdx == 1))
-                    m_entropyCoder.codeSaoMerge(1);
+                    m_entropyCoder[idxY].codeSaoMerge(1);
 
-                rate = m_entropyCoder.getNumberOfWrittenBits();
+                rate = m_entropyCoder[idxY].getNumberOfWrittenBits();
                 double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
                 if (mergeCost < bestCost)
                 {
                     SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
                     bestCost = mergeCost;
-                    m_entropyCoder.store(m_rdContexts.temp);
+                    m_entropyCoder[idxY].store(m_rdContexts[idxY].temp);
                     for (int plane = 0; plane < 3; plane++)
                     {
                         mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
@@ -1242,8 +1251,8 @@
                 m_numNoSao[0]++;
             if (saoParam->ctuParam[1][addr].typeIdx < 0)
                 m_numNoSao[1]++;
-            m_entropyCoder.load(m_rdContexts.temp);
-            m_entropyCoder.store(m_rdContexts.cur);
+            m_entropyCoder[idxY].load(m_rdContexts[idxY].temp);
+            m_entropyCoder[idxY].store(m_rdContexts[idxY].cur);
         }
     }
 }
@@ -1322,7 +1331,7 @@
     return offsetOut;
 }
 
-void SAO::saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam* mergeSaoParam, double* mergeDist)
+void SAO::saoComponentParamDist(SAOParam* saoParam, int idxY, int addr, int addrUp, int addrLeft, SaoCtuParam* mergeSaoParam, double* mergeDist)
 {
     int64_t bestDist = 0;
 
@@ -1334,10 +1343,10 @@
     double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
 
     resetSaoUnit(lclCtuParam);
-    m_entropyCoder.load(m_rdContexts.temp);
-    m_entropyCoder.resetBits();
-    m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
-    double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
+    m_entropyCoder[idxY].load(m_rdContexts[idxY].temp);
+    m_entropyCoder[idxY].resetBits();
+    m_entropyCoder[idxY].codeSaoOffset(*lclCtuParam, 0);
+    double dCostPartBest = m_entropyCoder[idxY].getNumberOfWrittenBits() * m_lumaLambda;
 
     for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
     {
@@ -1372,11 +1381,11 @@
         for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
             ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1];
 
-        m_entropyCoder.load(m_rdContexts.temp);
-        m_entropyCoder.resetBits();
-        m_entropyCoder.codeSaoOffset(ctuParamRdo, 0);
+        m_entropyCoder[idxY].load(m_rdContexts[idxY].temp);
+        m_entropyCoder[idxY].resetBits();
+        m_entropyCoder[idxY].codeSaoOffset(ctuParamRdo, 0);
 
-        uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
+        uint32_t estRate = m_entropyCoder[idxY].getNumberOfWrittenBits();
         double cost = (double)estDist + m_lumaLambda * (double)estRate;
 
         if (cost < dCostPartBest)
@@ -1388,9 +1397,9 @@
     }
 
     mergeDist[0] = ((double)bestDist / m_lumaLambda);
-    m_entropyCoder.load(m_rdContexts.temp);
-    m_entropyCoder.codeSaoOffset(*lclCtuParam, 0);
-    m_entropyCoder.store(m_rdContexts.temp);
+    m_entropyCoder[idxY].load(m_rdContexts[idxY].temp);
+    m_entropyCoder[idxY].codeSaoOffset(*lclCtuParam, 0);
+    m_entropyCoder[idxY].store(m_rdContexts[idxY].temp);
 
     // merge left or merge up
 
@@ -1425,7 +1434,7 @@
     }
 }
 
-void SAO::sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist)
+void SAO::sao2ChromaParamDist(SAOParam* saoParam, int idxY, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist)
 {
     int64_t bestDist = 0;
 
@@ -1437,12 +1446,12 @@
 
     resetSaoUnit(lclCtuParam[0]);
     resetSaoUnit(lclCtuParam[1]);
-    m_entropyCoder.load(m_rdContexts.temp);
-    m_entropyCoder.resetBits();
-    m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
-    m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
+    m_entropyCoder[idxY].load(m_rdContexts[idxY].temp);
+    m_entropyCoder[idxY].resetBits();
+    m_entropyCoder[idxY].codeSaoOffset(*lclCtuParam[0], 1);
+    m_entropyCoder[idxY].codeSaoOffset(*lclCtuParam[1], 2);
 
-    double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
+    double costPartBest = m_entropyCoder[idxY].getNumberOfWrittenBits() * m_chromaLambda;
 
     for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
     {
@@ -1480,8 +1489,8 @@
             estDist[1] = estSaoTypeDist(2, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
         }
 
-        m_entropyCoder.load(m_rdContexts.temp);
-        m_entropyCoder.resetBits();
+        m_entropyCoder[idxY].load(m_rdContexts[idxY].temp);
+        m_entropyCoder[idxY].resetBits();
 
         SaoCtuParam  ctuParamRdo[2];
         for (int compIdx = 0; compIdx < 2; compIdx++)
@@ -1492,10 +1501,10 @@
             for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
                 ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1];
 
-            m_entropyCoder.codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1);
+            m_entropyCoder[idxY].codeSaoOffset(ctuParamRdo[compIdx], compIdx + 1);
         }
 
-        uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
+        uint32_t estRate = m_entropyCoder[idxY].getNumberOfWrittenBits();
         double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
 
         if (cost < costPartBest)
@@ -1508,10 +1517,10 @@
     }
 
     mergeDist[0] += ((double)bestDist / m_chromaLambda);
-    m_entropyCoder.load(m_rdContexts.temp);
-    m_entropyCoder.codeSaoOffset(*lclCtuParam[0], 1);
-    m_entropyCoder.codeSaoOffset(*lclCtuParam[1], 2);
-    m_entropyCoder.store(m_rdContexts.temp);
+    m_entropyCoder[idxY].load(m_rdContexts[idxY].temp);
+    m_entropyCoder[idxY].codeSaoOffset(*lclCtuParam[0], 1);
+    m_entropyCoder[idxY].codeSaoOffset(*lclCtuParam[1], 2);
+    m_entropyCoder[idxY].store(m_rdContexts[idxY].temp);
 
     // merge left or merge up
 
diff -r 1f4de5949b3b -r ef7e244c3e99 source/encoder/sao.h
--- a/source/encoder/sao.h	Mon Nov 23 16:19:30 2015 -0600
+++ b/source/encoder/sao.h	Mon Nov 23 16:33:51 2015 -0600
@@ -71,31 +71,31 @@
 protected:
 
     /* allocated per part */
-    PerClass*   m_count;
-    PerClass*   m_offset;
-    PerClass*   m_offsetOrg;
+    PerClass*       m_count;
+    PerClass*       m_offset;
+    PerClass*       m_offsetOrg;
 
     /* allocated per CTU */
-    PerPlane*   m_countPreDblk;
-    PerPlane*   m_offsetOrgPreDblk;
+    PerPlane*       m_countPreDblk;
+    PerPlane*       m_offsetOrgPreDblk;
 
-    double      m_depthSaoRate[2][4];
-    int8_t      m_offsetBo[SAO_NUM_BO_CLASSES];
-    int8_t      m_offsetEo[NUM_EDGETYPE];
+    double          m_depthSaoRate[2][4];
+    int8_t          m_offsetBo[SAO_NUM_BO_CLASSES];
+    int8_t          m_offsetEo[NUM_EDGETYPE];
 
-    int         m_chromaFormat;
-    int         m_numCuInWidth;
-    int         m_numCuInHeight;
-    int         m_hChromaShift;
-    int         m_vChromaShift;
+    int             m_chromaFormat;
+    int             m_numCuInWidth;
+    int             m_numCuInHeight;
+    int             m_hChromaShift;
+    int             m_vChromaShift;
 
-    pixel*      m_clipTable;
-    pixel*      m_clipTableBase;
+    pixel*          m_clipTable;
+    pixel*          m_clipTableBase;
 
-    pixel*      m_tmpU1[3];
-    pixel*      m_tmpU2[3];
-    pixel*      m_tmpL1;
-    pixel*      m_tmpL2;
+    pixel*          m_tmpU1[3];
+    pixel*          m_tmpU2[3];
+    pixel*          m_tmpL1;
+    pixel*          m_tmpL2;
 
 public:
 
@@ -106,16 +106,16 @@
         Entropy temp;
     };
 
-    Frame*      m_frame;
-    Entropy     m_entropyCoder;
-    SAOContexts m_rdContexts;
+    Frame*          m_frame;
+    Entropy*        m_entropyCoder;
+    SAOContexts*    m_rdContexts;
 
-    x265_param* m_param;
-    int         m_refDepth;
-    int         m_numNoSao[2];
+    x265_param*     m_param;
+    int             m_refDepth;
+    int             m_numNoSao[2];
 
-    double      m_lumaLambda;
-    double      m_chromaLambda;
+    double          m_lumaLambda;
+    double          m_chromaLambda;
     /* TODO: No doubles for distortion */
 
     SAO();
@@ -125,7 +125,7 @@
 
     void allocSaoParam(SAOParam* saoParam) const;
 
-    void startSlice(Frame* pic, Entropy& initState, int qp);
+    void startSlice(Frame* pic, Entropy& initState, int numRows, int qp);
     void resetStats();
     void resetSaoUnit(SaoCtuParam* saoUnit);
 
@@ -138,8 +138,8 @@
     void calcSaoStatsCu(int addr, int plane);
     void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
 
-    void saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[2], double* mergeDist);
-    void sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist);
+    void saoComponentParamDist(SAOParam* saoParam, int idxY, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[2], double* mergeDist);
+    void sao2ChromaParamDist(SAOParam* saoParam, int idxY, int addr, int addrUp, int addrLeft, SaoCtuParam mergeSaoParam[][2], double* mergeDist);
 
     inline int estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
                              int32_t* currentDistortionTableBo, double* currentRdCostTableBo);



More information about the x265-devel mailing list