[x265] sao: refine sao merge mode

Satoshi Nakagawa nakagawa424 at oki.com
Wed Oct 15 04:57:34 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1413341637 -32400
#      Wed Oct 15 11:53:57 2014 +0900
# Node ID 8014e8d2c321148c7d68942b6f4552b8eede6e1c
# Parent  02ff8eaad63232e958153e8b7cdcd5907141a7b6
sao: refine sao merge mode

diff -r 02ff8eaad632 -r 8014e8d2c321 source/common/common.h
--- a/source/common/common.h	Mon Oct 13 18:17:00 2014 +0530
+++ b/source/common/common.h	Wed Oct 15 11:53:57 2014 +0900
@@ -251,18 +251,23 @@
     uint32_t count[MAX_NUM_TR_CATEGORIES];
 };
 
+enum SaoMergeMode
+{
+    SAO_MERGE_NONE,
+    SAO_MERGE_LEFT,
+    SAO_MERGE_UP
+};
+
 struct SaoCtuParam
 {
-    bool mergeUpFlag;
-    bool mergeLeftFlag;
+    SaoMergeMode mergeMode;
     int  typeIdx;
     uint32_t bandPos;    // BO band position
     int  offset[SAO_NUM_OFFSET];
 
     void reset()
     {
-        mergeUpFlag = false;
-        mergeLeftFlag = false;
+        mergeMode = SAO_MERGE_NONE;
         typeIdx = -1;
         bandPos = 0;
         offset[0] = 0;
diff -r 02ff8eaad632 -r 8014e8d2c321 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Oct 13 18:17:00 2014 +0530
+++ b/source/encoder/frameencoder.cpp	Wed Oct 15 11:53:57 2014 +0900
@@ -413,7 +413,7 @@
     const uint32_t lastCUAddr = (slice->m_endCUAddr + NUM_CU_PARTITIONS - 1) / NUM_CU_PARTITIONS;
     const int numSubstreams = m_param->bEnableWavefront ? m_frame->getPicSym()->getFrameHeightInCU() : 1;
 
-    SAOParam *saoParam = slice->m_pic->getPicSym()->m_saoParam;
+    SAOParam* saoParam = slice->m_sps->bUseSAO ? slice->m_pic->getPicSym()->m_saoParam : NULL;
     for (uint32_t cuAddr = 0; cuAddr < lastCUAddr; cuAddr++)
     {
         uint32_t col = cuAddr % widthInLCUs;
@@ -430,12 +430,12 @@
             m_entropyCoder.loadContexts(m_rows[lin - 1].bufferedEntropy);
         }
 
-        if (slice->m_sps->bUseSAO)
+        if (saoParam)
         {
             if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
             {
-                int mergeLeft = saoParam->ctuParam[0][cuAddr].mergeLeftFlag && col;
-                int mergeUp = saoParam->ctuParam[0][cuAddr].mergeUpFlag && lin;
+                int mergeLeft = col && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_LEFT;
+                int mergeUp = lin && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_UP;
                 if (col)
                     m_entropyCoder.codeSaoMerge(mergeLeft);
                 if (lin && !mergeLeft)
diff -r 02ff8eaad632 -r 8014e8d2c321 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Mon Oct 13 18:17:00 2014 +0530
+++ b/source/encoder/sao.cpp	Wed Oct 15 11:53:57 2014 +0900
@@ -90,7 +90,7 @@
     m_depthSaoRate[1][3] = 0;
 }
 
-bool SAO::create(x265_param *param)
+bool SAO::create(x265_param* param)
 {
     m_param = param;
     m_hChromaShift = CHROMA_H_SHIFT(param->internalCsp);
@@ -161,7 +161,7 @@
 }
 
 /* allocate memory for SAO parameters */
-void SAO::allocSaoParam(SAOParam *saoParam) const
+void SAO::allocSaoParam(SAOParam* saoParam) const
 {
     saoParam->numCuInWidth  = m_numCuInWidth;
 
@@ -170,14 +170,7 @@
     saoParam->ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
 }
 
-/* reset SAO parameters once per frame */
-void SAO::resetSAOParam(SAOParam *saoParam)
-{
-    saoParam->bSaoFlag[0] = false;
-    saoParam->bSaoFlag[1] = false;
-}
-
-void SAO::startSlice(Frame *pic, Entropy& initState, int qp)
+void SAO::startSlice(Frame* pic, Entropy& initState, int qp)
 {
     Slice* slice = pic->m_picSym->m_slice;
 
@@ -213,7 +206,6 @@
         pic->getPicSym()->m_saoParam = saoParam;
     }
 
-    resetSAOParam(saoParam);
     rdoSaoUnitRowInit(saoParam);
 
     // NOTE: Disable SAO automatic turn-off when frame parallelism is
@@ -229,7 +221,7 @@
 void SAO::processSaoCu(int addr, int typeIdx, int plane)
 {
     int x, y;
-    TComDataCU *cu = m_pic->getCU(addr);
+    const TComDataCU* cu = m_pic->getCU(addr);
     pixel* rec = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
     int stride = plane ? m_pic->getCStride() : m_pic->getStride();
     uint32_t picWidth  = m_param->sourceWidth;
@@ -454,12 +446,12 @@
 
     if (!idxY)
     {
-        pixel *rec = plane ? m_pic->getPicYuvRec()->getChromaAddr(plane) : m_pic->getPicYuvRec()->getLumaAddr();
+        pixel* rec = plane ? m_pic->getPicYuvRec()->getChromaAddr(plane) : m_pic->getPicYuvRec()->getLumaAddr();
         memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
     }
 
     int addr = idxY * m_numCuInWidth;
-    pixel *rec = plane ? m_pic->getPicYuvRec()->getChromaAddr(plane, addr) : m_pic->getPicYuvRec()->getLumaAddr(addr);
+    pixel* rec = plane ? m_pic->getPicYuvRec()->getChromaAddr(plane, addr) : m_pic->getPicYuvRec()->getLumaAddr(addr);
 
     for (int i = 0; i < ctuHeight + 1; i++)
     {
@@ -477,8 +469,8 @@
     {
         addr = idxY * m_numCuInWidth + idxX;
 
+        bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT;
         int typeIdx = ctuParam[addr].typeIdx;
-        bool mergeLeftFlag = ctuParam[addr].mergeLeftFlag;
 
         if (typeIdx >= 0)
         {
@@ -526,21 +518,19 @@
 
 void SAO::resetSaoUnit(SaoCtuParam* saoUnit)
 {
-    saoUnit->mergeUpFlag   = 0;
-    saoUnit->mergeLeftFlag = 0;
-    saoUnit->typeIdx       = -1;
-    saoUnit->bandPos       = 0;
+    saoUnit->mergeMode  = SAO_MERGE_NONE;
+    saoUnit->typeIdx    = -1;
+    saoUnit->bandPos    = 0;
 
     for (int i = 0; i < SAO_NUM_OFFSET; i++)
         saoUnit->offset[i] = 0;
 }
 
-void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, SaoCtuParam* saoUnitSrc)
+void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc)
 {
-    saoUnitDst->mergeLeftFlag = saoUnitSrc->mergeLeftFlag;
-    saoUnitDst->mergeUpFlag   = saoUnitSrc->mergeUpFlag;
-    saoUnitDst->typeIdx       = saoUnitSrc->typeIdx;
-    saoUnitDst->bandPos       = saoUnitSrc->bandPos;
+    saoUnitDst->mergeMode   = saoUnitSrc->mergeMode;
+    saoUnitDst->typeIdx     = saoUnitSrc->typeIdx;
+    saoUnitDst->bandPos     = saoUnitSrc->bandPos;
 
     for (int i = 0; i < SAO_NUM_OFFSET; i++)
         saoUnitDst->offset[i] = saoUnitSrc->offset[i];
@@ -550,7 +540,7 @@
 void SAO::calcSaoStatsCu(int addr, int plane)
 {
     int x, y;
-    TComDataCU *cu = m_pic->getCU(addr);
+    TComDataCU* cu = m_pic->getCU(addr);
     const pixel* fenc0 = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
     const pixel* rec0  = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
     const pixel* fenc;
@@ -796,7 +786,7 @@
     int addr    = idxX + m_numCuInWidth * idxY;
 
     int x, y;
-    TComDataCU *cu = pic->getCU(addr);
+    TComDataCU* cu = pic->getCU(addr);
     const pixel* fenc;
     const pixel* rec;
     int stride = m_pic->getStride();
@@ -1066,7 +1056,7 @@
     memset(m_offsetOrg, 0, sizeof(PerClass) * NUM_PLANE);
 }
 
-void SAO::rdoSaoUnitRowInit(SAOParam *saoParam)
+void SAO::rdoSaoUnitRowInit(SAOParam* saoParam)
 {
     saoParam->bSaoFlag[0] = true;
     saoParam->bSaoFlag[1] = true;
@@ -1079,7 +1069,7 @@
         saoParam->bSaoFlag[1] = false;
 }
 
-void SAO::rdoSaoUnitRowEnd(SAOParam *saoParam, int numctus)
+void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
 {
     if (!saoParam->bSaoFlag[0])
         m_depthSaoRate[0][m_refDepth] = 1.0;
@@ -1089,38 +1079,35 @@
     if (!saoParam->bSaoFlag[1])
         m_depthSaoRate[1][m_refDepth] = 1.0;
     else
-        m_depthSaoRate[1][m_refDepth] = m_numNoSao[1] / ((double)numctus * 2);
+        m_depthSaoRate[1][m_refDepth] = m_numNoSao[1] / ((double)numctus);
 }
 
-void SAO::rdoSaoUnitRow(SAOParam *saoParam, int idxY)
+void SAO::rdoSaoUnitRow(SAOParam* saoParam, int idxY)
 {
-    int j, k;
-    SaoCtuParam mergeSaoParam[3][2];
-    double compDistortion[3];
-    int allowMergeUp   = (idxY > 0);
+    SaoCtuParam mergeSaoParam[NUM_MERGE_MODE][2];
+    double mergeDist[NUM_MERGE_MODE];
+    bool allowMerge[2]; // left, up
+    allowMerge[1] = (idxY > 0);
 
     for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
     {
         int addr     = idxX + idxY * m_numCuInWidth;
-        int addrUp   = idxY == 0 ? -1 : addr - m_numCuInWidth;
-        int addrLeft = idxX == 0 ? -1 : addr - 1;
-        int allowMergeLeft = (idxX > 0);
+        int addrUp   = idxY ? addr - m_numCuInWidth : -1;
+        int addrLeft = idxX ? addr - 1 : -1;
+        allowMerge[0] = (idxX > 0);
 
-        compDistortion[0] = 0;
-        compDistortion[1] = 0;
-        compDistortion[2] = 0;
         m_entropyCoder.load(m_rdContexts.cur);
-        if (allowMergeLeft)
+        if (allowMerge[0])
             m_entropyCoder.codeSaoMerge(0);
-        if (allowMergeUp)
+        if (allowMerge[1])
             m_entropyCoder.codeSaoMerge(0);
         m_entropyCoder.store(m_rdContexts.temp);
         // reset stats Y, Cb, Cr
         for (int plane = 0; plane < 3; plane++)
         {
-            for (j = 0; j < MAX_NUM_SAO_TYPE; j++)
+            for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
             {
-                for (k = 0; k < MAX_NUM_SAO_CLASS; k++)
+                for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
                 {
                     m_offset[plane][j][k] = 0;
                     if (m_param->bSaoNonDeblocked)
@@ -1136,64 +1123,63 @@
                 }
             }
 
-            saoParam->ctuParam[plane][addr].typeIdx       = -1;
-            saoParam->ctuParam[plane][addr].mergeUpFlag   = 0;
-            saoParam->ctuParam[plane][addr].mergeLeftFlag = 0;
-            saoParam->ctuParam[plane][addr].bandPos    = 0;
-            if ((plane == 0 && saoParam->bSaoFlag[0]) || (plane > 0 && saoParam->bSaoFlag[1]))
+            saoParam->ctuParam[plane][addr].mergeMode   = SAO_MERGE_NONE;
+            saoParam->ctuParam[plane][addr].typeIdx     = -1;
+            saoParam->ctuParam[plane][addr].bandPos     = 0;
+            if (saoParam->bSaoFlag[plane > 0])
                 calcSaoStatsCu(addr, plane);
         }
 
-        saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
-                              &mergeSaoParam[0][0], &compDistortion[0]);
+        saoComponentParamDist(saoParam, addr, addrUp, addrLeft,
+                              &mergeSaoParam[0][0], mergeDist);
 
-        sao2ChromaParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
-                            &mergeSaoParam[1][0], &mergeSaoParam[2][0], &compDistortion[0]);
+        sao2ChromaParamDist(saoParam, addr, addrUp, addrLeft,
+                            mergeSaoParam, mergeDist);
 
         if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
         {
             // Cost of new SAO_params
             m_entropyCoder.load(m_rdContexts.cur);
             m_entropyCoder.resetBits();
-            if (allowMergeLeft)
+            if (allowMerge[0])
                 m_entropyCoder.codeSaoMerge(0);
-            if (allowMergeUp)
+            if (allowMerge[1])
                 m_entropyCoder.codeSaoMerge(0);
             for (int plane = 0; plane < 3; plane++)
             {
-                if ((plane == 0 && saoParam->bSaoFlag[0]) || (plane > 0 && saoParam->bSaoFlag[1]))
+                if (saoParam->bSaoFlag[plane > 0])
                     m_entropyCoder.codeSaoOffset(&saoParam->ctuParam[plane][addr], plane);
             }
 
             uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
-            double bestCost = compDistortion[0] + (double)rate;
+            double bestCost = mergeDist[0] + (double)rate;
             m_entropyCoder.store(m_rdContexts.temp);
 
             // Cost of Merge
-            for (int mergeUp = 0; mergeUp < 2; ++mergeUp)
+            for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
             {
-                if ((allowMergeLeft && !mergeUp) || (allowMergeUp && mergeUp))
+                if (!allowMerge[mergeIdx])
+                    continue;
+
+                m_entropyCoder.load(m_rdContexts.cur);
+                m_entropyCoder.resetBits();
+                if (allowMerge[0])
+                    m_entropyCoder.codeSaoMerge(1 - mergeIdx);
+                if (allowMerge[1] && (mergeIdx == 1))
+                    m_entropyCoder.codeSaoMerge(1);
+
+                rate = m_entropyCoder.getNumberOfWrittenBits();
+                double mergeCost = mergeDist[mergeIdx + 1] + (double)rate;
+                if (mergeCost < bestCost)
                 {
-                    m_entropyCoder.load(m_rdContexts.cur);
-                    m_entropyCoder.resetBits();
-                    if (allowMergeLeft)
-                        m_entropyCoder.codeSaoMerge(1 - mergeUp);
-                    if (allowMergeUp && (mergeUp == 1))
-                        m_entropyCoder.codeSaoMerge(1);
-
-                    rate = m_entropyCoder.getNumberOfWrittenBits();
-                    double mergeCost = compDistortion[mergeUp + 1] + (double)rate;
-                    if (mergeCost < bestCost)
+                    SaoMergeMode mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
+                    bestCost = mergeCost;
+                    m_entropyCoder.store(m_rdContexts.temp);
+                    for (int plane = 0; plane < 3; plane++)
                     {
-                        bestCost = mergeCost;
-                        m_entropyCoder.store(m_rdContexts.temp);
-                        for (int plane = 0; plane < 3; plane++)
-                        {
-                            mergeSaoParam[plane][mergeUp].mergeLeftFlag = !mergeUp;
-                            mergeSaoParam[plane][mergeUp].mergeUpFlag = !!mergeUp;
-                            if ((plane == 0 && saoParam->bSaoFlag[0]) || (plane > 0 && saoParam->bSaoFlag[1]))
-                                copySaoUnit(&saoParam->ctuParam[plane][addr], &mergeSaoParam[plane][mergeUp]);
-                        }
+                        mergeSaoParam[plane][mergeIdx].mergeMode = mergeMode;
+                        if (saoParam->bSaoFlag[plane > 0])
+                            copySaoUnit(&saoParam->ctuParam[plane][addr], &mergeSaoParam[plane][mergeIdx]);
                     }
                 }
             }
@@ -1201,7 +1187,7 @@
             if (saoParam->ctuParam[0][addr].typeIdx < 0)
                 m_numNoSao[0]++;
             if (saoParam->ctuParam[1][addr].typeIdx < 0)
-                m_numNoSao[1] += 2;
+                m_numNoSao[1]++;
             m_entropyCoder.load(m_rdContexts.temp);
             m_entropyCoder.store(m_rdContexts.cur);
         }
@@ -1209,7 +1195,7 @@
 }
 
 /** rate distortion optimization of SAO unit */
-inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo)
+inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
 {
     int64_t estDist = 0;
 
@@ -1249,7 +1235,7 @@
     return estDist;
 }
 
-inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t *currentDistortionTableBo, double *currentRdCostTableBo)
+inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t* currentDistortionTableBo, double* currentRdCostTableBo)
 {
     int offsetOut = 0;
 
@@ -1282,29 +1268,23 @@
     return offsetOut;
 }
 
-void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
-                                SaoCtuParam *compSaoParam, double *compDistortion)
+void SAO::saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft,
+                                SaoCtuParam* mergeSaoParam, double* mergeDist)
 {
     int64_t bestDist = 0;
 
     SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
-    SaoCtuParam  ctuParamRdo;
-
-    resetSaoUnit(&ctuParamRdo);
-    resetSaoUnit(&compSaoParam[0]);
-    resetSaoUnit(&compSaoParam[1]);
-    resetSaoUnit(lclCtuParam);
 
     double bestRDCostTableBo = MAX_DOUBLE;
     int    bestClassTableBo  = 0;
     int    currentDistortionTableBo[MAX_NUM_SAO_CLASS];
     double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
 
+    resetSaoUnit(lclCtuParam);
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.resetBits();
-    m_entropyCoder.codeSaoOffset(&ctuParamRdo, 0);
+    m_entropyCoder.codeSaoOffset(lclCtuParam, 0);
     double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
-    copySaoUnit(lclCtuParam, &ctuParamRdo);
 
     for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
     {
@@ -1332,10 +1312,9 @@
             for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
                 estDist += currentDistortionTableBo[classIdx];
         }
-        resetSaoUnit(&ctuParamRdo);
+        SaoCtuParam  ctuParamRdo;
+        ctuParamRdo.mergeMode = SAO_MERGE_NONE;
         ctuParamRdo.typeIdx = typeIdx;
-        ctuParamRdo.mergeLeftFlag = 0;
-        ctuParamRdo.mergeUpFlag   = 0;
         ctuParamRdo.bandPos = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
         for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
             ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1];
@@ -1355,78 +1334,63 @@
         }
     }
 
-    compDistortion[0] += ((double)bestDist / m_lumaLambda);
+    mergeDist[0] = ((double)bestDist / m_lumaLambda);
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.codeSaoOffset(lclCtuParam, 0);
     m_entropyCoder.store(m_rdContexts.temp);
 
     // merge left or merge up
 
-    for (int idxNeighbor = 0; idxNeighbor < 2; idxNeighbor++)
+    for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
     {
-        SaoCtuParam* ctuParamNeighbor = NULL;
-        if (allowMergeLeft && addrLeft >= 0 && idxNeighbor == 0)
-            ctuParamNeighbor = &(saoParam->ctuParam[0][addrLeft]);
-        else if (allowMergeUp && addrUp >= 0 && idxNeighbor == 1)
-            ctuParamNeighbor = &(saoParam->ctuParam[0][addrUp]);
-        if (ctuParamNeighbor != NULL)
+        SaoCtuParam* mergeSrcParam = NULL;
+        if (addrLeft >= 0 && mergeIdx == 0)
+            mergeSrcParam = &(saoParam->ctuParam[0][addrLeft]);
+        else if (addrUp >= 0 && mergeIdx == 1)
+            mergeSrcParam = &(saoParam->ctuParam[0][addrUp]);
+        if (mergeSrcParam)
         {
             int64_t estDist = 0;
-            int typeIdx = ctuParamNeighbor->typeIdx;
+            int typeIdx = mergeSrcParam->typeIdx;
             if (typeIdx >= 0)
             {
-                int mergeBandPosition = (typeIdx == SAO_BO) ? ctuParamNeighbor->bandPos : 0;
+                int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
                 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
                 {
-                    int mergeOffset = ctuParamNeighbor->offset[classIdx];
-                    estDist += estSaoDist(m_count[0][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + mergeBandPosition + 1]);
+                    int mergeOffset = mergeSrcParam->offset[classIdx];
+                    estDist += estSaoDist(m_count[0][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + bandPos + 1]);
                 }
             }
 
-            copySaoUnit(&compSaoParam[idxNeighbor], ctuParamNeighbor);
-            compSaoParam[idxNeighbor].mergeUpFlag   = !!idxNeighbor;
-            compSaoParam[idxNeighbor].mergeLeftFlag = !idxNeighbor;
+            copySaoUnit(&mergeSaoParam[mergeIdx], mergeSrcParam);
+            mergeSaoParam[mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
 
-            compDistortion[idxNeighbor + 1] += ((double)estDist / m_lumaLambda);
+            mergeDist[mergeIdx + 1] = ((double)estDist / m_lumaLambda);
         }
+        else
+            resetSaoUnit(&mergeSaoParam[mergeIdx]);
     }
 }
 
-void SAO::sao2ChromaParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
-                              SaoCtuParam *crSaoParam, SaoCtuParam *cbSaoParam, double *distortion)
+void SAO::sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft,
+                              SaoCtuParam mergeSaoParam[][2], double* mergeDist)
 {
     int64_t bestDist = 0;
 
     SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };
-    SaoCtuParam* saoMergeParam[2][2];
-    SaoCtuParam  ctuParamRdo[2];
-
-    saoMergeParam[0][0] = &crSaoParam[0];
-    saoMergeParam[0][1] = &crSaoParam[1];
-    saoMergeParam[1][0] = &cbSaoParam[0];
-    saoMergeParam[1][1] = &cbSaoParam[1];
-
-    resetSaoUnit(lclCtuParam[0]);
-    resetSaoUnit(lclCtuParam[1]);
-    resetSaoUnit(saoMergeParam[0][0]);
-    resetSaoUnit(saoMergeParam[0][1]);
-    resetSaoUnit(saoMergeParam[1][0]);
-    resetSaoUnit(saoMergeParam[1][1]);
-    resetSaoUnit(&ctuParamRdo[0]);
-    resetSaoUnit(&ctuParamRdo[1]);
 
     double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
     int    bestClassTableBo[2] = { 0, 0 };
     int    currentDistortionTableBo[MAX_NUM_SAO_CLASS];
 
+    resetSaoUnit(lclCtuParam[0]);
+    resetSaoUnit(lclCtuParam[1]);
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.resetBits();
-    m_entropyCoder.codeSaoOffset(&ctuParamRdo[0], 1);
-    m_entropyCoder.codeSaoOffset(&ctuParamRdo[1], 2);
+    m_entropyCoder.codeSaoOffset(lclCtuParam[0], 1);
+    m_entropyCoder.codeSaoOffset(lclCtuParam[1], 2);
 
     double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
-    copySaoUnit(lclCtuParam[0], &ctuParamRdo[0]);
-    copySaoUnit(lclCtuParam[1], &ctuParamRdo[1]);
 
     for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
     {
@@ -1467,12 +1431,11 @@
         m_entropyCoder.load(m_rdContexts.temp);
         m_entropyCoder.resetBits();
 
+        SaoCtuParam  ctuParamRdo[2];
         for (int compIdx = 0; compIdx < 2; compIdx++)
         {
-            resetSaoUnit(&ctuParamRdo[compIdx]);
+            ctuParamRdo[compIdx].mergeMode = SAO_MERGE_NONE;
             ctuParamRdo[compIdx].typeIdx = typeIdx;
-            ctuParamRdo[compIdx].mergeLeftFlag = 0;
-            ctuParamRdo[compIdx].mergeUpFlag   = 0;
             ctuParamRdo[compIdx].bandPos = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
             for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
                 ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1];
@@ -1492,7 +1455,7 @@
         }
     }
 
-    distortion[0] += ((double)bestDist / m_chromaLambda);
+    mergeDist[0] += ((double)bestDist / m_chromaLambda);
     m_entropyCoder.load(m_rdContexts.temp);
     m_entropyCoder.codeSaoOffset(lclCtuParam[0], 1);
     m_entropyCoder.codeSaoOffset(lclCtuParam[1], 2);
@@ -1500,40 +1463,41 @@
 
     // merge left or merge up
 
-    for (int idxNeighbor = 0; idxNeighbor < 2; idxNeighbor++)
+    for (int mergeIdx = 0; mergeIdx < 2; mergeIdx++)
     {
         for (int compIdx = 0; compIdx < 2; compIdx++)
         {
             int plane = compIdx + 1;
-            SaoCtuParam* ctuParamNeighbor = NULL;
-            if (allowMergeLeft && addrLeft >= 0 && idxNeighbor == 0)
-                ctuParamNeighbor = &(saoParam->ctuParam[plane][addrLeft]);
-            else if (allowMergeUp && addrUp >= 0 && idxNeighbor == 1)
-                ctuParamNeighbor = &(saoParam->ctuParam[plane][addrUp]);
-            if (ctuParamNeighbor != NULL)
+            SaoCtuParam* mergeSrcParam = NULL;
+            if (addrLeft >= 0 && mergeIdx == 0)
+                mergeSrcParam = &(saoParam->ctuParam[plane][addrLeft]);
+            else if (addrUp >= 0 && mergeIdx == 1)
+                mergeSrcParam = &(saoParam->ctuParam[plane][addrUp]);
+            if (mergeSrcParam)
             {
                 int64_t estDist = 0;
-                int typeIdx = ctuParamNeighbor->typeIdx;
+                int typeIdx = mergeSrcParam->typeIdx;
                 if (typeIdx >= 0)
                 {
-                    int mergeBandPosition = (typeIdx == SAO_BO) ? ctuParamNeighbor->bandPos : 0;
+                    int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
                     for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
                     {
-                        int mergeOffset = ctuParamNeighbor->offset[classIdx];
-                        estDist += estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + 1]);
+                        int mergeOffset = mergeSrcParam->offset[classIdx];
+                        estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
                     }
                 }
 
-                copySaoUnit(saoMergeParam[compIdx][idxNeighbor], ctuParamNeighbor);
-                saoMergeParam[compIdx][idxNeighbor]->mergeUpFlag   = !!idxNeighbor;
-                saoMergeParam[compIdx][idxNeighbor]->mergeLeftFlag = !idxNeighbor;
-                distortion[idxNeighbor + 1] += ((double)estDist / m_chromaLambda);
+                copySaoUnit(&mergeSaoParam[plane][mergeIdx], mergeSrcParam);
+                mergeSaoParam[plane][mergeIdx].mergeMode = mergeIdx ? SAO_MERGE_UP : SAO_MERGE_LEFT;
+                mergeDist[mergeIdx + 1] += ((double)estDist / m_chromaLambda);
             }
+            else
+                resetSaoUnit(&mergeSaoParam[plane][mergeIdx]);
         }
     }
 }
 
-static void restoreOrigLosslessYuv(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth);
+static void restoreOrigLosslessYuv(const TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth);
 
 /* Original Lossless YUV LF disable process */
 void restoreLFDisabledOrigYuv(Frame* pic)
@@ -1542,7 +1506,7 @@
     {
         for (uint32_t cuAddr = 0; cuAddr < pic->getNumCUsInFrame(); cuAddr++)
         {
-            TComDataCU* cu = pic->getCU(cuAddr);
+            const TComDataCU* cu = pic->getCU(cuAddr);
 
             origCUSampleRestoration(cu, 0, 0);
         }
@@ -1550,7 +1514,7 @@
 }
 
 /* Original YUV restoration for CU in lossless coding */
-void origCUSampleRestoration(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth)
+void origCUSampleRestoration(const TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth)
 {
     // go to sub-CU
     if (cu->getDepth(absZOrderIdx) > depth)
@@ -1573,33 +1537,33 @@
         restoreOrigLosslessYuv(cu, absZOrderIdx, depth);
 }
 
-/* Original Lossless YUV sample restoration */
-static void restoreOrigLosslessYuv(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth)
-{
-    TComPicYuv* pcPicYuvRec  = cu->m_pic->getPicYuvRec();
-    TComPicYuv* pcPicYuvOrig = cu->m_pic->getPicYuvOrg();
-    int csp = pcPicYuvOrig->m_picCsp;
-
-    pixel* dst = pcPicYuvRec->getLumaAddr(cu->m_cuAddr, absZOrderIdx);
-    pixel* src = pcPicYuvOrig->getLumaAddr(cu->m_cuAddr, absZOrderIdx);
-    uint32_t dstStride = pcPicYuvRec->getStride();
-    uint32_t srcStride = pcPicYuvOrig->getStride();
-    uint32_t width  = (g_maxCUSize >> depth);
-    uint32_t height = (g_maxCUSize >> depth);
-    int part = partitionFromSizes(width, height);
-
-    primitives.luma_copy_pp[part](dst, dstStride, src, srcStride);
-   
-    pixel* dstCb = pcPicYuvRec->getCbAddr(cu->m_cuAddr, absZOrderIdx);
-    pixel* srcCb = pcPicYuvOrig->getCbAddr(cu->m_cuAddr, absZOrderIdx);
-
-    pixel* dstCr = pcPicYuvRec->getCrAddr(cu->m_cuAddr, absZOrderIdx);
-    pixel* srcCr = pcPicYuvOrig->getCrAddr(cu->m_cuAddr, absZOrderIdx);
-
-    dstStride = pcPicYuvRec->getCStride();
-    srcStride = pcPicYuvOrig->getCStride();
-    primitives.chroma[csp].copy_pp[part](dstCb, dstStride, srcCb, srcStride);
-    primitives.chroma[csp].copy_pp[part](dstCr, dstStride, srcCr, srcStride);
+/* Original Lossless YUV sample restoration */
+static void restoreOrigLosslessYuv(const TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth)
+{
+    TComPicYuv* pcPicYuvRec  = cu->m_pic->getPicYuvRec();
+    TComPicYuv* pcPicYuvOrig = cu->m_pic->getPicYuvOrg();
+    int csp = pcPicYuvOrig->m_picCsp;
+
+    pixel* dst = pcPicYuvRec->getLumaAddr(cu->m_cuAddr, absZOrderIdx);
+    pixel* src = pcPicYuvOrig->getLumaAddr(cu->m_cuAddr, absZOrderIdx);
+    uint32_t dstStride = pcPicYuvRec->getStride();
+    uint32_t srcStride = pcPicYuvOrig->getStride();
+    uint32_t width  = (g_maxCUSize >> depth);
+    uint32_t height = (g_maxCUSize >> depth);
+    int part = partitionFromSizes(width, height);
+
+    primitives.luma_copy_pp[part](dst, dstStride, src, srcStride);
+
+    pixel* dstCb = pcPicYuvRec->getCbAddr(cu->m_cuAddr, absZOrderIdx);
+    pixel* srcCb = pcPicYuvOrig->getCbAddr(cu->m_cuAddr, absZOrderIdx);
+
+    pixel* dstCr = pcPicYuvRec->getCrAddr(cu->m_cuAddr, absZOrderIdx);
+    pixel* srcCr = pcPicYuvOrig->getCrAddr(cu->m_cuAddr, absZOrderIdx);
+
+    dstStride = pcPicYuvRec->getCStride();
+    srcStride = pcPicYuvOrig->getCStride();
+    primitives.chroma[csp].copy_pp[part](dstCb, dstStride, srcCb, srcStride);
+    primitives.chroma[csp].copy_pp[part](dstCr, dstStride, srcCr, srcStride);
 }
 
 }
diff -r 02ff8eaad632 -r 8014e8d2c321 source/encoder/sao.h
--- a/source/encoder/sao.h	Mon Oct 13 18:17:00 2014 +0530
+++ b/source/encoder/sao.h	Wed Oct 15 11:53:57 2014 +0900
@@ -60,6 +60,7 @@
     enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
     enum { NUM_EDGETYPE = 5 };
     enum { NUM_PLANE = 3 };
+    enum { NUM_MERGE_MODE = 3 };
 
     static const uint32_t s_eoTable[NUM_EDGETYPE];
 
@@ -115,13 +116,12 @@
 
     SAO();
 
-    bool create(x265_param *param);
+    bool create(x265_param* param);
     void destroy();
 
     void allocSaoParam(SAOParam* saoParam) const;
 
-    void startSlice(Frame *pic, Entropy& initState, int qp);
-    void resetSAOParam(SAOParam* saoParam);
+    void startSlice(Frame* pic, Entropy& initState, int qp);
     void resetStats();
     void resetSaoUnit(SaoCtuParam* saoUnit);
 
@@ -129,27 +129,27 @@
     void processSaoCu(int addr, int typeIdx, int plane);
     void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
 
-    void copySaoUnit(SaoCtuParam* saoUnitDst, SaoCtuParam* saoUnitSrc);
+    void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc);
 
     void calcSaoStatsCu(int addr, int plane);
     void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
 
-    void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
-                               SaoCtuParam *compSaoParam, double *distortion);
-    void sao2ChromaParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
-                             SaoCtuParam *crSaoParam, SaoCtuParam *cbSaoParam, double *distortion);
+    void saoComponentParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft,
+                               SaoCtuParam mergeSaoParam[2], double* mergeDist);
+    void sao2ChromaParamDist(SAOParam* saoParam, int addr, int addrUp, int addrLeft,
+                             SaoCtuParam mergeSaoParam[][2], double* mergeDist);
 
     inline int estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
-                             int32_t *currentDistortionTableBo, double *currentRdCostTableBo);
-    inline int64_t estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo);
+                             int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
+    inline int64_t estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t* currentDistortionTableBo, double* currentRdCostTableBo);
 
-    void rdoSaoUnitRowInit(SAOParam *saoParam);
-    void rdoSaoUnitRowEnd(SAOParam *saoParam, int numctus);
-    void rdoSaoUnitRow(SAOParam *saoParam, int idxY);
+    void rdoSaoUnitRowInit(SAOParam* saoParam);
+    void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
+    void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
 };
 
 void restoreLFDisabledOrigYuv(Frame* pic);
-void origCUSampleRestoration(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth);
+void origCUSampleRestoration(const TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth);
 
 }
 


More information about the x265-devel mailing list