[x265] sao: remove frame-based SAO

Satoshi Nakagawa nakagawa424 at oki.com
Tue Sep 30 02:52:08 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1412038092 -32400
#      Tue Sep 30 09:48:12 2014 +0900
# Node ID 3eacdaa304400b0100dcf1d1515ae1d24cbf4305
# Parent  5a6845566d1492d29af29ecc0cf75d644994735c
sao: remove frame-based SAO

diff -r 5a6845566d14 -r 3eacdaa30440 source/common/common.h
--- a/source/common/common.h	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/common/common.h	Tue Sep 30 09:48:12 2014 +0900
@@ -212,34 +212,6 @@
     uint32_t count[8];
 };
 
-struct SAOQTPart
-{
-    enum { NUM_DOWN_PART = 4 };
-
-    int     bestType;
-    int     subTypeIdx;  // indicates EO class or BO band position
-    int     offset[SAO_NUM_OFFSET];
-    int     startCUX;
-    int     startCUY;
-    int     endCUX;
-    int     endCUY;
-
-    int     partIdx;
-    int     partLevel;
-    int     partCol;
-    int     partRow;
-
-    int     downPartsIdx[NUM_DOWN_PART];
-    int     upPartIdx;
-
-    bool    bSplit;
-
-    bool    bProcessed;
-    double  minCost;
-    int64_t minDist;
-    int     minRate;
-};
-
 struct SaoLcuParam
 {
     bool mergeUpFlag;
@@ -266,10 +238,7 @@
 struct SAOParam
 {
     SaoLcuParam* saoLcuParam[3];
-    SAOQTPart*   saoPart[3];
     bool         bSaoFlag[2];
-    bool         oneUnitFlag[3];
-    int          maxSplitLevel;
     int          numCuInHeight;
     int          numCuInWidth;
 
@@ -277,15 +246,11 @@
     {
         for (int i = 0; i < 3; i++)
         {
-            saoPart[i] = NULL;
             saoLcuParam[i] = NULL;
         }
     }
     ~SAOParam()
     {
-        delete[] saoPart[0];
-        delete[] saoPart[1];
-        delete[] saoPart[2];
         delete[] saoLcuParam[0];
         delete[] saoLcuParam[1];
         delete[] saoLcuParam[2];
diff -r 5a6845566d14 -r 3eacdaa30440 source/common/param.cpp
--- a/source/common/param.cpp	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/common/param.cpp	Tue Sep 30 09:48:12 2014 +0900
@@ -169,7 +169,6 @@
     /* SAO Loop Filter */
     param->bEnableSAO = 1;
     param->saoLcuBoundary = 0;
-    param->saoLcuBasedOptimization = 1;
 
     /* Coding Quality */
     param->cbQpOffset = 0;
@@ -625,7 +624,6 @@
     OPT("lft") p->bEnableLoopFilter = atobool(value);
     OPT("sao") p->bEnableSAO = atobool(value);
     OPT("sao-lcu-bounds") p->saoLcuBoundary = atoi(value);
-    OPT("sao-lcu-opt") p->saoLcuBasedOptimization = atoi(value);
     OPT("ssim") p->bEnableSsim = atobool(value);
     OPT("psnr") p->bEnablePsnr = atobool(value);
     OPT("hash") p->decodedPictureHashSEI = atoi(value);
@@ -1165,13 +1163,7 @@
         fprintf(stderr, "nr=%d ", param->noiseReduction);
 
     TOOLOPT(param->bEnableLoopFilter, "lft");
-    if (param->bEnableSAO)
-    {
-        if (param->saoLcuBasedOptimization)
-            fprintf(stderr, "sao-lcu ");
-        else
-            fprintf(stderr, "sao-frame ");
-    }
+    TOOLOPT(param->bEnableSAO, "sao");
     TOOLOPT(param->bEnableSignHiding, "signhide");
     TOOLOPT(param->bCULossless, "cu-lossless");
     TOOLOPT(param->bEnableFastIntra, "fast-intra");
@@ -1245,7 +1237,6 @@
     BOOL(p->bEnableLoopFilter, "lft");
     BOOL(p->bEnableSAO, "sao");
     s += sprintf(s, " sao-lcu-bounds=%d", p->saoLcuBoundary);
-    s += sprintf(s, " sao-lcu-opt=%d", p->saoLcuBasedOptimization);
     BOOL(p->bBPyramid, "b-pyramid");
     BOOL(p->rc.cuTree, "cutree");
     s += sprintf(s, " rc=%s", p->rc.rateControlMode == X265_RC_ABR ? (
diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/encoder/encoder.cpp	Tue Sep 30 09:48:12 2014 +0900
@@ -1247,10 +1247,6 @@
         x265_log(p, X265_LOG_INFO, "Parallelism disabled, single thread mode\n");
         p->bEnableWavefront = 0;
     }
-    if (!p->saoLcuBasedOptimization && p->frameNumThreads > 1)
-    {
-        x265_log(p, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
-    }
 
     if (p->keyframeMax < 0)
     {
diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/encoder/frameencoder.cpp	Tue Sep 30 09:48:12 2014 +0900
@@ -85,7 +85,7 @@
     m_param = top->m_param;
     m_numRows = numRows;
     m_numCols = numCols;
-    m_filterRowDelay = (m_param->bEnableSAO && m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary) ?
+    m_filterRowDelay = (m_param->bEnableSAO && m_param->saoLcuBoundary) ?
                         2 : (m_param->bEnableSAO || m_param->bEnableLoopFilter ? 1 : 0);
     m_filterRowDelayCus = m_filterRowDelay * numCols;
 
@@ -323,17 +323,6 @@
         m_frameStats.percentSkip  = (double)totalSkip / totalCuCount;
     }
 
-    if (slice->m_sps->bUseSAO && !m_param->saoLcuBasedOptimization)
-    {
-        /* frame based SAO */
-        m_frameFilter.m_sao.SAOProcess(m_frame->getPicSym()->m_saoParam);
-        restoreLFDisabledOrigYuv(m_frame);
-
-        // Extend border after whole-frame SAO is finished
-        for (int row = 0; row < m_numRows; row++)
-            m_frameFilter.processRowPost(row);
-    }
-
     m_bs.resetBits();
     m_entropyCoder.load(m_initSliceContext);
     m_entropyCoder.setBitstream(&m_bs);
@@ -799,7 +788,7 @@
         }
 
         // NOTE: do CU level Filter
-        if (m_param->bEnableSAO && m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
+        if (m_param->bEnableSAO && m_param->saoLcuBoundary)
             // SAO parameter estimation using non-deblocked pixels for LCU bottom and right boundary areas
             m_frameFilter.m_sao.calcSaoStatsCu_BeforeDblk(m_frame, col, row);
 
diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/encoder/framefilter.cpp	Tue Sep 30 09:48:12 2014 +0900
@@ -115,20 +115,15 @@
     SAOParam* saoParam = m_frame->getPicSym()->m_saoParam;
     if (m_param->bEnableSAO)
     {
-        if (m_param->saoLcuBasedOptimization)
-        {
-            m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
-            m_sao.m_rdEntropyCoders[0][CI_NEXT_BEST].load(m_frameEncoder->m_initSliceContext);
-            m_sao.m_rdEntropyCoders[0][CI_CURR_BEST].load(m_frameEncoder->m_initSliceContext);
+        m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
+        m_sao.m_rdEntropyCoders[0][CI_NEXT_BEST].load(m_frameEncoder->m_initSliceContext);
+        m_sao.m_rdEntropyCoders[0][CI_CURR_BEST].load(m_frameEncoder->m_initSliceContext);
 
-            m_sao.rdoSaoUnitRow(saoParam, row);
+        m_sao.rdoSaoUnitRow(saoParam, row);
 
-            // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
-            if (row >= m_saoRowDelay)
-                processSao(row - m_saoRowDelay);
-        }
-        else
-            return;
+        // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
+        if (row >= m_saoRowDelay)
+            processSao(row - m_saoRowDelay);
     }
 
     // this row of CTUs has been encoded
@@ -138,7 +133,7 @@
 
     if (row == m_numRows - 1)
     {
-        if (m_param->bEnableSAO && m_param->saoLcuBasedOptimization)
+        if (m_param->bEnableSAO)
         {
             m_sao.rdoSaoUnitRowEnd(saoParam, m_frame->getNumCUsInFrame());
 
@@ -424,9 +419,6 @@
     const uint32_t lineStartCUAddr = row * numCols;
     SAOParam* saoParam = m_frame->getPicSym()->m_saoParam;
 
-    // NOTE: these flags are not used in this mode
-    X265_CHECK(!saoParam->oneUnitFlag[0] && !saoParam->oneUnitFlag[1] && !saoParam->oneUnitFlag[2], "invalid SAO flag");
-
     if (saoParam->bSaoFlag[0])
         m_sao.processSaoUnitRow(saoParam->saoLcuParam[0], row, 0);
 
diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/encoder/sao.cpp	Tue Sep 30 09:48:12 2014 +0900
@@ -51,34 +51,11 @@
     return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
 }
 
-int convertLevelRowCol2Idx(int level, int row, int col)
-{
-    if (!level)
-        return 0;
-    else if (level == 1)
-        return 1 + row * 2 + col;
-    else if (level == 2)
-        return 5 + row * 4 + col;
-    else if (level == 3)
-        return 21 + row * 8 + col;
-    else // (level == 4)
-        return 85 + row * 16 + col;
-}
-
 } // end anonymous namespace
 
 
 namespace x265 {
 
-const int SAO::s_numCulPartsLevel[5] =
-{
-    1,   // level 0
-    5,   // level 1
-    21,  // level 2
-    85,  // level 3
-    341, // level 4
-};
-
 const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
 {
     1, // 0
@@ -95,17 +72,10 @@
     m_offsetOrg = NULL;
     m_countPreDblk = NULL;
     m_offsetOrgPreDblk = NULL;
-    m_rate = NULL;
-    m_dist = NULL;
-    m_cost = NULL;
-    m_costPartBest = NULL;
-    m_distOrg = NULL;
-    m_typePartBest = NULL;
     m_refDepth = 0;
     m_lumaLambda = 0;
     m_chromaLambda = 0;
     m_param = NULL;
-    m_numTotalParts = 0;
     m_clipTable = NULL;
     m_clipTableBase = NULL;
     m_offsetBo = NULL;
@@ -137,16 +107,6 @@
     m_numCuInWidth =  (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
     m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
 
-    int maxSplitLevelHeight = (int)(logf((float)m_numCuInHeight) / logf(2.0));
-    int maxSplitLevelWidth  = (int)(logf((float)m_numCuInWidth) / logf(2.0));
-
-    m_maxSplitLevel = maxSplitLevelHeight < maxSplitLevelWidth ? maxSplitLevelHeight : maxSplitLevelWidth;
-    m_maxSplitLevel = X265_MIN(m_maxSplitLevel, SAO_MAX_DEPTH);
-
-    /* various structures are overloaded to store per component data.
-     * m_numTotalParts must allow for sufficient storage in any allocated arrays */
-    m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]);
-
     const pixel maxY = (1 << X265_DEPTH) - 1;
     const pixel rangeExt = maxY >> 1;
     int numLcu = m_numCuInWidth * m_numCuInHeight;
@@ -163,17 +123,9 @@
         CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth);
     }
 
-    CHECKED_MALLOC(m_distOrg, int64_t, m_numTotalParts);
-    CHECKED_MALLOC(m_costPartBest, double, m_numTotalParts);
-    CHECKED_MALLOC(m_typePartBest, int, m_numTotalParts);
-
-    CHECKED_MALLOC(m_rate, PerType, m_numTotalParts);
-    CHECKED_MALLOC(m_dist, PerType, m_numTotalParts);
-    CHECKED_MALLOC(m_cost, PerTypeD, m_numTotalParts);
-
-    CHECKED_MALLOC(m_count, PerClass, m_numTotalParts);
-    CHECKED_MALLOC(m_offset, PerClass, m_numTotalParts);
-    CHECKED_MALLOC(m_offsetOrg, PerClass, m_numTotalParts);
+    CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
+    CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
+    CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
 
     CHECKED_MALLOC(m_countPreDblk, PerPlane, numLcu);
     CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numLcu);
@@ -209,12 +161,6 @@
         X265_FREE(m_tmpU2[i]);
     }
 
-    X265_FREE(m_distOrg);
-    X265_FREE(m_costPartBest);
-    X265_FREE(m_typePartBest);
-    X265_FREE(m_rate);
-    X265_FREE(m_dist);
-    X265_FREE(m_cost);
     X265_FREE(m_count);
     X265_FREE(m_offset);
     X265_FREE(m_offsetOrg);
@@ -225,143 +171,22 @@
 /* allocate memory for SAO parameters */
 void SAO::allocSaoParam(SAOParam *saoParam) const
 {
-    saoParam->maxSplitLevel = m_maxSplitLevel;
     saoParam->numCuInWidth  = m_numCuInWidth;
     saoParam->numCuInHeight = m_numCuInHeight;
 
-    saoParam->saoPart[0] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
-    initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1,  0, m_numCuInHeight - 1, 0);
-
-    saoParam->saoPart[1] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
-    saoParam->saoPart[2] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
-    initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1,  0, m_numCuInHeight - 1, 1);
-    initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1,  0, m_numCuInHeight - 1, 2);
-
     saoParam->saoLcuParam[0] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
     saoParam->saoLcuParam[1] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
     saoParam->saoLcuParam[2] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
 }
 
-/* recursively initialize SAO parameters (only once) */
-void SAO::initSAOParam(SAOParam *saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const
-{
-    int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);
-
-    SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
-
-    saoPart->partIdx   = partIdx;
-    saoPart->partLevel = partLevel;
-    saoPart->partRow   = partRow;
-    saoPart->partCol   = partCol;
-
-    saoPart->startCUX  = startCUX;
-    saoPart->endCUX    = endCUX;
-    saoPart->startCUY  = startCUY;
-    saoPart->endCUY    = endCUY;
-
-    saoPart->upPartIdx = parentPartIdx;
-    saoPart->bestType  = -1;
-
-    saoPart->subTypeIdx = 0;
-
-    for (int j = 0; j < SAO_NUM_OFFSET; j++)
-        saoPart->offset[j] = 0;
-
-    if (saoPart->partLevel < m_maxSplitLevel)
-    {
-        int downLevel    = (partLevel + 1);
-        int downRowStart = (partRow << 1);
-        int downColStart = (partCol << 1);
-
-        int numCUWidth  = endCUX - startCUX + 1;
-        int numCUHeight = endCUY - startCUY + 1;
-        int numCULeft   = (numCUWidth  >> 1);
-        int numCUTop    = (numCUHeight >> 1);
-
-        int downStartCUX = startCUX;
-        int downEndCUX  = downStartCUX + numCULeft - 1;
-        int downStartCUY = startCUY;
-        int downEndCUY  = downStartCUY + numCUTop  - 1;
-        int downRowIdx = downRowStart + 0;
-        int downColIdx = downColStart + 0;
-
-        saoPart->downPartsIdx[0] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
-
-        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
-
-        downStartCUX = startCUX + numCULeft;
-        downEndCUX   = endCUX;
-        downStartCUY = startCUY;
-        downEndCUY   = downStartCUY + numCUTop - 1;
-        downRowIdx  = downRowStart + 0;
-        downColIdx  = downColStart + 1;
-
-        saoPart->downPartsIdx[1] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
-
-        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx,  downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
-
-        downStartCUX = startCUX;
-        downEndCUX   = downStartCUX + numCULeft - 1;
-        downStartCUY = startCUY + numCUTop;
-        downEndCUY   = endCUY;
-        downRowIdx  = downRowStart + 1;
-        downColIdx  = downColStart + 0;
-
-        saoPart->downPartsIdx[2] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
-
-        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
-
-        downStartCUX = startCUX + numCULeft;
-        downEndCUX   = endCUX;
-        downStartCUY = startCUY + numCUTop;
-        downEndCUY   = endCUY;
-        downRowIdx  = downRowStart + 1;
-        downColIdx  = downColStart + 1;
-
-        saoPart->downPartsIdx[3] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
-
-        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
-    }
-    else
-    {
-        saoPart->downPartsIdx[0] = saoPart->downPartsIdx[1] = saoPart->downPartsIdx[2] = saoPart->downPartsIdx[3] = -1;
-    }
-}
-
 /* reset SAO parameters once per frame */
 void SAO::resetSAOParam(SAOParam *saoParam)
 {
-    int numComponet = 3;
-
-    for (int c = 0; c < numComponet; c++)
-    {
-        if (c < 2)
-            saoParam->bSaoFlag[c] = false;
-
-        for (int i = 0; i < s_numCulPartsLevel[m_maxSplitLevel]; i++)
-        {
-            saoParam->saoPart[c][i].bestType     = -1;
-            saoParam->saoPart[c][i].bSplit       = false;
-            saoParam->saoPart[c][i].bProcessed   = false;
-            saoParam->saoPart[c][i].minCost      = MAX_DOUBLE;
-            saoParam->saoPart[c][i].minDist      = MAX_INT;
-            saoParam->saoPart[c][i].minRate      = MAX_INT;
-            saoParam->saoPart[c][i].subTypeIdx   = 0;
-            for (int j = 0; j < SAO_NUM_OFFSET; j++)
-            {
-                saoParam->saoPart[c][i].offset[j] = 0;
-                saoParam->saoPart[c][i].offset[j] = 0;
-                saoParam->saoPart[c][i].offset[j] = 0;
-            }
-        }
-
-        saoParam->oneUnitFlag[0] = 0;
-        saoParam->oneUnitFlag[1] = 0;
-        saoParam->oneUnitFlag[2] = 0;
-        resetLcuPart(saoParam->saoLcuParam[0]);
-        resetLcuPart(saoParam->saoLcuParam[1]);
-        resetLcuPart(saoParam->saoLcuParam[2]);
-    }
+    saoParam->bSaoFlag[0] = false;
+    saoParam->bSaoFlag[1] = false;
+    resetLcuPart(saoParam->saoLcuParam[0]);
+    resetLcuPart(saoParam->saoLcuParam[1]);
+    resetLcuPart(saoParam->saoLcuParam[2]);
 }
 
 void SAO::startSlice(Frame *pic, Entropy& initState, int qp)
@@ -647,133 +472,6 @@
 }
 
 /* Process SAO all units */
-void SAO::processSaoUnitAll(SaoLcuParam* saoLcuParam, bool oneUnitFlag, int plane)
-{
-    pixel *rec;
-    int picWidthTmp;
-
-    if (plane)
-    {
-        rec         = m_pic->getPicYuvRec()->getChromaAddr(plane);
-        picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
-    }
-    else
-    {
-        rec         = m_pic->getPicYuvRec()->getLumaAddr();
-        picWidthTmp = m_param->sourceWidth;
-    }
-
-    memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
-
-    int frameWidthInCU = m_pic->getFrameWidthInCU();
-    int frameHeightInCU = m_pic->getFrameHeightInCU();
-    int stride;
-    bool isChroma = !!plane;
-    uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;
-
-    const int boShift = X265_DEPTH - SAO_BO_BITS;
-
-    for (int idxY = 0; idxY < frameHeightInCU; idxY++)
-    {
-        int addr = idxY * frameWidthInCU;
-        if (plane == 0)
-        {
-            rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
-            stride = m_pic->getStride();
-            picWidthTmp = m_param->sourceWidth;
-        }
-        else
-        {
-            rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
-            stride = m_pic->getCStride();
-            picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
-        }
-        for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
-        {
-            m_tmpL1[i] = rec[0];
-            rec += stride;
-        }
-
-        rec -= (stride << 1);
-
-        memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
-
-        for (int idxX = 0; idxX < frameWidthInCU; idxX++)
-        {
-            addr = idxY * frameWidthInCU + idxX;
-
-            int typeIdx;
-            bool mergeLeftFlag;
-
-            if (oneUnitFlag)
-            {
-                typeIdx = saoLcuParam[0].typeIdx;
-                mergeLeftFlag = (addr == 0) ? 0 : 1;
-            }
-            else
-            {
-                typeIdx = saoLcuParam[addr].typeIdx;
-                mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
-            }
-            if (typeIdx >= 0)
-            {
-                if (!mergeLeftFlag)
-                {
-                    if (typeIdx == SAO_BO)
-                    {
-                        pixel* offsetBo = m_offsetBo;
-                        int offset[SAO_NUM_BO_CLASSES];
-                        memset(offset, 0, sizeof(offset));
-
-                        for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                            offset[((saoLcuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
-
-                        for (int i = 0; i < (1 << X265_DEPTH); i++)
-                            offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
-                    }
-                    else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
-                    {
-                        int offset[NUM_EDGETYPE];
-                        offset[0] = 0;
-                        for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                            offset[i + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
-
-                        for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
-                            m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
-                    }
-                }
-                processSaoCu(addr, typeIdx, plane);
-            }
-            else
-            {
-                if (idxX != (frameWidthInCU - 1))
-                {
-                    if (isChroma)
-                    {
-                        rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
-                        stride = m_pic->getCStride();
-                    }
-                    else
-                    {
-                        rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
-                        stride = m_pic->getStride();
-                    }
-
-                    int widthShift = isChroma ? (g_maxCUSize >> m_hChromaShift) : g_maxCUSize;
-                    for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
-                    {
-                        m_tmpL1[i] = rec[widthShift - 1];
-                        rec += stride;
-                    }
-                }
-            }
-        }
-
-        std::swap(m_tmpU1[plane], m_tmpU2[plane]);
-    }
-}
-
-/* Process SAO all units */
 void SAO::processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, int plane)
 {
     pixel *rec;
@@ -925,272 +623,8 @@
         saoUnitDst->offset[i] = saoUnitSrc->offset[i];
 }
 
-/* convert QP part to SAO unit */
-void SAO::convertQT2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane)
-{
-    SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
-
-    if (!saoPart->bSplit)
-    {
-        convertOnePart2SaoUnit(saoParam, partIdx, plane);
-        return;
-    }
-
-    if (saoPart->partLevel < m_maxSplitLevel)
-    {
-        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[0], plane);
-        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[1], plane);
-        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[2], plane);
-        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[3], plane);
-    }
-}
-
-/* convert one SAO part to SAO unit */
-void SAO::convertOnePart2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane)
-{
-    int frameWidthInCU = m_pic->getFrameWidthInCU();
-    SAOQTPart* saoQTPart = saoParam->saoPart[plane];
-    SaoLcuParam* saoLcuParam = saoParam->saoLcuParam[plane];
-
-    for (int idxY = saoQTPart[partIdx].startCUY; idxY <= saoQTPart[partIdx].endCUY; idxY++)
-    {
-        for (int idxX = saoQTPart[partIdx].startCUX; idxX <= saoQTPart[partIdx].endCUX; idxX++)
-        {
-            int addr = idxY * frameWidthInCU + idxX;
-            saoLcuParam[addr].partIdxTmp = (int)partIdx;
-            saoLcuParam[addr].typeIdx    = saoQTPart[partIdx].bestType;
-            saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
-            if (saoLcuParam[addr].typeIdx >= 0)
-            {
-                for (int j = 0; j < SAO_NUM_OFFSET; j++)
-                    saoLcuParam[addr].offset[j] = saoQTPart[partIdx].offset[j];
-            }
-            else
-            {
-                saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
-                for (int j = 0; j < SAO_NUM_OFFSET; j++)
-                    saoLcuParam[addr].offset[j] = 0;
-            }
-        }
-    }
-}
-
-/* process SAO for one partition */
-void SAO::rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane)
-{
-    SAOQTPart* onePart = &(psQTPart[partIdx]);
-
-    int64_t estDist;
-
-    m_distOrg[partIdx] = 0;
-
-    int    bestClassTableBo = 0;
-    int    currentDistortionTableBo[MAX_NUM_SAO_CLASS];
-    double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
-    double bestRDCostTableBo = MAX_DOUBLE;
-
-    int allowMergeLeft;
-    int allowMergeUp;
-    SaoLcuParam saoLcuParamRdo;
-
-    for (int typeIdx = -1; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
-    {
-        m_entropyCoder.load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
-        m_entropyCoder.resetBits();
-
-        if (typeIdx >= 0)
-        {
-            estDist = estSaoTypeDist(partIdx, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
-            if (typeIdx == SAO_BO)
-            {
-                // Estimate Best Position
-                for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
-                {
-                    double currentRDCost = 0.0;
-                    for (int j = i; j < i + SAO_BO_LEN; j++)
-                        currentRDCost += currentRdCostTableBo[j];
-
-                    if (currentRDCost < bestRDCostTableBo)
-                    {
-                        bestRDCostTableBo = currentRDCost;
-                        bestClassTableBo  = i;
-                    }
-                }
-
-                // Recode all offsets
-                for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
-                    estDist += currentDistortionTableBo[classIdx];
-            }
-
-            for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
-            {
-                for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
-                {
-                    // get bits for typeIdx = -1
-                    allowMergeLeft = 1;
-                    allowMergeUp   = 1;
-
-                    // reset
-                    resetSaoUnit(&saoLcuParamRdo);
-
-                    // set merge flag
-                    saoLcuParamRdo.mergeUpFlag   = 1;
-                    saoLcuParamRdo.mergeLeftFlag = 1;
-
-                    if (ry == onePart->startCUY)
-                        saoLcuParamRdo.mergeUpFlag = 0;
-
-                    if (rx == onePart->startCUX)
-                        saoLcuParamRdo.mergeLeftFlag = 0;
-
-                    // set type and offsets
-                    saoLcuParamRdo.typeIdx = typeIdx;
-                    saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
-                    for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
-                        saoLcuParamRdo.offset[classIdx] = (int)m_offset[partIdx][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
-
-                    m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
-                }
-            }
-
-            m_dist[partIdx][typeIdx] = estDist;
-            m_rate[partIdx][typeIdx] = m_entropyCoder.getNumberOfWrittenBits();
-
-            m_cost[partIdx][typeIdx] = (double)((double)m_dist[partIdx][typeIdx] + m_lumaLambda * (double)m_rate[partIdx][typeIdx]);
-
-            if (m_cost[partIdx][typeIdx] < m_costPartBest[partIdx])
-            {
-                m_distOrg[partIdx] = 0;
-                m_costPartBest[partIdx] = m_cost[partIdx][typeIdx];
-                m_typePartBest[partIdx] = typeIdx;
-                m_entropyCoder.store(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
-            }
-        }
-        else
-        {
-            for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
-            {
-                for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
-                {
-                    // get bits for iTypeIdx = -1
-                    allowMergeLeft = 1;
-                    allowMergeUp   = 1;
-
-                    // reset
-                    resetSaoUnit(&saoLcuParamRdo);
-
-                    // set merge flag
-                    saoLcuParamRdo.mergeUpFlag   = 1;
-                    saoLcuParamRdo.mergeLeftFlag = 1;
-
-                    if (ry == onePart->startCUY)
-                        saoLcuParamRdo.mergeUpFlag = 0;
-
-                    if (rx == onePart->startCUX)
-                        saoLcuParamRdo.mergeLeftFlag = 0;
-
-                    m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry,  &saoLcuParamRdo, 1,  1,  allowMergeLeft, allowMergeUp);
-                }
-            }
-            if (m_distOrg[partIdx] < m_costPartBest[partIdx])
-            {
-                m_costPartBest[partIdx] = (double)m_distOrg[partIdx] + m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
-                m_typePartBest[partIdx] = -1;
-                m_entropyCoder.store(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
-            }
-        }
-    }
-
-    onePart->bProcessed = true;
-    onePart->bSplit    = false;
-    onePart->minDist   =       m_typePartBest[partIdx] >= 0 ? m_dist[partIdx][m_typePartBest[partIdx]] : m_distOrg[partIdx];
-    onePart->minRate   = (int)(m_typePartBest[partIdx] >= 0 ? m_rate[partIdx][m_typePartBest[partIdx]] : 0);
-    onePart->minCost   = onePart->minDist + m_lumaLambda * onePart->minRate;
-    onePart->bestType  = m_typePartBest[partIdx];
-
-    if (onePart->bestType != -1)
-    {
-        int minIndex = 0;
-        if (onePart->bestType == SAO_BO)
-        {
-            onePart->subTypeIdx = bestClassTableBo;
-            minIndex = onePart->subTypeIdx;
-        }
-        for (int i = 0; i < SAO_NUM_OFFSET; i++)
-            onePart->offset[i] = (int)m_offset[partIdx][onePart->bestType][minIndex + i + 1];
-    }
-}
-
-/* Run partition tree disable */
-void SAO::disablePartTree(SAOQTPart *psQTPart, int partIdx)
-{
-    SAOQTPart* pOnePart = &(psQTPart[partIdx]);
-
-    pOnePart->bSplit   = false;
-    pOnePart->bestType = -1;
-
-    if (pOnePart->partLevel < (int)m_maxSplitLevel)
-    {
-        for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
-            disablePartTree(psQTPart, pOnePart->downPartsIdx[i]);
-    }
-}
-
-/* Run quadtree decision function */
-void SAO::runQuadTreeDecision(SAOQTPart *qtPart, int partIdx, double &costFinal, int maxLevel, int plane)
-{
-    SAOQTPart* onePart = &(qtPart[partIdx]);
-
-    uint32_t nextDepth = onePart->partLevel + 1;
-
-    if (!partIdx)
-        costFinal = 0;
-
-    // SAO for this part
-    if (!onePart->bProcessed)
-        rdoSaoOnePart(qtPart, partIdx, plane);
-
-    // SAO for sub 4 parts
-    if (onePart->partLevel < maxLevel)
-    {
-        double costNotSplit = m_lumaLambda + onePart->minCost;
-        double costSplit    = m_lumaLambda;
-
-        for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
-        {
-            if (i) //initialize RD with previous depth buffer
-                m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
-            else
-                m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
-
-            runQuadTreeDecision(qtPart, onePart->downPartsIdx[i], costFinal, maxLevel, plane);
-            costSplit += costFinal;
-            m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_TEMP_BEST]);
-        }
-
-        if (costSplit < costNotSplit)
-        {
-            costFinal = costSplit;
-            onePart->bSplit   = true;
-            onePart->bestType = -1;
-            m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
-        }
-        else
-        {
-            costFinal = costNotSplit;
-            onePart->bSplit = false;
-            for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
-                disablePartTree(qtPart, onePart->downPartsIdx[i]);
-
-            m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
-        }
-    }
-    else
-        costFinal = onePart->minCost;
-}
-
 /* Calculate SAO statistics for current LCU without non-crossing slice */
-void SAO::calcSaoStatsCu(int addr, int partIdx, int plane)
+void SAO::calcSaoStatsCu(int addr, int plane)
 {
     int x, y;
     TComDataCU *cu = m_pic->getCU(addr);
@@ -1216,15 +650,8 @@
     int isLuma = !plane;
     int isChroma = !!plane;
     int numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
-
-    if (!m_param->saoLcuBasedOptimization)
-        numSkipLine = 0;
-
     int numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
 
-    if (!m_param->saoLcuBasedOptimization)
-        numSkipLineRight = 0;
-
     picWidthTmp  = isLuma ? m_param->sourceWidth  : m_param->sourceWidth  >> m_hChromaShift;
     picHeightTmp = isLuma ? m_param->sourceHeight : m_param->sourceHeight >> m_vChromaShift;
     lcuWidth     = isLuma ? g_maxCUSize : g_maxCUSize >> m_hChromaShift;
@@ -1244,13 +671,13 @@
     {
         const int boShift = X265_DEPTH - SAO_BO_BITS;
 
-        if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
+        if (m_param->saoLcuBoundary)
         {
             numSkipLine      = isChroma ? 3 - (2 * m_vChromaShift) : 3;
             numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
         }
-        stats = m_offsetOrg[partIdx][SAO_BO];
-        counts = m_count[partIdx][SAO_BO];
+        stats = m_offsetOrg[plane][SAO_BO];
+        counts = m_count[plane][SAO_BO];
 
         fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
         recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
@@ -1278,13 +705,13 @@
     {
         //if (iSaoType == EO_0)
         {
-            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
+            if (m_param->saoLcuBoundary)
             {
                 numSkipLine      = isChroma ? 3 - (2 * m_vChromaShift) : 3;
                 numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
             }
-            stats = m_offsetOrg[partIdx][SAO_EO_0];
-            counts = m_count[partIdx][SAO_EO_0];
+            stats = m_offsetOrg[plane][SAO_EO_0];
+            counts = m_count[plane][SAO_EO_0];
 
             fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
             recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
@@ -1311,13 +738,13 @@
 
         //if (iSaoType == EO_1)
         {
-            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
+            if (m_param->saoLcuBoundary)
             {
                 numSkipLine      = isChroma ? 4 - (2 * m_vChromaShift) : 4;
                 numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
             }
-            stats = m_offsetOrg[partIdx][SAO_EO_1];
-            counts = m_count[partIdx][SAO_EO_1];
+            stats = m_offsetOrg[plane][SAO_EO_1];
+            counts = m_count[plane][SAO_EO_1];
 
             fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
             recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
@@ -1352,13 +779,13 @@
         }
         //if (iSaoType == EO_2)
         {
-            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
+            if (m_param->saoLcuBoundary)
             {
                 numSkipLine      = isChroma ? 4 - (2 * m_vChromaShift) : 4;
                 numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
             }
-            stats = m_offsetOrg[partIdx][SAO_EO_2];
-            counts = m_count[partIdx][SAO_EO_2];
+            stats = m_offsetOrg[plane][SAO_EO_2];
+            counts = m_count[plane][SAO_EO_2];
 
             fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
             recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
@@ -1398,13 +825,13 @@
         }
         //if (iSaoType == EO_3)
         {
-            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
+            if (m_param->saoLcuBoundary)
             {
                 numSkipLine      = isChroma ? 4 - (2 * m_vChromaShift) : 4;
                 numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
             }
-            stats = m_offsetOrg[partIdx][SAO_EO_3];
-            counts = m_count[partIdx][SAO_EO_3];
+            stats = m_offsetOrg[plane][SAO_EO_3];
+            counts = m_count[plane][SAO_EO_3];
 
             fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
             recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
@@ -1721,84 +1148,13 @@
     }
 }
 
-void SAO::getSaoStats(SAOQTPart *psQTPart, int plane)
-{
-    int levelIdx, partIdx;
-    int i;
-    int lcuIdx;
-    int lcuIdy;
-    int frameWidthInCU = m_pic->getFrameWidthInCU();
-    int downPartIdx;
-    int partStart;
-    int partEnd;
-    SAOQTPart* onePart;
-
-    if (!m_maxSplitLevel)
-    {
-        partIdx = 0;
-        onePart = &(psQTPart[partIdx]);
-        for (lcuIdy = onePart->startCUY; lcuIdy <= onePart->endCUY; lcuIdy++)
-        {
-            for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
-            {
-                int addr = lcuIdy * frameWidthInCU + lcuIdx;
-                calcSaoStatsCu(addr, partIdx, plane);
-            }
-        }
-    }
-    else
-    {
-        for (partIdx = s_numCulPartsLevel[m_maxSplitLevel - 1]; partIdx < s_numCulPartsLevel[m_maxSplitLevel]; partIdx++)
-        {
-            onePart = &(psQTPart[partIdx]);
-            for (lcuIdy = onePart->startCUY; lcuIdy <= onePart->endCUY; lcuIdy++)
-            {
-                for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
-                {
-                    int addr = lcuIdy * frameWidthInCU + lcuIdx;
-                    calcSaoStatsCu(addr, partIdx, plane);
-                }
-            }
-        }
-
-        for (levelIdx = m_maxSplitLevel - 1; levelIdx >= 0; levelIdx--)
-        {
-            partStart = (levelIdx > 0) ? s_numCulPartsLevel[levelIdx - 1] : 0;
-            partEnd   = s_numCulPartsLevel[levelIdx];
-
-            for (partIdx = partStart; partIdx < partEnd; partIdx++)
-            {
-                onePart = &(psQTPart[partIdx]);
-                for (i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
-                {
-                    downPartIdx = onePart->downPartsIdx[i];
-                    for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
-                    {
-                        for (int classIdx = 0; classIdx < (typeIdx < SAO_BO ? SAO_EO_LEN : SAO_NUM_BO_CLASSES) + 1; classIdx++)
-                        {
-                            m_offsetOrg[partIdx][typeIdx][classIdx] += m_offsetOrg[downPartIdx][typeIdx][classIdx];
-                            m_count[partIdx][typeIdx][classIdx]    += m_count[downPartIdx][typeIdx][classIdx];
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
 /* reset offset statistics */
 void SAO::resetStats()
 {
-    for (int i = 0; i < m_numTotalParts; i++)
+    for (int i = 0; i < NUM_PLANE; i++)
     {
-        m_costPartBest[i] = MAX_DOUBLE;
-        m_typePartBest[i] = -1;
-        m_distOrg[i] = 0;
         for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
         {
-            m_dist[i][j] = 0;
-            m_rate[i][j] = 0;
-            m_cost[i][j] = 0;
             for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
             {
                 m_count[i][j][k] = 0;
@@ -1809,31 +1165,6 @@
     }
 }
 
-/* Sample adaptive offset process */
-void SAO::SAOProcess(SAOParam *saoParam)
-{
-    X265_CHECK(!m_param->saoLcuBasedOptimization, "SAO LCU mode failure\n"); 
-    double costFinal = 0;
-    saoParam->bSaoFlag[0] = true;
-    saoParam->bSaoFlag[1] = false;
-
-    getSaoStats(saoParam->saoPart[0], 0);
-    runQuadTreeDecision(saoParam->saoPart[0], 0, costFinal, m_maxSplitLevel, 0);
-    saoParam->bSaoFlag[0] = costFinal < 0;
-
-    if (saoParam->bSaoFlag[0])
-    {
-        convertQT2SaoUnit(saoParam, 0, 0);
-        assignSaoUnitSyntax(saoParam->saoLcuParam[0], saoParam->saoPart[0], saoParam->oneUnitFlag[0]);
-        processSaoUnitAll(saoParam->saoLcuParam[0], saoParam->oneUnitFlag[0], 0);
-    }
-    if (saoParam->bSaoFlag[1])
-    {
-        processSaoUnitAll(saoParam->saoLcuParam[1], saoParam->oneUnitFlag[1], 1);
-        processSaoUnitAll(saoParam->saoLcuParam[2], saoParam->oneUnitFlag[2], 2);
-    }
-}
-
 /* Check merge SAO unit */
 void SAO::checkMerge(SaoLcuParam * saoUnitCurr, SaoLcuParam * saoUnitCheck, int dir)
 {
@@ -1885,65 +1216,10 @@
     }
 }
 
-/** Assign SAO unit syntax from picture-based algorithm */
-void SAO::assignSaoUnitSyntax(SaoLcuParam* saoLcuParam,  SAOQTPart* saoPart, bool &oneUnitFlag)
-{
-    if (saoPart->bSplit == 0)
-        oneUnitFlag = 1;
-    else
-    {
-        oneUnitFlag = 0;
-
-        int idxCount = -1;
-        saoLcuParam[0].mergeUpFlag = 0;
-        saoLcuParam[0].mergeLeftFlag = 0;
-
-        for (int j = 0; j < m_numCuInHeight; j++)
-        {
-            for (int i = 0; i < m_numCuInWidth; i++)
-            {
-                int addr     = i + j * m_numCuInWidth;
-                int addrUp   = (j == 0) ? -1 : addr - m_numCuInWidth;
-                int addrLeft = (i == 0) ? -1 : addr - 1;
-                int idx      = saoLcuParam[addr].partIdxTmp;
-                int idxLeft  = (addrLeft == -1) ? -1 : saoLcuParam[addrLeft].partIdxTmp;
-                int idxUp    = (addrUp == -1)   ? -1 : saoLcuParam[addrUp].partIdxTmp;
-
-                if (idx != idxLeft && idx != idxUp)
-                {
-                    saoLcuParam[addr].mergeUpFlag   = 0;
-                    idxCount++;
-                    saoLcuParam[addr].mergeLeftFlag = 0;
-                    saoLcuParam[addr].partIdx = idxCount;
-                }
-                else if (idx == idxLeft)
-                {
-                    saoLcuParam[addr].mergeUpFlag   = 1;
-                    saoLcuParam[addr].mergeLeftFlag = 1;
-                    saoLcuParam[addr].partIdx = saoLcuParam[addrLeft].partIdx;
-                }
-                else if (idx == idxUp)
-                {
-                    saoLcuParam[addr].mergeUpFlag   = 1;
-                    saoLcuParam[addr].mergeLeftFlag = 0;
-                    saoLcuParam[addr].partIdx = saoLcuParam[addrUp].partIdx;
-                }
-                if (addrUp != -1)
-                    checkMerge(&saoLcuParam[addr], &saoLcuParam[addrUp], 1);
-                if (addrLeft != -1)
-                    checkMerge(&saoLcuParam[addr], &saoLcuParam[addrLeft], 0);
-            }
-        }
-    }
-}
-
 void SAO::rdoSaoUnitRowInit(SAOParam *saoParam)
 {
     saoParam->bSaoFlag[0] = true;
     saoParam->bSaoFlag[1] = true;
-    saoParam->oneUnitFlag[0] = false;
-    saoParam->oneUnitFlag[1] = false;
-    saoParam->oneUnitFlag[2] = false;
 
     m_numNoSao[0] = 0; // Luma
     m_numNoSao[1] = 0; // Chroma
@@ -2005,7 +1281,7 @@
                 for (k = 0; k < MAX_NUM_SAO_CLASS; k++)
                 {
                     m_offset[compIdx][j][k] = 0;
-                    if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
+                    if (m_param->saoLcuBoundary)
                     {
                         m_count[compIdx][j][k] = m_countPreDblk[addr][compIdx][j][k];
                         m_offsetOrg[compIdx][j][k] = m_offsetOrgPreDblk[addr][compIdx][j][k];
@@ -2023,10 +1299,10 @@
             saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0;
             saoParam->saoLcuParam[compIdx][addr].subTypeIdx    = 0;
             if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
-                calcSaoStatsCu(addr, compIdx,  compIdx);
+                calcSaoStatsCu(addr, compIdx);
         }
 
-        saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft, 0, 
+        saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
                               &mergeSaoParam[0][0], &compDistortion[0]);
 
         sao2ChromaParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
@@ -2169,13 +1445,13 @@
     return offsetOutput;
 }
 
-void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
+void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
                                 SaoLcuParam *compSaoParam, double *compDistortion)
 {
     int64_t estDist;
     int64_t bestDist;
 
-    SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[plane][addr]);
+    SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[0][addr]);
     SaoLcuParam* saoLcuParamNeighbor = NULL;
 
     resetSaoUnit(saoLcuParam);
@@ -2194,14 +1470,14 @@
 
     m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
     m_entropyCoder.resetBits();
-    m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
+    m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, 0);
     dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
     copySaoUnit(saoLcuParam, &saoLcuParamRdo);
     bestDist = 0;
 
     for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
     {
-        estDist = estSaoTypeDist(plane, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
+        estDist = estSaoTypeDist(0, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
 
         if (typeIdx == SAO_BO)
         {
@@ -2231,18 +1507,18 @@
         saoLcuParamRdo.mergeUpFlag   = 0;
         saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
         for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
-            saoLcuParamRdo.offset[classIdx] = (int)m_offset[plane][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
+            saoLcuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
 
         m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
         m_entropyCoder.resetBits();
-        m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
+        m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, 0);
 
         uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
-        m_cost[plane][typeIdx] = (double)((double)estDist + m_lumaLambda * (double)estRate);
+        double cost = (double)((double)estDist + m_lumaLambda * (double)estRate);
 
-        if (m_cost[plane][typeIdx] < dCostPartBest)
+        if (cost < dCostPartBest)
         {
-            dCostPartBest = m_cost[plane][typeIdx];
+            dCostPartBest = cost;
             copySaoUnit(saoLcuParam, &saoLcuParamRdo);
             bestDist = estDist;
         }
@@ -2250,7 +1526,7 @@
 
     compDistortion[0] += ((double)bestDist / m_lumaLambda);
     m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
-    m_entropyCoder.codeSaoOffset(saoLcuParam, plane);
+    m_entropyCoder.codeSaoOffset(saoLcuParam, 0);
     m_entropyCoder.store(m_rdEntropyCoders[0][CI_TEMP_BEST]);
 
     // merge left or merge up
@@ -2259,9 +1535,9 @@
     {
         saoLcuParamNeighbor = NULL;
         if (allowMergeLeft && addrLeft >= 0 && idxNeighbor == 0)
-            saoLcuParamNeighbor = &(saoParam->saoLcuParam[plane][addrLeft]);
+            saoLcuParamNeighbor = &(saoParam->saoLcuParam[0][addrLeft]);
         else if (allowMergeUp && addrUp >= 0 && idxNeighbor == 1)
-            saoLcuParamNeighbor = &(saoParam->saoLcuParam[plane][addrUp]);
+            saoLcuParamNeighbor = &(saoParam->saoLcuParam[0][addrUp]);
         if (saoLcuParamNeighbor != NULL)
         {
             estDist = 0;
@@ -2273,7 +1549,7 @@
                 for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
                 {
                     mergeOffset = saoLcuParamNeighbor->offset[classIdx];
-                    estDist += estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + 1],  0);
+                    estDist += estSaoDist(m_count[0][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + mergeBandPosition + 1],  0);
                 }
             }
             else
@@ -2382,11 +1658,11 @@
         }
 
         uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
-        m_cost[1][typeIdx] = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
+        double cost = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
 
-        if (m_cost[1][typeIdx] < costPartBest)
+        if (cost < costPartBest)
         {
-            costPartBest = m_cost[1][typeIdx];
+            costPartBest = cost;
             copySaoUnit(saoLcuParam[0], &saoLcuParamRdo[0]);
             copySaoUnit(saoLcuParam[1], &saoLcuParamRdo[1]);
             bestDist = (estDist[0] + estDist[1]);
diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/sao.h
--- a/source/encoder/sao.h	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/encoder/sao.h	Tue Sep 30 09:48:12 2014 +0900
@@ -59,25 +59,18 @@
     enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) };
     enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
     enum { NUM_EDGETYPE = 5 };
+    enum { NUM_PLANE = 3 };
 
-    static const int      s_numCulPartsLevel[5];
     static const uint32_t s_eoTable[NUM_EDGETYPE];
 
     typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
     typedef int64_t (PerType[MAX_NUM_SAO_TYPE]);
-    typedef double  (PerTypeD[MAX_NUM_SAO_TYPE]);
     typedef int64_t (PerPlane[3][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
 
     /* allocated per part */
     PerClass*   m_count;
     PerClass*   m_offset;
     PerClass*   m_offsetOrg;
-    PerType*    m_rate;
-    PerType*    m_dist;
-    PerTypeD*   m_cost;
-    double*     m_costPartBest;
-    int64_t*    m_distOrg;
-    int*        m_typePartBest;
 
     /* allocated per LCU */
     PerPlane*   m_countPreDblk;
@@ -87,11 +80,8 @@
     pixel*      m_offsetBo;
     int8_t      m_offsetEo[NUM_EDGETYPE];
 
-    int         m_maxSplitLevel;
-
     int         m_numCuInWidth;
     int         m_numCuInHeight;
-    int         m_numTotalParts;
     int         m_hChromaShift;
     int         m_vChromaShift;
 
@@ -122,7 +112,6 @@
     bool create(x265_param *param);
     void destroy();
 
-    void initSAOParam(SAOParam* saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const;
     void allocSaoParam(SAOParam* saoParam) const;
 
     void startSlice(Frame *pic, Entropy& initState, int qp);
@@ -130,30 +119,19 @@
     void resetStats();
     void resetSaoUnit(SaoLcuParam* saoUnit);
 
-    void SAOProcess(SAOParam* saoParam);
-
     // LCU-basd SAO process without slice granularity
     void processSaoCu(int addr, int partIdx, int plane);
 
     void resetLcuPart(SaoLcuParam* saoLcuParam);
-    void convertQT2SaoUnit(SAOParam* saoParam, uint32_t partIdx, int plane);
-    void convertOnePart2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane);
-    void processSaoUnitAll(SaoLcuParam* saoLcuParam, bool oneUnitFlag, int plane);
     void processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, int plane);
 
     void copySaoUnit(SaoLcuParam* saoUnitDst, SaoLcuParam* saoUnitSrc);
 
-    void runQuadTreeDecision(SAOQTPart *psQTPart, int partIdx, double &costFinal, int maxLevel, int plane);
-    void rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane);
-
-    void disablePartTree(SAOQTPart *psQTPart, int partIdx);
-    void getSaoStats(SAOQTPart *psQTPart, int plane);
-    void calcSaoStatsCu(int addr, int partIdx, int plane);
+    void calcSaoStatsCu(int addr, int plane);
     void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
-    void assignSaoUnitSyntax(SaoLcuParam* saoLcuParam,  SAOQTPart* saoPart, bool &oneUnitFlag);
     void checkMerge(SaoLcuParam* lcuParamCurr, SaoLcuParam * lcuParamCheck, int dir);
 
-    void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
+    void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
                                SaoLcuParam *compSaoParam, double *distortion);
     void sao2ChromaParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
                             SaoLcuParam *crSaoParam, SaoLcuParam *cbSaoParam, double *distortion);
diff -r 5a6845566d14 -r 3eacdaa30440 source/x265.cpp
--- a/source/x265.cpp	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/x265.cpp	Tue Sep 30 09:48:12 2014 +0900
@@ -457,7 +457,6 @@
     H0("   --[no-]lft                    Enable Deblocking Loop Filter. Default %s\n", OPT(param->bEnableLoopFilter));
     H0("   --[no-]sao                    Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO));
     H0("   --sao-lcu-bounds <integer>    0: right/bottom boundary areas skipped  1: non-deblocked pixels are used. Default %d\n", param->saoLcuBoundary);
-    H0("   --sao-lcu-opt <integer>       0: SAO picture-based optimization, 1: SAO LCU-based optimization. Default %d\n", param->saoLcuBasedOptimization);
     H0("\nVUI options:\n");
     H0("   --sar <width:height|int>      Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n");
     H0("                                 Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n");
diff -r 5a6845566d14 -r 3eacdaa30440 source/x265.h
--- a/source/x265.h	Mon Sep 29 17:37:47 2014 -0500
+++ b/source/x265.h	Tue Sep 30 09:48:12 2014 +0900
@@ -734,12 +734,6 @@
      * pixels are used entirely. Default is 0 */
     int       saoLcuBoundary;
 
-    /* Select the scope of the SAO optimization. If 0 SAO is performed over the
-     * entire output picture at once, this can severly restrict frame
-     * parallelism so it is not recommended for many-core machines.  If 1 SAO is
-     * performed on LCUs in series. Default is 1 */
-    int       saoLcuBasedOptimization;
-
     /* Generally a small signed integer which offsets the QP used to quantize
      * the Cb chroma residual (delta from luma QP specified by rate-control).
      * Default is 0, which is recommended */


More information about the x265-devel mailing list