[x265] sao: remove frame-based SAO

Steve Borho steve at borho.org
Tue Sep 30 05:19:02 CEST 2014


On 09/30, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1412038092 -32400
> #      Tue Sep 30 09:48:12 2014 +0900
> # Node ID 3eacdaa304400b0100dcf1d1515ae1d24cbf4305
> # Parent  5a6845566d1492d29af29ecc0cf75d644994735c
> sao: remove frame-based SAO

Queued together with removal of the reST docs for this option and the
getopt parameter.

Thanks

> diff -r 5a6845566d14 -r 3eacdaa30440 source/common/common.h
> --- a/source/common/common.h	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/common/common.h	Tue Sep 30 09:48:12 2014 +0900
> @@ -212,34 +212,6 @@
>      uint32_t count[8];
>  };
>  
> -struct SAOQTPart
> -{
> -    enum { NUM_DOWN_PART = 4 };
> -
> -    int     bestType;
> -    int     subTypeIdx;  // indicates EO class or BO band position
> -    int     offset[SAO_NUM_OFFSET];
> -    int     startCUX;
> -    int     startCUY;
> -    int     endCUX;
> -    int     endCUY;
> -
> -    int     partIdx;
> -    int     partLevel;
> -    int     partCol;
> -    int     partRow;
> -
> -    int     downPartsIdx[NUM_DOWN_PART];
> -    int     upPartIdx;
> -
> -    bool    bSplit;
> -
> -    bool    bProcessed;
> -    double  minCost;
> -    int64_t minDist;
> -    int     minRate;
> -};
> -
>  struct SaoLcuParam
>  {
>      bool mergeUpFlag;
> @@ -266,10 +238,7 @@
>  struct SAOParam
>  {
>      SaoLcuParam* saoLcuParam[3];
> -    SAOQTPart*   saoPart[3];
>      bool         bSaoFlag[2];
> -    bool         oneUnitFlag[3];
> -    int          maxSplitLevel;
>      int          numCuInHeight;
>      int          numCuInWidth;
>  
> @@ -277,15 +246,11 @@
>      {
>          for (int i = 0; i < 3; i++)
>          {
> -            saoPart[i] = NULL;
>              saoLcuParam[i] = NULL;
>          }
>      }
>      ~SAOParam()
>      {
> -        delete[] saoPart[0];
> -        delete[] saoPart[1];
> -        delete[] saoPart[2];
>          delete[] saoLcuParam[0];
>          delete[] saoLcuParam[1];
>          delete[] saoLcuParam[2];
> diff -r 5a6845566d14 -r 3eacdaa30440 source/common/param.cpp
> --- a/source/common/param.cpp	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/common/param.cpp	Tue Sep 30 09:48:12 2014 +0900
> @@ -169,7 +169,6 @@
>      /* SAO Loop Filter */
>      param->bEnableSAO = 1;
>      param->saoLcuBoundary = 0;
> -    param->saoLcuBasedOptimization = 1;
>  
>      /* Coding Quality */
>      param->cbQpOffset = 0;
> @@ -625,7 +624,6 @@
>      OPT("lft") p->bEnableLoopFilter = atobool(value);
>      OPT("sao") p->bEnableSAO = atobool(value);
>      OPT("sao-lcu-bounds") p->saoLcuBoundary = atoi(value);
> -    OPT("sao-lcu-opt") p->saoLcuBasedOptimization = atoi(value);
>      OPT("ssim") p->bEnableSsim = atobool(value);
>      OPT("psnr") p->bEnablePsnr = atobool(value);
>      OPT("hash") p->decodedPictureHashSEI = atoi(value);
> @@ -1165,13 +1163,7 @@
>          fprintf(stderr, "nr=%d ", param->noiseReduction);
>  
>      TOOLOPT(param->bEnableLoopFilter, "lft");
> -    if (param->bEnableSAO)
> -    {
> -        if (param->saoLcuBasedOptimization)
> -            fprintf(stderr, "sao-lcu ");
> -        else
> -            fprintf(stderr, "sao-frame ");
> -    }
> +    TOOLOPT(param->bEnableSAO, "sao");
>      TOOLOPT(param->bEnableSignHiding, "signhide");
>      TOOLOPT(param->bCULossless, "cu-lossless");
>      TOOLOPT(param->bEnableFastIntra, "fast-intra");
> @@ -1245,7 +1237,6 @@
>      BOOL(p->bEnableLoopFilter, "lft");
>      BOOL(p->bEnableSAO, "sao");
>      s += sprintf(s, " sao-lcu-bounds=%d", p->saoLcuBoundary);
> -    s += sprintf(s, " sao-lcu-opt=%d", p->saoLcuBasedOptimization);
>      BOOL(p->bBPyramid, "b-pyramid");
>      BOOL(p->rc.cuTree, "cutree");
>      s += sprintf(s, " rc=%s", p->rc.rateControlMode == X265_RC_ABR ? (
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/encoder.cpp	Tue Sep 30 09:48:12 2014 +0900
> @@ -1247,10 +1247,6 @@
>          x265_log(p, X265_LOG_INFO, "Parallelism disabled, single thread mode\n");
>          p->bEnableWavefront = 0;
>      }
> -    if (!p->saoLcuBasedOptimization && p->frameNumThreads > 1)
> -    {
> -        x265_log(p, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
> -    }
>  
>      if (p->keyframeMax < 0)
>      {
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/frameencoder.cpp	Tue Sep 30 09:48:12 2014 +0900
> @@ -85,7 +85,7 @@
>      m_param = top->m_param;
>      m_numRows = numRows;
>      m_numCols = numCols;
> -    m_filterRowDelay = (m_param->bEnableSAO && m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary) ?
> +    m_filterRowDelay = (m_param->bEnableSAO && m_param->saoLcuBoundary) ?
>                          2 : (m_param->bEnableSAO || m_param->bEnableLoopFilter ? 1 : 0);
>      m_filterRowDelayCus = m_filterRowDelay * numCols;
>  
> @@ -323,17 +323,6 @@
>          m_frameStats.percentSkip  = (double)totalSkip / totalCuCount;
>      }
>  
> -    if (slice->m_sps->bUseSAO && !m_param->saoLcuBasedOptimization)
> -    {
> -        /* frame based SAO */
> -        m_frameFilter.m_sao.SAOProcess(m_frame->getPicSym()->m_saoParam);
> -        restoreLFDisabledOrigYuv(m_frame);
> -
> -        // Extend border after whole-frame SAO is finished
> -        for (int row = 0; row < m_numRows; row++)
> -            m_frameFilter.processRowPost(row);
> -    }
> -
>      m_bs.resetBits();
>      m_entropyCoder.load(m_initSliceContext);
>      m_entropyCoder.setBitstream(&m_bs);
> @@ -799,7 +788,7 @@
>          }
>  
>          // NOTE: do CU level Filter
> -        if (m_param->bEnableSAO && m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> +        if (m_param->bEnableSAO && m_param->saoLcuBoundary)
>              // SAO parameter estimation using non-deblocked pixels for LCU bottom and right boundary areas
>              m_frameFilter.m_sao.calcSaoStatsCu_BeforeDblk(m_frame, col, row);
>  
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/framefilter.cpp	Tue Sep 30 09:48:12 2014 +0900
> @@ -115,20 +115,15 @@
>      SAOParam* saoParam = m_frame->getPicSym()->m_saoParam;
>      if (m_param->bEnableSAO)
>      {
> -        if (m_param->saoLcuBasedOptimization)
> -        {
> -            m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
> -            m_sao.m_rdEntropyCoders[0][CI_NEXT_BEST].load(m_frameEncoder->m_initSliceContext);
> -            m_sao.m_rdEntropyCoders[0][CI_CURR_BEST].load(m_frameEncoder->m_initSliceContext);
> +        m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
> +        m_sao.m_rdEntropyCoders[0][CI_NEXT_BEST].load(m_frameEncoder->m_initSliceContext);
> +        m_sao.m_rdEntropyCoders[0][CI_CURR_BEST].load(m_frameEncoder->m_initSliceContext);
>  
> -            m_sao.rdoSaoUnitRow(saoParam, row);
> +        m_sao.rdoSaoUnitRow(saoParam, row);
>  
> -            // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
> -            if (row >= m_saoRowDelay)
> -                processSao(row - m_saoRowDelay);
> -        }
> -        else
> -            return;
> +        // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
> +        if (row >= m_saoRowDelay)
> +            processSao(row - m_saoRowDelay);
>      }
>  
>      // this row of CTUs has been encoded
> @@ -138,7 +133,7 @@
>  
>      if (row == m_numRows - 1)
>      {
> -        if (m_param->bEnableSAO && m_param->saoLcuBasedOptimization)
> +        if (m_param->bEnableSAO)
>          {
>              m_sao.rdoSaoUnitRowEnd(saoParam, m_frame->getNumCUsInFrame());
>  
> @@ -424,9 +419,6 @@
>      const uint32_t lineStartCUAddr = row * numCols;
>      SAOParam* saoParam = m_frame->getPicSym()->m_saoParam;
>  
> -    // NOTE: these flags are not used in this mode
> -    X265_CHECK(!saoParam->oneUnitFlag[0] && !saoParam->oneUnitFlag[1] && !saoParam->oneUnitFlag[2], "invalid SAO flag");
> -
>      if (saoParam->bSaoFlag[0])
>          m_sao.processSaoUnitRow(saoParam->saoLcuParam[0], row, 0);
>  
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/sao.cpp	Tue Sep 30 09:48:12 2014 +0900
> @@ -51,34 +51,11 @@
>      return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
>  }
>  
> -int convertLevelRowCol2Idx(int level, int row, int col)
> -{
> -    if (!level)
> -        return 0;
> -    else if (level == 1)
> -        return 1 + row * 2 + col;
> -    else if (level == 2)
> -        return 5 + row * 4 + col;
> -    else if (level == 3)
> -        return 21 + row * 8 + col;
> -    else // (level == 4)
> -        return 85 + row * 16 + col;
> -}
> -
>  } // end anonymous namespace
>  
>  
>  namespace x265 {
>  
> -const int SAO::s_numCulPartsLevel[5] =
> -{
> -    1,   // level 0
> -    5,   // level 1
> -    21,  // level 2
> -    85,  // level 3
> -    341, // level 4
> -};
> -
>  const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
>  {
>      1, // 0
> @@ -95,17 +72,10 @@
>      m_offsetOrg = NULL;
>      m_countPreDblk = NULL;
>      m_offsetOrgPreDblk = NULL;
> -    m_rate = NULL;
> -    m_dist = NULL;
> -    m_cost = NULL;
> -    m_costPartBest = NULL;
> -    m_distOrg = NULL;
> -    m_typePartBest = NULL;
>      m_refDepth = 0;
>      m_lumaLambda = 0;
>      m_chromaLambda = 0;
>      m_param = NULL;
> -    m_numTotalParts = 0;
>      m_clipTable = NULL;
>      m_clipTableBase = NULL;
>      m_offsetBo = NULL;
> @@ -137,16 +107,6 @@
>      m_numCuInWidth =  (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
>      m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
>  
> -    int maxSplitLevelHeight = (int)(logf((float)m_numCuInHeight) / logf(2.0));
> -    int maxSplitLevelWidth  = (int)(logf((float)m_numCuInWidth) / logf(2.0));
> -
> -    m_maxSplitLevel = maxSplitLevelHeight < maxSplitLevelWidth ? maxSplitLevelHeight : maxSplitLevelWidth;
> -    m_maxSplitLevel = X265_MIN(m_maxSplitLevel, SAO_MAX_DEPTH);
> -
> -    /* various structures are overloaded to store per component data.
> -     * m_numTotalParts must allow for sufficient storage in any allocated arrays */
> -    m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]);
> -
>      const pixel maxY = (1 << X265_DEPTH) - 1;
>      const pixel rangeExt = maxY >> 1;
>      int numLcu = m_numCuInWidth * m_numCuInHeight;
> @@ -163,17 +123,9 @@
>          CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth);
>      }
>  
> -    CHECKED_MALLOC(m_distOrg, int64_t, m_numTotalParts);
> -    CHECKED_MALLOC(m_costPartBest, double, m_numTotalParts);
> -    CHECKED_MALLOC(m_typePartBest, int, m_numTotalParts);
> -
> -    CHECKED_MALLOC(m_rate, PerType, m_numTotalParts);
> -    CHECKED_MALLOC(m_dist, PerType, m_numTotalParts);
> -    CHECKED_MALLOC(m_cost, PerTypeD, m_numTotalParts);
> -
> -    CHECKED_MALLOC(m_count, PerClass, m_numTotalParts);
> -    CHECKED_MALLOC(m_offset, PerClass, m_numTotalParts);
> -    CHECKED_MALLOC(m_offsetOrg, PerClass, m_numTotalParts);
> +    CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
> +    CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
> +    CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
>  
>      CHECKED_MALLOC(m_countPreDblk, PerPlane, numLcu);
>      CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numLcu);
> @@ -209,12 +161,6 @@
>          X265_FREE(m_tmpU2[i]);
>      }
>  
> -    X265_FREE(m_distOrg);
> -    X265_FREE(m_costPartBest);
> -    X265_FREE(m_typePartBest);
> -    X265_FREE(m_rate);
> -    X265_FREE(m_dist);
> -    X265_FREE(m_cost);
>      X265_FREE(m_count);
>      X265_FREE(m_offset);
>      X265_FREE(m_offsetOrg);
> @@ -225,143 +171,22 @@
>  /* allocate memory for SAO parameters */
>  void SAO::allocSaoParam(SAOParam *saoParam) const
>  {
> -    saoParam->maxSplitLevel = m_maxSplitLevel;
>      saoParam->numCuInWidth  = m_numCuInWidth;
>      saoParam->numCuInHeight = m_numCuInHeight;
>  
> -    saoParam->saoPart[0] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
> -    initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1,  0, m_numCuInHeight - 1, 0);
> -
> -    saoParam->saoPart[1] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
> -    saoParam->saoPart[2] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
> -    initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1,  0, m_numCuInHeight - 1, 1);
> -    initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1,  0, m_numCuInHeight - 1, 2);
> -
>      saoParam->saoLcuParam[0] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
>      saoParam->saoLcuParam[1] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
>      saoParam->saoLcuParam[2] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
>  }
>  
> -/* recursively initialize SAO parameters (only once) */
> -void SAO::initSAOParam(SAOParam *saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const
> -{
> -    int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);
> -
> -    SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
> -
> -    saoPart->partIdx   = partIdx;
> -    saoPart->partLevel = partLevel;
> -    saoPart->partRow   = partRow;
> -    saoPart->partCol   = partCol;
> -
> -    saoPart->startCUX  = startCUX;
> -    saoPart->endCUX    = endCUX;
> -    saoPart->startCUY  = startCUY;
> -    saoPart->endCUY    = endCUY;
> -
> -    saoPart->upPartIdx = parentPartIdx;
> -    saoPart->bestType  = -1;
> -
> -    saoPart->subTypeIdx = 0;
> -
> -    for (int j = 0; j < SAO_NUM_OFFSET; j++)
> -        saoPart->offset[j] = 0;
> -
> -    if (saoPart->partLevel < m_maxSplitLevel)
> -    {
> -        int downLevel    = (partLevel + 1);
> -        int downRowStart = (partRow << 1);
> -        int downColStart = (partCol << 1);
> -
> -        int numCUWidth  = endCUX - startCUX + 1;
> -        int numCUHeight = endCUY - startCUY + 1;
> -        int numCULeft   = (numCUWidth  >> 1);
> -        int numCUTop    = (numCUHeight >> 1);
> -
> -        int downStartCUX = startCUX;
> -        int downEndCUX  = downStartCUX + numCULeft - 1;
> -        int downStartCUY = startCUY;
> -        int downEndCUY  = downStartCUY + numCUTop  - 1;
> -        int downRowIdx = downRowStart + 0;
> -        int downColIdx = downColStart + 0;
> -
> -        saoPart->downPartsIdx[0] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> -        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> -
> -        downStartCUX = startCUX + numCULeft;
> -        downEndCUX   = endCUX;
> -        downStartCUY = startCUY;
> -        downEndCUY   = downStartCUY + numCUTop - 1;
> -        downRowIdx  = downRowStart + 0;
> -        downColIdx  = downColStart + 1;
> -
> -        saoPart->downPartsIdx[1] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> -        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx,  downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> -
> -        downStartCUX = startCUX;
> -        downEndCUX   = downStartCUX + numCULeft - 1;
> -        downStartCUY = startCUY + numCUTop;
> -        downEndCUY   = endCUY;
> -        downRowIdx  = downRowStart + 1;
> -        downColIdx  = downColStart + 0;
> -
> -        saoPart->downPartsIdx[2] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> -        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> -
> -        downStartCUX = startCUX + numCULeft;
> -        downEndCUX   = endCUX;
> -        downStartCUY = startCUY + numCUTop;
> -        downEndCUY   = endCUY;
> -        downRowIdx  = downRowStart + 1;
> -        downColIdx  = downColStart + 1;
> -
> -        saoPart->downPartsIdx[3] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> -        initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> -    }
> -    else
> -    {
> -        saoPart->downPartsIdx[0] = saoPart->downPartsIdx[1] = saoPart->downPartsIdx[2] = saoPart->downPartsIdx[3] = -1;
> -    }
> -}
> -
>  /* reset SAO parameters once per frame */
>  void SAO::resetSAOParam(SAOParam *saoParam)
>  {
> -    int numComponet = 3;
> -
> -    for (int c = 0; c < numComponet; c++)
> -    {
> -        if (c < 2)
> -            saoParam->bSaoFlag[c] = false;
> -
> -        for (int i = 0; i < s_numCulPartsLevel[m_maxSplitLevel]; i++)
> -        {
> -            saoParam->saoPart[c][i].bestType     = -1;
> -            saoParam->saoPart[c][i].bSplit       = false;
> -            saoParam->saoPart[c][i].bProcessed   = false;
> -            saoParam->saoPart[c][i].minCost      = MAX_DOUBLE;
> -            saoParam->saoPart[c][i].minDist      = MAX_INT;
> -            saoParam->saoPart[c][i].minRate      = MAX_INT;
> -            saoParam->saoPart[c][i].subTypeIdx   = 0;
> -            for (int j = 0; j < SAO_NUM_OFFSET; j++)
> -            {
> -                saoParam->saoPart[c][i].offset[j] = 0;
> -                saoParam->saoPart[c][i].offset[j] = 0;
> -                saoParam->saoPart[c][i].offset[j] = 0;
> -            }
> -        }
> -
> -        saoParam->oneUnitFlag[0] = 0;
> -        saoParam->oneUnitFlag[1] = 0;
> -        saoParam->oneUnitFlag[2] = 0;
> -        resetLcuPart(saoParam->saoLcuParam[0]);
> -        resetLcuPart(saoParam->saoLcuParam[1]);
> -        resetLcuPart(saoParam->saoLcuParam[2]);
> -    }
> +    saoParam->bSaoFlag[0] = false;
> +    saoParam->bSaoFlag[1] = false;
> +    resetLcuPart(saoParam->saoLcuParam[0]);
> +    resetLcuPart(saoParam->saoLcuParam[1]);
> +    resetLcuPart(saoParam->saoLcuParam[2]);
>  }
>  
>  void SAO::startSlice(Frame *pic, Entropy& initState, int qp)
> @@ -647,133 +472,6 @@
>  }
>  
>  /* Process SAO all units */
> -void SAO::processSaoUnitAll(SaoLcuParam* saoLcuParam, bool oneUnitFlag, int plane)
> -{
> -    pixel *rec;
> -    int picWidthTmp;
> -
> -    if (plane)
> -    {
> -        rec         = m_pic->getPicYuvRec()->getChromaAddr(plane);
> -        picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
> -    }
> -    else
> -    {
> -        rec         = m_pic->getPicYuvRec()->getLumaAddr();
> -        picWidthTmp = m_param->sourceWidth;
> -    }
> -
> -    memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
> -
> -    int frameWidthInCU = m_pic->getFrameWidthInCU();
> -    int frameHeightInCU = m_pic->getFrameHeightInCU();
> -    int stride;
> -    bool isChroma = !!plane;
> -    uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;
> -
> -    const int boShift = X265_DEPTH - SAO_BO_BITS;
> -
> -    for (int idxY = 0; idxY < frameHeightInCU; idxY++)
> -    {
> -        int addr = idxY * frameWidthInCU;
> -        if (plane == 0)
> -        {
> -            rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> -            stride = m_pic->getStride();
> -            picWidthTmp = m_param->sourceWidth;
> -        }
> -        else
> -        {
> -            rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> -            stride = m_pic->getCStride();
> -            picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
> -        }
> -        for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
> -        {
> -            m_tmpL1[i] = rec[0];
> -            rec += stride;
> -        }
> -
> -        rec -= (stride << 1);
> -
> -        memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
> -
> -        for (int idxX = 0; idxX < frameWidthInCU; idxX++)
> -        {
> -            addr = idxY * frameWidthInCU + idxX;
> -
> -            int typeIdx;
> -            bool mergeLeftFlag;
> -
> -            if (oneUnitFlag)
> -            {
> -                typeIdx = saoLcuParam[0].typeIdx;
> -                mergeLeftFlag = (addr == 0) ? 0 : 1;
> -            }
> -            else
> -            {
> -                typeIdx = saoLcuParam[addr].typeIdx;
> -                mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
> -            }
> -            if (typeIdx >= 0)
> -            {
> -                if (!mergeLeftFlag)
> -                {
> -                    if (typeIdx == SAO_BO)
> -                    {
> -                        pixel* offsetBo = m_offsetBo;
> -                        int offset[SAO_NUM_BO_CLASSES];
> -                        memset(offset, 0, sizeof(offset));
> -
> -                        for (int i = 0; i < SAO_NUM_OFFSET; i++)
> -                            offset[((saoLcuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
> -
> -                        for (int i = 0; i < (1 << X265_DEPTH); i++)
> -                            offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
> -                    }
> -                    else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
> -                    {
> -                        int offset[NUM_EDGETYPE];
> -                        offset[0] = 0;
> -                        for (int i = 0; i < SAO_NUM_OFFSET; i++)
> -                            offset[i + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
> -
> -                        for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
> -                            m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
> -                    }
> -                }
> -                processSaoCu(addr, typeIdx, plane);
> -            }
> -            else
> -            {
> -                if (idxX != (frameWidthInCU - 1))
> -                {
> -                    if (isChroma)
> -                    {
> -                        rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> -                        stride = m_pic->getCStride();
> -                    }
> -                    else
> -                    {
> -                        rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> -                        stride = m_pic->getStride();
> -                    }
> -
> -                    int widthShift = isChroma ? (g_maxCUSize >> m_hChromaShift) : g_maxCUSize;
> -                    for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
> -                    {
> -                        m_tmpL1[i] = rec[widthShift - 1];
> -                        rec += stride;
> -                    }
> -                }
> -            }
> -        }
> -
> -        std::swap(m_tmpU1[plane], m_tmpU2[plane]);
> -    }
> -}
> -
> -/* Process SAO all units */
>  void SAO::processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, int plane)
>  {
>      pixel *rec;
> @@ -925,272 +623,8 @@
>          saoUnitDst->offset[i] = saoUnitSrc->offset[i];
>  }
>  
> -/* convert QP part to SAO unit */
> -void SAO::convertQT2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane)
> -{
> -    SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
> -
> -    if (!saoPart->bSplit)
> -    {
> -        convertOnePart2SaoUnit(saoParam, partIdx, plane);
> -        return;
> -    }
> -
> -    if (saoPart->partLevel < m_maxSplitLevel)
> -    {
> -        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[0], plane);
> -        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[1], plane);
> -        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[2], plane);
> -        convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[3], plane);
> -    }
> -}
> -
> -/* convert one SAO part to SAO unit */
> -void SAO::convertOnePart2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane)
> -{
> -    int frameWidthInCU = m_pic->getFrameWidthInCU();
> -    SAOQTPart* saoQTPart = saoParam->saoPart[plane];
> -    SaoLcuParam* saoLcuParam = saoParam->saoLcuParam[plane];
> -
> -    for (int idxY = saoQTPart[partIdx].startCUY; idxY <= saoQTPart[partIdx].endCUY; idxY++)
> -    {
> -        for (int idxX = saoQTPart[partIdx].startCUX; idxX <= saoQTPart[partIdx].endCUX; idxX++)
> -        {
> -            int addr = idxY * frameWidthInCU + idxX;
> -            saoLcuParam[addr].partIdxTmp = (int)partIdx;
> -            saoLcuParam[addr].typeIdx    = saoQTPart[partIdx].bestType;
> -            saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
> -            if (saoLcuParam[addr].typeIdx >= 0)
> -            {
> -                for (int j = 0; j < SAO_NUM_OFFSET; j++)
> -                    saoLcuParam[addr].offset[j] = saoQTPart[partIdx].offset[j];
> -            }
> -            else
> -            {
> -                saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
> -                for (int j = 0; j < SAO_NUM_OFFSET; j++)
> -                    saoLcuParam[addr].offset[j] = 0;
> -            }
> -        }
> -    }
> -}
> -
> -/* process SAO for one partition */
> -void SAO::rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane)
> -{
> -    SAOQTPart* onePart = &(psQTPart[partIdx]);
> -
> -    int64_t estDist;
> -
> -    m_distOrg[partIdx] = 0;
> -
> -    int    bestClassTableBo = 0;
> -    int    currentDistortionTableBo[MAX_NUM_SAO_CLASS];
> -    double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
> -    double bestRDCostTableBo = MAX_DOUBLE;
> -
> -    int allowMergeLeft;
> -    int allowMergeUp;
> -    SaoLcuParam saoLcuParamRdo;
> -
> -    for (int typeIdx = -1; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> -    {
> -        m_entropyCoder.load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
> -        m_entropyCoder.resetBits();
> -
> -        if (typeIdx >= 0)
> -        {
> -            estDist = estSaoTypeDist(partIdx, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
> -            if (typeIdx == SAO_BO)
> -            {
> -                // Estimate Best Position
> -                for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
> -                {
> -                    double currentRDCost = 0.0;
> -                    for (int j = i; j < i + SAO_BO_LEN; j++)
> -                        currentRDCost += currentRdCostTableBo[j];
> -
> -                    if (currentRDCost < bestRDCostTableBo)
> -                    {
> -                        bestRDCostTableBo = currentRDCost;
> -                        bestClassTableBo  = i;
> -                    }
> -                }
> -
> -                // Recode all offsets
> -                for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
> -                    estDist += currentDistortionTableBo[classIdx];
> -            }
> -
> -            for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
> -            {
> -                for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
> -                {
> -                    // get bits for typeIdx = -1
> -                    allowMergeLeft = 1;
> -                    allowMergeUp   = 1;
> -
> -                    // reset
> -                    resetSaoUnit(&saoLcuParamRdo);
> -
> -                    // set merge flag
> -                    saoLcuParamRdo.mergeUpFlag   = 1;
> -                    saoLcuParamRdo.mergeLeftFlag = 1;
> -
> -                    if (ry == onePart->startCUY)
> -                        saoLcuParamRdo.mergeUpFlag = 0;
> -
> -                    if (rx == onePart->startCUX)
> -                        saoLcuParamRdo.mergeLeftFlag = 0;
> -
> -                    // set type and offsets
> -                    saoLcuParamRdo.typeIdx = typeIdx;
> -                    saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
> -                    for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> -                        saoLcuParamRdo.offset[classIdx] = (int)m_offset[partIdx][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
> -
> -                    m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
> -                }
> -            }
> -
> -            m_dist[partIdx][typeIdx] = estDist;
> -            m_rate[partIdx][typeIdx] = m_entropyCoder.getNumberOfWrittenBits();
> -
> -            m_cost[partIdx][typeIdx] = (double)((double)m_dist[partIdx][typeIdx] + m_lumaLambda * (double)m_rate[partIdx][typeIdx]);
> -
> -            if (m_cost[partIdx][typeIdx] < m_costPartBest[partIdx])
> -            {
> -                m_distOrg[partIdx] = 0;
> -                m_costPartBest[partIdx] = m_cost[partIdx][typeIdx];
> -                m_typePartBest[partIdx] = typeIdx;
> -                m_entropyCoder.store(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
> -            }
> -        }
> -        else
> -        {
> -            for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
> -            {
> -                for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
> -                {
> -                    // get bits for iTypeIdx = -1
> -                    allowMergeLeft = 1;
> -                    allowMergeUp   = 1;
> -
> -                    // reset
> -                    resetSaoUnit(&saoLcuParamRdo);
> -
> -                    // set merge flag
> -                    saoLcuParamRdo.mergeUpFlag   = 1;
> -                    saoLcuParamRdo.mergeLeftFlag = 1;
> -
> -                    if (ry == onePart->startCUY)
> -                        saoLcuParamRdo.mergeUpFlag = 0;
> -
> -                    if (rx == onePart->startCUX)
> -                        saoLcuParamRdo.mergeLeftFlag = 0;
> -
> -                    m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry,  &saoLcuParamRdo, 1,  1,  allowMergeLeft, allowMergeUp);
> -                }
> -            }
> -            if (m_distOrg[partIdx] < m_costPartBest[partIdx])
> -            {
> -                m_costPartBest[partIdx] = (double)m_distOrg[partIdx] + m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
> -                m_typePartBest[partIdx] = -1;
> -                m_entropyCoder.store(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
> -            }
> -        }
> -    }
> -
> -    onePart->bProcessed = true;
> -    onePart->bSplit    = false;
> -    onePart->minDist   =       m_typePartBest[partIdx] >= 0 ? m_dist[partIdx][m_typePartBest[partIdx]] : m_distOrg[partIdx];
> -    onePart->minRate   = (int)(m_typePartBest[partIdx] >= 0 ? m_rate[partIdx][m_typePartBest[partIdx]] : 0);
> -    onePart->minCost   = onePart->minDist + m_lumaLambda * onePart->minRate;
> -    onePart->bestType  = m_typePartBest[partIdx];
> -
> -    if (onePart->bestType != -1)
> -    {
> -        int minIndex = 0;
> -        if (onePart->bestType == SAO_BO)
> -        {
> -            onePart->subTypeIdx = bestClassTableBo;
> -            minIndex = onePart->subTypeIdx;
> -        }
> -        for (int i = 0; i < SAO_NUM_OFFSET; i++)
> -            onePart->offset[i] = (int)m_offset[partIdx][onePart->bestType][minIndex + i + 1];
> -    }
> -}
> -
> -/* Run partition tree disable */
> -void SAO::disablePartTree(SAOQTPart *psQTPart, int partIdx)
> -{
> -    SAOQTPart* pOnePart = &(psQTPart[partIdx]);
> -
> -    pOnePart->bSplit   = false;
> -    pOnePart->bestType = -1;
> -
> -    if (pOnePart->partLevel < (int)m_maxSplitLevel)
> -    {
> -        for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> -            disablePartTree(psQTPart, pOnePart->downPartsIdx[i]);
> -    }
> -}
> -
> -/* Run quadtree decision function */
> -void SAO::runQuadTreeDecision(SAOQTPart *qtPart, int partIdx, double &costFinal, int maxLevel, int plane)
> -{
> -    SAOQTPart* onePart = &(qtPart[partIdx]);
> -
> -    uint32_t nextDepth = onePart->partLevel + 1;
> -
> -    if (!partIdx)
> -        costFinal = 0;
> -
> -    // SAO for this part
> -    if (!onePart->bProcessed)
> -        rdoSaoOnePart(qtPart, partIdx, plane);
> -
> -    // SAO for sub 4 parts
> -    if (onePart->partLevel < maxLevel)
> -    {
> -        double costNotSplit = m_lumaLambda + onePart->minCost;
> -        double costSplit    = m_lumaLambda;
> -
> -        for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> -        {
> -            if (i) //initialize RD with previous depth buffer
> -                m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
> -            else
> -                m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
> -
> -            runQuadTreeDecision(qtPart, onePart->downPartsIdx[i], costFinal, maxLevel, plane);
> -            costSplit += costFinal;
> -            m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_TEMP_BEST]);
> -        }
> -
> -        if (costSplit < costNotSplit)
> -        {
> -            costFinal = costSplit;
> -            onePart->bSplit   = true;
> -            onePart->bestType = -1;
> -            m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
> -        }
> -        else
> -        {
> -            costFinal = costNotSplit;
> -            onePart->bSplit = false;
> -            for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> -                disablePartTree(qtPart, onePart->downPartsIdx[i]);
> -
> -            m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
> -        }
> -    }
> -    else
> -        costFinal = onePart->minCost;
> -}
> -
>  /* Calculate SAO statistics for current LCU without non-crossing slice */
> -void SAO::calcSaoStatsCu(int addr, int partIdx, int plane)
> +void SAO::calcSaoStatsCu(int addr, int plane)
>  {
>      int x, y;
>      TComDataCU *cu = m_pic->getCU(addr);
> @@ -1216,15 +650,8 @@
>      int isLuma = !plane;
>      int isChroma = !!plane;
>      int numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> -
> -    if (!m_param->saoLcuBasedOptimization)
> -        numSkipLine = 0;
> -
>      int numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
>  
> -    if (!m_param->saoLcuBasedOptimization)
> -        numSkipLineRight = 0;
> -
>      picWidthTmp  = isLuma ? m_param->sourceWidth  : m_param->sourceWidth  >> m_hChromaShift;
>      picHeightTmp = isLuma ? m_param->sourceHeight : m_param->sourceHeight >> m_vChromaShift;
>      lcuWidth     = isLuma ? g_maxCUSize : g_maxCUSize >> m_hChromaShift;
> @@ -1244,13 +671,13 @@
>      {
>          const int boShift = X265_DEPTH - SAO_BO_BITS;
>  
> -        if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> +        if (m_param->saoLcuBoundary)
>          {
>              numSkipLine      = isChroma ? 3 - (2 * m_vChromaShift) : 3;
>              numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
>          }
> -        stats = m_offsetOrg[partIdx][SAO_BO];
> -        counts = m_count[partIdx][SAO_BO];
> +        stats = m_offsetOrg[plane][SAO_BO];
> +        counts = m_count[plane][SAO_BO];
>  
>          fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
>          recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1278,13 +705,13 @@
>      {
>          //if (iSaoType == EO_0)
>          {
> -            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> +            if (m_param->saoLcuBoundary)
>              {
>                  numSkipLine      = isChroma ? 3 - (2 * m_vChromaShift) : 3;
>                  numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
>              }
> -            stats = m_offsetOrg[partIdx][SAO_EO_0];
> -            counts = m_count[partIdx][SAO_EO_0];
> +            stats = m_offsetOrg[plane][SAO_EO_0];
> +            counts = m_count[plane][SAO_EO_0];
>  
>              fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
>              recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1311,13 +738,13 @@
>  
>          //if (iSaoType == EO_1)
>          {
> -            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> +            if (m_param->saoLcuBoundary)
>              {
>                  numSkipLine      = isChroma ? 4 - (2 * m_vChromaShift) : 4;
>                  numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
>              }
> -            stats = m_offsetOrg[partIdx][SAO_EO_1];
> -            counts = m_count[partIdx][SAO_EO_1];
> +            stats = m_offsetOrg[plane][SAO_EO_1];
> +            counts = m_count[plane][SAO_EO_1];
>  
>              fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
>              recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1352,13 +779,13 @@
>          }
>          //if (iSaoType == EO_2)
>          {
> -            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> +            if (m_param->saoLcuBoundary)
>              {
>                  numSkipLine      = isChroma ? 4 - (2 * m_vChromaShift) : 4;
>                  numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
>              }
> -            stats = m_offsetOrg[partIdx][SAO_EO_2];
> -            counts = m_count[partIdx][SAO_EO_2];
> +            stats = m_offsetOrg[plane][SAO_EO_2];
> +            counts = m_count[plane][SAO_EO_2];
>  
>              fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
>              recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1398,13 +825,13 @@
>          }
>          //if (iSaoType == EO_3)
>          {
> -            if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> +            if (m_param->saoLcuBoundary)
>              {
>                  numSkipLine      = isChroma ? 4 - (2 * m_vChromaShift) : 4;
>                  numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
>              }
> -            stats = m_offsetOrg[partIdx][SAO_EO_3];
> -            counts = m_count[partIdx][SAO_EO_3];
> +            stats = m_offsetOrg[plane][SAO_EO_3];
> +            counts = m_count[plane][SAO_EO_3];
>  
>              fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
>              recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1721,84 +1148,13 @@
>      }
>  }
>  
> -void SAO::getSaoStats(SAOQTPart *psQTPart, int plane)
> -{
> -    int levelIdx, partIdx;
> -    int i;
> -    int lcuIdx;
> -    int lcuIdy;
> -    int frameWidthInCU = m_pic->getFrameWidthInCU();
> -    int downPartIdx;
> -    int partStart;
> -    int partEnd;
> -    SAOQTPart* onePart;
> -
> -    if (!m_maxSplitLevel)
> -    {
> -        partIdx = 0;
> -        onePart = &(psQTPart[partIdx]);
> -        for (lcuIdy = onePart->startCUY; lcuIdy <= onePart->endCUY; lcuIdy++)
> -        {
> -            for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
> -            {
> -                int addr = lcuIdy * frameWidthInCU + lcuIdx;
> -                calcSaoStatsCu(addr, partIdx, plane);
> -            }
> -        }
> -    }
> -    else
> -    {
> -        for (partIdx = s_numCulPartsLevel[m_maxSplitLevel - 1]; partIdx < s_numCulPartsLevel[m_maxSplitLevel]; partIdx++)
> -        {
> -            onePart = &(psQTPart[partIdx]);
> -            for (lcuIdy = onePart->startCUY; lcuIdy <= onePart->endCUY; lcuIdy++)
> -            {
> -                for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
> -                {
> -                    int addr = lcuIdy * frameWidthInCU + lcuIdx;
> -                    calcSaoStatsCu(addr, partIdx, plane);
> -                }
> -            }
> -        }
> -
> -        for (levelIdx = m_maxSplitLevel - 1; levelIdx >= 0; levelIdx--)
> -        {
> -            partStart = (levelIdx > 0) ? s_numCulPartsLevel[levelIdx - 1] : 0;
> -            partEnd   = s_numCulPartsLevel[levelIdx];
> -
> -            for (partIdx = partStart; partIdx < partEnd; partIdx++)
> -            {
> -                onePart = &(psQTPart[partIdx]);
> -                for (i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> -                {
> -                    downPartIdx = onePart->downPartsIdx[i];
> -                    for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> -                    {
> -                        for (int classIdx = 0; classIdx < (typeIdx < SAO_BO ? SAO_EO_LEN : SAO_NUM_BO_CLASSES) + 1; classIdx++)
> -                        {
> -                            m_offsetOrg[partIdx][typeIdx][classIdx] += m_offsetOrg[downPartIdx][typeIdx][classIdx];
> -                            m_count[partIdx][typeIdx][classIdx]    += m_count[downPartIdx][typeIdx][classIdx];
> -                        }
> -                    }
> -                }
> -            }
> -        }
> -    }
> -}
> -
>  /* reset offset statistics */
>  void SAO::resetStats()
>  {
> -    for (int i = 0; i < m_numTotalParts; i++)
> +    for (int i = 0; i < NUM_PLANE; i++)
>      {
> -        m_costPartBest[i] = MAX_DOUBLE;
> -        m_typePartBest[i] = -1;
> -        m_distOrg[i] = 0;
>          for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
>          {
> -            m_dist[i][j] = 0;
> -            m_rate[i][j] = 0;
> -            m_cost[i][j] = 0;
>              for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
>              {
>                  m_count[i][j][k] = 0;
> @@ -1809,31 +1165,6 @@
>      }
>  }
>  
> -/* Sample adaptive offset process */
> -void SAO::SAOProcess(SAOParam *saoParam)
> -{
> -    X265_CHECK(!m_param->saoLcuBasedOptimization, "SAO LCU mode failure\n"); 
> -    double costFinal = 0;
> -    saoParam->bSaoFlag[0] = true;
> -    saoParam->bSaoFlag[1] = false;
> -
> -    getSaoStats(saoParam->saoPart[0], 0);
> -    runQuadTreeDecision(saoParam->saoPart[0], 0, costFinal, m_maxSplitLevel, 0);
> -    saoParam->bSaoFlag[0] = costFinal < 0;
> -
> -    if (saoParam->bSaoFlag[0])
> -    {
> -        convertQT2SaoUnit(saoParam, 0, 0);
> -        assignSaoUnitSyntax(saoParam->saoLcuParam[0], saoParam->saoPart[0], saoParam->oneUnitFlag[0]);
> -        processSaoUnitAll(saoParam->saoLcuParam[0], saoParam->oneUnitFlag[0], 0);
> -    }
> -    if (saoParam->bSaoFlag[1])
> -    {
> -        processSaoUnitAll(saoParam->saoLcuParam[1], saoParam->oneUnitFlag[1], 1);
> -        processSaoUnitAll(saoParam->saoLcuParam[2], saoParam->oneUnitFlag[2], 2);
> -    }
> -}
> -
>  /* Check merge SAO unit */
>  void SAO::checkMerge(SaoLcuParam * saoUnitCurr, SaoLcuParam * saoUnitCheck, int dir)
>  {
> @@ -1885,65 +1216,10 @@
>      }
>  }
>  
> -/** Assign SAO unit syntax from picture-based algorithm */
> -void SAO::assignSaoUnitSyntax(SaoLcuParam* saoLcuParam,  SAOQTPart* saoPart, bool &oneUnitFlag)
> -{
> -    if (saoPart->bSplit == 0)
> -        oneUnitFlag = 1;
> -    else
> -    {
> -        oneUnitFlag = 0;
> -
> -        int idxCount = -1;
> -        saoLcuParam[0].mergeUpFlag = 0;
> -        saoLcuParam[0].mergeLeftFlag = 0;
> -
> -        for (int j = 0; j < m_numCuInHeight; j++)
> -        {
> -            for (int i = 0; i < m_numCuInWidth; i++)
> -            {
> -                int addr     = i + j * m_numCuInWidth;
> -                int addrUp   = (j == 0) ? -1 : addr - m_numCuInWidth;
> -                int addrLeft = (i == 0) ? -1 : addr - 1;
> -                int idx      = saoLcuParam[addr].partIdxTmp;
> -                int idxLeft  = (addrLeft == -1) ? -1 : saoLcuParam[addrLeft].partIdxTmp;
> -                int idxUp    = (addrUp == -1)   ? -1 : saoLcuParam[addrUp].partIdxTmp;
> -
> -                if (idx != idxLeft && idx != idxUp)
> -                {
> -                    saoLcuParam[addr].mergeUpFlag   = 0;
> -                    idxCount++;
> -                    saoLcuParam[addr].mergeLeftFlag = 0;
> -                    saoLcuParam[addr].partIdx = idxCount;
> -                }
> -                else if (idx == idxLeft)
> -                {
> -                    saoLcuParam[addr].mergeUpFlag   = 1;
> -                    saoLcuParam[addr].mergeLeftFlag = 1;
> -                    saoLcuParam[addr].partIdx = saoLcuParam[addrLeft].partIdx;
> -                }
> -                else if (idx == idxUp)
> -                {
> -                    saoLcuParam[addr].mergeUpFlag   = 1;
> -                    saoLcuParam[addr].mergeLeftFlag = 0;
> -                    saoLcuParam[addr].partIdx = saoLcuParam[addrUp].partIdx;
> -                }
> -                if (addrUp != -1)
> -                    checkMerge(&saoLcuParam[addr], &saoLcuParam[addrUp], 1);
> -                if (addrLeft != -1)
> -                    checkMerge(&saoLcuParam[addr], &saoLcuParam[addrLeft], 0);
> -            }
> -        }
> -    }
> -}
> -
>  void SAO::rdoSaoUnitRowInit(SAOParam *saoParam)
>  {
>      saoParam->bSaoFlag[0] = true;
>      saoParam->bSaoFlag[1] = true;
> -    saoParam->oneUnitFlag[0] = false;
> -    saoParam->oneUnitFlag[1] = false;
> -    saoParam->oneUnitFlag[2] = false;
>  
>      m_numNoSao[0] = 0; // Luma
>      m_numNoSao[1] = 0; // Chroma
> @@ -2005,7 +1281,7 @@
>                  for (k = 0; k < MAX_NUM_SAO_CLASS; k++)
>                  {
>                      m_offset[compIdx][j][k] = 0;
> -                    if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> +                    if (m_param->saoLcuBoundary)
>                      {
>                          m_count[compIdx][j][k] = m_countPreDblk[addr][compIdx][j][k];
>                          m_offsetOrg[compIdx][j][k] = m_offsetOrgPreDblk[addr][compIdx][j][k];
> @@ -2023,10 +1299,10 @@
>              saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0;
>              saoParam->saoLcuParam[compIdx][addr].subTypeIdx    = 0;
>              if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
> -                calcSaoStatsCu(addr, compIdx,  compIdx);
> +                calcSaoStatsCu(addr, compIdx);
>          }
>  
> -        saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft, 0, 
> +        saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
>                                &mergeSaoParam[0][0], &compDistortion[0]);
>  
>          sao2ChromaParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
> @@ -2169,13 +1445,13 @@
>      return offsetOutput;
>  }
>  
> -void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
> +void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
>                                  SaoLcuParam *compSaoParam, double *compDistortion)
>  {
>      int64_t estDist;
>      int64_t bestDist;
>  
> -    SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[plane][addr]);
> +    SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[0][addr]);
>      SaoLcuParam* saoLcuParamNeighbor = NULL;
>  
>      resetSaoUnit(saoLcuParam);
> @@ -2194,14 +1470,14 @@
>  
>      m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
>      m_entropyCoder.resetBits();
> -    m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
> +    m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, 0);
>      dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
>      copySaoUnit(saoLcuParam, &saoLcuParamRdo);
>      bestDist = 0;
>  
>      for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
>      {
> -        estDist = estSaoTypeDist(plane, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
> +        estDist = estSaoTypeDist(0, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
>  
>          if (typeIdx == SAO_BO)
>          {
> @@ -2231,18 +1507,18 @@
>          saoLcuParamRdo.mergeUpFlag   = 0;
>          saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
>          for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> -            saoLcuParamRdo.offset[classIdx] = (int)m_offset[plane][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
> +            saoLcuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
>  
>          m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
>          m_entropyCoder.resetBits();
> -        m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
> +        m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, 0);
>  
>          uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> -        m_cost[plane][typeIdx] = (double)((double)estDist + m_lumaLambda * (double)estRate);
> +        double cost = (double)((double)estDist + m_lumaLambda * (double)estRate);
>  
> -        if (m_cost[plane][typeIdx] < dCostPartBest)
> +        if (cost < dCostPartBest)
>          {
> -            dCostPartBest = m_cost[plane][typeIdx];
> +            dCostPartBest = cost;
>              copySaoUnit(saoLcuParam, &saoLcuParamRdo);
>              bestDist = estDist;
>          }
> @@ -2250,7 +1526,7 @@
>  
>      compDistortion[0] += ((double)bestDist / m_lumaLambda);
>      m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> -    m_entropyCoder.codeSaoOffset(saoLcuParam, plane);
> +    m_entropyCoder.codeSaoOffset(saoLcuParam, 0);
>      m_entropyCoder.store(m_rdEntropyCoders[0][CI_TEMP_BEST]);
>  
>      // merge left or merge up
> @@ -2259,9 +1535,9 @@
>      {
>          saoLcuParamNeighbor = NULL;
>          if (allowMergeLeft && addrLeft >= 0 && idxNeighbor == 0)
> -            saoLcuParamNeighbor = &(saoParam->saoLcuParam[plane][addrLeft]);
> +            saoLcuParamNeighbor = &(saoParam->saoLcuParam[0][addrLeft]);
>          else if (allowMergeUp && addrUp >= 0 && idxNeighbor == 1)
> -            saoLcuParamNeighbor = &(saoParam->saoLcuParam[plane][addrUp]);
> +            saoLcuParamNeighbor = &(saoParam->saoLcuParam[0][addrUp]);
>          if (saoLcuParamNeighbor != NULL)
>          {
>              estDist = 0;
> @@ -2273,7 +1549,7 @@
>                  for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>                  {
>                      mergeOffset = saoLcuParamNeighbor->offset[classIdx];
> -                    estDist += estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + 1],  0);
> +                    estDist += estSaoDist(m_count[0][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + mergeBandPosition + 1],  0);
>                  }
>              }
>              else
> @@ -2382,11 +1658,11 @@
>          }
>  
>          uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> -        m_cost[1][typeIdx] = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
> +        double cost = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
>  
> -        if (m_cost[1][typeIdx] < costPartBest)
> +        if (cost < costPartBest)
>          {
> -            costPartBest = m_cost[1][typeIdx];
> +            costPartBest = cost;
>              copySaoUnit(saoLcuParam[0], &saoLcuParamRdo[0]);
>              copySaoUnit(saoLcuParam[1], &saoLcuParamRdo[1]);
>              bestDist = (estDist[0] + estDist[1]);
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/sao.h
> --- a/source/encoder/sao.h	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/sao.h	Tue Sep 30 09:48:12 2014 +0900
> @@ -59,25 +59,18 @@
>      enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) };
>      enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
>      enum { NUM_EDGETYPE = 5 };
> +    enum { NUM_PLANE = 3 };
>  
> -    static const int      s_numCulPartsLevel[5];
>      static const uint32_t s_eoTable[NUM_EDGETYPE];
>  
>      typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
>      typedef int64_t (PerType[MAX_NUM_SAO_TYPE]);
> -    typedef double  (PerTypeD[MAX_NUM_SAO_TYPE]);
>      typedef int64_t (PerPlane[3][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
>  
>      /* allocated per part */
>      PerClass*   m_count;
>      PerClass*   m_offset;
>      PerClass*   m_offsetOrg;
> -    PerType*    m_rate;
> -    PerType*    m_dist;
> -    PerTypeD*   m_cost;
> -    double*     m_costPartBest;
> -    int64_t*    m_distOrg;
> -    int*        m_typePartBest;
>  
>      /* allocated per LCU */
>      PerPlane*   m_countPreDblk;
> @@ -87,11 +80,8 @@
>      pixel*      m_offsetBo;
>      int8_t      m_offsetEo[NUM_EDGETYPE];
>  
> -    int         m_maxSplitLevel;
> -
>      int         m_numCuInWidth;
>      int         m_numCuInHeight;
> -    int         m_numTotalParts;
>      int         m_hChromaShift;
>      int         m_vChromaShift;
>  
> @@ -122,7 +112,6 @@
>      bool create(x265_param *param);
>      void destroy();
>  
> -    void initSAOParam(SAOParam* saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const;
>      void allocSaoParam(SAOParam* saoParam) const;
>  
>      void startSlice(Frame *pic, Entropy& initState, int qp);
> @@ -130,30 +119,19 @@
>      void resetStats();
>      void resetSaoUnit(SaoLcuParam* saoUnit);
>  
> -    void SAOProcess(SAOParam* saoParam);
> -
>      // LCU-basd SAO process without slice granularity
>      void processSaoCu(int addr, int partIdx, int plane);
>  
>      void resetLcuPart(SaoLcuParam* saoLcuParam);
> -    void convertQT2SaoUnit(SAOParam* saoParam, uint32_t partIdx, int plane);
> -    void convertOnePart2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane);
> -    void processSaoUnitAll(SaoLcuParam* saoLcuParam, bool oneUnitFlag, int plane);
>      void processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, int plane);
>  
>      void copySaoUnit(SaoLcuParam* saoUnitDst, SaoLcuParam* saoUnitSrc);
>  
> -    void runQuadTreeDecision(SAOQTPart *psQTPart, int partIdx, double &costFinal, int maxLevel, int plane);
> -    void rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane);
> -
> -    void disablePartTree(SAOQTPart *psQTPart, int partIdx);
> -    void getSaoStats(SAOQTPart *psQTPart, int plane);
> -    void calcSaoStatsCu(int addr, int partIdx, int plane);
> +    void calcSaoStatsCu(int addr, int plane);
>      void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
> -    void assignSaoUnitSyntax(SaoLcuParam* saoLcuParam,  SAOQTPart* saoPart, bool &oneUnitFlag);
>      void checkMerge(SaoLcuParam* lcuParamCurr, SaoLcuParam * lcuParamCheck, int dir);
>  
> -    void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
> +    void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
>                                 SaoLcuParam *compSaoParam, double *distortion);
>      void sao2ChromaParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
>                              SaoLcuParam *crSaoParam, SaoLcuParam *cbSaoParam, double *distortion);
> diff -r 5a6845566d14 -r 3eacdaa30440 source/x265.cpp
> --- a/source/x265.cpp	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/x265.cpp	Tue Sep 30 09:48:12 2014 +0900
> @@ -457,7 +457,6 @@
>      H0("   --[no-]lft                    Enable Deblocking Loop Filter. Default %s\n", OPT(param->bEnableLoopFilter));
>      H0("   --[no-]sao                    Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO));
>      H0("   --sao-lcu-bounds <integer>    0: right/bottom boundary areas skipped  1: non-deblocked pixels are used. Default %d\n", param->saoLcuBoundary);
> -    H0("   --sao-lcu-opt <integer>       0: SAO picture-based optimization, 1: SAO LCU-based optimization. Default %d\n", param->saoLcuBasedOptimization);
>      H0("\nVUI options:\n");
>      H0("   --sar <width:height|int>      Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n");
>      H0("                                 Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n");
> diff -r 5a6845566d14 -r 3eacdaa30440 source/x265.h
> --- a/source/x265.h	Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/x265.h	Tue Sep 30 09:48:12 2014 +0900
> @@ -734,12 +734,6 @@
>       * pixels are used entirely. Default is 0 */
>      int       saoLcuBoundary;
>  
> -    /* Select the scope of the SAO optimization. If 0 SAO is performed over the
> -     * entire output picture at once, this can severly restrict frame
> -     * parallelism so it is not recommended for many-core machines.  If 1 SAO is
> -     * performed on LCUs in series. Default is 1 */
> -    int       saoLcuBasedOptimization;
> -
>      /* Generally a small signed integer which offsets the QP used to quantize
>       * the Cb chroma residual (delta from luma QP specified by rate-control).
>       * Default is 0, which is recommended */
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list