[x265] sao: remove frame-based SAO
Steve Borho
steve at borho.org
Tue Sep 30 05:19:02 CEST 2014
On 09/30, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1412038092 -32400
> # Tue Sep 30 09:48:12 2014 +0900
> # Node ID 3eacdaa304400b0100dcf1d1515ae1d24cbf4305
> # Parent 5a6845566d1492d29af29ecc0cf75d644994735c
> sao: remove frame-based SAO
Queued together with removal of the reST docs for this option and the
getopt parameter.
Thanks
> diff -r 5a6845566d14 -r 3eacdaa30440 source/common/common.h
> --- a/source/common/common.h Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/common/common.h Tue Sep 30 09:48:12 2014 +0900
> @@ -212,34 +212,6 @@
> uint32_t count[8];
> };
>
> -struct SAOQTPart
> -{
> - enum { NUM_DOWN_PART = 4 };
> -
> - int bestType;
> - int subTypeIdx; // indicates EO class or BO band position
> - int offset[SAO_NUM_OFFSET];
> - int startCUX;
> - int startCUY;
> - int endCUX;
> - int endCUY;
> -
> - int partIdx;
> - int partLevel;
> - int partCol;
> - int partRow;
> -
> - int downPartsIdx[NUM_DOWN_PART];
> - int upPartIdx;
> -
> - bool bSplit;
> -
> - bool bProcessed;
> - double minCost;
> - int64_t minDist;
> - int minRate;
> -};
> -
> struct SaoLcuParam
> {
> bool mergeUpFlag;
> @@ -266,10 +238,7 @@
> struct SAOParam
> {
> SaoLcuParam* saoLcuParam[3];
> - SAOQTPart* saoPart[3];
> bool bSaoFlag[2];
> - bool oneUnitFlag[3];
> - int maxSplitLevel;
> int numCuInHeight;
> int numCuInWidth;
>
> @@ -277,15 +246,11 @@
> {
> for (int i = 0; i < 3; i++)
> {
> - saoPart[i] = NULL;
> saoLcuParam[i] = NULL;
> }
> }
> ~SAOParam()
> {
> - delete[] saoPart[0];
> - delete[] saoPart[1];
> - delete[] saoPart[2];
> delete[] saoLcuParam[0];
> delete[] saoLcuParam[1];
> delete[] saoLcuParam[2];
> diff -r 5a6845566d14 -r 3eacdaa30440 source/common/param.cpp
> --- a/source/common/param.cpp Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/common/param.cpp Tue Sep 30 09:48:12 2014 +0900
> @@ -169,7 +169,6 @@
> /* SAO Loop Filter */
> param->bEnableSAO = 1;
> param->saoLcuBoundary = 0;
> - param->saoLcuBasedOptimization = 1;
>
> /* Coding Quality */
> param->cbQpOffset = 0;
> @@ -625,7 +624,6 @@
> OPT("lft") p->bEnableLoopFilter = atobool(value);
> OPT("sao") p->bEnableSAO = atobool(value);
> OPT("sao-lcu-bounds") p->saoLcuBoundary = atoi(value);
> - OPT("sao-lcu-opt") p->saoLcuBasedOptimization = atoi(value);
> OPT("ssim") p->bEnableSsim = atobool(value);
> OPT("psnr") p->bEnablePsnr = atobool(value);
> OPT("hash") p->decodedPictureHashSEI = atoi(value);
> @@ -1165,13 +1163,7 @@
> fprintf(stderr, "nr=%d ", param->noiseReduction);
>
> TOOLOPT(param->bEnableLoopFilter, "lft");
> - if (param->bEnableSAO)
> - {
> - if (param->saoLcuBasedOptimization)
> - fprintf(stderr, "sao-lcu ");
> - else
> - fprintf(stderr, "sao-frame ");
> - }
> + TOOLOPT(param->bEnableSAO, "sao");
> TOOLOPT(param->bEnableSignHiding, "signhide");
> TOOLOPT(param->bCULossless, "cu-lossless");
> TOOLOPT(param->bEnableFastIntra, "fast-intra");
> @@ -1245,7 +1237,6 @@
> BOOL(p->bEnableLoopFilter, "lft");
> BOOL(p->bEnableSAO, "sao");
> s += sprintf(s, " sao-lcu-bounds=%d", p->saoLcuBoundary);
> - s += sprintf(s, " sao-lcu-opt=%d", p->saoLcuBasedOptimization);
> BOOL(p->bBPyramid, "b-pyramid");
> BOOL(p->rc.cuTree, "cutree");
> s += sprintf(s, " rc=%s", p->rc.rateControlMode == X265_RC_ABR ? (
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/encoder.cpp Tue Sep 30 09:48:12 2014 +0900
> @@ -1247,10 +1247,6 @@
> x265_log(p, X265_LOG_INFO, "Parallelism disabled, single thread mode\n");
> p->bEnableWavefront = 0;
> }
> - if (!p->saoLcuBasedOptimization && p->frameNumThreads > 1)
> - {
> - x265_log(p, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
> - }
>
> if (p->keyframeMax < 0)
> {
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/frameencoder.cpp Tue Sep 30 09:48:12 2014 +0900
> @@ -85,7 +85,7 @@
> m_param = top->m_param;
> m_numRows = numRows;
> m_numCols = numCols;
> - m_filterRowDelay = (m_param->bEnableSAO && m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary) ?
> + m_filterRowDelay = (m_param->bEnableSAO && m_param->saoLcuBoundary) ?
> 2 : (m_param->bEnableSAO || m_param->bEnableLoopFilter ? 1 : 0);
> m_filterRowDelayCus = m_filterRowDelay * numCols;
>
> @@ -323,17 +323,6 @@
> m_frameStats.percentSkip = (double)totalSkip / totalCuCount;
> }
>
> - if (slice->m_sps->bUseSAO && !m_param->saoLcuBasedOptimization)
> - {
> - /* frame based SAO */
> - m_frameFilter.m_sao.SAOProcess(m_frame->getPicSym()->m_saoParam);
> - restoreLFDisabledOrigYuv(m_frame);
> -
> - // Extend border after whole-frame SAO is finished
> - for (int row = 0; row < m_numRows; row++)
> - m_frameFilter.processRowPost(row);
> - }
> -
> m_bs.resetBits();
> m_entropyCoder.load(m_initSliceContext);
> m_entropyCoder.setBitstream(&m_bs);
> @@ -799,7 +788,7 @@
> }
>
> // NOTE: do CU level Filter
> - if (m_param->bEnableSAO && m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> + if (m_param->bEnableSAO && m_param->saoLcuBoundary)
> // SAO parameter estimation using non-deblocked pixels for LCU bottom and right boundary areas
> m_frameFilter.m_sao.calcSaoStatsCu_BeforeDblk(m_frame, col, row);
>
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/framefilter.cpp Tue Sep 30 09:48:12 2014 +0900
> @@ -115,20 +115,15 @@
> SAOParam* saoParam = m_frame->getPicSym()->m_saoParam;
> if (m_param->bEnableSAO)
> {
> - if (m_param->saoLcuBasedOptimization)
> - {
> - m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
> - m_sao.m_rdEntropyCoders[0][CI_NEXT_BEST].load(m_frameEncoder->m_initSliceContext);
> - m_sao.m_rdEntropyCoders[0][CI_CURR_BEST].load(m_frameEncoder->m_initSliceContext);
> + m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
> + m_sao.m_rdEntropyCoders[0][CI_NEXT_BEST].load(m_frameEncoder->m_initSliceContext);
> + m_sao.m_rdEntropyCoders[0][CI_CURR_BEST].load(m_frameEncoder->m_initSliceContext);
>
> - m_sao.rdoSaoUnitRow(saoParam, row);
> + m_sao.rdoSaoUnitRow(saoParam, row);
>
> - // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
> - if (row >= m_saoRowDelay)
> - processSao(row - m_saoRowDelay);
> - }
> - else
> - return;
> + // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
> + if (row >= m_saoRowDelay)
> + processSao(row - m_saoRowDelay);
> }
>
> // this row of CTUs has been encoded
> @@ -138,7 +133,7 @@
>
> if (row == m_numRows - 1)
> {
> - if (m_param->bEnableSAO && m_param->saoLcuBasedOptimization)
> + if (m_param->bEnableSAO)
> {
> m_sao.rdoSaoUnitRowEnd(saoParam, m_frame->getNumCUsInFrame());
>
> @@ -424,9 +419,6 @@
> const uint32_t lineStartCUAddr = row * numCols;
> SAOParam* saoParam = m_frame->getPicSym()->m_saoParam;
>
> - // NOTE: these flags are not used in this mode
> - X265_CHECK(!saoParam->oneUnitFlag[0] && !saoParam->oneUnitFlag[1] && !saoParam->oneUnitFlag[2], "invalid SAO flag");
> -
> if (saoParam->bSaoFlag[0])
> m_sao.processSaoUnitRow(saoParam->saoLcuParam[0], row, 0);
>
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/sao.cpp Tue Sep 30 09:48:12 2014 +0900
> @@ -51,34 +51,11 @@
> return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
> }
>
> -int convertLevelRowCol2Idx(int level, int row, int col)
> -{
> - if (!level)
> - return 0;
> - else if (level == 1)
> - return 1 + row * 2 + col;
> - else if (level == 2)
> - return 5 + row * 4 + col;
> - else if (level == 3)
> - return 21 + row * 8 + col;
> - else // (level == 4)
> - return 85 + row * 16 + col;
> -}
> -
> } // end anonymous namespace
>
>
> namespace x265 {
>
> -const int SAO::s_numCulPartsLevel[5] =
> -{
> - 1, // level 0
> - 5, // level 1
> - 21, // level 2
> - 85, // level 3
> - 341, // level 4
> -};
> -
> const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
> {
> 1, // 0
> @@ -95,17 +72,10 @@
> m_offsetOrg = NULL;
> m_countPreDblk = NULL;
> m_offsetOrgPreDblk = NULL;
> - m_rate = NULL;
> - m_dist = NULL;
> - m_cost = NULL;
> - m_costPartBest = NULL;
> - m_distOrg = NULL;
> - m_typePartBest = NULL;
> m_refDepth = 0;
> m_lumaLambda = 0;
> m_chromaLambda = 0;
> m_param = NULL;
> - m_numTotalParts = 0;
> m_clipTable = NULL;
> m_clipTableBase = NULL;
> m_offsetBo = NULL;
> @@ -137,16 +107,6 @@
> m_numCuInWidth = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
> m_numCuInHeight = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
>
> - int maxSplitLevelHeight = (int)(logf((float)m_numCuInHeight) / logf(2.0));
> - int maxSplitLevelWidth = (int)(logf((float)m_numCuInWidth) / logf(2.0));
> -
> - m_maxSplitLevel = maxSplitLevelHeight < maxSplitLevelWidth ? maxSplitLevelHeight : maxSplitLevelWidth;
> - m_maxSplitLevel = X265_MIN(m_maxSplitLevel, SAO_MAX_DEPTH);
> -
> - /* various structures are overloaded to store per component data.
> - * m_numTotalParts must allow for sufficient storage in any allocated arrays */
> - m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]);
> -
> const pixel maxY = (1 << X265_DEPTH) - 1;
> const pixel rangeExt = maxY >> 1;
> int numLcu = m_numCuInWidth * m_numCuInHeight;
> @@ -163,17 +123,9 @@
> CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth);
> }
>
> - CHECKED_MALLOC(m_distOrg, int64_t, m_numTotalParts);
> - CHECKED_MALLOC(m_costPartBest, double, m_numTotalParts);
> - CHECKED_MALLOC(m_typePartBest, int, m_numTotalParts);
> -
> - CHECKED_MALLOC(m_rate, PerType, m_numTotalParts);
> - CHECKED_MALLOC(m_dist, PerType, m_numTotalParts);
> - CHECKED_MALLOC(m_cost, PerTypeD, m_numTotalParts);
> -
> - CHECKED_MALLOC(m_count, PerClass, m_numTotalParts);
> - CHECKED_MALLOC(m_offset, PerClass, m_numTotalParts);
> - CHECKED_MALLOC(m_offsetOrg, PerClass, m_numTotalParts);
> + CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
> + CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
> + CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
>
> CHECKED_MALLOC(m_countPreDblk, PerPlane, numLcu);
> CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numLcu);
> @@ -209,12 +161,6 @@
> X265_FREE(m_tmpU2[i]);
> }
>
> - X265_FREE(m_distOrg);
> - X265_FREE(m_costPartBest);
> - X265_FREE(m_typePartBest);
> - X265_FREE(m_rate);
> - X265_FREE(m_dist);
> - X265_FREE(m_cost);
> X265_FREE(m_count);
> X265_FREE(m_offset);
> X265_FREE(m_offsetOrg);
> @@ -225,143 +171,22 @@
> /* allocate memory for SAO parameters */
> void SAO::allocSaoParam(SAOParam *saoParam) const
> {
> - saoParam->maxSplitLevel = m_maxSplitLevel;
> saoParam->numCuInWidth = m_numCuInWidth;
> saoParam->numCuInHeight = m_numCuInHeight;
>
> - saoParam->saoPart[0] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
> - initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1, 0, m_numCuInHeight - 1, 0);
> -
> - saoParam->saoPart[1] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
> - saoParam->saoPart[2] = new SAOQTPart[s_numCulPartsLevel[saoParam->maxSplitLevel]];
> - initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1, 0, m_numCuInHeight - 1, 1);
> - initSAOParam(saoParam, 0, 0, 0, -1, 0, m_numCuInWidth - 1, 0, m_numCuInHeight - 1, 2);
> -
> saoParam->saoLcuParam[0] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
> saoParam->saoLcuParam[1] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
> saoParam->saoLcuParam[2] = new SaoLcuParam[m_numCuInHeight * m_numCuInWidth];
> }
>
> -/* recursively initialize SAO parameters (only once) */
> -void SAO::initSAOParam(SAOParam *saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const
> -{
> - int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);
> -
> - SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
> -
> - saoPart->partIdx = partIdx;
> - saoPart->partLevel = partLevel;
> - saoPart->partRow = partRow;
> - saoPart->partCol = partCol;
> -
> - saoPart->startCUX = startCUX;
> - saoPart->endCUX = endCUX;
> - saoPart->startCUY = startCUY;
> - saoPart->endCUY = endCUY;
> -
> - saoPart->upPartIdx = parentPartIdx;
> - saoPart->bestType = -1;
> -
> - saoPart->subTypeIdx = 0;
> -
> - for (int j = 0; j < SAO_NUM_OFFSET; j++)
> - saoPart->offset[j] = 0;
> -
> - if (saoPart->partLevel < m_maxSplitLevel)
> - {
> - int downLevel = (partLevel + 1);
> - int downRowStart = (partRow << 1);
> - int downColStart = (partCol << 1);
> -
> - int numCUWidth = endCUX - startCUX + 1;
> - int numCUHeight = endCUY - startCUY + 1;
> - int numCULeft = (numCUWidth >> 1);
> - int numCUTop = (numCUHeight >> 1);
> -
> - int downStartCUX = startCUX;
> - int downEndCUX = downStartCUX + numCULeft - 1;
> - int downStartCUY = startCUY;
> - int downEndCUY = downStartCUY + numCUTop - 1;
> - int downRowIdx = downRowStart + 0;
> - int downColIdx = downColStart + 0;
> -
> - saoPart->downPartsIdx[0] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> - initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> -
> - downStartCUX = startCUX + numCULeft;
> - downEndCUX = endCUX;
> - downStartCUY = startCUY;
> - downEndCUY = downStartCUY + numCUTop - 1;
> - downRowIdx = downRowStart + 0;
> - downColIdx = downColStart + 1;
> -
> - saoPart->downPartsIdx[1] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> - initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> -
> - downStartCUX = startCUX;
> - downEndCUX = downStartCUX + numCULeft - 1;
> - downStartCUY = startCUY + numCUTop;
> - downEndCUY = endCUY;
> - downRowIdx = downRowStart + 1;
> - downColIdx = downColStart + 0;
> -
> - saoPart->downPartsIdx[2] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> - initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> -
> - downStartCUX = startCUX + numCULeft;
> - downEndCUX = endCUX;
> - downStartCUY = startCUY + numCUTop;
> - downEndCUY = endCUY;
> - downRowIdx = downRowStart + 1;
> - downColIdx = downColStart + 1;
> -
> - saoPart->downPartsIdx[3] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
> -
> - initSAOParam(saoParam, downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY, plane);
> - }
> - else
> - {
> - saoPart->downPartsIdx[0] = saoPart->downPartsIdx[1] = saoPart->downPartsIdx[2] = saoPart->downPartsIdx[3] = -1;
> - }
> -}
> -
> /* reset SAO parameters once per frame */
> void SAO::resetSAOParam(SAOParam *saoParam)
> {
> - int numComponet = 3;
> -
> - for (int c = 0; c < numComponet; c++)
> - {
> - if (c < 2)
> - saoParam->bSaoFlag[c] = false;
> -
> - for (int i = 0; i < s_numCulPartsLevel[m_maxSplitLevel]; i++)
> - {
> - saoParam->saoPart[c][i].bestType = -1;
> - saoParam->saoPart[c][i].bSplit = false;
> - saoParam->saoPart[c][i].bProcessed = false;
> - saoParam->saoPart[c][i].minCost = MAX_DOUBLE;
> - saoParam->saoPart[c][i].minDist = MAX_INT;
> - saoParam->saoPart[c][i].minRate = MAX_INT;
> - saoParam->saoPart[c][i].subTypeIdx = 0;
> - for (int j = 0; j < SAO_NUM_OFFSET; j++)
> - {
> - saoParam->saoPart[c][i].offset[j] = 0;
> - saoParam->saoPart[c][i].offset[j] = 0;
> - saoParam->saoPart[c][i].offset[j] = 0;
> - }
> - }
> -
> - saoParam->oneUnitFlag[0] = 0;
> - saoParam->oneUnitFlag[1] = 0;
> - saoParam->oneUnitFlag[2] = 0;
> - resetLcuPart(saoParam->saoLcuParam[0]);
> - resetLcuPart(saoParam->saoLcuParam[1]);
> - resetLcuPart(saoParam->saoLcuParam[2]);
> - }
> + saoParam->bSaoFlag[0] = false;
> + saoParam->bSaoFlag[1] = false;
> + resetLcuPart(saoParam->saoLcuParam[0]);
> + resetLcuPart(saoParam->saoLcuParam[1]);
> + resetLcuPart(saoParam->saoLcuParam[2]);
> }
>
> void SAO::startSlice(Frame *pic, Entropy& initState, int qp)
> @@ -647,133 +472,6 @@
> }
>
> /* Process SAO all units */
> -void SAO::processSaoUnitAll(SaoLcuParam* saoLcuParam, bool oneUnitFlag, int plane)
> -{
> - pixel *rec;
> - int picWidthTmp;
> -
> - if (plane)
> - {
> - rec = m_pic->getPicYuvRec()->getChromaAddr(plane);
> - picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
> - }
> - else
> - {
> - rec = m_pic->getPicYuvRec()->getLumaAddr();
> - picWidthTmp = m_param->sourceWidth;
> - }
> -
> - memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
> -
> - int frameWidthInCU = m_pic->getFrameWidthInCU();
> - int frameHeightInCU = m_pic->getFrameHeightInCU();
> - int stride;
> - bool isChroma = !!plane;
> - uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;
> -
> - const int boShift = X265_DEPTH - SAO_BO_BITS;
> -
> - for (int idxY = 0; idxY < frameHeightInCU; idxY++)
> - {
> - int addr = idxY * frameWidthInCU;
> - if (plane == 0)
> - {
> - rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> - stride = m_pic->getStride();
> - picWidthTmp = m_param->sourceWidth;
> - }
> - else
> - {
> - rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> - stride = m_pic->getCStride();
> - picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
> - }
> - for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
> - {
> - m_tmpL1[i] = rec[0];
> - rec += stride;
> - }
> -
> - rec -= (stride << 1);
> -
> - memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
> -
> - for (int idxX = 0; idxX < frameWidthInCU; idxX++)
> - {
> - addr = idxY * frameWidthInCU + idxX;
> -
> - int typeIdx;
> - bool mergeLeftFlag;
> -
> - if (oneUnitFlag)
> - {
> - typeIdx = saoLcuParam[0].typeIdx;
> - mergeLeftFlag = (addr == 0) ? 0 : 1;
> - }
> - else
> - {
> - typeIdx = saoLcuParam[addr].typeIdx;
> - mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
> - }
> - if (typeIdx >= 0)
> - {
> - if (!mergeLeftFlag)
> - {
> - if (typeIdx == SAO_BO)
> - {
> - pixel* offsetBo = m_offsetBo;
> - int offset[SAO_NUM_BO_CLASSES];
> - memset(offset, 0, sizeof(offset));
> -
> - for (int i = 0; i < SAO_NUM_OFFSET; i++)
> - offset[((saoLcuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
> -
> - for (int i = 0; i < (1 << X265_DEPTH); i++)
> - offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
> - }
> - else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
> - {
> - int offset[NUM_EDGETYPE];
> - offset[0] = 0;
> - for (int i = 0; i < SAO_NUM_OFFSET; i++)
> - offset[i + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
> -
> - for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
> - m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
> - }
> - }
> - processSaoCu(addr, typeIdx, plane);
> - }
> - else
> - {
> - if (idxX != (frameWidthInCU - 1))
> - {
> - if (isChroma)
> - {
> - rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> - stride = m_pic->getCStride();
> - }
> - else
> - {
> - rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> - stride = m_pic->getStride();
> - }
> -
> - int widthShift = isChroma ? (g_maxCUSize >> m_hChromaShift) : g_maxCUSize;
> - for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
> - {
> - m_tmpL1[i] = rec[widthShift - 1];
> - rec += stride;
> - }
> - }
> - }
> - }
> -
> - std::swap(m_tmpU1[plane], m_tmpU2[plane]);
> - }
> -}
> -
> -/* Process SAO all units */
> void SAO::processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, int plane)
> {
> pixel *rec;
> @@ -925,272 +623,8 @@
> saoUnitDst->offset[i] = saoUnitSrc->offset[i];
> }
>
> -/* convert QP part to SAO unit */
> -void SAO::convertQT2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane)
> -{
> - SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
> -
> - if (!saoPart->bSplit)
> - {
> - convertOnePart2SaoUnit(saoParam, partIdx, plane);
> - return;
> - }
> -
> - if (saoPart->partLevel < m_maxSplitLevel)
> - {
> - convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[0], plane);
> - convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[1], plane);
> - convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[2], plane);
> - convertQT2SaoUnit(saoParam, saoPart->downPartsIdx[3], plane);
> - }
> -}
> -
> -/* convert one SAO part to SAO unit */
> -void SAO::convertOnePart2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane)
> -{
> - int frameWidthInCU = m_pic->getFrameWidthInCU();
> - SAOQTPart* saoQTPart = saoParam->saoPart[plane];
> - SaoLcuParam* saoLcuParam = saoParam->saoLcuParam[plane];
> -
> - for (int idxY = saoQTPart[partIdx].startCUY; idxY <= saoQTPart[partIdx].endCUY; idxY++)
> - {
> - for (int idxX = saoQTPart[partIdx].startCUX; idxX <= saoQTPart[partIdx].endCUX; idxX++)
> - {
> - int addr = idxY * frameWidthInCU + idxX;
> - saoLcuParam[addr].partIdxTmp = (int)partIdx;
> - saoLcuParam[addr].typeIdx = saoQTPart[partIdx].bestType;
> - saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
> - if (saoLcuParam[addr].typeIdx >= 0)
> - {
> - for (int j = 0; j < SAO_NUM_OFFSET; j++)
> - saoLcuParam[addr].offset[j] = saoQTPart[partIdx].offset[j];
> - }
> - else
> - {
> - saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
> - for (int j = 0; j < SAO_NUM_OFFSET; j++)
> - saoLcuParam[addr].offset[j] = 0;
> - }
> - }
> - }
> -}
> -
> -/* process SAO for one partition */
> -void SAO::rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane)
> -{
> - SAOQTPart* onePart = &(psQTPart[partIdx]);
> -
> - int64_t estDist;
> -
> - m_distOrg[partIdx] = 0;
> -
> - int bestClassTableBo = 0;
> - int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
> - double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
> - double bestRDCostTableBo = MAX_DOUBLE;
> -
> - int allowMergeLeft;
> - int allowMergeUp;
> - SaoLcuParam saoLcuParamRdo;
> -
> - for (int typeIdx = -1; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> - {
> - m_entropyCoder.load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
> - m_entropyCoder.resetBits();
> -
> - if (typeIdx >= 0)
> - {
> - estDist = estSaoTypeDist(partIdx, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
> - if (typeIdx == SAO_BO)
> - {
> - // Estimate Best Position
> - for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
> - {
> - double currentRDCost = 0.0;
> - for (int j = i; j < i + SAO_BO_LEN; j++)
> - currentRDCost += currentRdCostTableBo[j];
> -
> - if (currentRDCost < bestRDCostTableBo)
> - {
> - bestRDCostTableBo = currentRDCost;
> - bestClassTableBo = i;
> - }
> - }
> -
> - // Recode all offsets
> - for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
> - estDist += currentDistortionTableBo[classIdx];
> - }
> -
> - for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
> - {
> - for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
> - {
> - // get bits for typeIdx = -1
> - allowMergeLeft = 1;
> - allowMergeUp = 1;
> -
> - // reset
> - resetSaoUnit(&saoLcuParamRdo);
> -
> - // set merge flag
> - saoLcuParamRdo.mergeUpFlag = 1;
> - saoLcuParamRdo.mergeLeftFlag = 1;
> -
> - if (ry == onePart->startCUY)
> - saoLcuParamRdo.mergeUpFlag = 0;
> -
> - if (rx == onePart->startCUX)
> - saoLcuParamRdo.mergeLeftFlag = 0;
> -
> - // set type and offsets
> - saoLcuParamRdo.typeIdx = typeIdx;
> - saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
> - for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> - saoLcuParamRdo.offset[classIdx] = (int)m_offset[partIdx][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
> -
> - m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
> - }
> - }
> -
> - m_dist[partIdx][typeIdx] = estDist;
> - m_rate[partIdx][typeIdx] = m_entropyCoder.getNumberOfWrittenBits();
> -
> - m_cost[partIdx][typeIdx] = (double)((double)m_dist[partIdx][typeIdx] + m_lumaLambda * (double)m_rate[partIdx][typeIdx]);
> -
> - if (m_cost[partIdx][typeIdx] < m_costPartBest[partIdx])
> - {
> - m_distOrg[partIdx] = 0;
> - m_costPartBest[partIdx] = m_cost[partIdx][typeIdx];
> - m_typePartBest[partIdx] = typeIdx;
> - m_entropyCoder.store(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
> - }
> - }
> - else
> - {
> - for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
> - {
> - for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
> - {
> - // get bits for iTypeIdx = -1
> - allowMergeLeft = 1;
> - allowMergeUp = 1;
> -
> - // reset
> - resetSaoUnit(&saoLcuParamRdo);
> -
> - // set merge flag
> - saoLcuParamRdo.mergeUpFlag = 1;
> - saoLcuParamRdo.mergeLeftFlag = 1;
> -
> - if (ry == onePart->startCUY)
> - saoLcuParamRdo.mergeUpFlag = 0;
> -
> - if (rx == onePart->startCUX)
> - saoLcuParamRdo.mergeLeftFlag = 0;
> -
> - m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
> - }
> - }
> - if (m_distOrg[partIdx] < m_costPartBest[partIdx])
> - {
> - m_costPartBest[partIdx] = (double)m_distOrg[partIdx] + m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
> - m_typePartBest[partIdx] = -1;
> - m_entropyCoder.store(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
> - }
> - }
> - }
> -
> - onePart->bProcessed = true;
> - onePart->bSplit = false;
> - onePart->minDist = m_typePartBest[partIdx] >= 0 ? m_dist[partIdx][m_typePartBest[partIdx]] : m_distOrg[partIdx];
> - onePart->minRate = (int)(m_typePartBest[partIdx] >= 0 ? m_rate[partIdx][m_typePartBest[partIdx]] : 0);
> - onePart->minCost = onePart->minDist + m_lumaLambda * onePart->minRate;
> - onePart->bestType = m_typePartBest[partIdx];
> -
> - if (onePart->bestType != -1)
> - {
> - int minIndex = 0;
> - if (onePart->bestType == SAO_BO)
> - {
> - onePart->subTypeIdx = bestClassTableBo;
> - minIndex = onePart->subTypeIdx;
> - }
> - for (int i = 0; i < SAO_NUM_OFFSET; i++)
> - onePart->offset[i] = (int)m_offset[partIdx][onePart->bestType][minIndex + i + 1];
> - }
> -}
> -
> -/* Run partition tree disable */
> -void SAO::disablePartTree(SAOQTPart *psQTPart, int partIdx)
> -{
> - SAOQTPart* pOnePart = &(psQTPart[partIdx]);
> -
> - pOnePart->bSplit = false;
> - pOnePart->bestType = -1;
> -
> - if (pOnePart->partLevel < (int)m_maxSplitLevel)
> - {
> - for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> - disablePartTree(psQTPart, pOnePart->downPartsIdx[i]);
> - }
> -}
> -
> -/* Run quadtree decision function */
> -void SAO::runQuadTreeDecision(SAOQTPart *qtPart, int partIdx, double &costFinal, int maxLevel, int plane)
> -{
> - SAOQTPart* onePart = &(qtPart[partIdx]);
> -
> - uint32_t nextDepth = onePart->partLevel + 1;
> -
> - if (!partIdx)
> - costFinal = 0;
> -
> - // SAO for this part
> - if (!onePart->bProcessed)
> - rdoSaoOnePart(qtPart, partIdx, plane);
> -
> - // SAO for sub 4 parts
> - if (onePart->partLevel < maxLevel)
> - {
> - double costNotSplit = m_lumaLambda + onePart->minCost;
> - double costSplit = m_lumaLambda;
> -
> - for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> - {
> - if (i) //initialize RD with previous depth buffer
> - m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
> - else
> - m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
> -
> - runQuadTreeDecision(qtPart, onePart->downPartsIdx[i], costFinal, maxLevel, plane);
> - costSplit += costFinal;
> - m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_TEMP_BEST]);
> - }
> -
> - if (costSplit < costNotSplit)
> - {
> - costFinal = costSplit;
> - onePart->bSplit = true;
> - onePart->bestType = -1;
> - m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
> - }
> - else
> - {
> - costFinal = costNotSplit;
> - onePart->bSplit = false;
> - for (int i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> - disablePartTree(qtPart, onePart->downPartsIdx[i]);
> -
> - m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[onePart->partLevel][CI_TEMP_BEST]);
> - }
> - }
> - else
> - costFinal = onePart->minCost;
> -}
> -
> /* Calculate SAO statistics for current LCU without non-crossing slice */
> -void SAO::calcSaoStatsCu(int addr, int partIdx, int plane)
> +void SAO::calcSaoStatsCu(int addr, int plane)
> {
> int x, y;
> TComDataCU *cu = m_pic->getCU(addr);
> @@ -1216,15 +650,8 @@
> int isLuma = !plane;
> int isChroma = !!plane;
> int numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> -
> - if (!m_param->saoLcuBasedOptimization)
> - numSkipLine = 0;
> -
> int numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
>
> - if (!m_param->saoLcuBasedOptimization)
> - numSkipLineRight = 0;
> -
> picWidthTmp = isLuma ? m_param->sourceWidth : m_param->sourceWidth >> m_hChromaShift;
> picHeightTmp = isLuma ? m_param->sourceHeight : m_param->sourceHeight >> m_vChromaShift;
> lcuWidth = isLuma ? g_maxCUSize : g_maxCUSize >> m_hChromaShift;
> @@ -1244,13 +671,13 @@
> {
> const int boShift = X265_DEPTH - SAO_BO_BITS;
>
> - if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> + if (m_param->saoLcuBoundary)
> {
> numSkipLine = isChroma ? 3 - (2 * m_vChromaShift) : 3;
> numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
> }
> - stats = m_offsetOrg[partIdx][SAO_BO];
> - counts = m_count[partIdx][SAO_BO];
> + stats = m_offsetOrg[plane][SAO_BO];
> + counts = m_count[plane][SAO_BO];
>
> fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1278,13 +705,13 @@
> {
> //if (iSaoType == EO_0)
> {
> - if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> + if (m_param->saoLcuBoundary)
> {
> numSkipLine = isChroma ? 3 - (2 * m_vChromaShift) : 3;
> numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
> }
> - stats = m_offsetOrg[partIdx][SAO_EO_0];
> - counts = m_count[partIdx][SAO_EO_0];
> + stats = m_offsetOrg[plane][SAO_EO_0];
> + counts = m_count[plane][SAO_EO_0];
>
> fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1311,13 +738,13 @@
>
> //if (iSaoType == EO_1)
> {
> - if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> + if (m_param->saoLcuBoundary)
> {
> numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
> }
> - stats = m_offsetOrg[partIdx][SAO_EO_1];
> - counts = m_count[partIdx][SAO_EO_1];
> + stats = m_offsetOrg[plane][SAO_EO_1];
> + counts = m_count[plane][SAO_EO_1];
>
> fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1352,13 +779,13 @@
> }
> //if (iSaoType == EO_2)
> {
> - if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> + if (m_param->saoLcuBoundary)
> {
> numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
> }
> - stats = m_offsetOrg[partIdx][SAO_EO_2];
> - counts = m_count[partIdx][SAO_EO_2];
> + stats = m_offsetOrg[plane][SAO_EO_2];
> + counts = m_count[plane][SAO_EO_2];
>
> fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1398,13 +825,13 @@
> }
> //if (iSaoType == EO_3)
> {
> - if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> + if (m_param->saoLcuBoundary)
> {
> numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
> }
> - stats = m_offsetOrg[partIdx][SAO_EO_3];
> - counts = m_count[partIdx][SAO_EO_3];
> + stats = m_offsetOrg[plane][SAO_EO_3];
> + counts = m_count[plane][SAO_EO_3];
>
> fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> @@ -1721,84 +1148,13 @@
> }
> }
>
> -void SAO::getSaoStats(SAOQTPart *psQTPart, int plane)
> -{
> - int levelIdx, partIdx;
> - int i;
> - int lcuIdx;
> - int lcuIdy;
> - int frameWidthInCU = m_pic->getFrameWidthInCU();
> - int downPartIdx;
> - int partStart;
> - int partEnd;
> - SAOQTPart* onePart;
> -
> - if (!m_maxSplitLevel)
> - {
> - partIdx = 0;
> - onePart = &(psQTPart[partIdx]);
> - for (lcuIdy = onePart->startCUY; lcuIdy <= onePart->endCUY; lcuIdy++)
> - {
> - for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
> - {
> - int addr = lcuIdy * frameWidthInCU + lcuIdx;
> - calcSaoStatsCu(addr, partIdx, plane);
> - }
> - }
> - }
> - else
> - {
> - for (partIdx = s_numCulPartsLevel[m_maxSplitLevel - 1]; partIdx < s_numCulPartsLevel[m_maxSplitLevel]; partIdx++)
> - {
> - onePart = &(psQTPart[partIdx]);
> - for (lcuIdy = onePart->startCUY; lcuIdy <= onePart->endCUY; lcuIdy++)
> - {
> - for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
> - {
> - int addr = lcuIdy * frameWidthInCU + lcuIdx;
> - calcSaoStatsCu(addr, partIdx, plane);
> - }
> - }
> - }
> -
> - for (levelIdx = m_maxSplitLevel - 1; levelIdx >= 0; levelIdx--)
> - {
> - partStart = (levelIdx > 0) ? s_numCulPartsLevel[levelIdx - 1] : 0;
> - partEnd = s_numCulPartsLevel[levelIdx];
> -
> - for (partIdx = partStart; partIdx < partEnd; partIdx++)
> - {
> - onePart = &(psQTPart[partIdx]);
> - for (i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
> - {
> - downPartIdx = onePart->downPartsIdx[i];
> - for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> - {
> - for (int classIdx = 0; classIdx < (typeIdx < SAO_BO ? SAO_EO_LEN : SAO_NUM_BO_CLASSES) + 1; classIdx++)
> - {
> - m_offsetOrg[partIdx][typeIdx][classIdx] += m_offsetOrg[downPartIdx][typeIdx][classIdx];
> - m_count[partIdx][typeIdx][classIdx] += m_count[downPartIdx][typeIdx][classIdx];
> - }
> - }
> - }
> - }
> - }
> - }
> -}
> -
> /* reset offset statistics */
> void SAO::resetStats()
> {
> - for (int i = 0; i < m_numTotalParts; i++)
> + for (int i = 0; i < NUM_PLANE; i++)
> {
> - m_costPartBest[i] = MAX_DOUBLE;
> - m_typePartBest[i] = -1;
> - m_distOrg[i] = 0;
> for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
> {
> - m_dist[i][j] = 0;
> - m_rate[i][j] = 0;
> - m_cost[i][j] = 0;
> for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
> {
> m_count[i][j][k] = 0;
> @@ -1809,31 +1165,6 @@
> }
> }
>
> -/* Sample adaptive offset process */
> -void SAO::SAOProcess(SAOParam *saoParam)
> -{
> - X265_CHECK(!m_param->saoLcuBasedOptimization, "SAO LCU mode failure\n");
> - double costFinal = 0;
> - saoParam->bSaoFlag[0] = true;
> - saoParam->bSaoFlag[1] = false;
> -
> - getSaoStats(saoParam->saoPart[0], 0);
> - runQuadTreeDecision(saoParam->saoPart[0], 0, costFinal, m_maxSplitLevel, 0);
> - saoParam->bSaoFlag[0] = costFinal < 0;
> -
> - if (saoParam->bSaoFlag[0])
> - {
> - convertQT2SaoUnit(saoParam, 0, 0);
> - assignSaoUnitSyntax(saoParam->saoLcuParam[0], saoParam->saoPart[0], saoParam->oneUnitFlag[0]);
> - processSaoUnitAll(saoParam->saoLcuParam[0], saoParam->oneUnitFlag[0], 0);
> - }
> - if (saoParam->bSaoFlag[1])
> - {
> - processSaoUnitAll(saoParam->saoLcuParam[1], saoParam->oneUnitFlag[1], 1);
> - processSaoUnitAll(saoParam->saoLcuParam[2], saoParam->oneUnitFlag[2], 2);
> - }
> -}
> -
> /* Check merge SAO unit */
> void SAO::checkMerge(SaoLcuParam * saoUnitCurr, SaoLcuParam * saoUnitCheck, int dir)
> {
> @@ -1885,65 +1216,10 @@
> }
> }
>
> -/** Assign SAO unit syntax from picture-based algorithm */
> -void SAO::assignSaoUnitSyntax(SaoLcuParam* saoLcuParam, SAOQTPart* saoPart, bool &oneUnitFlag)
> -{
> - if (saoPart->bSplit == 0)
> - oneUnitFlag = 1;
> - else
> - {
> - oneUnitFlag = 0;
> -
> - int idxCount = -1;
> - saoLcuParam[0].mergeUpFlag = 0;
> - saoLcuParam[0].mergeLeftFlag = 0;
> -
> - for (int j = 0; j < m_numCuInHeight; j++)
> - {
> - for (int i = 0; i < m_numCuInWidth; i++)
> - {
> - int addr = i + j * m_numCuInWidth;
> - int addrUp = (j == 0) ? -1 : addr - m_numCuInWidth;
> - int addrLeft = (i == 0) ? -1 : addr - 1;
> - int idx = saoLcuParam[addr].partIdxTmp;
> - int idxLeft = (addrLeft == -1) ? -1 : saoLcuParam[addrLeft].partIdxTmp;
> - int idxUp = (addrUp == -1) ? -1 : saoLcuParam[addrUp].partIdxTmp;
> -
> - if (idx != idxLeft && idx != idxUp)
> - {
> - saoLcuParam[addr].mergeUpFlag = 0;
> - idxCount++;
> - saoLcuParam[addr].mergeLeftFlag = 0;
> - saoLcuParam[addr].partIdx = idxCount;
> - }
> - else if (idx == idxLeft)
> - {
> - saoLcuParam[addr].mergeUpFlag = 1;
> - saoLcuParam[addr].mergeLeftFlag = 1;
> - saoLcuParam[addr].partIdx = saoLcuParam[addrLeft].partIdx;
> - }
> - else if (idx == idxUp)
> - {
> - saoLcuParam[addr].mergeUpFlag = 1;
> - saoLcuParam[addr].mergeLeftFlag = 0;
> - saoLcuParam[addr].partIdx = saoLcuParam[addrUp].partIdx;
> - }
> - if (addrUp != -1)
> - checkMerge(&saoLcuParam[addr], &saoLcuParam[addrUp], 1);
> - if (addrLeft != -1)
> - checkMerge(&saoLcuParam[addr], &saoLcuParam[addrLeft], 0);
> - }
> - }
> - }
> -}
> -
> void SAO::rdoSaoUnitRowInit(SAOParam *saoParam)
> {
> saoParam->bSaoFlag[0] = true;
> saoParam->bSaoFlag[1] = true;
> - saoParam->oneUnitFlag[0] = false;
> - saoParam->oneUnitFlag[1] = false;
> - saoParam->oneUnitFlag[2] = false;
>
> m_numNoSao[0] = 0; // Luma
> m_numNoSao[1] = 0; // Chroma
> @@ -2005,7 +1281,7 @@
> for (k = 0; k < MAX_NUM_SAO_CLASS; k++)
> {
> m_offset[compIdx][j][k] = 0;
> - if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
> + if (m_param->saoLcuBoundary)
> {
> m_count[compIdx][j][k] = m_countPreDblk[addr][compIdx][j][k];
> m_offsetOrg[compIdx][j][k] = m_offsetOrgPreDblk[addr][compIdx][j][k];
> @@ -2023,10 +1299,10 @@
> saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0;
> saoParam->saoLcuParam[compIdx][addr].subTypeIdx = 0;
> if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
> - calcSaoStatsCu(addr, compIdx, compIdx);
> + calcSaoStatsCu(addr, compIdx);
> }
>
> - saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft, 0,
> + saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
> &mergeSaoParam[0][0], &compDistortion[0]);
>
> sao2ChromaParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
> @@ -2169,13 +1445,13 @@
> return offsetOutput;
> }
>
> -void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
> +void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
> SaoLcuParam *compSaoParam, double *compDistortion)
> {
> int64_t estDist;
> int64_t bestDist;
>
> - SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[plane][addr]);
> + SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[0][addr]);
> SaoLcuParam* saoLcuParamNeighbor = NULL;
>
> resetSaoUnit(saoLcuParam);
> @@ -2194,14 +1470,14 @@
>
> m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> m_entropyCoder.resetBits();
> - m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
> + m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, 0);
> dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
> copySaoUnit(saoLcuParam, &saoLcuParamRdo);
> bestDist = 0;
>
> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> {
> - estDist = estSaoTypeDist(plane, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
> + estDist = estSaoTypeDist(0, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
>
> if (typeIdx == SAO_BO)
> {
> @@ -2231,18 +1507,18 @@
> saoLcuParamRdo.mergeUpFlag = 0;
> saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> - saoLcuParamRdo.offset[classIdx] = (int)m_offset[plane][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
> + saoLcuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
>
> m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> m_entropyCoder.resetBits();
> - m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
> + m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, 0);
>
> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> - m_cost[plane][typeIdx] = (double)((double)estDist + m_lumaLambda * (double)estRate);
> + double cost = (double)((double)estDist + m_lumaLambda * (double)estRate);
>
> - if (m_cost[plane][typeIdx] < dCostPartBest)
> + if (cost < dCostPartBest)
> {
> - dCostPartBest = m_cost[plane][typeIdx];
> + dCostPartBest = cost;
> copySaoUnit(saoLcuParam, &saoLcuParamRdo);
> bestDist = estDist;
> }
> @@ -2250,7 +1526,7 @@
>
> compDistortion[0] += ((double)bestDist / m_lumaLambda);
> m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> - m_entropyCoder.codeSaoOffset(saoLcuParam, plane);
> + m_entropyCoder.codeSaoOffset(saoLcuParam, 0);
> m_entropyCoder.store(m_rdEntropyCoders[0][CI_TEMP_BEST]);
>
> // merge left or merge up
> @@ -2259,9 +1535,9 @@
> {
> saoLcuParamNeighbor = NULL;
> if (allowMergeLeft && addrLeft >= 0 && idxNeighbor == 0)
> - saoLcuParamNeighbor = &(saoParam->saoLcuParam[plane][addrLeft]);
> + saoLcuParamNeighbor = &(saoParam->saoLcuParam[0][addrLeft]);
> else if (allowMergeUp && addrUp >= 0 && idxNeighbor == 1)
> - saoLcuParamNeighbor = &(saoParam->saoLcuParam[plane][addrUp]);
> + saoLcuParamNeighbor = &(saoParam->saoLcuParam[0][addrUp]);
> if (saoLcuParamNeighbor != NULL)
> {
> estDist = 0;
> @@ -2273,7 +1549,7 @@
> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> {
> mergeOffset = saoLcuParamNeighbor->offset[classIdx];
> - estDist += estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + 1], 0);
> + estDist += estSaoDist(m_count[0][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + mergeBandPosition + 1], 0);
> }
> }
> else
> @@ -2382,11 +1658,11 @@
> }
>
> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> - m_cost[1][typeIdx] = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
> + double cost = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
>
> - if (m_cost[1][typeIdx] < costPartBest)
> + if (cost < costPartBest)
> {
> - costPartBest = m_cost[1][typeIdx];
> + costPartBest = cost;
> copySaoUnit(saoLcuParam[0], &saoLcuParamRdo[0]);
> copySaoUnit(saoLcuParam[1], &saoLcuParamRdo[1]);
> bestDist = (estDist[0] + estDist[1]);
> diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/sao.h
> --- a/source/encoder/sao.h Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/encoder/sao.h Tue Sep 30 09:48:12 2014 +0900
> @@ -59,25 +59,18 @@
> enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) };
> enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
> enum { NUM_EDGETYPE = 5 };
> + enum { NUM_PLANE = 3 };
>
> - static const int s_numCulPartsLevel[5];
> static const uint32_t s_eoTable[NUM_EDGETYPE];
>
> typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
> typedef int64_t (PerType[MAX_NUM_SAO_TYPE]);
> - typedef double (PerTypeD[MAX_NUM_SAO_TYPE]);
> typedef int64_t (PerPlane[3][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
>
> /* allocated per part */
> PerClass* m_count;
> PerClass* m_offset;
> PerClass* m_offsetOrg;
> - PerType* m_rate;
> - PerType* m_dist;
> - PerTypeD* m_cost;
> - double* m_costPartBest;
> - int64_t* m_distOrg;
> - int* m_typePartBest;
>
> /* allocated per LCU */
> PerPlane* m_countPreDblk;
> @@ -87,11 +80,8 @@
> pixel* m_offsetBo;
> int8_t m_offsetEo[NUM_EDGETYPE];
>
> - int m_maxSplitLevel;
> -
> int m_numCuInWidth;
> int m_numCuInHeight;
> - int m_numTotalParts;
> int m_hChromaShift;
> int m_vChromaShift;
>
> @@ -122,7 +112,6 @@
> bool create(x265_param *param);
> void destroy();
>
> - void initSAOParam(SAOParam* saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const;
> void allocSaoParam(SAOParam* saoParam) const;
>
> void startSlice(Frame *pic, Entropy& initState, int qp);
> @@ -130,30 +119,19 @@
> void resetStats();
> void resetSaoUnit(SaoLcuParam* saoUnit);
>
> - void SAOProcess(SAOParam* saoParam);
> -
> // LCU-basd SAO process without slice granularity
> void processSaoCu(int addr, int partIdx, int plane);
>
> void resetLcuPart(SaoLcuParam* saoLcuParam);
> - void convertQT2SaoUnit(SAOParam* saoParam, uint32_t partIdx, int plane);
> - void convertOnePart2SaoUnit(SAOParam *saoParam, uint32_t partIdx, int plane);
> - void processSaoUnitAll(SaoLcuParam* saoLcuParam, bool oneUnitFlag, int plane);
> void processSaoUnitRow(SaoLcuParam* saoLcuParam, int idxY, int plane);
>
> void copySaoUnit(SaoLcuParam* saoUnitDst, SaoLcuParam* saoUnitSrc);
>
> - void runQuadTreeDecision(SAOQTPart *psQTPart, int partIdx, double &costFinal, int maxLevel, int plane);
> - void rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane);
> -
> - void disablePartTree(SAOQTPart *psQTPart, int partIdx);
> - void getSaoStats(SAOQTPart *psQTPart, int plane);
> - void calcSaoStatsCu(int addr, int partIdx, int plane);
> + void calcSaoStatsCu(int addr, int plane);
> void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
> - void assignSaoUnitSyntax(SaoLcuParam* saoLcuParam, SAOQTPart* saoPart, bool &oneUnitFlag);
> void checkMerge(SaoLcuParam* lcuParamCurr, SaoLcuParam * lcuParamCheck, int dir);
>
> - void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
> + void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
> SaoLcuParam *compSaoParam, double *distortion);
> void sao2ChromaParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
> SaoLcuParam *crSaoParam, SaoLcuParam *cbSaoParam, double *distortion);
> diff -r 5a6845566d14 -r 3eacdaa30440 source/x265.cpp
> --- a/source/x265.cpp Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/x265.cpp Tue Sep 30 09:48:12 2014 +0900
> @@ -457,7 +457,6 @@
> H0(" --[no-]lft Enable Deblocking Loop Filter. Default %s\n", OPT(param->bEnableLoopFilter));
> H0(" --[no-]sao Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO));
> H0(" --sao-lcu-bounds <integer> 0: right/bottom boundary areas skipped 1: non-deblocked pixels are used. Default %d\n", param->saoLcuBoundary);
> - H0(" --sao-lcu-opt <integer> 0: SAO picture-based optimization, 1: SAO LCU-based optimization. Default %d\n", param->saoLcuBasedOptimization);
> H0("\nVUI options:\n");
> H0(" --sar <width:height|int> Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n");
> H0(" Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n");
> diff -r 5a6845566d14 -r 3eacdaa30440 source/x265.h
> --- a/source/x265.h Mon Sep 29 17:37:47 2014 -0500
> +++ b/source/x265.h Tue Sep 30 09:48:12 2014 +0900
> @@ -734,12 +734,6 @@
> * pixels are used entirely. Default is 0 */
> int saoLcuBoundary;
>
> - /* Select the scope of the SAO optimization. If 0 SAO is performed over the
> - * entire output picture at once, this can severly restrict frame
> - * parallelism so it is not recommended for many-core machines. If 1 SAO is
> - * performed on LCUs in series. Default is 1 */
> - int saoLcuBasedOptimization;
> -
> /* Generally a small signed integer which offsets the QP used to quantize
> * the Cb chroma residual (delta from luma QP specified by rate-control).
> * Default is 0, which is recommended */
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list