[x265] sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]
Steve Borho
steve at borho.org
Sun Oct 5 23:11:35 CEST 2014
On 10/05, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1412500756 -32400
> # Sun Oct 05 18:19:16 2014 +0900
> # Node ID 64ea900398eb29ddd1c12df8126fa9866a280c81
> # Parent b6d49505b179cb509aa76f3a065192f0b4926579
> sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]
Looks really good, but unfortunately it has collided with a number of
refactors I was also working on. Can you resend based on the current
tip?
> diff -r b6d49505b179 -r 64ea900398eb source/common/common.h
> --- a/source/common/common.h Thu Oct 02 16:47:55 2014 -0500
> +++ b/source/common/common.h Sun Oct 05 18:19:16 2014 +0900
> @@ -132,6 +132,12 @@
> return std::min<T>(std::max<T>(minVal, a), maxVal);
> }
>
> +template<typename T>
> +inline T x265_min(T a, T b) { return a < b ? a : b; }
> +
> +template<typename T>
> +inline T x265_max(T a, T b) { return a > b ? a : b; }
> +
> typedef int16_t coeff_t; // transform coefficient
>
> #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
> @@ -224,17 +230,15 @@
> bool mergeUpFlag;
> bool mergeLeftFlag;
> int typeIdx;
> - int subTypeIdx; // indicates EO class or BO band position
> + uint32_t bandPos; // BO band position
> int offset[SAO_NUM_OFFSET];
> - int partIdx;
> - int partIdxTmp;
>
> void reset()
> {
> mergeUpFlag = false;
> mergeLeftFlag = false;
> typeIdx = -1;
> - subTypeIdx = 0;
> + bandPos = 0;
> offset[0] = 0;
> offset[1] = 0;
> offset[2] = 0;
> @@ -246,7 +250,6 @@
> {
> SaoCtuParam* ctuParam[3];
> bool bSaoFlag[2];
> - int numCuInHeight;
> int numCuInWidth;
>
> SAOParam()
> @@ -254,6 +257,7 @@
> for (int i = 0; i < 3; i++)
> ctuParam[i] = NULL;
> }
> +
> ~SAOParam()
> {
> delete[] ctuParam[0];
> diff -r b6d49505b179 -r 64ea900398eb source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Thu Oct 02 16:47:55 2014 -0500
> +++ b/source/encoder/entropy.cpp Sun Oct 05 18:19:16 2014 +0900
> @@ -511,7 +511,7 @@
> }
>
> // We need to split, so don't try these modes.
> - if (cuSplitFlag)
> + if (cuSplitFlag)
> codeSplitFlag(ctu, absPartIdx, depth);
>
> if (depth < ctu->getDepth(absPartIdx) && depth < g_maxCUDepth)
> @@ -863,74 +863,40 @@
> encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange);
> }
>
> -void Entropy::codeSaoOffset(SaoCtuParam* saoLcuParam, uint32_t compIdx)
> +void Entropy::codeSaoOffset(const SaoCtuParam* saoLcuParam, int plane)
> {
> - uint32_t symbol;
> - int i;
> + int typeIdx = saoLcuParam->typeIdx;
>
> - symbol = saoLcuParam->typeIdx + 1;
> - if (compIdx != 2)
> - codeSaoTypeIdx(symbol);
> + if (plane != 2)
> + {
> + encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
> + if (typeIdx >= 0)
> + encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
> + }
>
> - if (symbol)
> + if (typeIdx >= 0)
> {
> - if (saoLcuParam->typeIdx < SAO_BO && compIdx != 2)
> - saoLcuParam->subTypeIdx = saoLcuParam->typeIdx;
> + enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
>
> - int offsetTh = 1 << X265_MIN(X265_DEPTH - 5, 5);
> - if (saoLcuParam->typeIdx == SAO_BO)
> + if (typeIdx == SAO_BO)
> {
> - for (i = 0; i < SAO_BO_LEN; i++)
> - {
> - uint32_t absOffset = ((saoLcuParam->offset[i] < 0) ? -saoLcuParam->offset[i] : saoLcuParam->offset[i]);
> - codeSaoMaxUvlc(absOffset, offsetTh - 1);
> - }
> + for (int i = 0; i < SAO_BO_LEN; i++)
> + codeSaoMaxUvlc(abs(saoLcuParam->offset[i]), OFFSET_THRESH - 1);
>
> - for (i = 0; i < SAO_BO_LEN; i++)
> - {
> + for (int i = 0; i < SAO_BO_LEN; i++)
> if (saoLcuParam->offset[i] != 0)
> - {
> - uint32_t sign = (saoLcuParam->offset[i] < 0) ? 1 : 0;
> - codeSAOSign(sign);
> - }
> - }
> + encodeBinEP(saoLcuParam->offset[i] < 0);
>
> - symbol = (uint32_t)(saoLcuParam->subTypeIdx);
> - codeSaoUflc(5, symbol);
> + encodeBinsEP(saoLcuParam->bandPos, 5);
> }
> - else // if (saoLcuParam->typeIdx < SAO_BO)
> + else // if (typeIdx < SAO_BO)
> {
> - codeSaoMaxUvlc(saoLcuParam->offset[0], offsetTh - 1);
> - codeSaoMaxUvlc(saoLcuParam->offset[1], offsetTh - 1);
> - codeSaoMaxUvlc(-saoLcuParam->offset[2], offsetTh - 1);
> - codeSaoMaxUvlc(-saoLcuParam->offset[3], offsetTh - 1);
> - if (compIdx != 2)
> - {
> - symbol = (uint32_t)(saoLcuParam->subTypeIdx);
> - codeSaoUflc(2, symbol);
> - }
> - }
> - }
> -}
> -
> -void Entropy::codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoCtuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp)
> -{
> - if (saoFlag)
> - {
> - if (rx > 0 && cuAddrInSlice != 0 && allowMergeLeft)
> - codeSaoMerge(saoLcuParam->mergeLeftFlag);
> - else
> - saoLcuParam->mergeLeftFlag = 0;
> -
> - if (!saoLcuParam->mergeLeftFlag)
> - {
> - if ((ry > 0) && (cuAddrUpInSlice >= 0) && allowMergeUp)
> - codeSaoMerge(saoLcuParam->mergeUpFlag);
> - else
> - saoLcuParam->mergeUpFlag = 0;
> -
> - if (!saoLcuParam->mergeUpFlag)
> - codeSaoOffset(saoLcuParam, compIdx);
> + codeSaoMaxUvlc(saoLcuParam->offset[0], OFFSET_THRESH - 1);
> + codeSaoMaxUvlc(saoLcuParam->offset[1], OFFSET_THRESH - 1);
> + codeSaoMaxUvlc(-saoLcuParam->offset[2], OFFSET_THRESH - 1);
> + codeSaoMaxUvlc(-saoLcuParam->offset[3], OFFSET_THRESH - 1);
> + if (plane != 2)
> + encodeBinsEP((uint32_t)(typeIdx), 2);
> }
> }
> }
> @@ -1584,7 +1550,7 @@
>
> if (cu->m_slice->m_pps->bTransformSkipEnabled)
> codeTransformSkipFlags(cu, absPartIdx, trSize, ttype);
> -
> +
> bool bIsLuma = ttype == TEXT_LUMA;
>
> // select scans
> @@ -1758,12 +1724,12 @@
> {
> X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
>
> - uint32_t isCodeLast = (maxSymbol > code) ? 1 : 0;
> - uint32_t isCodeNonZero = (code != 0) ? 1 : 0;
> + uint32_t isCodeNonZero = !!code;
>
> encodeBinEP(isCodeNonZero);
> if (isCodeNonZero)
> {
> + uint32_t isCodeLast = (maxSymbol > code);
> uint32_t mask = (1 << (code - 1)) - 1;
> uint32_t len = code - 1 + isCodeLast;
> mask <<= isCodeLast;
> @@ -1772,14 +1738,6 @@
> }
> }
>
> -/** Code SAO type index */
> -void Entropy::codeSaoTypeIdx(uint32_t code)
> -{
> - encodeBin((code == 0) ? 0 : 1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
> - if (code)
> - encodeBinEP(code <= 4 ? 1 : 0);
> -}
> -
> /* estimate bit cost for CBP, significant map and significant coefficients */
> void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma)
> {
> diff -r b6d49505b179 -r 64ea900398eb source/encoder/entropy.h
> --- a/source/encoder/entropy.h Thu Oct 02 16:47:55 2014 -0500
> +++ b/source/encoder/entropy.h Sun Oct 05 18:19:16 2014 +0900
> @@ -39,7 +39,6 @@
> class TComDataCU;
> class ScalingList;
>
> -
> enum SplitType
> {
> DONT_SPLIT = 0,
> @@ -149,8 +148,7 @@
> void finishSlice() { encodeBinTrm(1); finish(); dynamic_cast<Bitstream*>(m_bitIf)->writeByteAlignment(); }
>
> void encodeCTU(TComDataCU* cu);
> - void codeSaoOffset(SaoCtuParam* saoLcuParam, uint32_t compIdx);
> - void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoCtuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp);
> + void codeSaoOffset(const SaoCtuParam* saoLcuParam, int plane);
> void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
>
> void codeCUTransquantBypassFlag(uint32_t symbol);
> @@ -215,9 +213,6 @@
> void codeRefFrmIdx(TComDataCU* cu, uint32_t absPartIdx, int list);
>
> void codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol);
> - void codeSaoTypeIdx(uint32_t code);
> - void codeSaoUflc(uint32_t length, uint32_t code) { encodeBinsEP(code, length); }
> - void codeSAOSign(uint32_t code) { encodeBinEP(code); }
>
> void codeDeltaQP(TComDataCU* cu, uint32_t absPartIdx);
> void codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx);
> @@ -230,7 +225,7 @@
> uint32_t bakAbsPartIdxCU;
> };
>
> - void encodeTransform(TComDataCU* cu, CoeffCodeState& state, uint32_t offsetLumaOffset, uint32_t offsetChroma,
> + void encodeTransform(TComDataCU* cu, CoeffCodeState& state, uint32_t offsetLumaOffset, uint32_t offsetChroma,
> uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t uiTrIdx, bool& bCodeDQP, uint32_t* depthRange);
>
> void copyFrom(Entropy& src);
> diff -r b6d49505b179 -r 64ea900398eb source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp Thu Oct 02 16:47:55 2014 -0500
> +++ b/source/encoder/sao.cpp Sun Oct 05 18:19:16 2014 +0900
> @@ -27,22 +27,9 @@
>
> namespace {
>
> -#if HIGH_BIT_DEPTH
> -inline double roundIDBI2(double x)
> +inline int32_t roundIBDI(int32_t num, int32_t den)
> {
> - return ((x) > 0) ? (int)(((int)(x) + (1 << (X265_DEPTH - 8 - 1))) / (1 << (X265_DEPTH - 8))) :
> - ((int)(((int)(x) - (1 << (X265_DEPTH - 8 - 1))) / (1 << (X265_DEPTH - 8))));
> -}
> -#endif
> -
> -/* rounding with IBDI */
> -inline double roundIDBI(double x)
> -{
> -#if HIGH_BIT_DEPTH
> - return X265_DEPTH > 8 ? roundIDBI2(x) : ((x) >= 0 ? ((int)((x) + 0.5)) : ((int)((x) - 0.5)));
> -#else
> - return (x) >= 0 ? ((int)((x) + 0.5)) : ((int)((x) - 0.5));
> -#endif
> + return num >= 0 ? ((num * 2 + den) / (den * 2)) : -((-num * 2 + den) / (den * 2));
> }
>
> /* get the sign of input variable (TODO: this is a dup, make common) */
> @@ -51,6 +38,11 @@
> return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
> }
>
> +inline int64_t estSaoDist(int32_t count, int offset, int32_t offsetOrg)
> +{
> + return (count * offset - offsetOrg * 2) * offset;
> +}
> +
> } // end anonymous namespace
>
>
> @@ -172,7 +164,6 @@
> void SAO::allocSaoParam(SAOParam *saoParam) const
> {
> saoParam->numCuInWidth = m_numCuInWidth;
> - saoParam->numCuInHeight = m_numCuInHeight;
>
> saoParam->ctuParam[0] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
> saoParam->ctuParam[1] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
> @@ -184,9 +175,11 @@
> {
> saoParam->bSaoFlag[0] = false;
> saoParam->bSaoFlag[1] = false;
> +#if 0
> resetCtuPart(saoParam->ctuParam[0]);
> resetCtuPart(saoParam->ctuParam[1]);
> resetCtuPart(saoParam->ctuParam[2]);
> +#endif
> }
>
> void SAO::startSlice(Frame *pic, Entropy& initState, int qp)
> @@ -238,64 +231,45 @@
> }
>
> // CTU-based SAO process without slice granularity
> -void SAO::processSaoCu(int addr, int saoType, int plane)
> +void SAO::processSaoCu(int addr, int typeIdx, int plane)
> {
> int x, y;
> - TComDataCU *tmpCu = m_pic->getCU(addr);
> - pixel* rec;
> - int stride;
> - int ctuWidth;
> - int ctuHeight;
> - int rpelx;
> - int bpely;
> - int picWidthTmp;
> - int picHeightTmp;
> + TComDataCU *cu = m_pic->getCU(addr);
> + pixel* rec = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + int stride = plane ? m_pic->getCStride() : m_pic->getStride();
> + uint32_t picWidth = m_param->sourceWidth;
> + uint32_t picHeight = m_param->sourceHeight;
> + int ctuWidth = g_maxCUSize;
> + int ctuHeight = g_maxCUSize;
> + uint32_t lpelx = cu->getCUPelX();
> + uint32_t tpely = cu->getCUPelY();
> + if (plane)
> + {
> + picWidth >>= m_hChromaShift;
> + picHeight >>= m_vChromaShift;
> + ctuWidth >>= m_hChromaShift;
> + ctuHeight >>= m_vChromaShift;
> + lpelx >>= m_hChromaShift;
> + tpely >>= m_vChromaShift;
> + }
> + uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
> + uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
> + ctuWidth = rpelx - lpelx;
> + ctuHeight = bpely - tpely;
> +
> int startX;
> int startY;
> int endX;
> int endY;
> pixel* tmpL;
> pixel* tmpU;
> - uint32_t lpelx = tmpCu->getCUPelX();
> - uint32_t tpely = tmpCu->getCUPelY();
> - bool isLuma = !plane;
> -
> - picWidthTmp = isLuma ? m_param->sourceWidth : m_param->sourceWidth >> m_hChromaShift;
> - picHeightTmp = isLuma ? m_param->sourceHeight : m_param->sourceHeight >> m_vChromaShift;
> - ctuWidth = isLuma ? g_maxCUSize : g_maxCUSize >> m_hChromaShift;
> - ctuHeight = isLuma ? g_maxCUSize : g_maxCUSize >> m_vChromaShift;
> - lpelx = isLuma ? lpelx : lpelx >> m_hChromaShift;
> - tpely = isLuma ? tpely : tpely >> m_vChromaShift;
> -
> - rpelx = lpelx + ctuWidth;
> - bpely = tpely + ctuHeight;
> - rpelx = rpelx > picWidthTmp ? picWidthTmp : rpelx;
> - bpely = bpely > picHeightTmp ? picHeightTmp : bpely;
> - ctuWidth = rpelx - lpelx;
> - ctuHeight = bpely - tpely;
> -
> - if (!tmpCu->m_pic)
> - return;
> -
> - if (plane)
> - {
> - rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> - stride = m_pic->getCStride();
> - }
> - else
> - {
> - rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> - stride = m_pic->getStride();
> - }
>
> int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
> int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
>
> -// if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
> {
> - int cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize >> m_vChromaShift);
> - pixel* recR = &rec[isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >> m_hChromaShift) - 1)];
> - for (int i = 0; i < cuHeightTmp + 1; i++)
> + const pixel* recR = &rec[ctuWidth - 1];
> + for (int i = 0; i < ctuHeight + 1; i++)
> {
> m_tmpL2[i] = *recR;
> recR += stride;
> @@ -305,13 +279,13 @@
> tmpU = &(m_tmpU1[plane][lpelx]);
> }
>
> - switch (saoType)
> + switch (typeIdx)
> {
> case SAO_EO_0: // dir: -
> {
> pixel firstPxl = 0, lastPxl = 0;
> startX = !lpelx;
> - endX = (rpelx == picWidthTmp) ? ctuWidth - 1 : ctuWidth;
> + endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
> if (ctuWidth & 15)
> {
> for (y = 0; y < ctuHeight; y++)
> @@ -338,7 +312,7 @@
> if (!lpelx)
> firstPxl = rec[0];
>
> - if (rpelx == picWidthTmp)
> + if (rpelx == picWidth)
> lastPxl = rec[ctuWidth - 1];
>
> primitives.saoCuOrgE0(rec, m_offsetEo, ctuWidth, (int8_t)signLeft);
> @@ -346,7 +320,7 @@
> if (!lpelx)
> rec[0] = firstPxl;
>
> - if (rpelx == picWidthTmp)
> + if (rpelx == picWidth)
> rec[ctuWidth - 1] = lastPxl;
>
> rec += stride;
> @@ -357,7 +331,7 @@
> case SAO_EO_1: // dir: |
> {
> startY = !tpely;
> - endY = (bpely == picHeightTmp) ? ctuHeight - 1 : ctuHeight;
> + endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
> if (!tpely)
> rec += stride;
>
> @@ -383,10 +357,10 @@
> case SAO_EO_2: // dir: 135
> {
> startX = !lpelx;
> - endX = (rpelx == picWidthTmp) ? ctuWidth - 1 : ctuWidth;
> + endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
>
> startY = !tpely;
> - endY = (bpely == picHeightTmp) ? ctuHeight - 1 : ctuHeight;
> + endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
>
> if (!tpely)
> rec += stride;
> @@ -396,17 +370,15 @@
>
> for (y = startY; y < endY; y++)
> {
> - int signDown2 = signOf(rec[stride + startX] - tmpL[y]);
> + upBufft[startX] = signOf(rec[stride + startX] - tmpL[y]);
> for (x = startX; x < endX; x++)
> {
> - int signDown1 = signOf(rec[x] - rec[x + stride + 1]);
> - int edgeType = signDown1 + upBuff1[x] + 2;
> - upBufft[x + 1] = -signDown1;
> + int signDown = signOf(rec[x] - rec[x + stride + 1]);
> + int edgeType = signDown + upBuff1[x] + 2;
> + upBufft[x + 1] = -signDown;
> rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
> }
>
> - upBufft[startX] = signDown2;
> -
> std::swap(upBuff1, upBufft);
>
> rec += stride;
> @@ -416,13 +388,13 @@
> }
> case SAO_EO_3: // dir: 45
> {
> - startX = (lpelx == 0) ? 1 : 0;
> - endX = (rpelx == picWidthTmp) ? ctuWidth - 1 : ctuWidth;
> + startX = !lpelx;
> + endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
>
> - startY = (tpely == 0) ? 1 : 0;
> - endY = (bpely == picHeightTmp) ? ctuHeight - 1 : ctuHeight;
> + startY = !tpely;
> + endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
>
> - if (startY == 1)
> + if (!tpely)
> rec += stride;
>
> for (x = startX - 1; x < endX; x++)
> @@ -431,15 +403,15 @@
> for (y = startY; y < endY; y++)
> {
> x = startX;
> - int signDown1 = signOf(rec[x] - tmpL[y + 1]);
> - int edgeType = signDown1 + upBuff1[x] + 2;
> - upBuff1[x - 1] = -signDown1;
> + int signDown = signOf(rec[x] - tmpL[y + 1]);
> + int edgeType = signDown + upBuff1[x] + 2;
> + upBuff1[x - 1] = -signDown;
> rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
> for (x = startX + 1; x < endX; x++)
> {
> - signDown1 = signOf(rec[x] - rec[x + stride - 1]);
> - edgeType = signDown1 + upBuff1[x] + 2;
> - upBuff1[x - 1] = -signDown1;
> + signDown = signOf(rec[x] - rec[x + stride - 1]);
> + edgeType = signDown + upBuff1[x] + 2;
> + upBuff1[x - 1] = -signDown;
> rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
> }
>
> @@ -474,44 +446,27 @@
> /* Process SAO all units */
> void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane)
> {
> - pixel *rec;
> - int picWidthTmp;
> -
> + int stride = plane ? m_pic->getCStride() : m_pic->getStride();
> + uint32_t picWidth = m_param->sourceWidth;
> + int ctuWidth = g_maxCUSize;
> + int ctuHeight = g_maxCUSize;
> if (plane)
> {
> - rec = m_pic->getPicYuvRec()->getChromaAddr(plane);
> - picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
> - }
> - else
> - {
> - rec = m_pic->getPicYuvRec()->getLumaAddr();
> - picWidthTmp = m_param->sourceWidth;
> + picWidth >>= m_hChromaShift;
> + ctuWidth >>= m_hChromaShift;
> + ctuHeight >>= m_vChromaShift;
> }
>
> if (!idxY)
> - memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
> + {
> + pixel *rec = plane ? m_pic->getPicYuvRec()->getChromaAddr(plane) : m_pic->getPicYuvRec()->getLumaAddr();
> + memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
> + }
>
> - int frameWidthInCU = m_pic->getFrameWidthInCU();
> - int stride;
> - bool isChroma = !!plane;
> + int addr = idxY * m_numCuInWidth;
> + pixel *rec = plane ? m_pic->getPicYuvRec()->getChromaAddr(plane, addr) : m_pic->getPicYuvRec()->getLumaAddr(addr);
>
> - const int boShift = X265_DEPTH - SAO_BO_BITS;
> -
> - int addr = idxY * frameWidthInCU;
> - if (isChroma)
> - {
> - rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> - stride = m_pic->getCStride();
> - picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
> - }
> - else
> - {
> - rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> - stride = m_pic->getStride();
> - picWidthTmp = m_param->sourceWidth;
> - }
> - int maxCUHeight = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;
> - for (int i = 0; i < maxCUHeight + 1; i++)
> + for (int i = 0; i < ctuHeight + 1; i++)
> {
> m_tmpL1[i] = rec[0];
> rec += stride;
> @@ -519,11 +474,13 @@
>
> rec -= (stride << 1);
>
> - memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
> + memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidth);
>
> - for (int idxX = 0; idxX < frameWidthInCU; idxX++)
> + const int boShift = X265_DEPTH - SAO_BO_BITS;
> +
> + for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
> {
> - addr = idxY * frameWidthInCU + idxX;
> + addr = idxY * m_numCuInWidth + idxX;
>
> int typeIdx = ctuParam[addr].typeIdx;
> bool mergeLeftFlag = ctuParam[addr].mergeLeftFlag;
> @@ -539,7 +496,7 @@
> memset(offset, 0, sizeof(offset));
>
> for (int i = 0; i < SAO_NUM_OFFSET; i++)
> - offset[((ctuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = ctuParam[addr].offset[i] << SAO_BIT_INC;
> + offset[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = ctuParam[addr].offset[i] << SAO_BIT_INC;
>
> for (int i = 0; i < (1 << X265_DEPTH); i++)
> offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
> @@ -557,27 +514,14 @@
> }
> processSaoCu(addr, typeIdx, plane);
> }
> - else
> + else if (idxX != (m_numCuInWidth - 1))
> {
> - if (idxX != (frameWidthInCU - 1))
> + rec = plane ? m_pic->getPicYuvRec()->getChromaAddr(plane, addr) : m_pic->getPicYuvRec()->getLumaAddr(addr);
> +
> + for (int i = 0; i < ctuHeight + 1; i++)
> {
> - if (isChroma)
> - {
> - rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> - stride = m_pic->getCStride();
> - }
> - else
> - {
> - rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
> - stride = m_pic->getStride();
> - }
> -
> - int widthShift = isChroma ? (g_maxCUSize >> m_hChromaShift) : g_maxCUSize;
> - for (int i = 0; i < maxCUHeight + 1; i++)
> - {
> - m_tmpL1[i] = rec[widthShift - 1];
> - rec += stride;
> - }
> + m_tmpL1[i] = rec[ctuWidth - 1];
> + rec += stride;
> }
> }
> }
> @@ -591,9 +535,8 @@
> {
> ctuParam[i].mergeUpFlag = 1;
> ctuParam[i].mergeLeftFlag = 0;
> - ctuParam[i].partIdx = 0;
> ctuParam[i].typeIdx = -1;
> - ctuParam[i].subTypeIdx = 0;
> + ctuParam[i].bandPos = 0;
> for (int j = 0; j < SAO_NUM_OFFSET; j++)
> ctuParam[i].offset[j] = 0;
> }
> @@ -603,10 +546,8 @@
> {
> saoUnit->mergeUpFlag = 0;
> saoUnit->mergeLeftFlag = 0;
> - saoUnit->partIdx = 0;
> - saoUnit->partIdxTmp = 0;
> saoUnit->typeIdx = -1;
> - saoUnit->subTypeIdx = 0;
> + saoUnit->bandPos = 0;
>
> for (int i = 0; i < SAO_NUM_OFFSET; i++)
> saoUnit->offset[i] = 0;
> @@ -617,8 +558,8 @@
> saoUnitDst->mergeLeftFlag = saoUnitSrc->mergeLeftFlag;
> saoUnitDst->mergeUpFlag = saoUnitSrc->mergeUpFlag;
> saoUnitDst->typeIdx = saoUnitSrc->typeIdx;
> + saoUnitDst->bandPos = saoUnitSrc->bandPos;
>
> - saoUnitDst->subTypeIdx = saoUnitSrc->subTypeIdx;
> for (int i = 0; i < SAO_NUM_OFFSET; i++)
> saoUnitDst->offset[i] = saoUnitSrc->offset[i];
> }
> @@ -628,242 +569,240 @@
> {
> int x, y;
> TComDataCU *cu = m_pic->getCU(addr);
> + const pixel* fenc0 = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> + const pixel* rec0 = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + const pixel* fenc;
> + const pixel* rec;
> + int stride = plane ? m_pic->getCStride() : m_pic->getStride();
> + uint32_t picWidth = m_param->sourceWidth;
> + uint32_t picHeight = m_param->sourceHeight;
> + int ctuWidth = g_maxCUSize;
> + int ctuHeight = g_maxCUSize;
> + uint32_t lpelx = cu->getCUPelX();
> + uint32_t tpely = cu->getCUPelY();
> + if (plane)
> + {
> + picWidth >>= m_hChromaShift;
> + picHeight >>= m_vChromaShift;
> + ctuWidth >>= m_hChromaShift;
> + ctuHeight >>= m_vChromaShift;
> + lpelx >>= m_hChromaShift;
> + tpely >>= m_vChromaShift;
> + }
> + uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
> + uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
> + ctuWidth = rpelx - lpelx;
> + ctuHeight = bpely - tpely;
>
> - pixel* fenc;
> - pixel* recon;
> - int stride;
> - int ctuHeight;
> - int ctuWidth;
> - uint32_t rpelx;
> - uint32_t bpely;
> - uint32_t picWidthTmp;
> - uint32_t picHeightTmp;
> - int64_t* stats;
> - int64_t* counts;
> int startX;
> int startY;
> int endX;
> int endY;
> - uint32_t lpelx = cu->getCUPelX();
> - uint32_t tpely = cu->getCUPelY();
> + int32_t* stats;
> + int32_t* count;
>
> - int isLuma = !plane;
> - int isChroma = !!plane;
> - int numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> - int numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
> + int skipB = plane ? 2 : 4;
> + int skipR = plane ? 3 : 5;
>
> - picWidthTmp = isLuma ? m_param->sourceWidth : m_param->sourceWidth >> m_hChromaShift;
> - picHeightTmp = isLuma ? m_param->sourceHeight : m_param->sourceHeight >> m_vChromaShift;
> - ctuWidth = isLuma ? g_maxCUSize : g_maxCUSize >> m_hChromaShift;
> - ctuHeight = isLuma ? g_maxCUSize : g_maxCUSize >> m_vChromaShift;
> - lpelx = isLuma ? lpelx : lpelx >> m_hChromaShift;
> - tpely = isLuma ? tpely : tpely >> m_vChromaShift;
> + int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
> + int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
>
> - rpelx = lpelx + ctuWidth;
> - bpely = tpely + ctuHeight;
> - rpelx = rpelx > picWidthTmp ? picWidthTmp : rpelx;
> - bpely = bpely > picHeightTmp ? picHeightTmp : bpely;
> - ctuWidth = rpelx - lpelx;
> - ctuHeight = bpely - tpely;
> - stride = (plane == 0) ? m_pic->getStride() : m_pic->getCStride();
> -
> - //if(iSaoType == BO_0 || iSaoType == BO_1)
> + // SAO_BO:
> {
> const int boShift = X265_DEPTH - SAO_BO_BITS;
>
> if (m_param->bSaoNonDeblocked)
> {
> - numSkipLine = isChroma ? 3 - (2 * m_vChromaShift) : 3;
> - numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
> + skipB = plane ? 1 : 3;
> + skipR = plane ? 2 : 4;
> }
> stats = m_offsetOrg[plane][SAO_BO];
> - counts = m_count[plane][SAO_BO];
> + count = m_count[plane][SAO_BO];
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + fenc = fenc0;
> + rec = rec0;
>
> - endX = (rpelx == picWidthTmp) ? ctuWidth : ctuWidth - numSkipLineRight;
> - endY = (bpely == picHeightTmp) ? ctuHeight : ctuHeight - numSkipLine;
> + endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
> + endY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
> +
> for (y = 0; y < endY; y++)
> {
> for (x = 0; x < endX; x++)
> {
> - int classIdx = 1 + (recon[x] >> boShift);
> - stats[classIdx] += (fenc[x] - recon[x]);
> - counts[classIdx]++;
> + int classIdx = 1 + (rec[x] >> boShift);
> + stats[classIdx] += (fenc[x] - rec[x]);
> + count[classIdx]++;
> }
>
> fenc += stride;
> - recon += stride;
> + rec += stride;
> }
> }
>
> - int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
> - int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
> -
> - //if (iSaoType == EO_0 || iSaoType == EO_1 || iSaoType == EO_2 || iSaoType == EO_3)
> {
> - //if (iSaoType == EO_0)
> + // SAO_EO_0: // dir: -
> {
> if (m_param->bSaoNonDeblocked)
> {
> - numSkipLine = isChroma ? 3 - (2 * m_vChromaShift) : 3;
> - numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
> + skipB = plane ? 1 : 3;
> + skipR = plane ? 3 : 5;
> }
> stats = m_offsetOrg[plane][SAO_EO_0];
> - counts = m_count[plane][SAO_EO_0];
> + count = m_count[plane][SAO_EO_0];
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + fenc = fenc0;
> + rec = rec0;
>
> - startX = (lpelx == 0) ? 1 : 0;
> - endX = (rpelx == picWidthTmp) ? ctuWidth - 1 : ctuWidth - numSkipLineRight;
> - for (y = 0; y < ctuHeight - numSkipLine; y++)
> + startX = !lpelx;
> + endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
> + for (y = 0; y < ctuHeight - skipB; y++)
> {
> - int signLeft = signOf(recon[startX] - recon[startX - 1]);
> + int signLeft = signOf(rec[startX] - rec[startX - 1]);
> for (x = startX; x < endX; x++)
> {
> - int signRight = signOf(recon[x] - recon[x + 1]);
> + int signRight = signOf(rec[x] - rec[x + 1]);
> int edgeType = signRight + signLeft + 2;
> signLeft = -signRight;
>
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - counts[s_eoTable[edgeType]]++;
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> fenc += stride;
> - recon += stride;
> + rec += stride;
> }
> }
>
> - //if (iSaoType == EO_1)
> + // SAO_EO_1: // dir: |
> {
> if (m_param->bSaoNonDeblocked)
> {
> - numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> - numSkipLineRight = isChroma ? 4 - (2 * m_hChromaShift) : 4;
> + skipB = plane ? 2 : 4;
> + skipR = plane ? 2 : 4;
> }
> stats = m_offsetOrg[plane][SAO_EO_1];
> - counts = m_count[plane][SAO_EO_1];
> + count = m_count[plane][SAO_EO_1];
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + fenc = fenc0;
> + rec = rec0;
>
> - startY = (tpely == 0) ? 1 : 0;
> - endX = (rpelx == picWidthTmp) ? ctuWidth : ctuWidth - numSkipLineRight;
> - endY = (bpely == picHeightTmp) ? ctuHeight - 1 : ctuHeight - numSkipLine;
> - if (tpely == 0)
> + startY = !tpely;
> + endX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
> + endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
> + if (!tpely)
> {
> fenc += stride;
> - recon += stride;
> + rec += stride;
> }
>
> for (x = 0; x < ctuWidth; x++)
> - upBuff1[x] = signOf(recon[x] - recon[x - stride]);
> + upBuff1[x] = signOf(rec[x] - rec[x - stride]);
>
> for (y = startY; y < endY; y++)
> {
> for (x = 0; x < endX; x++)
> {
> - int signDown = signOf(recon[x] - recon[x + stride]);
> + int signDown = signOf(rec[x] - rec[x + stride]);
> int edgeType = signDown + upBuff1[x] + 2;
> upBuff1[x] = -signDown;
>
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - counts[s_eoTable[edgeType]]++;
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> fenc += stride;
> - recon += stride;
> + rec += stride;
> }
> }
> - //if (iSaoType == EO_2)
> +
> + // SAO_EO_2: // dir: 135
> {
> if (m_param->bSaoNonDeblocked)
> {
> - numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> - numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
> + skipB = plane ? 2 : 4;
> + skipR = plane ? 3 : 5;
> }
> stats = m_offsetOrg[plane][SAO_EO_2];
> - counts = m_count[plane][SAO_EO_2];
> + count = m_count[plane][SAO_EO_2];
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + fenc = fenc0;
> + rec = rec0;
>
> - startX = (lpelx == 0) ? 1 : 0;
> - endX = (rpelx == picWidthTmp) ? ctuWidth - 1 : ctuWidth - numSkipLineRight;
> + startX = !lpelx;
> + endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
>
> - startY = (tpely == 0) ? 1 : 0;
> - endY = (bpely == picHeightTmp) ? ctuHeight - 1 : ctuHeight - numSkipLine;
> - if (tpely == 0)
> + startY = !tpely;
> + endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
> + if (!tpely)
> {
> fenc += stride;
> - recon += stride;
> + rec += stride;
> }
>
> for (x = startX; x < endX; x++)
> - upBuff1[x] = signOf(recon[x] - recon[x - stride - 1]);
> + upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]);
>
> for (y = startY; y < endY; y++)
> {
> - int signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);
> + upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]);
> for (x = startX; x < endX; x++)
> {
> - int signDown1 = signOf(recon[x] - recon[x + stride + 1]);
> - int edgeType = signDown1 + upBuff1[x] + 2;
> - upBufft[x + 1] = -signDown1;
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - counts[s_eoTable[edgeType]]++;
> + int signDown = signOf(rec[x] - rec[x + stride + 1]);
> + int edgeType = signDown + upBuff1[x] + 2;
> + upBufft[x + 1] = -signDown;
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> - upBufft[startX] = signDown2;
> std::swap(upBuff1, upBufft);
>
> - recon += stride;
> + rec += stride;
> fenc += stride;
> }
> }
> - //if (iSaoType == EO_3)
> +
> + // SAO_EO_3: // dir: 45
> {
> if (m_param->bSaoNonDeblocked)
> {
> - numSkipLine = isChroma ? 4 - (2 * m_vChromaShift) : 4;
> - numSkipLineRight = isChroma ? 5 - (2 * m_hChromaShift) : 5;
> + skipB = plane ? 2 : 4;
> + skipR = plane ? 3 : 5;
> }
> stats = m_offsetOrg[plane][SAO_EO_3];
> - counts = m_count[plane][SAO_EO_3];
> + count = m_count[plane][SAO_EO_3];
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + fenc = fenc0;
> + rec = rec0;
>
> - startX = (lpelx == 0) ? 1 : 0;
> - endX = (rpelx == picWidthTmp) ? ctuWidth - 1 : ctuWidth - numSkipLineRight;
> + startX = !lpelx;
> + endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
>
> - startY = (tpely == 0) ? 1 : 0;
> - endY = (bpely == picHeightTmp) ? ctuHeight - 1 : ctuHeight - numSkipLine;
> - if (startY == 1)
> + startY = !tpely;
> + endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
> +
> + if (!tpely)
> {
> fenc += stride;
> - recon += stride;
> + rec += stride;
> }
>
> for (x = startX - 1; x < endX; x++)
> - upBuff1[x] = signOf(recon[x] - recon[x - stride + 1]);
> + upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]);
>
> for (y = startY; y < endY; y++)
> {
> for (x = startX; x < endX; x++)
> {
> - int signDown1 = signOf(recon[x] - recon[x + stride - 1]);
> - int edgeType = signDown1 + upBuff1[x] + 2;
> - upBuff1[x - 1] = -signDown1;
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - counts[s_eoTable[edgeType]]++;
> + int signDown = signOf(rec[x] - rec[x + stride - 1]);
> + int edgeType = signDown + upBuff1[x] + 2;
> + upBuff1[x - 1] = -signDown;
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> - upBuff1[endX - 1] = signOf(recon[endX - 1 + stride] - recon[endX]);
> + upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
>
> - recon += stride;
> + rec += stride;
> fenc += stride;
> }
> }
> @@ -872,277 +811,266 @@
>
> void SAO::calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY)
> {
> + int addr = idxX + m_numCuInWidth * idxY;
> +
> int x, y;
> + TComDataCU *cu = pic->getCU(addr);
> + const pixel* fenc;
> + const pixel* rec;
> + int stride = m_pic->getStride();
> + uint32_t picWidth = m_param->sourceWidth;
> + uint32_t picHeight = m_param->sourceHeight;
> + int ctuWidth = g_maxCUSize;
> + int ctuHeight = g_maxCUSize;
> + uint32_t lpelx = cu->getCUPelX();
> + uint32_t tpely = cu->getCUPelY();
> + uint32_t rpelx = x265_min(lpelx + ctuWidth, picWidth);
> + uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
> + ctuWidth = rpelx - lpelx;
> + ctuHeight = bpely - tpely;
>
> - pixel* fenc;
> - pixel* recon;
> - int stride;
> - uint32_t rPelX;
> - uint32_t bPelY;
> - int64_t* stats;
> - int64_t* count;
> int startX;
> int startY;
> int endX;
> int endY;
> int firstX, firstY;
> + int32_t* stats;
> + int32_t* count;
>
> - int frameWidthInCU = m_numCuInWidth;
> + int skipB, skipR;
>
> - int isChroma;
> - int numSkipLine, numSkipLineRight;
> -
> - uint32_t lPelX, tPelY;
> - TComDataCU *cu;
> int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
> int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
>
> const int boShift = X265_DEPTH - SAO_BO_BITS;
>
> - // NOTE: Row
> + memset(m_countPreDblk[addr], 0, sizeof(PerPlane));
> + memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane));
> +
> + for (int plane = 0; plane < NUM_PLANE; plane++)
> {
> - // NOTE: Col
> + if (plane == 1)
> {
> - int addr = idxX + frameWidthInCU * idxY;
> - cu = pic->getCU(addr);
> + stride = pic->getCStride();
> + picWidth >>= m_hChromaShift;
> + picHeight >>= m_vChromaShift;
> + ctuWidth >>= m_hChromaShift;
> + ctuHeight >>= m_vChromaShift;
> + lpelx >>= m_hChromaShift;
> + tpely >>= m_vChromaShift;
> + rpelx >>= m_hChromaShift;
> + bpely >>= m_vChromaShift;
> + }
>
> - uint32_t picWidthTmp = m_param->sourceWidth;
> - uint32_t picHeightTmp = m_param->sourceHeight;
> - int ctuWidth = g_maxCUSize;
> - int ctuHeight = g_maxCUSize;
> - lPelX = cu->getCUPelX();
> - tPelY = cu->getCUPelY();
> - rPelX = lPelX + ctuWidth;
> - bPelY = tPelY + ctuHeight;
> - rPelX = rPelX > picWidthTmp ? picWidthTmp : rPelX;
> - bPelY = bPelY > picHeightTmp ? picHeightTmp : bPelY;
> - ctuWidth = rPelX - lPelX;
> - ctuHeight = bPelY - tPelY;
> + // SAO_BO:
>
> - memset(m_countPreDblk[addr], 0, sizeof(PerPlane));
> - memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane));
> + skipB = plane ? 1 : 3;
> + skipR = plane ? 2 : 4;
>
> - for (int plane = 0; plane < 3; plane++)
> + stats = m_offsetOrgPreDblk[addr][plane][SAO_BO];
> + count = m_countPreDblk[addr][plane][SAO_BO];
> +
> + const pixel* fenc0 = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> + const pixel* rec0 = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + fenc = fenc0;
> + rec = rec0;
> +
> + startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
> + startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
> +
> + for (y = 0; y < ctuHeight; y++)
> + {
> + for (x = (y < startY ? startX : 0); x < ctuWidth; x++)
> {
> - isChroma = !!plane;
> - if (plane == 1)
> + int classIdx = 1 + (rec[x] >> boShift);
> + stats[classIdx] += (fenc[x] - rec[x]);
> + count[classIdx]++;
> + }
> +
> + fenc += stride;
> + rec += stride;
> + }
> +
> + // SAO_EO_0: // dir: -
> + {
> + skipB = plane ? 1 : 3;
> + skipR = plane ? 3 : 5;
> +
> + stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0];
> + count = m_countPreDblk[addr][plane][SAO_EO_0];
> +
> + fenc = fenc0;
> + rec = rec0;
> +
> + startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
> + startY = (bpely == picHeight) ? ctuHeight : ctuHeight - skipB;
> + firstX = !lpelx;
> + // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
> + endX = ctuWidth - 1; // not refer right CTU
> +
> + for (y = 0; y < ctuHeight; y++)
> + {
> + x = (y < startY ? startX : firstX);
> + int signLeft = signOf(rec[x] - rec[x - 1]);
> + for (; x < endX; x++)
> {
> - picWidthTmp >>= m_hChromaShift;
> - picHeightTmp >>= m_vChromaShift;
> - ctuWidth >>= m_hChromaShift;
> - ctuHeight >>= m_vChromaShift;
> - lPelX >>= m_hChromaShift;
> - tPelY >>= m_vChromaShift;
> - rPelX = lPelX + ctuWidth;
> - bPelY = tPelY + ctuHeight;
> + int signRight = signOf(rec[x] - rec[x + 1]);
> + int edgeType = signRight + signLeft + 2;
> + signLeft = -signRight;
> +
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> - stride = (plane == 0) ? pic->getStride() : pic->getCStride();
> + fenc += stride;
> + rec += stride;
> + }
> + }
>
> - //if(iSaoType == BO)
> + // SAO_EO_1: // dir: |
> + {
> + skipB = plane ? 2 : 4;
> + skipR = plane ? 2 : 4;
>
> - numSkipLine = isChroma ? 1 : 3;
> - numSkipLineRight = isChroma ? 2 : 4;
> + stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1];
> + count = m_countPreDblk[addr][plane][SAO_EO_1];
>
> - stats = m_offsetOrgPreDblk[addr][plane][SAO_BO];
> - count = m_countPreDblk[addr][plane][SAO_BO];
> + fenc = fenc0;
> + rec = rec0;
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
> + startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
> + firstY = !tpely;
> + // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
> + endY = ctuHeight - 1; // not refer below CTU
> + if (!tpely)
> + {
> + fenc += stride;
> + rec += stride;
> + }
>
> - startX = (rPelX == picWidthTmp) ? ctuWidth : ctuWidth - numSkipLineRight;
> - startY = (bPelY == picHeightTmp) ? ctuHeight : ctuHeight - numSkipLine;
> + for (x = startX; x < ctuWidth; x++)
> + upBuff1[x] = signOf(rec[x] - rec[x - stride]);
>
> - for (y = 0; y < ctuHeight; y++)
> + for (y = firstY; y < endY; y++)
> + {
> + for (x = (y < startY - 1 ? startX : 0); x < ctuWidth; x++)
> {
> - for (x = 0; x < ctuWidth; x++)
> - {
> - if (x < startX && y < startY)
> - continue;
> + int signDown = signOf(rec[x] - rec[x + stride]);
> + int edgeType = signDown + upBuff1[x] + 2;
> + upBuff1[x] = -signDown;
>
> - int classIdx = 1 + (recon[x] >> boShift);
> - stats[classIdx] += (fenc[x] - recon[x]);
> - count[classIdx]++;
> - }
> + if (x < startX && y < startY)
> + continue;
>
> - fenc += stride;
> - recon += stride;
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> - //if (iSaoType == EO_0)
> + fenc += stride;
> + rec += stride;
> + }
> + }
>
> - numSkipLine = isChroma ? 1 : 3;
> - numSkipLineRight = isChroma ? 3 : 5;
> + // SAO_EO_2: // dir: 135
> + {
> + skipB = plane ? 2 : 4;
> + skipR = plane ? 3 : 5;
>
> - stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0];
> - count = m_countPreDblk[addr][plane][SAO_EO_0];
> + stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2];
> + count = m_countPreDblk[addr][plane][SAO_EO_2];
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + fenc = fenc0;
> + rec = rec0;
>
> - startX = (rPelX == picWidthTmp) ? ctuWidth - 1 : ctuWidth - numSkipLineRight;
> - startY = (bPelY == picHeightTmp) ? ctuHeight : ctuHeight - numSkipLine;
> - firstX = (lPelX == 0) ? 1 : 0;
> - endX = (rPelX == picWidthTmp) ? ctuWidth - 1 : ctuWidth;
> + startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
> + startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
> + firstX = !lpelx;
> + firstY = !tpely;
> + // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
> + // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
> + endX = ctuWidth - 1; // not refer right CTU
> + endY = ctuHeight - 1; // not refer below CTU
> + if (!tpely)
> + {
> + fenc += stride;
> + rec += stride;
> + }
>
> - for (y = 0; y < ctuHeight; y++)
> + for (x = startX; x < endX; x++)
> + upBuff1[x] = signOf(rec[x] - rec[x - stride - 1]);
> +
> + for (y = firstY; y < endY; y++)
> + {
> + x = (y < startY - 1 ? startX : firstX);
> + upBufft[x] = signOf(rec[x + stride] - rec[x - 1]);
> + for (; x < endX; x++)
> {
> - int signLeft = signOf(recon[firstX] - recon[firstX - 1]);
> - for (x = firstX; x < endX; x++)
> - {
> - int signRight = signOf(recon[x] - recon[x + 1]);
> - int edgeType = signRight + signLeft + 2;
> - signLeft = -signRight;
> + int signDown = signOf(rec[x] - rec[x + stride + 1]);
> + int edgeType = signDown + upBuff1[x] + 2;
> + upBufft[x + 1] = -signDown;
>
> - if (x < startX && y < startY)
> - continue;
> + if (x < startX && y < startY)
> + continue;
>
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - count[s_eoTable[edgeType]]++;
> - }
> -
> - fenc += stride;
> - recon += stride;
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> - //if (iSaoType == EO_1)
> + std::swap(upBuff1, upBufft);
>
> - numSkipLine = isChroma ? 2 : 4;
> - numSkipLineRight = isChroma ? 2 : 4;
> + rec += stride;
> + fenc += stride;
> + }
> + }
>
> - stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1];
> - count = m_countPreDblk[addr][plane][SAO_EO_1];
> + // SAO_EO_3: // dir: 45
> + {
> + skipB = plane ? 2 : 4;
> + skipR = plane ? 3 : 5;
>
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> + stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3];
> + count = m_countPreDblk[addr][plane][SAO_EO_3];
>
> - startX = (rPelX == picWidthTmp) ? ctuWidth : ctuWidth - numSkipLineRight;
> - startY = (bPelY == picHeightTmp) ? ctuHeight - 1 : ctuHeight - numSkipLine;
> - firstY = (tPelY == 0) ? 1 : 0;
> - endY = (bPelY == picHeightTmp) ? ctuHeight - 1 : ctuHeight;
> - if (firstY == 1)
> + fenc = fenc0;
> + rec = rec0;
> +
> + startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
> + startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
> + firstX = !lpelx;
> + firstY = !tpely;
> + // endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
> + // endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
> + endX = ctuWidth - 1; // not refer right CTU
> + endY = ctuHeight - 1; // not refer below CTU
> + if (!tpely)
> + {
> + fenc += stride;
> + rec += stride;
> + }
> +
> + for (x = startX - 1; x < endX; x++)
> + upBuff1[x] = signOf(rec[x] - rec[x - stride + 1]);
> +
> + for (y = firstY; y < endY; y++)
> + {
> + for (x = (y < startY - 1 ? startX : firstX); x < endX; x++)
> {
> - fenc += stride;
> - recon += stride;
> + int signDown = signOf(rec[x] - rec[x + stride - 1]);
> + int edgeType = signDown + upBuff1[x] + 2;
> + upBuff1[x - 1] = -signDown;
> +
> + if (x < startX && y < startY)
> + continue;
> +
> + stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> + count[s_eoTable[edgeType]]++;
> }
>
> - for (x = 0; x < ctuWidth; x++)
> - upBuff1[x] = signOf(recon[x] - recon[x - stride]);
> + upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
>
> - for (y = firstY; y < endY; y++)
> - {
> - for (x = 0; x < ctuWidth; x++)
> - {
> - int signDown = signOf(recon[x] - recon[x + stride]);
> - int edgeType = signDown + upBuff1[x] + 2;
> - upBuff1[x] = -signDown;
> -
> - if (x < startX && y < startY)
> - continue;
> -
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - count[s_eoTable[edgeType]]++;
> - }
> -
> - fenc += stride;
> - recon += stride;
> - }
> -
> - //if (iSaoType == EO_2)
> -
> - numSkipLine = isChroma ? 2 : 4;
> - numSkipLineRight = isChroma ? 3 : 5;
> -
> - stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2];
> - count = m_countPreDblk[addr][plane][SAO_EO_2];
> -
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> -
> - startX = (rPelX == picWidthTmp) ? ctuWidth - 1 : ctuWidth - numSkipLineRight;
> - startY = (bPelY == picHeightTmp) ? ctuHeight - 1 : ctuHeight - numSkipLine;
> - firstX = (lPelX == 0) ? 1 : 0;
> - firstY = (tPelY == 0) ? 1 : 0;
> - endX = (rPelX == picWidthTmp) ? ctuWidth - 1 : ctuWidth;
> - endY = (bPelY == picHeightTmp) ? ctuHeight - 1 : ctuHeight;
> - if (firstY == 1)
> - {
> - fenc += stride;
> - recon += stride;
> - }
> -
> - for (x = firstX; x < endX; x++)
> - upBuff1[x] = signOf(recon[x] - recon[x - stride - 1]);
> -
> - for (y = firstY; y < endY; y++)
> - {
> - int signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);
> - for (x = firstX; x < endX; x++)
> - {
> - int signDown1 = signOf(recon[x] - recon[x + stride + 1]);
> - int edgeType = signDown1 + upBuff1[x] + 2;
> - upBufft[x + 1] = -signDown1;
> -
> - if (x < startX && y < startY)
> - continue;
> -
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - count[s_eoTable[edgeType]]++;
> - }
> -
> - upBufft[firstX] = signDown2;
> - std::swap(upBuff1, upBufft);
> -
> - recon += stride;
> - fenc += stride;
> - }
> -
> - //if (iSaoType == EO_3)
> -
> - numSkipLine = isChroma ? 2 : 4;
> - numSkipLineRight = isChroma ? 3 : 5;
> -
> - stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3];
> - count = m_countPreDblk[addr][plane][SAO_EO_3];
> -
> - fenc = m_pic->getPicYuvOrg()->getPlaneAddr(plane, addr);
> - recon = m_pic->getPicYuvRec()->getPlaneAddr(plane, addr);
> -
> - startX = (rPelX == picWidthTmp) ? ctuWidth - 1 : ctuWidth - numSkipLineRight;
> - startY = (bPelY == picHeightTmp) ? ctuHeight - 1 : ctuHeight - numSkipLine;
> - firstX = (lPelX == 0) ? 1 : 0;
> - firstY = (tPelY == 0) ? 1 : 0;
> - endX = (rPelX == picWidthTmp) ? ctuWidth - 1 : ctuWidth;
> - endY = (bPelY == picHeightTmp) ? ctuHeight - 1 : ctuHeight;
> - if (firstY == 1)
> - {
> - fenc += stride;
> - recon += stride;
> - }
> -
> - for (x = firstX - 1; x < endX; x++)
> - upBuff1[x] = signOf(recon[x] - recon[x - stride + 1]);
> -
> - for (y = firstY; y < endY; y++)
> - {
> - for (x = firstX; x < endX; x++)
> - {
> - int signDown1 = signOf(recon[x] - recon[x + stride - 1]);
> - int edgeType = signDown1 + upBuff1[x] + 2;
> - upBuff1[x - 1] = -signDown1;
> -
> - if (x < startX && y < startY)
> - continue;
> -
> - stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> - count[s_eoTable[edgeType]]++;
> - }
> -
> - upBuff1[endX - 1] = signOf(recon[endX - 1 + stride] - recon[endX]);
> -
> - recon += stride;
> - fenc += stride;
> - }
> + rec += stride;
> + fenc += stride;
> }
> }
> }
> @@ -1151,69 +1079,9 @@
> /* reset offset statistics */
> void SAO::resetStats()
> {
> - for (int i = 0; i < NUM_PLANE; i++)
> - {
> - for (int j = 0; j < MAX_NUM_SAO_TYPE; j++)
> - {
> - for (int k = 0; k < MAX_NUM_SAO_CLASS; k++)
> - {
> - m_count[i][j][k] = 0;
> - m_offset[i][j][k] = 0;
> - m_offsetOrg[i][j][k] = 0;
> - }
> - }
> - }
> -}
> -
> -/* Check merge SAO unit */
> -void SAO::checkMerge(SaoCtuParam * saoUnitCurr, SaoCtuParam * saoUnitCheck, int dir)
> -{
> - int countDiff = 0;
> -
> - if (saoUnitCurr->partIdx != saoUnitCheck->partIdx)
> - {
> - if (saoUnitCurr->typeIdx >= 0)
> - {
> - if (saoUnitCurr->typeIdx == saoUnitCheck->typeIdx)
> - {
> - for (int i = 0; i < SAO_NUM_OFFSET; i++)
> - countDiff += (saoUnitCurr->offset[i] != saoUnitCheck->offset[i]);
> -
> - countDiff += (saoUnitCurr->subTypeIdx != saoUnitCheck->subTypeIdx);
> - if (countDiff == 0)
> - {
> - saoUnitCurr->partIdx = saoUnitCheck->partIdx;
> - if (dir == 1)
> - {
> - saoUnitCurr->mergeUpFlag = 1;
> - saoUnitCurr->mergeLeftFlag = 0;
> - }
> - else
> - {
> - saoUnitCurr->mergeUpFlag = 0;
> - saoUnitCurr->mergeLeftFlag = 1;
> - }
> - }
> - }
> - }
> - else
> - {
> - if (saoUnitCurr->typeIdx == saoUnitCheck->typeIdx)
> - {
> - saoUnitCurr->partIdx = saoUnitCheck->partIdx;
> - if (dir == 1)
> - {
> - saoUnitCurr->mergeUpFlag = 1;
> - saoUnitCurr->mergeLeftFlag = 0;
> - }
> - else
> - {
> - saoUnitCurr->mergeUpFlag = 0;
> - saoUnitCurr->mergeLeftFlag = 1;
> - }
> - }
> - }
> - }
> + memset(m_count, 0, sizeof(PerClass) * NUM_PLANE);
> + memset(m_offset, 0, sizeof(PerClass) * NUM_PLANE);
> + memset(m_offsetOrg, 0, sizeof(PerClass) * NUM_PLANE);
> }
>
> void SAO::rdoSaoUnitRowInit(SAOParam *saoParam)
> @@ -1244,25 +1112,17 @@
>
> void SAO::rdoSaoUnitRow(SAOParam *saoParam, int idxY)
> {
> - int frameWidthInCU = saoParam->numCuInWidth;
> int j, k;
> - int compIdx = 0;
> SaoCtuParam mergeSaoParam[3][2];
> double compDistortion[3];
> + int allowMergeUp = (idxY > 0);
>
> - for (int idxX = 0; idxX < frameWidthInCU; idxX++)
> + for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
> {
> - int addr = idxX + idxY * frameWidthInCU;
> - int addrUp = idxY == 0 ? -1 : addr - frameWidthInCU;
> + int addr = idxX + idxY * m_numCuInWidth;
> + int addrUp = idxY == 0 ? -1 : addr - m_numCuInWidth;
> int addrLeft = idxX == 0 ? -1 : addr - 1;
> - int allowMergeLeft = 1;
> - int allowMergeUp = 1;
> - uint32_t rate;
> - double bestCost, mergeCost;
> - if (idxX == 0)
> - allowMergeLeft = 0;
> - if (idxY == 0)
> - allowMergeUp = 0;
> + int allowMergeLeft = (idxX > 0);
>
> compDistortion[0] = 0;
> compDistortion[1] = 0;
> @@ -1274,32 +1134,32 @@
> m_entropyCoder.codeSaoMerge(0);
> m_entropyCoder.store(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> // reset stats Y, Cb, Cr
> - for (compIdx = 0; compIdx < 3; compIdx++)
> + for (int plane = 0; plane < 3; plane++)
> {
> for (j = 0; j < MAX_NUM_SAO_TYPE; j++)
> {
> for (k = 0; k < MAX_NUM_SAO_CLASS; k++)
> {
> - m_offset[compIdx][j][k] = 0;
> + m_offset[plane][j][k] = 0;
> if (m_param->bSaoNonDeblocked)
> {
> - m_count[compIdx][j][k] = m_countPreDblk[addr][compIdx][j][k];
> - m_offsetOrg[compIdx][j][k] = m_offsetOrgPreDblk[addr][compIdx][j][k];
> + m_count[plane][j][k] = m_countPreDblk[addr][plane][j][k];
> + m_offsetOrg[plane][j][k] = m_offsetOrgPreDblk[addr][plane][j][k];
> }
> else
> {
> - m_count[compIdx][j][k] = 0;
> - m_offsetOrg[compIdx][j][k] = 0;
> + m_count[plane][j][k] = 0;
> + m_offsetOrg[plane][j][k] = 0;
> }
> }
> }
>
> - saoParam->ctuParam[compIdx][addr].typeIdx = -1;
> - saoParam->ctuParam[compIdx][addr].mergeUpFlag = 0;
> - saoParam->ctuParam[compIdx][addr].mergeLeftFlag = 0;
> - saoParam->ctuParam[compIdx][addr].subTypeIdx = 0;
> - if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
> - calcSaoStatsCu(addr, compIdx);
> + saoParam->ctuParam[plane][addr].typeIdx = -1;
> + saoParam->ctuParam[plane][addr].mergeUpFlag = 0;
> + saoParam->ctuParam[plane][addr].mergeLeftFlag = 0;
> + saoParam->ctuParam[plane][addr].bandPos = 0;
> + if ((plane == 0 && saoParam->bSaoFlag[0]) || (plane > 0 && saoParam->bSaoFlag[1]))
> + calcSaoStatsCu(addr, plane);
> }
>
> saoComponentParamDist(allowMergeLeft, allowMergeUp, saoParam, addr, addrUp, addrLeft,
> @@ -1317,14 +1177,14 @@
> m_entropyCoder.codeSaoMerge(0);
> if (allowMergeUp)
> m_entropyCoder.codeSaoMerge(0);
> - for (compIdx = 0; compIdx < 3; compIdx++)
> + for (int plane = 0; plane < 3; plane++)
> {
> - if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
> - m_entropyCoder.codeSaoOffset(&saoParam->ctuParam[compIdx][addr], compIdx);
> + if ((plane == 0 && saoParam->bSaoFlag[0]) || (plane > 0 && saoParam->bSaoFlag[1]))
> + m_entropyCoder.codeSaoOffset(&saoParam->ctuParam[plane][addr], plane);
> }
>
> - rate = m_entropyCoder.getNumberOfWrittenBits();
> - bestCost = compDistortion[0] + (double)rate;
> + uint32_t rate = m_entropyCoder.getNumberOfWrittenBits();
> + double bestCost = compDistortion[0] + (double)rate;
> m_entropyCoder.store(m_rdEntropyCoders[0][CI_TEMP_BEST]);
>
> // Cost of Merge
> @@ -1340,17 +1200,17 @@
> m_entropyCoder.codeSaoMerge(1);
>
> rate = m_entropyCoder.getNumberOfWrittenBits();
> - mergeCost = compDistortion[mergeUp + 1] + (double)rate;
> + double mergeCost = compDistortion[mergeUp + 1] + (double)rate;
> if (mergeCost < bestCost)
> {
> bestCost = mergeCost;
> m_entropyCoder.store(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> - for (compIdx = 0; compIdx < 3; compIdx++)
> + for (int plane = 0; plane < 3; plane++)
> {
> - mergeSaoParam[compIdx][mergeUp].mergeLeftFlag = !mergeUp;
> - mergeSaoParam[compIdx][mergeUp].mergeUpFlag = !!mergeUp;
> - if ((compIdx == 0 && saoParam->bSaoFlag[0]) || (compIdx > 0 && saoParam->bSaoFlag[1]))
> - copySaoUnit(&saoParam->ctuParam[compIdx][addr], &mergeSaoParam[compIdx][mergeUp]);
> + mergeSaoParam[plane][mergeUp].mergeLeftFlag = !mergeUp;
> + mergeSaoParam[plane][mergeUp].mergeUpFlag = !!mergeUp;
> + if ((plane == 0 && saoParam->bSaoFlag[0]) || (plane > 0 && saoParam->bSaoFlag[1]))
> + copySaoUnit(&saoParam->ctuParam[plane][addr], &mergeSaoParam[plane][mergeUp]);
> }
> }
> }
> @@ -1367,92 +1227,85 @@
> }
>
> /** rate distortion optimization of SAO unit */
> -inline int64_t SAO::estSaoTypeDist(int compIdx, int typeIdx, int shift, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo)
> +inline int64_t SAO::estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo)
> {
> int64_t estDist = 0;
>
> for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)
> {
> + int32_t count = m_count[plane][typeIdx][classIdx];
> + int32_t& offsetOrg = m_offsetOrg[plane][typeIdx][classIdx];
> + int32_t& offsetOut = m_offset[plane][typeIdx][classIdx];
> +
> if (typeIdx == SAO_BO)
> {
> currentDistortionTableBo[classIdx - 1] = 0;
> currentRdCostTableBo[classIdx - 1] = lambda;
> }
> - if (m_count[compIdx][typeIdx][classIdx])
> + if (count)
> {
> - m_offset[compIdx][typeIdx][classIdx] = (int64_t)roundIDBI((double)(m_offsetOrg[compIdx][typeIdx][classIdx] << (X265_DEPTH - 8)) / (double)(m_count[compIdx][typeIdx][classIdx] << SAO_BIT_INC));
> - m_offset[compIdx][typeIdx][classIdx] = Clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, (int)m_offset[compIdx][typeIdx][classIdx]);
> + int offset = roundIBDI(offsetOrg, count << SAO_BIT_INC);
> + offset = Clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, offset);
> if (typeIdx < SAO_BO)
> {
> - if (m_offset[compIdx][typeIdx][classIdx] < 0 && classIdx < 3)
> - m_offset[compIdx][typeIdx][classIdx] = 0;
> - if (m_offset[compIdx][typeIdx][classIdx] > 0 && classIdx >= 3)
> - m_offset[compIdx][typeIdx][classIdx] = 0;
> + if (classIdx < 3)
> + offset = X265_MAX(offset, 0);
> + else
> + offset = X265_MIN(offset, 0);
> }
> - m_offset[compIdx][typeIdx][classIdx] = estIterOffset(typeIdx, classIdx, lambda, m_offset[compIdx][typeIdx][classIdx], m_count[compIdx][typeIdx][classIdx], m_offsetOrg[compIdx][typeIdx][classIdx], shift, SAO_BIT_INC, currentDistortionTableBo, currentRdCostTableBo, OFFSET_THRESH);
> + offsetOut = estIterOffset(typeIdx, classIdx, lambda, offset, count, offsetOrg, currentDistortionTableBo, currentRdCostTableBo);
> }
> else
> {
> - m_offsetOrg[compIdx][typeIdx][classIdx] = 0;
> - m_offset[compIdx][typeIdx][classIdx] = 0;
> + offsetOrg = 0;
> + offsetOut = 0;
> }
> if (typeIdx != SAO_BO)
> - estDist += estSaoDist(m_count[compIdx][typeIdx][classIdx], m_offset[compIdx][typeIdx][classIdx] << SAO_BIT_INC, m_offsetOrg[compIdx][typeIdx][classIdx], shift);
> + estDist += estSaoDist(count, (int)offsetOut << SAO_BIT_INC, offsetOrg);
> }
>
> return estDist;
> }
>
> -inline int64_t SAO::estSaoDist(int64_t count, int64_t offset, int64_t offsetOrg, int shift)
> +inline int SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg, int32_t *currentDistortionTableBo, double *currentRdCostTableBo)
> {
> - return (count * offset * offset - offsetOrg * offset * 2) >> shift;
> -}
> + int offsetOut = 0;
>
> -inline int64_t SAO::estIterOffset(int typeIdx, int classIdx, double lambda, int64_t offsetInput, int64_t count, int64_t offsetOrg, int shift, int bitIncrease, int32_t *currentDistortionTableBo, double *currentRdCostTableBo, int offsetTh)
> -{
> - //Clean up, best_q_offset.
> - int64_t iterOffset, tempOffset;
> - int64_t tempDist, tempRate;
> - int64_t offsetOutput = 0;
> -
> - iterOffset = offsetInput;
> // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
> double tempMinCost = lambda;
> - while (iterOffset != 0)
> + while (offset != 0)
> {
> // Calculate the bits required for signalling the offset
> - tempRate = (typeIdx == SAO_BO) ? (abs((int)iterOffset) + 2) : (abs((int)iterOffset) + 1);
> - if (abs((int)iterOffset) == offsetTh - 1)
> + int tempRate = (typeIdx == SAO_BO) ? (abs(offset) + 2) : (abs(offset) + 1);
> + if (abs(offset) == OFFSET_THRESH - 1)
> tempRate--;
>
> // Do the dequntization before distorion calculation
> - tempOffset = iterOffset << bitIncrease;
> - tempDist = estSaoDist(count, tempOffset, offsetOrg, shift);
> + int tempOffset = offset << SAO_BIT_INC;
> + int64_t tempDist = estSaoDist(count, tempOffset, offsetOrg);
> double tempCost = ((double)tempDist + lambda * (double)tempRate);
> if (tempCost < tempMinCost)
> {
> tempMinCost = tempCost;
> - offsetOutput = iterOffset;
> + offsetOut = offset;
> if (typeIdx == SAO_BO)
> {
> currentDistortionTableBo[classIdx - 1] = (int)tempDist;
> currentRdCostTableBo[classIdx - 1] = tempCost;
> }
> }
> - iterOffset = (iterOffset > 0) ? (iterOffset - 1) : (iterOffset + 1);
> + offset = (offset > 0) ? (offset - 1) : (offset + 1);
> }
>
> - return offsetOutput;
> + return offsetOut;
> }
>
> void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
> SaoCtuParam *compSaoParam, double *compDistortion)
> {
> - int64_t estDist;
> - int64_t bestDist;
> + int64_t bestDist = 0;
>
> SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
> - SaoCtuParam* ctuParamNeighbor = NULL;
> SaoCtuParam ctuParamRdo;
>
> resetSaoUnit(&ctuParamRdo);
> @@ -1460,7 +1313,6 @@
> resetSaoUnit(&compSaoParam[1]);
> resetSaoUnit(lclCtuParam);
>
> - double dCostPartBest = MAX_DOUBLE;
> double bestRDCostTableBo = MAX_DOUBLE;
> int bestClassTableBo = 0;
> int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
> @@ -1469,13 +1321,12 @@
> m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> m_entropyCoder.resetBits();
> m_entropyCoder.codeSaoOffset(&ctuParamRdo, 0);
> - dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
> + double dCostPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
> copySaoUnit(lclCtuParam, &ctuParamRdo);
> - bestDist = 0;
>
> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> {
> - estDist = estSaoTypeDist(0, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
> + int64_t estDist = estSaoTypeDist(0, typeIdx, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
>
> if (typeIdx == SAO_BO)
> {
> @@ -1503,16 +1354,16 @@
> ctuParamRdo.typeIdx = typeIdx;
> ctuParamRdo.mergeLeftFlag = 0;
> ctuParamRdo.mergeUpFlag = 0;
> - ctuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
> + ctuParamRdo.bandPos = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> - ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.subTypeIdx + 1];
> + ctuParamRdo.offset[classIdx] = (int)m_offset[0][typeIdx][classIdx + ctuParamRdo.bandPos + 1];
>
> m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> m_entropyCoder.resetBits();
> m_entropyCoder.codeSaoOffset(&ctuParamRdo, 0);
>
> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> - double cost = (double)((double)estDist + m_lumaLambda * (double)estRate);
> + double cost = (double)estDist + m_lumaLambda * (double)estRate;
>
> if (cost < dCostPartBest)
> {
> @@ -1531,27 +1382,24 @@
>
> for (int idxNeighbor = 0; idxNeighbor < 2; idxNeighbor++)
> {
> - ctuParamNeighbor = NULL;
> + SaoCtuParam* ctuParamNeighbor = NULL;
> if (allowMergeLeft && addrLeft >= 0 && idxNeighbor == 0)
> ctuParamNeighbor = &(saoParam->ctuParam[0][addrLeft]);
> else if (allowMergeUp && addrUp >= 0 && idxNeighbor == 1)
> ctuParamNeighbor = &(saoParam->ctuParam[0][addrUp]);
> if (ctuParamNeighbor != NULL)
> {
> - estDist = 0;
> + int64_t estDist = 0;
> int typeIdx = ctuParamNeighbor->typeIdx;
> if (typeIdx >= 0)
> {
> - int mergeBandPosition = (typeIdx == SAO_BO) ? ctuParamNeighbor->subTypeIdx : 0;
> - int mergeOffset;
> + int mergeBandPosition = (typeIdx == SAO_BO) ? ctuParamNeighbor->bandPos : 0;
> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> {
> - mergeOffset = ctuParamNeighbor->offset[classIdx];
> - estDist += estSaoDist(m_count[0][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + mergeBandPosition + 1], 0);
> + int mergeOffset = ctuParamNeighbor->offset[classIdx];
> + estDist += estSaoDist(m_count[0][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[0][typeIdx][classIdx + mergeBandPosition + 1]);
> }
> }
> - else
> - estDist = 0;
>
> copySaoUnit(&compSaoParam[idxNeighbor], ctuParamNeighbor);
> compSaoParam[idxNeighbor].mergeUpFlag = !!idxNeighbor;
> @@ -1565,11 +1413,9 @@
> void SAO::sao2ChromaParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
> SaoCtuParam *crSaoParam, SaoCtuParam *cbSaoParam, double *distortion)
> {
> - int64_t estDist[2];
> int64_t bestDist = 0;
>
> SaoCtuParam* lclCtuParam[2] = { &saoParam->ctuParam[1][addr], &saoParam->ctuParam[2][addr] };
> - SaoCtuParam* ctuParamNeighbor[2] = { NULL, NULL };
> SaoCtuParam* saoMergeParam[2][2];
> SaoCtuParam ctuParamRdo[2];
>
> @@ -1587,8 +1433,6 @@
> resetSaoUnit(&ctuParamRdo[0]);
> resetSaoUnit(&ctuParamRdo[1]);
>
> - double costPartBest = MAX_DOUBLE;
> - double bestRDCostTableBo;
> double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
> int bestClassTableBo[2] = { 0, 0 };
> int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
> @@ -1598,19 +1442,20 @@
> m_entropyCoder.codeSaoOffset(&ctuParamRdo[0], 1);
> m_entropyCoder.codeSaoOffset(&ctuParamRdo[1], 2);
>
> - costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
> + double costPartBest = m_entropyCoder.getNumberOfWrittenBits() * m_chromaLambda;
> copySaoUnit(lclCtuParam[0], &ctuParamRdo[0]);
> copySaoUnit(lclCtuParam[1], &ctuParamRdo[1]);
>
> for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> {
> + int64_t estDist[2];
> if (typeIdx == SAO_BO)
> {
> // Estimate Best Position
> for (int compIdx = 0; compIdx < 2; compIdx++)
> {
> - bestRDCostTableBo = MAX_DOUBLE;
> - estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, 0, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
> + double bestRDCostTableBo = MAX_DOUBLE;
> + estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
> for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
> {
> double currentRDCost = 0.0;
> @@ -1633,8 +1478,8 @@
> }
> else
> {
> - estDist[0] = estSaoTypeDist(1, typeIdx, 0, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
> - estDist[1] = estSaoTypeDist(2, typeIdx, 0, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
> + estDist[0] = estSaoTypeDist(1, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
> + estDist[1] = estSaoTypeDist(2, typeIdx, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
> }
>
> m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
> @@ -1646,15 +1491,15 @@
> ctuParamRdo[compIdx].typeIdx = typeIdx;
> ctuParamRdo[compIdx].mergeLeftFlag = 0;
> ctuParamRdo[compIdx].mergeUpFlag = 0;
> - ctuParamRdo[compIdx].subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
> + ctuParamRdo[compIdx].bandPos = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> - ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].subTypeIdx + 1];
> + ctuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + ctuParamRdo[compIdx].bandPos + 1];
>
> m_entropyCoder.codeSaoOffset(&ctuParamRdo[compIdx], compIdx + 1);
> }
>
> uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
> - double cost = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
> + double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
>
> if (cost < costPartBest)
> {
> @@ -1677,31 +1522,30 @@
> {
> for (int compIdx = 0; compIdx < 2; compIdx++)
> {
> - ctuParamNeighbor[compIdx] = NULL;
> + int plane = compIdx + 1;
> + SaoCtuParam* ctuParamNeighbor = NULL;
> if (allowMergeLeft && addrLeft >= 0 && idxNeighbor == 0)
> - ctuParamNeighbor[compIdx] = &(saoParam->ctuParam[compIdx + 1][addrLeft]);
> + ctuParamNeighbor = &(saoParam->ctuParam[plane][addrLeft]);
> else if (allowMergeUp && addrUp >= 0 && idxNeighbor == 1)
> - ctuParamNeighbor[compIdx] = &(saoParam->ctuParam[compIdx + 1][addrUp]);
> - if (ctuParamNeighbor[compIdx] != NULL)
> + ctuParamNeighbor = &(saoParam->ctuParam[plane][addrUp]);
> + if (ctuParamNeighbor != NULL)
> {
> - estDist[compIdx] = 0;
> - int typeIdx = ctuParamNeighbor[compIdx]->typeIdx;
> + int64_t estDist = 0;
> + int typeIdx = ctuParamNeighbor->typeIdx;
> if (typeIdx >= 0)
> {
> - int mergeBandPosition = (typeIdx == SAO_BO) ? ctuParamNeighbor[compIdx]->subTypeIdx : 0;
> + int mergeBandPosition = (typeIdx == SAO_BO) ? ctuParamNeighbor->bandPos : 0;
> for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
> {
> - int mergeOffset = ctuParamNeighbor[compIdx]->offset[classIdx];
> - estDist[compIdx] += estSaoDist(m_count[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1], 0);
> + int mergeOffset = ctuParamNeighbor->offset[classIdx];
> + estDist += estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + 1]);
> }
> }
> - else
> - estDist[compIdx] = 0;
>
> - copySaoUnit(saoMergeParam[compIdx][idxNeighbor], ctuParamNeighbor[compIdx]);
> + copySaoUnit(saoMergeParam[compIdx][idxNeighbor], ctuParamNeighbor);
> saoMergeParam[compIdx][idxNeighbor]->mergeUpFlag = !!idxNeighbor;
> saoMergeParam[compIdx][idxNeighbor]->mergeLeftFlag = !idxNeighbor;
> - distortion[idxNeighbor + 1] += ((double)estDist[compIdx] / m_chromaLambda);
> + distortion[idxNeighbor + 1] += ((double)estDist / m_chromaLambda);
> }
> }
> }
> diff -r b6d49505b179 -r 64ea900398eb source/encoder/sao.h
> --- a/source/encoder/sao.h Thu Oct 02 16:47:55 2014 -0500
> +++ b/source/encoder/sao.h Sun Oct 05 18:19:16 2014 +0900
> @@ -63,9 +63,8 @@
>
> static const uint32_t s_eoTable[NUM_EDGETYPE];
>
> - typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
> - typedef int64_t (PerType[MAX_NUM_SAO_TYPE]);
> - typedef int64_t (PerPlane[3][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
> + typedef int32_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
> + typedef int32_t (PerPlane[NUM_PLANE][MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
>
> /* allocated per part */
> PerClass* m_count;
> @@ -102,7 +101,7 @@
> x265_param* m_param;
> int m_refDepth;
> int m_numNoSao[2];
> -
> +
> double m_lumaLambda;
> double m_chromaLambda;
> /* TODO: No doubles for distortion */
> @@ -120,7 +119,7 @@
> void resetSaoUnit(SaoCtuParam* saoUnit);
>
> // CTU-based SAO process without slice granularity
> - void processSaoCu(int addr, int partIdx, int plane);
> + void processSaoCu(int addr, int typeIdx, int plane);
>
> void resetCtuPart(SaoCtuParam* ctuParam);
> void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
> @@ -129,17 +128,15 @@
>
> void calcSaoStatsCu(int addr, int plane);
> void calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY);
> - void checkMerge(SaoCtuParam* paramCurr, SaoCtuParam* paramCheck, int dir);
>
> void saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
> SaoCtuParam *compSaoParam, double *distortion);
> void sao2ChromaParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft,
> SaoCtuParam *crSaoParam, SaoCtuParam *cbSaoParam, double *distortion);
>
> - inline int64_t estSaoDist(int64_t count, int64_t offset, int64_t offsetOrg, int shift);
> - inline int64_t estIterOffset(int typeIdx, int classIdx, double lambda, int64_t offsetInput, int64_t count, int64_t offsetOrg, int shift,
> - int bitIncrease, int32_t *currentDistortionTableBo, double *currentRdCostTableBo, int offsetTh);
> - inline int64_t estSaoTypeDist(int compIdx, int typeIdx, int shift, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo);
> + inline int estIterOffset(int typeIdx, int classIdx, double lambda, int offset, int32_t count, int32_t offsetOrg,
> + int32_t *currentDistortionTableBo, double *currentRdCostTableBo);
> + inline int64_t estSaoTypeDist(int plane, int typeIdx, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo);
>
> void rdoSaoUnitRowInit(SAOParam *saoParam);
> void rdoSaoUnitRowEnd(SAOParam *saoParam, int numctus);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list