[x265] refine deblocking filter
Steve Borho
steve at borho.org
Thu Sep 25 03:26:06 CEST 2014
On 09/24, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1411549726 -32400
> # Wed Sep 24 18:08:46 2014 +0900
> # Node ID 9f96fc8374d834d424190b0b1581054996985b67
> # Parent b2b7072ddbf73085d457bd6a71bca946e505dea8
> refine deblocking filter
Hi Satoshi,
I pushed this one since it looked harmless, but the automated tests are
reporting decoder hash mismatches which bisect to this commit. Can you
take a look?
repro:
vc11-x86_64-8bpp-Release
x265 BasketballDrive_1920x1080_50.y4m o.bin --preset superfast -f 30 --hash 1
> diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h Wed Sep 24 11:48:15 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.h Wed Sep 24 18:08:46 2014 +0900
> @@ -155,6 +155,8 @@
>
> pixel* getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
>
> + int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
> +
> uint32_t getCUHeight(int rowNum);
>
> void copyFromPicture(const x265_picture&, int padx, int pady);
> diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.cpp
> --- a/source/common/deblock.cpp Wed Sep 24 11:48:15 2014 +0530
> +++ b/source/common/deblock.cpp Wed Sep 24 18:08:46 2014 +0900
> @@ -48,7 +48,7 @@
> return;
>
> Frame* pic = cu->m_pic;
> - uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
> + uint32_t curNumParts = m_numPartitions >> (depth * 2);
>
> if (cu->getDepth(absZOrderIdx) > depth)
> {
> @@ -56,35 +56,34 @@
> uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - cu->getCUPelX();
> uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
> for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
> - {
> if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
> deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
> - }
> return;
> }
>
> Param params;
> setLoopfilterParam(cu, absZOrderIdx, ¶ms);
> - setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
> + setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
> setEdgefilterPU(cu, absZOrderIdx, dir, ¶ms, edgeFilter, blockingStrength);
>
> for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
> {
> - uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
> + uint32_t bsCheck = !(partIdx & (1 << dir));
>
> - if (edgeFilter[partIdx] && bsCheck)
> + if (bsCheck && edgeFilter[partIdx])
> getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
> }
>
> - uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
> + const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
> uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
> uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
> - const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
> -
> + uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
> + uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
> +
> for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
> {
> edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
> - if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
> + if (!((e0 + e) & chromaMask))
> edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
> }
> }
> @@ -115,66 +114,60 @@
> }
> }
>
> -void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
> +void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
> {
> if (cu->getTransformIdx(absZOrderIdx) + cu->getDepth(absZOrderIdx) > (uint8_t)depth)
> {
> - const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >> (depth << 1);
> + const uint32_t curNumParts = m_numPartitions >> (depth * 2);
> const uint32_t qNumParts = curNumParts >> 2;
>
> for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
> - {
> - uint32_t nsAddr = absZOrderIdx;
> - setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
> - }
> + setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
> return;
> }
>
> uint32_t widthInBaseUnits = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
> - setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
> }
>
> void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[])
> {
> const uint32_t depth = cu->getDepth(absZOrderIdx);
> const uint32_t widthInBaseUnits = cu->m_pic->getNumPartInCUSize() >> depth;
> - const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
> - const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
> + const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
> + const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
>
> setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir == EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter, blockingStrength);
>
> - int32_t mode = cu->getPartitionSize(absZOrderIdx);
> - switch (mode)
> + switch (cu->getPartitionSize(absZOrderIdx))
> {
> case SIZE_2NxN:
> + if (EDGE_HOR == dir)
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> + break;
> case SIZE_Nx2N:
> - {
> - const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR : EDGE_VER);
> - if (realDir == dir)
> - setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> - break;
> - }
> + if (EDGE_VER == dir)
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> + break;
> case SIZE_NxN:
> - {
> - setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> - break;
> - }
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
> + break;
> case SIZE_2NxnU:
> + if (EDGE_HOR == dir)
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> + break;
> case SIZE_nLx2N:
> - {
> - const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR : EDGE_VER);
> - if (realDir == dir)
> - setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> - break;
> - }
> + if (EDGE_VER == dir)
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> + break;
> case SIZE_2NxnD:
> + if (EDGE_HOR == dir)
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> + break;
> case SIZE_nRx2N:
> - {
> - const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR : EDGE_VER);
> - if (realDir == dir)
> - setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> - break;
> - }
> + if (EDGE_VER == dir)
> + setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
> + break;
>
> case SIZE_2Nx2N:
> default:
> @@ -338,15 +331,15 @@
> return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
> }
>
> -static inline bool useStrongFiltering(int32_t offset, int32_t d, int32_t beta, int32_t tc, pixel* src)
> +static inline bool useStrongFiltering(int32_t offset, int32_t beta, int32_t tc, pixel* src)
> {
> + int16_t m0 = (int16_t)src[-offset * 4];
> + int16_t m3 = (int16_t)src[-offset];
> int16_t m4 = (int16_t)src[0];
> - int16_t m3 = (int16_t)src[-offset];
> int16_t m7 = (int16_t)src[offset * 3];
> - int16_t m0 = (int16_t)src[-offset * 4];
> int32_t strong = abs(m0 - m3) + abs(m7 - m4);
>
> - return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
> + return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
> }
>
> /* Deblocking for the luminance component with strong or weak filter
> @@ -362,63 +355,61 @@
> static inline void pelFilterLuma(pixel* src, int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool partQNoFilter,
> int32_t thrCut, bool filterSecondP, bool filterSecondQ)
> {
> - int32_t delta;
> -
> + int16_t m1 = (int16_t)src[-offset * 3];
> + int16_t m2 = (int16_t)src[-offset * 2];
> + int16_t m3 = (int16_t)src[-offset];
> int16_t m4 = (int16_t)src[0];
> - int16_t m3 = (int16_t)src[-offset];
> int16_t m5 = (int16_t)src[offset];
> - int16_t m2 = (int16_t)src[-offset * 2];
> int16_t m6 = (int16_t)src[offset * 2];
> - int16_t m1 = (int16_t)src[-offset * 3];
> - int16_t m7 = (int16_t)src[offset * 3];
> - int16_t m0 = (int16_t)src[-offset * 4];
>
> if (sw)
> {
> - src[-offset] = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
> - src[0] = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
> - src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1 + m2 + m3 + m4 + 2) >> 2));
> - src[offset] = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3 + m4 + m5 + m6 + 2) >> 2));
> - src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
> - src[offset * 2] = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
> + int16_t m0 = (int16_t)src[-offset * 4];
> + int16_t m7 = (int16_t)src[offset * 3];
> + int32_t tc2 = 2 * tc;
> + if (!partPNoFilter)
> + {
> + src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
> + src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
> + src[-offset] = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
> + }
> + if (!partQNoFilter)
> + {
> + src[0] = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
> + src[offset] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
> + src[offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
> + }
> }
> else
> {
> /* Weak filter */
> - delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
> + int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
>
> if (abs(delta) < thrCut)
> {
> delta = Clip3(-tc, tc, delta);
> - src[-offset] = Clip(m3 + delta);
> - src[0] = Clip(m4 - delta);
>
> int32_t tc2 = tc >> 1;
> - if (filterSecondP)
> + if (!partPNoFilter)
> {
> - int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
> - src[-offset * 2] = Clip(m2 + delta1);
> + src[-offset] = Clip(m3 + delta);
> + if (filterSecondP)
> + {
> + int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
> + src[-offset * 2] = Clip(m2 + delta1);
> + }
> }
> - if (filterSecondQ)
> + if (!partQNoFilter)
> {
> - int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
> - src[offset] = Clip(m5 + delta2);
> + src[0] = Clip(m4 - delta);
> + if (filterSecondQ)
> + {
> + int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
> + src[offset] = Clip(m5 + delta2);
> + }
> }
> }
> }
> -
> - if (partPNoFilter)
> - {
> - src[-offset] = (pixel)m3;
> - src[-offset * 2] = (pixel)m2;
> - src[-offset * 3] = (pixel)m1;
> - }
> - if (partQNoFilter)
> - {
> - src[0] = (pixel)m4;
> - src[offset] = (pixel)m5;
> - src[offset * 2] = (pixel)m6;
> - }
> }
>
> /* Deblocking of one line/column for the chrominance component
> @@ -429,34 +420,26 @@
> * \param partQNoFilter indicator to disable filtering on partQ */
> static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
> {
> - int32_t delta;
> + int16_t m2 = (int16_t)src[-offset * 2];
> + int16_t m3 = (int16_t)src[-offset];
> + int16_t m4 = (int16_t)src[0];
> + int16_t m5 = (int16_t)src[offset];
>
> - int16_t m4 = (int16_t)src[0];
> - int16_t m3 = (int16_t)src[-offset];
> - int16_t m5 = (int16_t)src[offset];
> - int16_t m2 = (int16_t)src[-offset * 2];
> -
> - delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
> - src[-offset] = Clip(m3 + delta);
> - src[0] = Clip(m4 - delta);
> -
> - if (partPNoFilter)
> - src[-offset] = (pixel)m3;
> - if (partQNoFilter)
> - src[0] = (pixel)m4;
> + int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
> + if (!partPNoFilter)
> + src[-offset] = Clip(m3 + delta);
> + if (!partQNoFilter)
> + src[0] = Clip(m4 - delta);
> }
>
> void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
> {
> TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> pixel* src = reconYuv->getLumaAddr(cu->getAddr(), absZOrderIdx);
> - pixel* tmpsrc = src;
>
> int32_t stride = reconYuv->getStride();
> uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
>
> - uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 << (LOG2_UNIT_SIZE - 2) : 1;
> - uint32_t bsAbsIdx = 0, bs = 0;
> int32_t offset, srcStep;
>
> bool partPNoFilter = false;
> @@ -472,20 +455,20 @@
> {
> offset = 1;
> srcStep = stride;
> - tmpsrc += (edge << LOG2_UNIT_SIZE);
> + src += (edge << LOG2_UNIT_SIZE);
> }
> else // (dir == EDGE_HOR)
> {
> offset = stride;
> srcStep = 1;
> - tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
> + src += (edge << LOG2_UNIT_SIZE) * stride;
> }
>
> for (uint32_t idx = 0; idx < numParts; idx++)
> {
> - uint32_t partOffset = idx << LOG2_UNIT_SIZE;
> - bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> - bs = blockingStrength[bsAbsIdx];
> + uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
> + uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> + uint32_t bs = blockingStrength[bsAbsIdx];
> if (bs)
> {
> int32_t qpQ = cu->getQP(bsAbsIdx);
> @@ -499,29 +482,25 @@
>
> int32_t qpP = cuP->getQP(partP);
> int32_t qp = (qpP + qpQ + 1) >> 1;
> - int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
>
> - int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
> int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
>
> - int32_t tc = s_tcTable[indexTC] * bitdepthScale;
> - int32_t beta = s_betaTable[indexB] * bitdepthScale;
> - int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
> - int32_t thrCut = tc * 10;
> + const int32_t bitdepthShift = X265_DEPTH - 8;
> + int32_t beta = s_betaTable[indexB] << bitdepthShift;
>
> - for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
> + int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset);
> + int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset);
> + int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset);
> + int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset);
> + int32_t d0 = dp0 + dq0;
> + int32_t d3 = dp3 + dq3;
> +
> + int32_t dp = dp0 + dp3;
> + int32_t dq = dq0 + dq3;
> + int32_t d = d0 + d3;
> +
> + if (d < beta)
> {
> - int32_t dp0 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
> - int32_t dq0 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
> - int32_t dp3 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
> - int32_t dq3 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
> - int32_t d0 = dp0 + dq0;
> - int32_t d3 = dp3 + dq3;
> -
> - int32_t dp = dp0 + dp3;
> - int32_t dq = dq0 + dq3;
> - int32_t d = d0 + d3;
> -
> if (cu->m_slice->m_pps->bTransquantBypassEnabled)
> {
> // check if each of PUs is lossless coded
> @@ -529,17 +508,21 @@
> partQNoFilter = cuQ->getCUTransquantBypass(partQ);
> }
>
> - if (d < beta)
> - {
> - bool filterP = (dp < sideThreshold);
> - bool filterQ = (dq < sideThreshold);
> + int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
> + int32_t tc = s_tcTable[indexTC] << bitdepthShift;
> + int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
> + int32_t thrCut = tc * 10;
>
> - bool sw = useStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
> - && useStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
> + bool filterP = (dp < sideThreshold);
> + bool filterQ = (dq < sideThreshold);
>
> - for (int32_t i = 0; i < DEBLOCK_SMALLEST_BLOCK / 2; i++)
> - pelFilterLuma(tmpsrc + srcStep * (partOffset + blkIdx * 4 + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
> - }
> + bool sw = (2 * d0 < (beta >> 2) &&
> + 2 * d3 < (beta >> 2) &&
> + useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) &&
> + useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3)));
> +
> + for (int32_t i = 0; i < UNIT_SIZE; i++)
> + pelFilterLuma(src + srcStep * (unitOffset + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
> }
> }
> }
> @@ -548,17 +531,7 @@
> void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
> {
> int32_t chFmt = cu->getChromaFormat();
> - TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> - int32_t stride = reconYuv->getCStride();
> - pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
> - pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
> - uint32_t log2UnitSizeH = LOG2_UNIT_SIZE - cu->getHorzChromaShift();
> - uint32_t log2UnitSizeV = LOG2_UNIT_SIZE - cu->getVertChromaShift();
> - uint32_t sizeChromaH = 1 << log2UnitSizeH;
> - uint32_t sizeChromaV = 1 << log2UnitSizeV;
> - int32_t offset, srcStep;
> -
> - const uint32_t lcuWidthInBaseUnits = cu->m_pic->getNumPartInCUSize();
> + int32_t offset, srcStep, chromaShift;
>
> bool partPNoFilter = false;
> bool partQNoFilter = false;
> @@ -568,44 +541,42 @@
> TComDataCU* cuQ = cu;
> int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
>
> - // Vertical Position
> - uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
> - uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] / lcuWidthInBaseUnits + edge;
> + X265_CHECK(((dir == EDGE_VER)
> + ? ((g_zscanToPelX[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getHorzChromaShift())
> + : ((g_zscanToPelY[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getVertChromaShift())) % DEBLOCK_SMALLEST_BLOCK == 0,
> + "invalid edge\n");
>
> - if ((sizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (sizeChromaV < DEBLOCK_SMALLEST_BLOCK) &&
> - (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeH)) && !dir) ||
> - ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeV)) && dir)))
> - return;
>
> - uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
> - uint32_t bsAbsIdx;
> - uint8_t bs;
> -
> - pixel* tmpSrcCb = srcCb;
> - pixel* tmpSrcCr = srcCr;
> - uint32_t loopLength;
> + TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
> + int32_t stride = reconYuv->getCStride();
> + int32_t srcOffset = reconYuv->getChromaAddrOffset(cu->getAddr(), absZOrderIdx);
>
> if (dir == EDGE_VER)
> {
> + chromaShift = cu->getVertChromaShift();
> + srcOffset += (edge << (LOG2_UNIT_SIZE - cu->getHorzChromaShift()));
> offset = 1;
> srcStep = stride;
> - tmpSrcCb += (edge << log2UnitSizeH);
> - tmpSrcCr += (edge << log2UnitSizeH);
> - loopLength = sizeChromaV;
> }
> else // (dir == EDGE_HOR)
> {
> + chromaShift = cu->getHorzChromaShift();
> + srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->getVertChromaShift());
> offset = stride;
> srcStep = 1;
> - tmpSrcCb += edge * stride << log2UnitSizeV;
> - tmpSrcCr += edge * stride << log2UnitSizeV;
> - loopLength = sizeChromaH;
> }
>
> - for (uint32_t idx = 0; idx < numParts; idx++)
> + pixel* srcChroma[2];
> + srcChroma[0] = reconYuv->getCbAddr() + srcOffset;
> + srcChroma[1] = reconYuv->getCrAddr() + srcOffset;
> +
> + uint32_t numUnits = cu->m_pic->getNumPartInCUSize() >> (depth + chromaShift);
> +
> + for (uint32_t idx = 0; idx < numUnits; idx++)
> {
> - bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
> - bs = blockingStrength[bsAbsIdx];
> + uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
> + uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx << chromaShift);
> + uint32_t bs = blockingStrength[bsAbsIdx];
>
> if (bs > 1)
> {
> @@ -630,7 +601,6 @@
> for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
> {
> int32_t chromaQPOffset = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
> - pixel* tmpSrcChroma = !chromaIdx ? tmpSrcCb : tmpSrcCr;
> int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
> if (qp >= 30)
> {
> @@ -640,12 +610,13 @@
> qp = X265_MIN(qp, 51);
> }
>
> - int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
> - int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset);
> - int32_t tc = s_tcTable[indexTC] * bitdepthScale;
> + int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
> + const int32_t bitdepthShift = X265_DEPTH - 8;
> + int32_t tc = s_tcTable[indexTC] << bitdepthShift;
> + pixel* srcC = srcChroma[chromaIdx];
>
> - for (uint32_t step = 0; step < loopLength; step++)
> - pelFilterChroma(tmpSrcChroma + srcStep * (step + idx * loopLength), offset, tc, partPNoFilter, partQNoFilter);
> + for (int32_t i = 0; i < UNIT_SIZE; i++)
> + pelFilterChroma(srcC + srcStep * (unitOffset + i), offset, tc, partPNoFilter, partQNoFilter);
> }
> }
> }
> diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.h
> --- a/source/common/deblock.h Wed Sep 24 11:48:15 2014 +0530
> +++ b/source/common/deblock.h Wed Sep 24 18:08:46 2014 +0900
> @@ -58,7 +58,7 @@
>
> // set filtering functions
> void setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, Param *params);
> - void setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
> + void setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
> void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[]);
> void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits = 0);
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list