[x265] refine deblocking filter
Satoshi Nakagawa
nakagawa424 at oki.com
Wed Sep 24 11:19:03 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1411549726 -32400
# Wed Sep 24 18:08:46 2014 +0900
# Node ID 9f96fc8374d834d424190b0b1581054996985b67
# Parent b2b7072ddbf73085d457bd6a71bca946e505dea8
refine deblocking filter
diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h Wed Sep 24 11:48:15 2014 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.h Wed Sep 24 18:08:46 2014 +0900
@@ -155,6 +155,8 @@
pixel* getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+ int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+
uint32_t getCUHeight(int rowNum);
void copyFromPicture(const x265_picture&, int padx, int pady);
diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.cpp
--- a/source/common/deblock.cpp Wed Sep 24 11:48:15 2014 +0530
+++ b/source/common/deblock.cpp Wed Sep 24 18:08:46 2014 +0900
@@ -48,7 +48,7 @@
return;
Frame* pic = cu->m_pic;
- uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
+ uint32_t curNumParts = m_numPartitions >> (depth * 2);
if (cu->getDepth(absZOrderIdx) > depth)
{
@@ -56,35 +56,34 @@
uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - cu->getCUPelX();
uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
- {
if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
- }
return;
}
Param params;
setLoopfilterParam(cu, absZOrderIdx, ¶ms);
- setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
+ setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
setEdgefilterPU(cu, absZOrderIdx, dir, ¶ms, edgeFilter, blockingStrength);
for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
{
- uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
+ uint32_t bsCheck = !(partIdx & (1 << dir));
- if (edgeFilter[partIdx] && bsCheck)
+ if (bsCheck && edgeFilter[partIdx])
getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
}
- uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
+ const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
- const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
-
+ uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
+ uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
+
for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
{
edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
- if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
+ if (!((e0 + e) & chromaMask))
edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
}
}
@@ -115,66 +114,60 @@
}
}
-void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
{
if (cu->getTransformIdx(absZOrderIdx) + cu->getDepth(absZOrderIdx) > (uint8_t)depth)
{
- const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >> (depth << 1);
+ const uint32_t curNumParts = m_numPartitions >> (depth * 2);
const uint32_t qNumParts = curNumParts >> 2;
for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
- {
- uint32_t nsAddr = absZOrderIdx;
- setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
- }
+ setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
return;
}
uint32_t widthInBaseUnits = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
- setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
}
void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[])
{
const uint32_t depth = cu->getDepth(absZOrderIdx);
const uint32_t widthInBaseUnits = cu->m_pic->getNumPartInCUSize() >> depth;
- const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
- const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
+ const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
+ const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir == EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter, blockingStrength);
- int32_t mode = cu->getPartitionSize(absZOrderIdx);
- switch (mode)
+ switch (cu->getPartitionSize(absZOrderIdx))
{
case SIZE_2NxN:
+ if (EDGE_HOR == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
+ break;
case SIZE_Nx2N:
- {
- const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR : EDGE_VER);
- if (realDir == dir)
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ if (EDGE_VER == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
+ break;
case SIZE_NxN:
- {
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
+ break;
case SIZE_2NxnU:
+ if (EDGE_HOR == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+ break;
case SIZE_nLx2N:
- {
- const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR : EDGE_VER);
- if (realDir == dir)
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ if (EDGE_VER == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+ break;
case SIZE_2NxnD:
+ if (EDGE_HOR == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+ break;
case SIZE_nRx2N:
- {
- const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR : EDGE_VER);
- if (realDir == dir)
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ if (EDGE_VER == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+ break;
case SIZE_2Nx2N:
default:
@@ -338,15 +331,15 @@
return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
}
-static inline bool useStrongFiltering(int32_t offset, int32_t d, int32_t beta, int32_t tc, pixel* src)
+static inline bool useStrongFiltering(int32_t offset, int32_t beta, int32_t tc, pixel* src)
{
+ int16_t m0 = (int16_t)src[-offset * 4];
+ int16_t m3 = (int16_t)src[-offset];
int16_t m4 = (int16_t)src[0];
- int16_t m3 = (int16_t)src[-offset];
int16_t m7 = (int16_t)src[offset * 3];
- int16_t m0 = (int16_t)src[-offset * 4];
int32_t strong = abs(m0 - m3) + abs(m7 - m4);
- return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+ return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
}
/* Deblocking for the luminance component with strong or weak filter
@@ -362,63 +355,61 @@
static inline void pelFilterLuma(pixel* src, int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool partQNoFilter,
int32_t thrCut, bool filterSecondP, bool filterSecondQ)
{
- int32_t delta;
-
+ int16_t m1 = (int16_t)src[-offset * 3];
+ int16_t m2 = (int16_t)src[-offset * 2];
+ int16_t m3 = (int16_t)src[-offset];
int16_t m4 = (int16_t)src[0];
- int16_t m3 = (int16_t)src[-offset];
int16_t m5 = (int16_t)src[offset];
- int16_t m2 = (int16_t)src[-offset * 2];
int16_t m6 = (int16_t)src[offset * 2];
- int16_t m1 = (int16_t)src[-offset * 3];
- int16_t m7 = (int16_t)src[offset * 3];
- int16_t m0 = (int16_t)src[-offset * 4];
if (sw)
{
- src[-offset] = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
- src[0] = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
- src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1 + m2 + m3 + m4 + 2) >> 2));
- src[offset] = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3 + m4 + m5 + m6 + 2) >> 2));
- src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
- src[offset * 2] = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
+ int16_t m0 = (int16_t)src[-offset * 4];
+ int16_t m7 = (int16_t)src[offset * 3];
+ int32_t tc2 = 2 * tc;
+ if (!partPNoFilter)
+ {
+ src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
+ src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
+ src[-offset] = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
+ }
+ if (!partQNoFilter)
+ {
+ src[0] = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
+ src[offset] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
+ src[offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
+ }
}
else
{
/* Weak filter */
- delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
+ int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
if (abs(delta) < thrCut)
{
delta = Clip3(-tc, tc, delta);
- src[-offset] = Clip(m3 + delta);
- src[0] = Clip(m4 - delta);
int32_t tc2 = tc >> 1;
- if (filterSecondP)
+ if (!partPNoFilter)
{
- int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
- src[-offset * 2] = Clip(m2 + delta1);
+ src[-offset] = Clip(m3 + delta);
+ if (filterSecondP)
+ {
+ int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
+ src[-offset * 2] = Clip(m2 + delta1);
+ }
}
- if (filterSecondQ)
+ if (!partQNoFilter)
{
- int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
- src[offset] = Clip(m5 + delta2);
+ src[0] = Clip(m4 - delta);
+ if (filterSecondQ)
+ {
+ int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
+ src[offset] = Clip(m5 + delta2);
+ }
}
}
}
-
- if (partPNoFilter)
- {
- src[-offset] = (pixel)m3;
- src[-offset * 2] = (pixel)m2;
- src[-offset * 3] = (pixel)m1;
- }
- if (partQNoFilter)
- {
- src[0] = (pixel)m4;
- src[offset] = (pixel)m5;
- src[offset * 2] = (pixel)m6;
- }
}
/* Deblocking of one line/column for the chrominance component
@@ -429,34 +420,26 @@
* \param partQNoFilter indicator to disable filtering on partQ */
static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
{
- int32_t delta;
+ int16_t m2 = (int16_t)src[-offset * 2];
+ int16_t m3 = (int16_t)src[-offset];
+ int16_t m4 = (int16_t)src[0];
+ int16_t m5 = (int16_t)src[offset];
- int16_t m4 = (int16_t)src[0];
- int16_t m3 = (int16_t)src[-offset];
- int16_t m5 = (int16_t)src[offset];
- int16_t m2 = (int16_t)src[-offset * 2];
-
- delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
- src[-offset] = Clip(m3 + delta);
- src[0] = Clip(m4 - delta);
-
- if (partPNoFilter)
- src[-offset] = (pixel)m3;
- if (partQNoFilter)
- src[0] = (pixel)m4;
+ int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
+ if (!partPNoFilter)
+ src[-offset] = Clip(m3 + delta);
+ if (!partQNoFilter)
+ src[0] = Clip(m4 - delta);
}
void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
{
TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
pixel* src = reconYuv->getLumaAddr(cu->getAddr(), absZOrderIdx);
- pixel* tmpsrc = src;
int32_t stride = reconYuv->getStride();
uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
- uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 << (LOG2_UNIT_SIZE - 2) : 1;
- uint32_t bsAbsIdx = 0, bs = 0;
int32_t offset, srcStep;
bool partPNoFilter = false;
@@ -472,20 +455,20 @@
{
offset = 1;
srcStep = stride;
- tmpsrc += (edge << LOG2_UNIT_SIZE);
+ src += (edge << LOG2_UNIT_SIZE);
}
else // (dir == EDGE_HOR)
{
offset = stride;
srcStep = 1;
- tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
+ src += (edge << LOG2_UNIT_SIZE) * stride;
}
for (uint32_t idx = 0; idx < numParts; idx++)
{
- uint32_t partOffset = idx << LOG2_UNIT_SIZE;
- bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
- bs = blockingStrength[bsAbsIdx];
+ uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+ uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
+ uint32_t bs = blockingStrength[bsAbsIdx];
if (bs)
{
int32_t qpQ = cu->getQP(bsAbsIdx);
@@ -499,29 +482,25 @@
int32_t qpP = cuP->getQP(partP);
int32_t qp = (qpP + qpQ + 1) >> 1;
- int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
- int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
- int32_t tc = s_tcTable[indexTC] * bitdepthScale;
- int32_t beta = s_betaTable[indexB] * bitdepthScale;
- int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
- int32_t thrCut = tc * 10;
+ const int32_t bitdepthShift = X265_DEPTH - 8;
+ int32_t beta = s_betaTable[indexB] << bitdepthShift;
- for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
+ int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset);
+ int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset);
+ int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset);
+ int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset);
+ int32_t d0 = dp0 + dq0;
+ int32_t d3 = dp3 + dq3;
+
+ int32_t dp = dp0 + dp3;
+ int32_t dq = dq0 + dq3;
+ int32_t d = d0 + d3;
+
+ if (d < beta)
{
- int32_t dp0 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
- int32_t dq0 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
- int32_t dp3 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
- int32_t dq3 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
- int32_t d0 = dp0 + dq0;
- int32_t d3 = dp3 + dq3;
-
- int32_t dp = dp0 + dp3;
- int32_t dq = dq0 + dq3;
- int32_t d = d0 + d3;
-
if (cu->m_slice->m_pps->bTransquantBypassEnabled)
{
// check if each of PUs is lossless coded
@@ -529,17 +508,21 @@
partQNoFilter = cuQ->getCUTransquantBypass(partQ);
}
- if (d < beta)
- {
- bool filterP = (dp < sideThreshold);
- bool filterQ = (dq < sideThreshold);
+ int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
+ int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+ int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
+ int32_t thrCut = tc * 10;
- bool sw = useStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
- && useStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
+ bool filterP = (dp < sideThreshold);
+ bool filterQ = (dq < sideThreshold);
- for (int32_t i = 0; i < DEBLOCK_SMALLEST_BLOCK / 2; i++)
- pelFilterLuma(tmpsrc + srcStep * (partOffset + blkIdx * 4 + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
- }
+ bool sw = (2 * d0 < (beta >> 2) &&
+ 2 * d3 < (beta >> 2) &&
+ useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) &&
+ useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3)));
+
+ for (int32_t i = 0; i < UNIT_SIZE; i++)
+ pelFilterLuma(src + srcStep * (unitOffset + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
}
}
}
@@ -548,17 +531,7 @@
void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
{
int32_t chFmt = cu->getChromaFormat();
- TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
- int32_t stride = reconYuv->getCStride();
- pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
- pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
- uint32_t log2UnitSizeH = LOG2_UNIT_SIZE - cu->getHorzChromaShift();
- uint32_t log2UnitSizeV = LOG2_UNIT_SIZE - cu->getVertChromaShift();
- uint32_t sizeChromaH = 1 << log2UnitSizeH;
- uint32_t sizeChromaV = 1 << log2UnitSizeV;
- int32_t offset, srcStep;
-
- const uint32_t lcuWidthInBaseUnits = cu->m_pic->getNumPartInCUSize();
+ int32_t offset, srcStep, chromaShift;
bool partPNoFilter = false;
bool partQNoFilter = false;
@@ -568,44 +541,42 @@
TComDataCU* cuQ = cu;
int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
- // Vertical Position
- uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
- uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] / lcuWidthInBaseUnits + edge;
+ X265_CHECK(((dir == EDGE_VER)
+ ? ((g_zscanToPelX[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getHorzChromaShift())
+ : ((g_zscanToPelY[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getVertChromaShift())) % DEBLOCK_SMALLEST_BLOCK == 0,
+ "invalid edge\n");
- if ((sizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (sizeChromaV < DEBLOCK_SMALLEST_BLOCK) &&
- (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeH)) && !dir) ||
- ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeV)) && dir)))
- return;
- uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
- uint32_t bsAbsIdx;
- uint8_t bs;
-
- pixel* tmpSrcCb = srcCb;
- pixel* tmpSrcCr = srcCr;
- uint32_t loopLength;
+ TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
+ int32_t stride = reconYuv->getCStride();
+ int32_t srcOffset = reconYuv->getChromaAddrOffset(cu->getAddr(), absZOrderIdx);
if (dir == EDGE_VER)
{
+ chromaShift = cu->getVertChromaShift();
+ srcOffset += (edge << (LOG2_UNIT_SIZE - cu->getHorzChromaShift()));
offset = 1;
srcStep = stride;
- tmpSrcCb += (edge << log2UnitSizeH);
- tmpSrcCr += (edge << log2UnitSizeH);
- loopLength = sizeChromaV;
}
else // (dir == EDGE_HOR)
{
+ chromaShift = cu->getHorzChromaShift();
+ srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->getVertChromaShift());
offset = stride;
srcStep = 1;
- tmpSrcCb += edge * stride << log2UnitSizeV;
- tmpSrcCr += edge * stride << log2UnitSizeV;
- loopLength = sizeChromaH;
}
- for (uint32_t idx = 0; idx < numParts; idx++)
+ pixel* srcChroma[2];
+ srcChroma[0] = reconYuv->getCbAddr() + srcOffset;
+ srcChroma[1] = reconYuv->getCrAddr() + srcOffset;
+
+ uint32_t numUnits = cu->m_pic->getNumPartInCUSize() >> (depth + chromaShift);
+
+ for (uint32_t idx = 0; idx < numUnits; idx++)
{
- bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
- bs = blockingStrength[bsAbsIdx];
+ uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+ uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx << chromaShift);
+ uint32_t bs = blockingStrength[bsAbsIdx];
if (bs > 1)
{
@@ -630,7 +601,6 @@
for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
{
int32_t chromaQPOffset = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
- pixel* tmpSrcChroma = !chromaIdx ? tmpSrcCb : tmpSrcCr;
int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
if (qp >= 30)
{
@@ -640,12 +610,13 @@
qp = X265_MIN(qp, 51);
}
- int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
- int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset);
- int32_t tc = s_tcTable[indexTC] * bitdepthScale;
+ int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
+ const int32_t bitdepthShift = X265_DEPTH - 8;
+ int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+ pixel* srcC = srcChroma[chromaIdx];
- for (uint32_t step = 0; step < loopLength; step++)
- pelFilterChroma(tmpSrcChroma + srcStep * (step + idx * loopLength), offset, tc, partPNoFilter, partQNoFilter);
+ for (int32_t i = 0; i < UNIT_SIZE; i++)
+ pelFilterChroma(srcC + srcStep * (unitOffset + i), offset, tc, partPNoFilter, partQNoFilter);
}
}
}
diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.h
--- a/source/common/deblock.h Wed Sep 24 11:48:15 2014 +0530
+++ b/source/common/deblock.h Wed Sep 24 18:08:46 2014 +0900
@@ -58,7 +58,7 @@
// set filtering functions
void setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, Param *params);
- void setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
+ void setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[]);
void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits = 0);
More information about the x265-devel
mailing list