[x265] refine deblocking filter
Satoshi Nakagawa
nakagawa424 at oki.com
Fri Sep 26 12:36:19 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1411727676 -32400
# Fri Sep 26 19:34:36 2014 +0900
# Node ID 06237deb460b629d6100d5b613d42033cc3477bd
# Parent 7dccbbed034970de161b361cd6e17ed4efca7226
refine deblocking filter
diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h Fri Sep 26 19:34:36 2014 +0900
@@ -155,6 +155,8 @@
pixel* getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+ int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+
uint32_t getCUHeight(int rowNum);
void copyFromPicture(const x265_picture&, int padx, int pady);
diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 19:34:36 2014 +0900
@@ -115,10 +115,10 @@
uint32_t g_maxCUSize = MAX_CU_SIZE;
uint32_t g_maxFullDepth = NUM_FULL_DEPTH - 1;
uint32_t g_maxCUDepth = NUM_CU_DEPTH - 1;
-uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
-uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
+uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, };
+uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, };
-const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] =
{
0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
@@ -138,7 +138,7 @@
48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60
};
-const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS] =
{
0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.h Fri Sep 26 19:34:36 2014 +0900
@@ -54,6 +54,8 @@
#define UNIT_SIZE (1 << LOG2_UNIT_SIZE) // unit size of CU partition
#define TMVP_UNIT_MASK 0xF0 // mask for mapping index to CompressMV field
+#define MAX_NUM_PARTITIONS 256
+
#define MIN_PU_SIZE 4
#define MIN_TU_SIZE 4
#define MAX_NUM_SPU_W (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
@@ -75,15 +77,15 @@
extern const uint8_t g_chroma422IntraAngleMappingTable[36];
// flexible conversion from relative to absolute index
-extern uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS];
+extern uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS];
void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx);
void initRasterToZscan(uint32_t maxFullDepth);
// conversion of partition index to picture pel position
-extern const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS];
+extern const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS];
// global variable (LCU width/height, max. CU depth)
extern uint32_t g_maxLog2CUSize;
diff -r 7dccbbed0349 -r 06237deb460b source/common/deblock.cpp
--- a/source/common/deblock.cpp Wed Sep 24 18:26:45 2014 -0500
+++ b/source/common/deblock.cpp Fri Sep 26 19:34:36 2014 +0900
@@ -32,23 +32,24 @@
#define DEBLOCK_SMALLEST_BLOCK 8
#define DEFAULT_INTRA_TC_OFFSET 2
-void Deblock::deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::deblockCTU(TComDataCU* cu, int32_t dir)
{
+ uint8_t blockingStrength[MAX_NUM_PARTITIONS];
+
memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions);
- memset(edgeFilter, 0, sizeof(bool) * m_numPartitions);
- deblockCU(cu, 0, 0, dir, edgeFilter, blockingStrength);
+ deblockCU(cu, 0, 0, dir, blockingStrength);
}
/* Deblocking filter process in CU-based (the same function as conventional's)
* param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
-void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t dir, uint8_t blockingStrength[])
{
if (!cu->m_pic || cu->getPartitionSize(absZOrderIdx) == SIZE_NONE)
return;
Frame* pic = cu->m_pic;
- uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
+ uint32_t curNumParts = m_numPartitions >> (depth * 2);
if (cu->getDepth(absZOrderIdx) > depth)
{
@@ -56,35 +57,36 @@
uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - cu->getCUPelX();
uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
- {
if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
- deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
- }
+ deblockCU(cu, absZOrderIdx, depth + 1, dir, blockingStrength);
return;
}
+ const uint32_t widthInBaseUnits = cu->m_pic->getNumPartInCUSize() >> depth;
Param params;
setLoopfilterParam(cu, absZOrderIdx, ¶ms);
- setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
- setEdgefilterPU(cu, absZOrderIdx, dir, ¶ms, edgeFilter, blockingStrength);
+ setEdgefilterPU(cu, absZOrderIdx, dir, blockingStrength, widthInBaseUnits);
+ setEdgefilterTU(cu, absZOrderIdx, depth, dir, blockingStrength);
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, 0, (dir == EDGE_VER ? params.leftEdge : params.topEdge), blockingStrength, widthInBaseUnits);
for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
{
- uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
+ uint32_t bsCheck = !(partIdx & (1 << dir));
- if (edgeFilter[partIdx] && bsCheck)
+ if (bsCheck && blockingStrength[partIdx])
getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
}
- uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
+ const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
- const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
-
+ uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
+ uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
+
for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
{
edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
- if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
+ if (!((e0 + e) & chromaMask))
edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
}
}
@@ -99,82 +101,67 @@
return g_rasterToZscan[g_zscanToRaster[absZOrderIdx] + baseUnitIdx * lcuWidthInBaseUnits + edgeIdx];
}
-void Deblock::setEdgefilterMultiple(TComDataCU* cu, uint32_t scanIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits)
+void Deblock::setEdgefilterMultiple(TComDataCU* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits)
{
- if (!widthInBaseUnits)
- widthInBaseUnits = cu->m_pic->getNumPartInCUSize() >> depth;
-
const uint32_t numElem = widthInBaseUnits;
X265_CHECK(numElem > 0, "numElem edge filter check\n");
for (uint32_t i = 0; i < numElem; i++)
{
const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i);
- edgeFilter[bsidx] = value;
- if (!edgeIdx)
- blockingStrength[bsidx] = value;
+ blockingStrength[bsidx] = value;
}
}
-void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[])
{
if (cu->getTransformIdx(absZOrderIdx) + cu->getDepth(absZOrderIdx) > (uint8_t)depth)
{
- const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >> (depth << 1);
+ const uint32_t curNumParts = m_numPartitions >> (depth * 2);
const uint32_t qNumParts = curNumParts >> 2;
for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
- {
- uint32_t nsAddr = absZOrderIdx;
- setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
- }
+ setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, blockingStrength);
return;
}
uint32_t widthInBaseUnits = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
- setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, 0, 2, blockingStrength, widthInBaseUnits);
}
-void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits)
{
- const uint32_t depth = cu->getDepth(absZOrderIdx);
- const uint32_t widthInBaseUnits = cu->m_pic->getNumPartInCUSize() >> depth;
- const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
- const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
+ const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
+ const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir == EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter, blockingStrength);
-
- int32_t mode = cu->getPartitionSize(absZOrderIdx);
- switch (mode)
+ switch (cu->getPartitionSize(absZOrderIdx))
{
case SIZE_2NxN:
+ if (EDGE_HOR == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+ break;
case SIZE_Nx2N:
- {
- const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR : EDGE_VER);
- if (realDir == dir)
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ if (EDGE_VER == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+ break;
case SIZE_NxN:
- {
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, hWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+ break;
case SIZE_2NxnU:
+ if (EDGE_HOR == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+ break;
case SIZE_nLx2N:
- {
- const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR : EDGE_VER);
- if (realDir == dir)
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ if (EDGE_VER == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+ break;
case SIZE_2NxnD:
+ if (EDGE_HOR == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+ break;
case SIZE_nRx2N:
- {
- const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR : EDGE_VER);
- if (realDir == dir)
- setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
- break;
- }
+ if (EDGE_VER == dir)
+ setEdgefilterMultiple(cu, absZOrderIdx, dir, widthInBaseUnits - qWidthInBaseUnits, 1, blockingStrength, widthInBaseUnits);
+ break;
case SIZE_2Nx2N:
default:
@@ -191,25 +178,25 @@
uint32_t tempPartIdx;
if (!x)
- params->leftEdge = false;
+ params->leftEdge = 0;
else
{
tempCU = cu->getPULeft(tempPartIdx, absZOrderIdx);
if (tempCU)
- params->leftEdge = true;
+ params->leftEdge = 2;
else
- params->leftEdge = false;
+ params->leftEdge = 0;
}
if (!y)
- params->topEdge = false;
+ params->topEdge = 0;
else
{
tempCU = cu->getPUAbove(tempPartIdx, absZOrderIdx);
if (tempCU)
- params->topEdge = true;
+ params->topEdge = 2;
else
- params->topEdge = false;
+ params->topEdge = 0;
}
}
@@ -239,8 +226,9 @@
uint32_t nsPartQ = partQ;
uint32_t nsPartP = partP;
- if (blockingStrength[absPartIdx] && (cuQ->getCbf(nsPartQ, TEXT_LUMA, cuQ->getTransformIdx(nsPartQ)) ||
- cuP->getCbf(nsPartP, TEXT_LUMA, cuP->getTransformIdx(nsPartP))))
+ if (blockingStrength[absPartIdx] > 1 &&
+ (cuQ->getCbf(nsPartQ, TEXT_LUMA, cuQ->getTransformIdx(nsPartQ)) ||
+ cuP->getCbf(nsPartP, TEXT_LUMA, cuP->getTransformIdx(nsPartP))))
bs = 1;
else
{
@@ -338,7 +326,7 @@
return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
}
-static inline bool useStrongFiltering(int32_t offset, int32_t d, int32_t beta, int32_t tc, pixel* src)
+static inline bool useStrongFiltering(int32_t offset, int32_t beta, int32_t tc, pixel* src)
{
int16_t m4 = (int16_t)src[0];
int16_t m3 = (int16_t)src[-offset];
@@ -346,79 +334,87 @@
int16_t m0 = (int16_t)src[-offset * 4];
int32_t strong = abs(m0 - m3) + abs(m7 - m4);
- return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+ return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
}
/* Deblocking for the luminance component with strong or weak filter
* \param src pointer to picture data
* \param offset offset value for picture data
* \param tc tc value
- * \param sw decision strong/weak filter
* \param partPNoFilter indicator to disable filtering on partP
* \param partQNoFilter indicator to disable filtering on partQ
- * \param thrCut threshold value for weak filter decision
* \param filterSecondP decision weak filter/no filter for partP
* \param filterSecondQ decision weak filter/no filter for partQ */
-static inline void pelFilterLuma(pixel* src, int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool partQNoFilter,
- int32_t thrCut, bool filterSecondP, bool filterSecondQ)
+static inline void pelFilterLumaStrong(pixel* src, int32_t srcStep, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
{
- int32_t delta;
+ for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+ {
+ int16_t m4 = (int16_t)src[0];
+ int16_t m3 = (int16_t)src[-offset];
+ int16_t m5 = (int16_t)src[offset];
+ int16_t m2 = (int16_t)src[-offset * 2];
+ int32_t tc2 = 2 * tc;
+ if (!partPNoFilter)
+ {
+ int16_t m1 = (int16_t)src[-offset * 3];
+ int16_t m0 = (int16_t)src[-offset * 4];
+ src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
+ src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
+ src[-offset] = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
+ }
+ if (!partQNoFilter)
+ {
+ int16_t m6 = (int16_t)src[offset * 2];
+ int16_t m7 = (int16_t)src[offset * 3];
+ src[0] = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
+ src[offset] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
+ src[offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
+ }
+ }
+}
- int16_t m4 = (int16_t)src[0];
- int16_t m3 = (int16_t)src[-offset];
- int16_t m5 = (int16_t)src[offset];
- int16_t m2 = (int16_t)src[-offset * 2];
- int16_t m6 = (int16_t)src[offset * 2];
- int16_t m1 = (int16_t)src[-offset * 3];
- int16_t m7 = (int16_t)src[offset * 3];
- int16_t m0 = (int16_t)src[-offset * 4];
+/* Weak filter */
+static inline void pelFilterLuma(pixel* src, int32_t srcStep, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter,
+ bool filterSecondP, bool filterSecondQ)
+{
+ int32_t thrCut = tc * 10;
- if (sw)
+ for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
{
- src[-offset] = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
- src[0] = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
- src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1 + m2 + m3 + m4 + 2) >> 2));
- src[offset] = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3 + m4 + m5 + m6 + 2) >> 2));
- src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
- src[offset * 2] = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
- }
- else
- {
- /* Weak filter */
- delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
+ int16_t m4 = (int16_t)src[0];
+ int16_t m3 = (int16_t)src[-offset];
+ int16_t m5 = (int16_t)src[offset];
+ int16_t m2 = (int16_t)src[-offset * 2];
+
+ int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
if (abs(delta) < thrCut)
{
delta = Clip3(-tc, tc, delta);
- src[-offset] = Clip(m3 + delta);
- src[0] = Clip(m4 - delta);
int32_t tc2 = tc >> 1;
- if (filterSecondP)
+ if (!partPNoFilter)
{
- int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
- src[-offset * 2] = Clip(m2 + delta1);
+ src[-offset] = Clip(m3 + delta);
+ if (filterSecondP)
+ {
+ int16_t m1 = (int16_t)src[-offset * 3];
+ int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
+ src[-offset * 2] = Clip(m2 + delta1);
+ }
}
- if (filterSecondQ)
+ if (!partQNoFilter)
{
- int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
- src[offset] = Clip(m5 + delta2);
+ src[0] = Clip(m4 - delta);
+ if (filterSecondQ)
+ {
+ int16_t m6 = (int16_t)src[offset * 2];
+ int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
+ src[offset] = Clip(m5 + delta2);
+ }
}
}
}
-
- if (partPNoFilter)
- {
- src[-offset] = (pixel)m3;
- src[-offset * 2] = (pixel)m2;
- src[-offset * 3] = (pixel)m1;
- }
- if (partQNoFilter)
- {
- src[0] = (pixel)m4;
- src[offset] = (pixel)m5;
- src[offset * 2] = (pixel)m6;
- }
}
/* Deblocking of one line/column for the chrominance component
@@ -427,36 +423,31 @@
* \param tc tc value
* \param partPNoFilter indicator to disable filtering on partP
* \param partQNoFilter indicator to disable filtering on partQ */
-static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
+static inline void pelFilterChroma(pixel* src, int32_t srcStep, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
{
- int32_t delta;
+ for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+ {
+ int16_t m4 = (int16_t)src[0];
+ int16_t m3 = (int16_t)src[-offset];
+ int16_t m5 = (int16_t)src[offset];
+ int16_t m2 = (int16_t)src[-offset * 2];
- int16_t m4 = (int16_t)src[0];
- int16_t m3 = (int16_t)src[-offset];
- int16_t m5 = (int16_t)src[offset];
- int16_t m2 = (int16_t)src[-offset * 2];
-
- delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
- src[-offset] = Clip(m3 + delta);
- src[0] = Clip(m4 - delta);
-
- if (partPNoFilter)
- src[-offset] = (pixel)m3;
- if (partQNoFilter)
- src[0] = (pixel)m4;
+ int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
+ if (!partPNoFilter)
+ src[-offset] = Clip(m3 + delta);
+ if (!partQNoFilter)
+ src[0] = Clip(m4 - delta);
+ }
}
-void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
+void Deblock::edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[])
{
TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
pixel* src = reconYuv->getLumaAddr(cu->getAddr(), absZOrderIdx);
- pixel* tmpsrc = src;
int32_t stride = reconYuv->getStride();
uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
- uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 << (LOG2_UNIT_SIZE - 2) : 1;
- uint32_t bsAbsIdx = 0, bs = 0;
int32_t offset, srcStep;
bool partPNoFilter = false;
@@ -472,20 +463,20 @@
{
offset = 1;
srcStep = stride;
- tmpsrc += (edge << LOG2_UNIT_SIZE);
+ src += (edge << LOG2_UNIT_SIZE);
}
else // (dir == EDGE_HOR)
{
offset = stride;
srcStep = 1;
- tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
+ src += (edge << LOG2_UNIT_SIZE) * stride;
}
for (uint32_t idx = 0; idx < numParts; idx++)
{
- uint32_t partOffset = idx << LOG2_UNIT_SIZE;
- bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
- bs = blockingStrength[bsAbsIdx];
+ uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+ uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
+ uint32_t bs = blockingStrength[bsAbsIdx];
if (bs)
{
int32_t qpQ = cu->getQP(bsAbsIdx);
@@ -499,29 +490,23 @@
int32_t qpP = cuP->getQP(partP);
int32_t qp = (qpP + qpQ + 1) >> 1;
- int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
- int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
int32_t indexB = Clip3(0, QP_MAX_SPEC, qp + betaOffset);
- int32_t tc = s_tcTable[indexTC] * bitdepthScale;
- int32_t beta = s_betaTable[indexB] * bitdepthScale;
- int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
- int32_t thrCut = tc * 10;
+ const int32_t bitdepthShift = X265_DEPTH - 8;
+ int32_t beta = s_betaTable[indexB] << bitdepthShift;
- for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
+ int32_t dp0 = calcDP(src + srcStep * (unitOffset + 0), offset);
+ int32_t dq0 = calcDQ(src + srcStep * (unitOffset + 0), offset);
+ int32_t dp3 = calcDP(src + srcStep * (unitOffset + 3), offset);
+ int32_t dq3 = calcDQ(src + srcStep * (unitOffset + 3), offset);
+ int32_t d0 = dp0 + dq0;
+ int32_t d3 = dp3 + dq3;
+
+ int32_t d = d0 + d3;
+
+ if (d < beta)
{
- int32_t dp0 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
- int32_t dq0 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
- int32_t dp3 = calcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
- int32_t dq3 = calcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
- int32_t d0 = dp0 + dq0;
- int32_t d3 = dp3 + dq3;
-
- int32_t dp = dp0 + dp3;
- int32_t dq = dq0 + dq3;
- int32_t d = d0 + d3;
-
if (cu->m_slice->m_pps->bTransquantBypassEnabled)
{
// check if each of PUs is lossless coded
@@ -529,36 +514,35 @@
partQNoFilter = cuQ->getCUTransquantBypass(partQ);
}
- if (d < beta)
+ int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
+ int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+
+ bool sw = (2 * d0 < (beta >> 2) &&
+ 2 * d3 < (beta >> 2) &&
+ useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 0)) &&
+ useStrongFiltering(offset, beta, tc, src + srcStep * (unitOffset + 3)));
+
+ if (sw)
+ pelFilterLumaStrong(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter);
+ else
{
+ int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
+ int32_t dp = dp0 + dp3;
+ int32_t dq = dq0 + dq3;
bool filterP = (dp < sideThreshold);
bool filterQ = (dq < sideThreshold);
- bool sw = useStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
- && useStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
-
- for (int32_t i = 0; i < DEBLOCK_SMALLEST_BLOCK / 2; i++)
- pelFilterLuma(tmpsrc + srcStep * (partOffset + blkIdx * 4 + i), offset, tc, sw, partPNoFilter, partQNoFilter, thrCut, filterP, filterQ);
+ pelFilterLuma(src + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter, filterP, filterQ);
}
}
}
}
}
-void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[])
+void Deblock::edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[])
{
int32_t chFmt = cu->getChromaFormat();
- TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
- int32_t stride = reconYuv->getCStride();
- pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
- pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
- uint32_t log2UnitSizeH = LOG2_UNIT_SIZE - cu->getHorzChromaShift();
- uint32_t log2UnitSizeV = LOG2_UNIT_SIZE - cu->getVertChromaShift();
- uint32_t sizeChromaH = 1 << log2UnitSizeH;
- uint32_t sizeChromaV = 1 << log2UnitSizeV;
- int32_t offset, srcStep;
-
- const uint32_t lcuWidthInBaseUnits = cu->m_pic->getNumPartInCUSize();
+ int32_t offset, srcStep, chromaShift;
bool partPNoFilter = false;
bool partQNoFilter = false;
@@ -568,44 +552,42 @@
TComDataCU* cuQ = cu;
int32_t tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
- // Vertical Position
- uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
- uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] / lcuWidthInBaseUnits + edge;
+ X265_CHECK(((dir == EDGE_VER)
+ ? ((g_zscanToPelX[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getHorzChromaShift())
+ : ((g_zscanToPelY[absZOrderIdx] + edge * UNIT_SIZE) >> cu->getVertChromaShift())) % DEBLOCK_SMALLEST_BLOCK == 0,
+ "invalid edge\n");
- if ((sizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (sizeChromaV < DEBLOCK_SMALLEST_BLOCK) &&
- (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeH)) && !dir) ||
- ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeV)) && dir)))
- return;
- uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
- uint32_t bsAbsIdx;
- uint8_t bs;
-
- pixel* tmpSrcCb = srcCb;
- pixel* tmpSrcCr = srcCr;
- uint32_t loopLength;
+ TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
+ int32_t stride = reconYuv->getCStride();
+ int32_t srcOffset = reconYuv->getChromaAddrOffset(cu->getAddr(), absZOrderIdx);
if (dir == EDGE_VER)
{
+ chromaShift = cu->getVertChromaShift();
+ srcOffset += (edge << (LOG2_UNIT_SIZE - cu->getHorzChromaShift()));
offset = 1;
srcStep = stride;
- tmpSrcCb += (edge << log2UnitSizeH);
- tmpSrcCr += (edge << log2UnitSizeH);
- loopLength = sizeChromaV;
}
else // (dir == EDGE_HOR)
{
+ chromaShift = cu->getHorzChromaShift();
+ srcOffset += edge * stride << (LOG2_UNIT_SIZE - cu->getVertChromaShift());
offset = stride;
srcStep = 1;
- tmpSrcCb += edge * stride << log2UnitSizeV;
- tmpSrcCr += edge * stride << log2UnitSizeV;
- loopLength = sizeChromaH;
}
- for (uint32_t idx = 0; idx < numParts; idx++)
+ pixel* srcChroma[2];
+ srcChroma[0] = reconYuv->getCbAddr() + srcOffset;
+ srcChroma[1] = reconYuv->getCrAddr() + srcOffset;
+
+ uint32_t numUnits = cu->m_pic->getNumPartInCUSize() >> (depth + chromaShift);
+
+ for (uint32_t idx = 0; idx < numUnits; idx++)
{
- bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
- bs = blockingStrength[bsAbsIdx];
+ uint32_t unitOffset = idx << LOG2_UNIT_SIZE;
+ uint32_t bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx << chromaShift);
+ uint32_t bs = blockingStrength[bsAbsIdx];
if (bs > 1)
{
@@ -630,7 +612,6 @@
for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
{
int32_t chromaQPOffset = !chromaIdx ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
- pixel* tmpSrcChroma = !chromaIdx ? tmpSrcCb : tmpSrcCr;
int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
if (qp >= 30)
{
@@ -640,12 +621,12 @@
qp = X265_MIN(qp, 51);
}
- int32_t bitdepthScale = 1 << (X265_DEPTH - 8);
- int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset);
- int32_t tc = s_tcTable[indexTC] * bitdepthScale;
+ int32_t indexTC = Clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
+ const int32_t bitdepthShift = X265_DEPTH - 8;
+ int32_t tc = s_tcTable[indexTC] << bitdepthShift;
+ pixel* srcC = srcChroma[chromaIdx];
- for (uint32_t step = 0; step < loopLength; step++)
- pelFilterChroma(tmpSrcChroma + srcStep * (step + idx * loopLength), offset, tc, partPNoFilter, partQNoFilter);
+ pelFilterChroma(srcC + srcStep * unitOffset, srcStep, offset, tc, partPNoFilter, partQNoFilter);
}
}
}
diff -r 7dccbbed0349 -r 06237deb460b source/common/deblock.h
--- a/source/common/deblock.h Wed Sep 24 18:26:45 2014 -0500
+++ b/source/common/deblock.h Fri Sep 26 19:34:36 2014 +0900
@@ -43,31 +43,31 @@
void init() { m_numPartitions = 1 << (g_maxFullDepth * 2); }
- void deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
+ void deblockCTU(TComDataCU* cu, int32_t dir);
protected:
// CU-level deblocking function
- void deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t Edge, bool edgeFilter[], uint8_t blockingStrength[]);
+ void deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t Edge, uint8_t blockingStrength[]);
struct Param
{
- bool leftEdge;
- bool topEdge;
+ uint8_t leftEdge;
+ uint8_t topEdge;
};
// set filtering functions
void setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, Param *params);
- void setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
- void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[]);
- void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edgeIdx, bool value, bool edgeFilter[], uint8_t blockingStrength[], uint32_t widthInBaseUnits = 0);
+ void setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, uint8_t blockingStrength[]);
+ void setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, uint8_t blockingStrength[], uint32_t widthInBaseUnits);
+ void setEdgefilterMultiple(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockingStrength[], uint32_t widthInBaseUnits);
// get filtering functions
void getBoundaryStrengthSingle(TComDataCU* cu, int32_t dir, uint32_t partIdx, uint8_t blockingStrength[]);
// filter luma/chroma functions
- void edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[]);
- void edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, uint8_t blockingStrength[]);
+ void edgeFilterLuma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]);
+ void edgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockingStrength[]);
static const uint8_t s_tcTable[54];
static const uint8_t s_betaTable[52];
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/frameencoder.cpp Fri Sep 26 19:34:36 2014 +0900
@@ -584,7 +584,7 @@
processRowEncoder(realRow, tld);
else
{
- processRowFilter(realRow, tld);
+ processRowFilter(realRow);
// NOTE: Active next row
if (realRow != m_numRows - 1)
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/frameencoder.h Fri Sep 26 19:34:36 2014 +0900
@@ -49,10 +49,6 @@
{
Analysis analysis;
- // NOTE: the maximum LCU 64x64 have 256 4x4 partitions
- bool edgeFilter[256];
- uint8_t blockingStrength[256];
-
~ThreadLocalData() { analysis.destroy(); }
};
@@ -185,7 +181,7 @@
/* Called by WaveFront::findJob() */
void processRow(int row, int threadId);
void processRowEncoder(int row, ThreadLocalData& tld);
- void processRowFilter(int row, ThreadLocalData& tld) { m_frameFilter.processRow(row, tld); }
+ void processRowFilter(int row) { m_frameFilter.processRow(row); }
void enqueueRowEncoder(int row) { WaveFront::enqueueRow(row * 2 + 0); }
void enqueueRowFilter(int row) { WaveFront::enqueueRow(row * 2 + 1); }
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/framefilter.cpp Fri Sep 26 19:34:36 2014 +0900
@@ -78,7 +78,7 @@
m_sao.startSlice(pic, initState, qp);
}
-void FrameFilter::processRow(int row, ThreadLocalData& tld)
+void FrameFilter::processRow(int row)
{
PPAScopeEvent(Thread_filterCU);
@@ -98,17 +98,17 @@
const uint32_t cuAddr = lineStartCUAddr + col;
TComDataCU* cu = m_frame->getCU(cuAddr);
- m_deblock.deblockCTU(cu, Deblock::EDGE_VER, tld.edgeFilter, tld.blockingStrength);
+ m_deblock.deblockCTU(cu, Deblock::EDGE_VER);
if (col > 0)
{
TComDataCU* cu_prev = m_frame->getCU(cuAddr - 1);
- m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+ m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR);
}
}
TComDataCU* cu_prev = m_frame->getCU(lineStartCUAddr + numCols - 1);
- m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+ m_deblock.deblockCTU(cu_prev, Deblock::EDGE_HOR);
}
// SAO
diff -r 7dccbbed0349 -r 06237deb460b source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Wed Sep 24 18:26:45 2014 -0500
+++ b/source/encoder/framefilter.h Fri Sep 26 19:34:36 2014 +0900
@@ -64,7 +64,7 @@
void start(Frame *pic, Entropy& initState, int qp);
- void processRow(int row, ThreadLocalData& tld);
+ void processRow(int row);
void processRowPost(int row);
void processSao(int row);
};
More information about the x265-devel
mailing list