[x265] simplify intra filter (with fix for da61cf406f16) (Re: primitives: intra_pred[4][35] => intra_pred[35][4] (avoid *35))
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Sep 22 14:34:01 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1411388939 -32400
# Mon Sep 22 21:28:59 2014 +0900
# Node ID 3f229951f826e1d09dd0258721ef5a1f9fdc4392
# Parent fd435504f15e0b13dabba9efe0aa94e7047060b5
simplify intra filter (with fix for da61cf406f16)
diff -r fd435504f15e -r 3f229951f826 source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 22 13:14:54 2014 +0530
+++ b/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 22 21:28:59 2014 +0900
@@ -52,133 +52,96 @@
void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt, int dirMode)
{
- pixel* roiOrigin;
- pixel* adiTemp;
-
- int picStride = cu->m_pic->getStride();
-
IntraNeighbors intraNeighbors;
initIntraNeighbors(cu, zOrderIdxInPart, partDepth, true, &intraNeighbors);
uint32_t tuSize = intraNeighbors.tuSize;
uint32_t tuSize2 = tuSize << 1;
- roiOrigin = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
- adiTemp = adiBuf;
+ pixel* adiOrigin = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
+ int picStride = cu->m_pic->getStride();
- fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
+ fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors);
+ // initialization of ADI buffers
+ const int bufOffset = tuSize - 1;
+ refAbove += bufOffset;
+ refLeft += bufOffset;
+
+ // ADI_BUF_STRIDE * (2 * tuSize + 1);
+ memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
+ for (int k = 0; k < tuSize2 + 1; k++)
+ refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
+
bool bUseFilteredPredictions = (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize);
if (bUseFilteredPredictions)
{
// generate filtered intra prediction samples
- // left and left above border + above and above right border + top left corner = length of 3. filter buffer
- int bufSize = tuSize2 + tuSize2 + 1;
- uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1); // number of elements in one buffer
+ refAboveFlt += bufOffset;
+ refLeftFlt += bufOffset;
- pixel* filterBuf = adiBuf + wh; // buffer for 2. filtering (sequential)
- pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
+ bool bStrongSmoothing = (tuSize == 32 && cu->m_slice->m_sps->bUseStrongIntraSmoothing);
- int l = 0;
- // left border from bottom to top
- for (int i = 0; i < tuSize2; i++)
+ if (bStrongSmoothing)
{
- filterBuf[l++] = adiTemp[ADI_BUF_STRIDE * (tuSize2 - i)];
- }
+ const int trSize = 32;
+ const int trSize2 = 32 * 2;
+ const int threshold = 1 << (X265_DEPTH - 5);
+ int refBL = refLeft[trSize2];
+ int refTL = refAbove[0];
+ int refTR = refAbove[trSize2];
+ bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&
+ abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
- // top left corner
- filterBuf[l++] = adiTemp[0];
+ if (bStrongSmoothing)
+ {
+ // bilinear interpolation
+ const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1;
+ int init = (refTL << shift) + tuSize;
+ int delta;
- // above border from left to right
- memcpy(&filterBuf[l], &adiTemp[1], tuSize2 * sizeof(*filterBuf));
+ refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
- if (tuSize >= 32 && cu->m_slice->m_sps->bUseStrongIntraSmoothing)
- {
- int bottomLeft = filterBuf[0];
- int topLeft = filterBuf[tuSize2];
- int topRight = filterBuf[bufSize - 1];
- int threshold = 1 << (X265_DEPTH - 5);
- bool bilinearLeft = abs(bottomLeft + topLeft - 2 * filterBuf[tuSize]) < threshold;
- bool bilinearAbove = abs(topLeft + topRight - 2 * filterBuf[tuSize2 + tuSize]) < threshold;
+ //TODO: Performance Primitive???
+ delta = refBL - refTL;
+ for (int i = 1; i < trSize2; i++)
+ refLeftFlt[i] = (init + delta * i) >> shift;
+ refLeftFlt[trSize2] = refLeft[trSize2];
- if (bilinearLeft && bilinearAbove)
- {
- int shift = intraNeighbors.log2TrSize + 1;
- filterBufN[0] = filterBuf[0];
- filterBufN[tuSize2] = filterBuf[tuSize2];
- filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
- //TODO: Performance Primitive???
- for (int i = 1; i < tuSize2; i++)
- {
- filterBufN[i] = ((tuSize2 - i) * bottomLeft + i * topLeft + tuSize) >> shift;
- }
+ delta = refTR - refTL;
+ for (int i = 1; i < trSize2; i++)
+ refAboveFlt[i] = (init + delta * i) >> shift;
+ refAboveFlt[trSize2] = refAbove[trSize2];
- for (int i = 1; i < tuSize2; i++)
- {
- filterBufN[tuSize2 + i] = ((tuSize2 - i) * topLeft + i * topRight + tuSize) >> shift;
- }
- }
- else
- {
- // 1. filtering with [1 2 1]
- filterBufN[0] = filterBuf[0];
- filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
- for (int i = 1; i < bufSize - 1; i++)
- {
- filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
- }
- }
- }
- else
- {
- // 1. filtering with [1 2 1]
- filterBufN[0] = filterBuf[0];
- filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
- for (int i = 1; i < bufSize - 1; i++)
- {
- filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
+ return;
}
}
- // initialization of ADI buffers
- refAboveFlt += tuSize - 1;
- refLeftFlt += tuSize - 1;
- memcpy(refAboveFlt, filterBufN + tuSize2, (tuSize2 + 1) * sizeof(pixel));
- for (int k = 0; k < tuSize2 + 1; k++)
- {
- refLeftFlt[k] = filterBufN[tuSize2 - k]; // Smoothened
- }
- }
+ refLeft[-1] = refAbove[1];
+ for (int i = 0; i < tuSize2; i++)
+ refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2;
+ refLeftFlt[tuSize2] = refLeft[tuSize2];
- // initialization of ADI buffers
- refAbove += tuSize - 1;
- refLeft += tuSize - 1;
-
- // ADI_BUF_STRIDE * (2 * tuSize + 1);
- memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
- for (int k = 0; k < tuSize2 + 1; k++)
- {
- refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
+ refAboveFlt[0] = refLeftFlt[0];
+ for (int i = 1; i < tuSize2; i++)
+ refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2;
+ refAboveFlt[tuSize2] = refAbove[tuSize2];
}
}
void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, uint32_t chromaId)
{
- pixel* roiOrigin;
- pixel* adiTemp;
-
- int picStride = cu->m_pic->getCStride();
-
IntraNeighbors intraNeighbors;
initIntraNeighbors(cu, zOrderIdxInPart, partDepth, false, &intraNeighbors);
uint32_t tuSize = intraNeighbors.tuSize;
- roiOrigin = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
- adiTemp = getAdiChromaBuf(chromaId, tuSize, adiBuf);
+ pixel* adiOrigin = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
+ int picStride = cu->m_pic->getCStride();
+ pixel* adiRef = getAdiChromaBuf(chromaId, tuSize, adiBuf);
- fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
+ fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors);
}
void TComPattern::initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors)
@@ -226,14 +189,13 @@
intraNeighbors->log2TrSize = log2TrSize;
}
-void TComPattern::fillReferenceSamples(pixel* roiOrigin, int picStride, pixel* adiTemp, const IntraNeighbors& intraNeighbors)
+void TComPattern::fillReferenceSamples(pixel* adiOrigin, int picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors)
{
int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
int totalUnits = intraNeighbors.totalUnits;
uint32_t tuSize = intraNeighbors.tuSize;
uint32_t refSize = tuSize * 2 + 1;
- pixel* roiTemp;
int i, j;
int dcValue = 1 << (X265_DEPTH - 1);
@@ -241,27 +203,23 @@
{
// Fill border with DC value
for (i = 0; i < refSize; i++)
- {
- adiTemp[i] = dcValue;
- }
+ adiRef[i] = dcValue;
for (i = 1; i < refSize; i++)
- {
- adiTemp[i * ADI_BUF_STRIDE] = dcValue;
- }
+ adiRef[i * ADI_BUF_STRIDE] = dcValue;
}
else if (numIntraNeighbor == totalUnits)
{
// Fill top border with rec. samples
- roiTemp = roiOrigin - picStride - 1;
- memcpy(adiTemp, roiTemp, refSize * sizeof(*adiTemp));
+ pixel* adiTemp = adiOrigin - picStride - 1;
+ memcpy(adiRef, adiTemp, refSize * sizeof(*adiRef));
// Fill left border with rec. samples
- roiTemp = roiOrigin - 1;
+ adiTemp = adiOrigin - 1;
for (i = 1; i < refSize; i++)
{
- adiTemp[i * ADI_BUF_STRIDE] = roiTemp[0];
- roiTemp += picStride;
+ adiRef[i * ADI_BUF_STRIDE] = adiTemp[0];
+ adiTemp += picStride;
}
}
else // reference samples are partially available
@@ -284,12 +242,12 @@
}
// Fill top-left sample
- roiTemp = roiOrigin - picStride - 1;
+ pixel* adiTemp = adiOrigin - picStride - 1;
pAdiLineTemp = pAdiLine + (leftUnits * unitHeight);
pNeighborFlags = bNeighborFlags + leftUnits;
if (*pNeighborFlags)
{
- pixel topLeftVal = roiTemp[0];
+ pixel topLeftVal = adiTemp[0];
for (i = 0; i < unitWidth; i++)
{
pAdiLineTemp[i] = topLeftVal;
@@ -297,7 +255,7 @@
}
// Fill left & below-left samples
- roiTemp += picStride;
+ adiTemp += picStride;
pAdiLineTemp--;
pNeighborFlags--;
for (j = 0; j < leftUnits; j++)
@@ -306,25 +264,23 @@
{
for (i = 0; i < unitHeight; i++)
{
- pAdiLineTemp[-i] = roiTemp[i * picStride];
+ pAdiLineTemp[-i] = adiTemp[i * picStride];
}
}
- roiTemp += unitHeight * picStride;
+ adiTemp += unitHeight * picStride;
pAdiLineTemp -= unitHeight;
pNeighborFlags--;
}
// Fill above & above-right samples
- roiTemp = roiOrigin - picStride;
+ adiTemp = adiOrigin - picStride;
pAdiLineTemp = pAdiLine + (leftUnits * unitHeight) + unitWidth;
pNeighborFlags = bNeighborFlags + leftUnits + 1;
for (j = 0; j < aboveUnits; j++)
{
if (*pNeighborFlags)
- {
- memcpy(pAdiLineTemp, roiTemp, unitWidth * sizeof(*adiTemp));
- }
- roiTemp += unitWidth;
+ memcpy(pAdiLineTemp, adiTemp, unitWidth * sizeof(*adiTemp));
+ adiTemp += unitWidth;
pAdiLineTemp += unitWidth;
pNeighborFlags++;
}
@@ -395,12 +351,12 @@
// Copy processed samples
pAdiLineTemp = pAdiLine + refSize + unitWidth - 2;
- memcpy(adiTemp, pAdiLineTemp, refSize * sizeof(*adiTemp));
+ memcpy(adiRef, pAdiLineTemp, refSize * sizeof(*adiRef));
pAdiLineTemp = pAdiLine + refSize - 1;
for (i = 1; i < refSize; i++)
{
- adiTemp[i * ADI_BUF_STRIDE] = pAdiLineTemp[-i];
+ adiRef[i * ADI_BUF_STRIDE] = pAdiLineTemp[-i];
}
}
}
diff -r fd435504f15e -r 3f229951f826 source/Lib/TLibCommon/TComPattern.h
--- a/source/Lib/TLibCommon/TComPattern.h Mon Sep 22 13:14:54 2014 +0530
+++ b/source/Lib/TLibCommon/TComPattern.h Mon Sep 22 21:28:59 2014 +0900
@@ -95,7 +95,7 @@
private:
/// padding of unavailable reference samples for intra prediction
- static void fillReferenceSamples(pixel* roiOrigin, int picStride, pixel* adiTemp, const IntraNeighbors& intraNeighbors);
+ static void fillReferenceSamples(pixel* adiOrigin, int picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors);
/// constrained intra prediction
static bool isAboveLeftAvailable(TComDataCU* cu, uint32_t partIdxLT);
diff -r fd435504f15e -r 3f229951f826 source/encoder/predict.cpp
--- a/source/encoder/predict.cpp Mon Sep 22 13:14:54 2014 +0530
+++ b/source/encoder/predict.cpp Mon Sep 22 21:28:59 2014 +0900
@@ -106,58 +106,45 @@
void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
{
int tuSize = 1 << log2TrSizeC;
- uint32_t tuSize2 = tuSize << 1;
+ int tuSize2 = tuSize << 1;
// Create the prediction
- pixel* refAbv;
- pixel refLft[3 * MAX_CU_SIZE];
+ const int bufOffset = tuSize - 1;
+ pixel buf0[3 * MAX_CU_SIZE];
+ pixel buf1[3 * MAX_CU_SIZE];
+ pixel* above;
+ pixel* left = buf0 + bufOffset;
+
+ int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
+ for (int k = 0; k < limit; k++)
+ left[k] = src[k * ADI_BUF_STRIDE];
bool bUseFilteredPredictions = (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize));
if (bUseFilteredPredictions)
{
// generate filtered intra prediction samples
- // left and left above border + above and above right border + top left corner = length of 3. filter buffer
- int bufSize = tuSize2 + tuSize2 + 1;
- uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1); // number of elements in one buffer
+ buf0[bufOffset - 1] = src[1];
+ left = buf1 + bufOffset;
+ for (int i = 0; i < tuSize2; i++)
+ left[i] = (buf0[bufOffset + i - 1] + 2 * buf0[bufOffset + i] + buf0[bufOffset + i + 1] + 2) >> 2;
+ left[tuSize2] = buf0[bufOffset + tuSize2];
- pixel* filterBuf = src + wh; // buffer for 2. filtering (sequential)
- pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
-
- int l = 0;
- // left border from bottom to top
- for (uint32_t i = 0; i < tuSize2; i++)
- filterBuf[l++] = src[ADI_BUF_STRIDE * (tuSize2 - i)];
-
- // top left corner
- filterBuf[l++] = src[0];
-
- // above border from left to right
- memcpy(&filterBuf[l], &src[1], tuSize2 * sizeof(*filterBuf));
-
- // 1. filtering with [1 2 1]
- filterBufN[0] = filterBuf[0];
- filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
- for (int i = 1; i < bufSize - 1; i++)
- filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
-
- // initialization of ADI buffers
- int limit = tuSize2 + 1;
- refAbv = filterBufN + tuSize2;
- for (int k = 0; k < limit; k++)
- refLft[k + tuSize - 1] = filterBufN[tuSize2 - k]; // Smoothened
+ above = buf0 + bufOffset;
+ above[0] = left[0];
+ for (int i = 1; i < tuSize2; i++)
+ above[i] = (src[i - 1] + 2 * src[i] + src[i + 1] + 2) >> 2;
+ above[tuSize2] = src[tuSize2];
}
else
{
- int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
- refAbv = src;
- for (int k = 0; k < limit; k++)
- refLft[k + tuSize - 1] = src[k * ADI_BUF_STRIDE];
+ above = buf1 + bufOffset;
+ memcpy(above, src, (tuSize2 + 1) * sizeof(pixel));
}
int sizeIdx = log2TrSizeC - 2;
X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
- primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft + tuSize - 1, refAbv, dirMode, 0);
+ primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0);
}
bool Predict::checkIdenticalMotion()
More information about the x265-devel
mailing list