[x265] refine intra reference samples
Satoshi Nakagawa
nakagawa424 at oki.com
Sat May 10 08:29:33 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1399703246 -32400
# Sat May 10 15:27:26 2014 +0900
# Node ID dc0599b4da9e9b050bdceefb3c91418a6cfece64
# Parent d0acf82a77f9ce2aaa08255f69dba0bceb9f4598
refine intra reference samples
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/CommonDef.h
--- a/source/Lib/TLibCommon/CommonDef.h Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/CommonDef.h Sat May 10 15:27:26 2014 +0900
@@ -100,6 +100,7 @@
#define FAST_UDI_MAX_RDMODE_NUM 35 ///< maximum number of RD comparison in fast-UDI estimation loop
+#define ALL_IDX -1
#define PLANAR_IDX 0
#define VER_IDX 26 // index for intra VERTICAL mode
#define HOR_IDX 10 // index for intra HORIZONTAL mode
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPattern.cpp Sat May 10 15:27:26 2014 +0900
@@ -38,6 +38,7 @@
#include "TComPic.h"
#include "TComPattern.h"
#include "TComDataCU.h"
+#include "TComPrediction.h"
using namespace x265;
@@ -49,116 +50,84 @@
// ====================================================================================================================
void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
- int strideOrig, int heightOrig)
+ pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt, int dirMode)
{
pixel* roiOrigin;
pixel* adiTemp;
- uint32_t cuWidth = cu->getCUSize(0) >> partDepth;
- uint32_t cuHeight = cu->getCUSize(0) >> partDepth;
- uint32_t cuWidth2 = cuWidth << 1;
- uint32_t cuHeight2 = cuHeight << 1;
- uint32_t width;
- uint32_t height;
- int picStride = cu->getPic()->getStride();
- bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
- int numIntraNeighbor = 0;
+ int picStride = cu->getPic()->getStride();
- uint32_t partIdxLT, partIdxRT, partIdxLB;
+ IntraNeighbors intraNeighbors;
- cu->deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, zOrderIdxInPart, partDepth);
-
- int partIdxStride = cu->getPic()->getNumPartInCUSize();
- int baseUnitSize = g_maxCUSize >> g_maxCUDepth;
- int unitWidth = baseUnitSize;
- int unitHeight = baseUnitSize;
- int cuHeightInUnits = cuHeight / unitHeight;
- int cuWidthInUnits = cuWidth / unitWidth;
- int iAboveUnits = cuWidthInUnits << 1;
- int leftUnits = cuHeightInUnits << 1;
- partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)];
-
- if (!cu->getSlice()->getPPS()->getConstrainedIntraPred())
- {
- bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
- numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
- numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
- numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
- numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
- numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits));
- }
- else
- {
- bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT);
- numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
- numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
- numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
- numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
- numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits));
- }
-
- width = cuWidth2 + 1;
- height = cuHeight2 + 1;
-
- if (((width << 2) > strideOrig) || ((height << 2) > heightOrig))
- {
- return;
- }
+ initIntraNeighbors(cu, zOrderIdxInPart, partDepth, TEXT_LUMA, &intraNeighbors);
+ uint32_t tuSize = intraNeighbors.tuSize;
+ uint32_t tuSize2 = tuSize << 1;
roiOrigin = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
adiTemp = adiBuf;
- fillReferenceSamples(roiOrigin, adiTemp, bNeighborFlags, numIntraNeighbor, unitWidth, unitHeight, iAboveUnits, leftUnits,
- cuWidth, cuHeight, width, height, picStride);
+ fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
- // generate filtered intra prediction samples
- // left and left above border + above and above right border + top left corner = length of 3. filter buffer
- int bufSize = cuHeight2 + cuWidth2 + 1;
- uint32_t wh = ADI_BUF_STRIDE * height; // number of elements in one buffer
+ bool bUseFilteredPredictions = (dirMode == ALL_IDX || TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize));
- pixel* filteredBuf1 = adiBuf + wh; // 1. filter buffer
- pixel* filteredBuf2 = filteredBuf1 + wh; // 2. filter buffer
- pixel* filterBuf = filteredBuf2 + wh; // buffer for 2. filtering (sequential)
- pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
+ if (bUseFilteredPredictions && 8 <= tuSize && tuSize <= 32)
+ {
+ // generate filtered intra prediction samples
+ // left and left above border + above and above right border + top left corner = length of 3. filter buffer
+ int bufSize = tuSize2 + tuSize2 + 1;
+ uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1); // number of elements in one buffer
- int l = 0;
- // left border from bottom to top
- for (int i = 0; i < cuHeight2; i++)
- {
- filterBuf[l++] = adiTemp[ADI_BUF_STRIDE * (cuHeight2 - i)];
- }
+ pixel* filterBuf = adiBuf + wh; // buffer for 2. filtering (sequential)
+ pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
- // top left corner
- filterBuf[l++] = adiTemp[0];
+ int l = 0;
+ // left border from bottom to top
+ for (int i = 0; i < tuSize2; i++)
+ {
+ filterBuf[l++] = adiTemp[ADI_BUF_STRIDE * (tuSize2 - i)];
+ }
- // above border from left to right
- memcpy(&filterBuf[l], &adiTemp[1], cuWidth2 * sizeof(*filterBuf));
+ // top left corner
+ filterBuf[l++] = adiTemp[0];
- if (cu->getSlice()->getSPS()->getUseStrongIntraSmoothing())
- {
- int blkSize = 32;
- int bottomLeft = filterBuf[0];
- int topLeft = filterBuf[cuHeight2];
- int topRight = filterBuf[bufSize - 1];
- int threshold = 1 << (X265_DEPTH - 5);
- bool bilinearLeft = abs(bottomLeft + topLeft - 2 * filterBuf[cuHeight]) < threshold;
- bool bilinearAbove = abs(topLeft + topRight - 2 * filterBuf[cuHeight2 + cuHeight]) < threshold;
+ // above border from left to right
+ memcpy(&filterBuf[l], &adiTemp[1], tuSize2 * sizeof(*filterBuf));
- if (cuWidth >= blkSize && (bilinearLeft && bilinearAbove))
+ if (tuSize >= 32 && cu->getSlice()->getSPS()->getUseStrongIntraSmoothing())
{
- int shift = g_convertToBit[cuWidth] + 3; // log2(uiCuHeight2)
- filterBufN[0] = filterBuf[0];
- filterBufN[cuHeight2] = filterBuf[cuHeight2];
- filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
- //TODO: Performance Primitive???
- for (int i = 1; i < cuHeight2; i++)
+ int bottomLeft = filterBuf[0];
+ int topLeft = filterBuf[tuSize2];
+ int topRight = filterBuf[bufSize - 1];
+ int threshold = 1 << (X265_DEPTH - 5);
+ bool bilinearLeft = abs(bottomLeft + topLeft - 2 * filterBuf[tuSize]) < threshold;
+ bool bilinearAbove = abs(topLeft + topRight - 2 * filterBuf[tuSize2 + tuSize]) < threshold;
+
+ if (bilinearLeft && bilinearAbove)
{
- filterBufN[i] = ((cuHeight2 - i) * bottomLeft + i * topLeft + cuHeight) >> shift;
+ int shift = g_convertToBit[tuSize] + 3; // log2(tuSize2)
+ filterBufN[0] = filterBuf[0];
+ filterBufN[tuSize2] = filterBuf[tuSize2];
+ filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
+ //TODO: Performance Primitive???
+ for (int i = 1; i < tuSize2; i++)
+ {
+ filterBufN[i] = ((tuSize2 - i) * bottomLeft + i * topLeft + tuSize) >> shift;
+ }
+
+ for (int i = 1; i < tuSize2; i++)
+ {
+ filterBufN[tuSize2 + i] = ((tuSize2 - i) * topLeft + i * topRight + tuSize) >> shift;
+ }
}
-
- for (int i = 1; i < cuWidth2; i++)
+ else
{
- filterBufN[cuHeight2 + i] = ((cuWidth2 - i) * topLeft + i * topRight + cuWidth) >> shift;
+ // 1. filtering with [1 2 1]
+ filterBufN[0] = filterBuf[0];
+ filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
+ for (int i = 1; i < bufSize - 1; i++)
+ {
+ filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
+ }
}
}
else
@@ -171,165 +140,151 @@
filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
}
}
- }
- else
- {
- // 1. filtering with [1 2 1]
- filterBufN[0] = filterBuf[0];
- filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
- for (int i = 1; i < bufSize - 1; i++)
+
+ // initialization of ADI buffers
+ refAboveFlt += tuSize - 1;
+ refLeftFlt += tuSize - 1;
+ memcpy(refAboveFlt, filterBufN + tuSize2, (tuSize2 + 1) * sizeof(pixel));
+ for (int k = 0; k < tuSize2 + 1; k++)
{
- filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
+ refLeftFlt[k] = filterBufN[tuSize2 - k]; // Smoothened
}
}
- // fill 1. filter buffer with filtered values
- l = 0;
- for (int i = 0; i < cuHeight2; i++)
- {
- filteredBuf1[ADI_BUF_STRIDE * (cuHeight2 - i)] = filterBufN[l++];
- }
+ // initialization of ADI buffers
+ refAbove += tuSize - 1;
+ refLeft += tuSize - 1;
- filteredBuf1[0] = filterBufN[l++];
- memcpy(&filteredBuf1[1], &filterBufN[l], cuWidth2 * sizeof(*filteredBuf1));
-}
-
-// Overloaded initialization of ADI buffers to support buffered references for xpredIntraAngBufRef
-void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int strideOrig, int heightOrig,
- pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt)
-{
- initAdiPattern(cu, zOrderIdxInPart, partDepth, adiBuf, strideOrig, heightOrig);
- uint32_t cuWidth = cu->getCUSize(0) >> partDepth;
- uint32_t cuHeight = cu->getCUSize(0) >> partDepth;
- uint32_t cuWidth2 = cuWidth << 1;
- uint32_t cuHeight2 = cuHeight << 1;
-
- refAbove += cuWidth - 1;
- refAboveFlt += cuWidth - 1;
- refLeft += cuWidth - 1;
- refLeftFlt += cuWidth - 1;
-
- // ADI_BUF_STRIDE * (2 * height + 1);
- memcpy(refAbove, adiBuf, (cuWidth2 + 1) * sizeof(pixel));
- memcpy(refAboveFlt, adiBuf + ADI_BUF_STRIDE * (2 * cuHeight + 1), (cuWidth2 + 1) * sizeof(pixel));
-
- for (int k = 0; k < cuHeight2 + 1; k++)
+ // ADI_BUF_STRIDE * (2 * tuSize + 1);
+ memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
+ for (int k = 0; k < tuSize2 + 1; k++)
{
refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
- refLeftFlt[k] = (adiBuf + ADI_BUF_STRIDE * (cuHeight2 + 1))[k * ADI_BUF_STRIDE]; // Smoothened
}
}
-void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int strideOrig, int heightOrig, int chromaId)
+void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, int chromaId)
{
pixel* roiOrigin;
pixel* adiTemp;
- uint32_t cuWidth = cu->getCUSize(0) >> (partDepth + cu->getHorzChromaShift());
- uint32_t cuHeight = cu->getCUSize(0) >> (partDepth + cu->getVertChromaShift());
- cuHeight = (cuWidth != cuHeight) ? cuHeight >> 1 : cuHeight;
+ int picStride = cu->getPic()->getCStride();
- uint32_t width;
- uint32_t height;
- int picStride = cu->getPic()->getCStride();
+ IntraNeighbors intraNeighbors;
- bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
+ initIntraNeighbors(cu, zOrderIdxInPart, partDepth, TEXT_CHROMA, &intraNeighbors);
+ uint32_t tuSize = intraNeighbors.tuSize;
+
+ roiOrigin = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
+ adiTemp = getAdiChromaBuf(chromaId, tuSize, adiBuf);
+
+ fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
+}
+
+void TComPattern::initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *intraNeighbors)
+{
+ uint32_t tuSize = cu->getCUSize(0) >> partDepth;
+ int baseUnitSize = g_maxCUSize >> g_maxCUDepth;
+ int unitWidth = baseUnitSize;
+ int unitHeight = baseUnitSize;
+
+ if (cType != TEXT_LUMA)
+ {
+ tuSize >>= cu->getHorzChromaShift();
+ unitWidth >>= cu->getHorzChromaShift();
+ unitHeight >>= cu->getVertChromaShift();
+ }
+
int numIntraNeighbor = 0;
+ bool *bNeighborFlags = intraNeighbors->bNeighborFlags;
uint32_t partIdxLT, partIdxRT, partIdxLB;
cu->deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, zOrderIdxInPart, partDepth);
int partIdxStride = cu->getPic()->getNumPartInCUSize();
- int baseUnitSize = g_maxCUSize >> g_maxCUDepth;
- int unitWidth = baseUnitSize >> cu->getHorzChromaShift();
- int unitHeight = baseUnitSize >> cu->getVertChromaShift();
- int cuHeightInUnits = cuHeight / unitHeight;
- int cuWidthInUnits = cuWidth / unitWidth;
- int aboveUnits = cuWidthInUnits << 1;
- int leftUnits = cuHeightInUnits << 1;
- partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)];
+ int tuHeightInUnits = tuSize / unitHeight;
+ int tuWidthInUnits = tuSize / unitWidth;
+ int aboveUnits = tuWidthInUnits << 1;
+ int leftUnits = tuHeightInUnits << 1;
+ partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
if (!cu->getSlice()->getPPS()->getConstrainedIntraPred())
{
bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
- numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
+ numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
- numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits));
+ numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits));
}
else
{
bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT);
numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1));
- numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits));
+ numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1));
- numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits));
+ numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits));
}
-
- width = cuWidth * 2 + 1;
- height = cuHeight * 2 + 1;
-
- if ((4 * width > strideOrig) || (4 * height > heightOrig))
- {
- return;
- }
- roiOrigin = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
- adiTemp = (chromaId == 1) ? adiBuf : (adiBuf + 2 * ADI_BUF_STRIDE * height);
-
- fillReferenceSamples(roiOrigin, adiTemp, bNeighborFlags, numIntraNeighbor, unitWidth, unitHeight, aboveUnits, leftUnits,
- cuWidth, cuHeight, width, height, picStride);
+ intraNeighbors->numIntraNeighbor = numIntraNeighbor;
+ intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
+ intraNeighbors->aboveUnits = aboveUnits;
+ intraNeighbors->leftUnits = leftUnits;
+ intraNeighbors->tuSize = tuSize;
+ intraNeighbors->unitWidth = unitWidth;
+ intraNeighbors->unitHeight = unitHeight;
}
-void TComPattern::fillReferenceSamples(pixel* roiOrigin, pixel* adiTemp, bool* bNeighborFlags, int numIntraNeighbor, int unitWidth, int unitHeight, int aboveUnits, int leftUnits, uint32_t cuWidth, uint32_t cuHeight, uint32_t width, uint32_t height, int picStride)
+void TComPattern::fillReferenceSamples(pixel* roiOrigin, int picStride, pixel* adiTemp, const IntraNeighbors& intraNeighbors)
{
+ int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
+ int totalUnits = intraNeighbors.totalUnits;
+ uint32_t tuSize = intraNeighbors.tuSize;
+
+ uint32_t refSize = tuSize * 2 + 1;
pixel* roiTemp;
int i, j;
int dcValue = 1 << (X265_DEPTH - 1);
- int totalUnits = aboveUnits + leftUnits + 1;
if (numIntraNeighbor == 0)
{
// Fill border with DC value
- for (i = 0; i < width; i++)
+ for (i = 0; i < refSize; i++)
{
adiTemp[i] = dcValue;
}
- for (i = 1; i < height; i++)
+ for (i = 1; i < refSize; i++)
{
adiTemp[i * ADI_BUF_STRIDE] = dcValue;
}
}
else if (numIntraNeighbor == totalUnits)
{
- // Fill top-left border with rec. samples
+ // Fill top border with rec. samples
roiTemp = roiOrigin - picStride - 1;
- adiTemp[0] = roiTemp[0];
+ memcpy(adiTemp, roiTemp, refSize * sizeof(*adiTemp));
// Fill left border with rec. samples
- // Fill below left border with rec. samples
roiTemp = roiOrigin - 1;
-
- for (i = 0; i < 2 * cuHeight; i++)
+ for (i = 1; i < refSize; i++)
{
- adiTemp[(1 + i) * ADI_BUF_STRIDE] = roiTemp[0];
+ adiTemp[i * ADI_BUF_STRIDE] = roiTemp[0];
roiTemp += picStride;
}
-
- // Fill top border with rec. samples
- // Fill top right border with rec. samples
- roiTemp = roiOrigin - picStride;
- memcpy(&adiTemp[1], roiTemp, 2 * cuWidth * sizeof(*adiTemp));
}
else // reference samples are partially available
{
+ const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
+ int aboveUnits = intraNeighbors.aboveUnits;
+ int leftUnits = intraNeighbors.leftUnits;
+ int unitWidth = intraNeighbors.unitWidth;
+ int unitHeight = intraNeighbors.unitHeight;
int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
pixel pAdiLine[5 * MAX_CU_SIZE];
pixel *pAdiLineTemp;
- bool *pNeighborFlags;
+ const bool *pNeighborFlags;
int next, curr;
// Initialize
@@ -449,11 +404,11 @@
}
// Copy processed samples
- pAdiLineTemp = pAdiLine + height + unitWidth - 2;
- memcpy(adiTemp, pAdiLineTemp, width * sizeof(*adiTemp));
+ pAdiLineTemp = pAdiLine + refSize + unitWidth - 2;
+ memcpy(adiTemp, pAdiLineTemp, refSize * sizeof(*adiTemp));
- pAdiLineTemp = pAdiLine + height - 1;
- for (i = 1; i < height; i++)
+ pAdiLineTemp = pAdiLine + refSize - 1;
+ for (i = 1; i < refSize; i++)
{
adiTemp[i * ADI_BUF_STRIDE] = pAdiLineTemp[-i];
}
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPattern.h
--- a/source/Lib/TLibCommon/TComPattern.h Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPattern.h Sat May 10 15:27:26 2014 +0900
@@ -52,15 +52,27 @@
class TComDataCU;
+struct IntraNeighbors
+{
+ int numIntraNeighbor;
+ int totalUnits;
+ int aboveUnits;
+ int leftUnits;
+ int tuSize;
+ int unitWidth;
+ int unitHeight;
+ bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
+};
+
/// neighboring pixel access class for all components
class TComPattern
{
public:
// access functions of ADI buffers
- static pixel* getAdiChromaBuf(int chromaId, int cuHeight, pixel* adiBuf)
+ static pixel* getAdiChromaBuf(int chromaId, int tuSize, pixel* adiBuf)
{
- return adiBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (cuHeight * 2 + 1));
+ return adiBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (tuSize * 2 + 1));
}
// -------------------------------------------------------------------------------------------------------------------
@@ -69,21 +81,19 @@
/// set parameters from pixel buffers for accessing neighboring pixels
static void initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
- int strideOrig, int heightOrig, pixel* refAbove, pixel* refLeft,
- pixel* refAboveFlt, pixel* refLeftFlt);
-
- /// set luma parameters from CU data for accessing ADI data
- static void initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
- int strideOrig, int heightOrig);
+ pixel* refAbove, pixel* refLeft,
+ pixel* refAboveFlt, pixel* refLeftFlt, int dirMode);
/// set chroma parameters from CU data for accessing ADI data
- static void initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
- pixel* adiBuf, int strideOrig, int heightOrig, int chromaId);
+ static void initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
+ pixel* adiBuf, int chromaId);
+
+ static void initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *IntraNeighbors);
private:
/// padding of unavailable reference samples for intra prediction
- static void fillReferenceSamples(pixel* roiOrigin, pixel* adiTemp, bool* bNeighborFlags, int numIntraNeighbor, int unitWidth, int unitHeight, int aboveUnits, int leftUnits, uint32_t cuWidth, uint32_t cuHeight, uint32_t width, uint32_t height, int picStride);
+ static void fillReferenceSamples(pixel* roiOrigin, int picStride, pixel* adiTemp, const IntraNeighbors& intraNeighbors);
/// constrained intra prediction
static bool isAboveLeftAvailable(TComDataCU* cu, uint32_t partIdxLT);
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPrediction.cpp Sat May 10 15:27:26 2014 +0900
@@ -90,9 +90,9 @@
if (m_predBuf == NULL)
{
- m_predBufHeight = ((MAX_CU_SIZE + 2) << 4);
- m_predBufStride = ((MAX_CU_SIZE + 8) << 4);
- m_predBuf = X265_MALLOC(pixel, m_predBufStride * m_predBufHeight);
+ int predBufHeight = ((MAX_CU_SIZE + 2) << 4);
+ int predBufStride = ((MAX_CU_SIZE + 8) << 4);
+ m_predBuf = X265_MALLOC(pixel, predBufStride * predBufHeight);
m_refAbove = X265_MALLOC(pixel, 3 * MAX_CU_SIZE);
m_refAboveFlt = X265_MALLOC(pixel, 3 * MAX_CU_SIZE);
@@ -113,7 +113,7 @@
// Public member functions
// ====================================================================================================================
-bool TComPrediction::filteringIntraReferenceSamples(uint32_t dirMode, uint32_t width)
+bool TComPrediction::filteringIntraReferenceSamples(uint32_t dirMode, uint32_t tuSize)
{
bool bFilter;
@@ -124,39 +124,38 @@
else
{
int diff = std::min<int>(abs((int)dirMode - HOR_IDX), abs((int)dirMode - VER_IDX));
- uint32_t sizeIndex = g_convertToBit[width];
+ uint32_t sizeIndex = g_convertToBit[tuSize];
bFilter = diff > intraFilterThreshold[sizeIndex];
}
return bFilter;
}
-void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, int width)
+void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize)
{
- assert(width >= 4 && width <= 64);
- int log2BlkSize = g_convertToBit[width];
- bool bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, width);
+ assert(tuSize >= 4 && tuSize <= 64);
+ int log2BlkSize = g_convertToBit[tuSize];
+ bool bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
pixel *refLft, *refAbv;
- refLft = m_refLeft + width - 1;
- refAbv = m_refAbove + width - 1;
+ refLft = m_refLeft + tuSize - 1;
+ refAbv = m_refAbove + tuSize - 1;
- pixel *src = m_predBuf;
if (bUseFilteredPredictions)
{
- src += ADI_BUF_STRIDE * (2 * width + 1);
- refLft = m_refLeftFlt + width - 1;
- refAbv = m_refAboveFlt + width - 1;
+ refLft = m_refLeftFlt + tuSize - 1;
+ refAbv = m_refAboveFlt + tuSize - 1;
}
- bool bFilter = width <= 16 && dirMode != PLANAR_IDX;
+ bool bFilter = tuSize <= 16 && dirMode != PLANAR_IDX;
primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
}
// Angular chroma
-void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, int width, int height, int chFmt)
+void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize, int chFmt)
{
- int log2BlkSize = g_convertToBit[width];
+ int log2BlkSize = g_convertToBit[tuSize];
+ uint32_t tuSize2 = tuSize << 1;
// Create the prediction
pixel refAbv[3 * MAX_CU_SIZE];
@@ -170,36 +169,32 @@
}
else
{
- assert(width >= 4 && height >= 4 && width < 128 && height < 128);
- bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, width);
+ assert(tuSize >= 4 && tuSize < 128);
+ bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
}
if (bUseFilteredPredictions)
{
- uint32_t cuWidth2 = width << 1;
- uint32_t cuHeight2 = height << 1;
// generate filtered intra prediction samples
// left and left above border + above and above right border + top left corner = length of 3. filter buffer
- int bufSize = cuHeight2 + cuWidth2 + 1;
- uint32_t wh = ADI_BUF_STRIDE * height; // number of elements in one buffer
+ int bufSize = tuSize2 + tuSize2 + 1;
+ uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1); // number of elements in one buffer
- pixel* filteredBuf1 = src + wh; // 1. filter buffer
- pixel* filteredBuf2 = filteredBuf1 + wh; // 2. filter buffer
- pixel* filterBuf = filteredBuf2 + wh; // buffer for 2. filtering (sequential)
- pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
+ pixel* filterBuf = src + wh; // buffer for 2. filtering (sequential)
+ pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential)
int l = 0;
// left border from bottom to top
- for (int i = 0; i < cuHeight2; i++)
+ for (int i = 0; i < tuSize2; i++)
{
- filterBuf[l++] = src[ADI_BUF_STRIDE * (cuHeight2 - i)];
+ filterBuf[l++] = src[ADI_BUF_STRIDE * (tuSize2 - i)];
}
// top left corner
filterBuf[l++] = src[0];
// above border from left to right
- memcpy(&filterBuf[l], &src[1], cuWidth2 * sizeof(*filterBuf));
+ memcpy(&filterBuf[l], &src[1], tuSize2 * sizeof(*filterBuf));
// 1. filtering with [1 2 1]
filterBufN[0] = filterBuf[0];
@@ -209,35 +204,25 @@
filterBufN[i] = (filterBuf[i - 1] + 2 * filterBuf[i] + filterBuf[i + 1] + 2) >> 2;
}
- // fill 1. filter buffer with filtered values
- l = 0;
- for (int i = 0; i < cuHeight2; i++)
- {
- filteredBuf1[ADI_BUF_STRIDE * (cuHeight2 - i)] = filterBufN[l++];
- }
-
- filteredBuf1[0] = filterBufN[l++];
- memcpy(&filteredBuf1[1], &filterBufN[l], cuWidth2 * sizeof(*filteredBuf1));
-
- int limit = (2 * width + 1);
- src += wh;
- memcpy(refAbv + width - 1, src, (limit) * sizeof(pixel));
+ // initialization of ADI buffers
+ int limit = tuSize2 + 1;
+ memcpy(refAbv + tuSize - 1, filterBufN + tuSize2, limit * sizeof(pixel));
for (int k = 0; k < limit; k++)
{
- refLft[k + width - 1] = src[k * ADI_BUF_STRIDE];
+ refLft[k + tuSize - 1] = filterBufN[tuSize2 - k]; // Smoothened
}
}
else
{
- int limit = (dirMode <= 25 && dirMode >= 11) ? (width + 1 + 1) : (2 * width + 1);
- memcpy(refAbv + width - 1, src, (limit) * sizeof(pixel));
+ int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
+ memcpy(refAbv + tuSize - 1, src, (limit) * sizeof(pixel));
for (int k = 0; k < limit; k++)
{
- refLft[k + width - 1] = src[k * ADI_BUF_STRIDE];
+ refLft[k + tuSize - 1] = src[k * ADI_BUF_STRIDE];
}
}
- primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft + width - 1, refAbv + width - 1, dirMode, 0);
+ primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
}
/** Function for checking identical motion.
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibCommon/TComPrediction.h
--- a/source/Lib/TLibCommon/TComPrediction.h Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibCommon/TComPrediction.h Sat May 10 15:27:26 2014 +0900
@@ -93,8 +93,6 @@
pixel* m_refAboveFlt;
pixel* m_refLeft;
pixel* m_refLeftFlt;
- int m_predBufStride;
- int m_predBufHeight;
TComPrediction();
virtual ~TComPrediction();
@@ -105,9 +103,9 @@
void motionCompensation(TComDataCU* cu, TComYuv* predYuv, int picList = REF_PIC_LIST_X, int partIdx = -1, bool bLuma = true, bool bChroma = true);
// Angular Intra
- void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, int width);
- void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, int width, int height, int chFmt);
- bool filteringIntraReferenceSamples(uint32_t dirMode, uint32_t width);
+ void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, int tuSize);
+ void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, int tuSize, int chFmt);
+ static bool filteringIntraReferenceSamples(uint32_t dirMode, uint32_t tuSize);
};
}
//! \}
diff -r d0acf82a77f9 -r dc0599b4da9e source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu May 08 18:52:17 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Sat May 10 15:27:26 2014 +0900
@@ -396,13 +396,12 @@
bool bReusePred)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t width = cu->getCUSize(0) >> trDepth;
- uint32_t height = cu->getCUSize(0) >> trDepth;
+ uint32_t tuSize = cu->getCUSize(0) >> trDepth;
uint32_t stride = fencYuv->getStride();
pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
pixel* pred = predYuv->getLumaAddr(absPartIdx);
int16_t* residual = resiYuv->getLumaAddr(absPartIdx);
- int part = partitionFromSizes(width, height);
+ int part = partitionFromSizes(tuSize, tuSize);
uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
@@ -421,23 +420,23 @@
if (!bReusePred)
{
//===== init availability pattern =====
- TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt);
uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
+ TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
//===== get prediction signal =====
- predIntraLumaAng(lumaPredMode, pred, stride, width);
+ predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
}
//===== get residual signal =====
- assert(!((uint32_t)(size_t)fenc & (width - 1)));
- assert(!((uint32_t)(size_t)pred & (width - 1)));
- assert(!((uint32_t)(size_t)residual & (width - 1)));
- primitives.calcresidual[(int)g_convertToBit[width]](fenc, pred, residual, stride);
+ assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+ primitives.calcresidual[(int)g_convertToBit[tuSize]](fenc, pred, residual, stride);
//===== transform and quantization =====
//--- init rate estimation arrays for RDOQ ---
if (useTransformSkip ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
{
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, width, TEXT_LUMA);
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, TEXT_LUMA);
}
//--- transform and quantization ---
@@ -449,27 +448,27 @@
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
m_trQuant->selectLambda(TEXT_LUMA);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
//--- set coded block flag ---
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
//--- inverse transform ---
- int size = g_convertToBit[width];
+ int size = g_convertToBit[tuSize];
if (absSum)
{
int scalingListType = 0 + TEXT_LUMA;
assert(scalingListType < 6);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, width, scalingListType, useTransformSkip, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
}
else
{
int16_t* resiTmp = residual;
- memset(coeff, 0, sizeof(coeff_t) * width * height);
+ memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
primitives.blockfill_s[size](resiTmp, stride, 0);
}
- assert(width <= 32);
+ assert(tuSize <= 32);
//===== reconstruction =====
primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
//===== update distortion =====
@@ -507,8 +506,7 @@
}
TextType ttype = (chromaId == 1) ? TEXT_CHROMA_U : TEXT_CHROMA_V;
- uint32_t width = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
- uint32_t height = width;
+ uint32_t tuSize = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
uint32_t stride = fencYuv->getCStride();
pixel* fenc = (chromaId == 1) ? fencYuv->getCbAddr(absPartIdx) : fencYuv->getCrAddr(absPartIdx);
pixel* pred = (chromaId == 1) ? predYuv->getCbAddr(absPartIdx) : predYuv->getCrAddr(absPartIdx);
@@ -523,13 +521,13 @@
pixel* reconIPred = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
bool useTransformSkipChroma = cu->getTransformSkip(absPartIdx, ttype);
- int part = partitionFromSizes(width, height);
+ int part = partitionFromSizes(tuSize, tuSize);
if (!bReusePred)
{
//===== init availability pattern =====
- TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
- pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, height, m_predBuf);
+ TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, chromaId);
+ pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);
//===== update chroma mode =====
@@ -540,14 +538,14 @@
}
chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, chFmt);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
}
//===== get residual signal =====
- assert(!((uint32_t)(size_t)fenc & (width - 1)));
- assert(!((uint32_t)(size_t)pred & (width - 1)));
- assert(!((uint32_t)(size_t)residual & (width - 1)));
- int size = g_convertToBit[width];
+ assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+ int size = g_convertToBit[tuSize];
primitives.calcresidual[size](fenc, pred, residual, stride);
//===== transform and quantization =====
@@ -555,7 +553,7 @@
//--- init rate estimation arrays for RDOQ ---
if (useTransformSkipChroma ? m_cfg->bEnableRDOQTS : m_cfg->bEnableRDOQ)
{
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, width, ttype);
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, tuSize, ttype);
}
//--- transform and quantization ---
uint32_t absSum = 0;
@@ -574,7 +572,7 @@
m_trQuant->selectLambda(TEXT_CHROMA);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
//--- set coded block flag ---
cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
@@ -584,18 +582,18 @@
{
int scalingListType = 0 + ttype;
assert(scalingListType < 6);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, width, scalingListType, useTransformSkipChroma, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
}
else
{
int16_t* resiTmp = residual;
- memset(coeff, 0, sizeof(coeff_t) * width * height);
+ memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
primitives.blockfill_s[size](resiTmp, stride, 0);
}
}
- assert(((intptr_t)residual & (width - 1)) == 0);
- assert(width <= 32);
+ assert(((intptr_t)residual & (tuSize - 1)) == 0);
+ assert(tuSize <= 32);
//===== reconstruction =====
primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
//===== update distortion =====
@@ -852,8 +850,7 @@
//----- code luma block with given intra prediction mode and store Cbf-----
uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
- uint32_t width = cu->getCUSize(0) >> trDepth;
- uint32_t height = cu->getCUSize(0) >> trDepth;
+ uint32_t tuSize = cu->getCUSize(0) >> trDepth;
int chFmt = cu->getChromaFormat();
uint32_t stride = fencYuv->getStride();
pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
@@ -871,16 +868,15 @@
bool useTransformSkip = cu->getTransformSkip(absPartIdx, TEXT_LUMA);
//===== init availability pattern =====
-
- TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt);
+ TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
//===== get prediction signal =====
- predIntraLumaAng(lumaPredMode, pred, stride, width);
+ predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
//===== get residual signal =====
- assert(!((uint32_t)(size_t)fenc & (width - 1)));
- assert(!((uint32_t)(size_t)pred & (width - 1)));
- assert(!((uint32_t)(size_t)residual & (width - 1)));
- primitives.calcresidual[(int)g_convertToBit[width]](fenc, pred, residual, stride);
+ assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+ primitives.calcresidual[(int)g_convertToBit[tuSize]](fenc, pred, residual, stride);
//===== transform and quantization =====
uint32_t absSum = 0;
@@ -889,31 +885,31 @@
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
m_trQuant->selectLambda(TEXT_LUMA);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
//--- set coded block flag ---
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
//--- inverse transform ---
- int size = g_convertToBit[width];
+ int size = g_convertToBit[tuSize];
if (absSum)
{
int scalingListType = 0 + TEXT_LUMA;
assert(scalingListType < 6);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, width, scalingListType, useTransformSkip, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
}
else
{
int16_t* resiTmp = residual;
- memset(coeff, 0, sizeof(coeff_t) * width * height);
+ memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
primitives.blockfill_s[size](resiTmp, stride, 0);
}
//Generate Recon
- assert(width <= 32);
- int part = partitionFromSizes(width, height);
+ assert(tuSize <= 32);
+ int part = partitionFromSizes(tuSize, tuSize);
primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
- primitives.blockcpy_pp(width, height, reconIPred, reconIPredStride, recon, stride);
+ primitives.blockcpy_pp(tuSize, tuSize, reconIPred, reconIPredStride, recon, stride);
}
if (bCheckSplit && !bCheckFull)
@@ -1417,8 +1413,7 @@
}
}
- uint32_t width = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
- uint32_t height = width;
+ uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
uint32_t stride = fencYuv->getCStride();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
@@ -1455,17 +1450,17 @@
}
chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
//===== init availability pattern =====
- TComPattern::initAdiPatternChroma(cu, absTUPartIdxC, actualTrDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
- pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, height, m_predBuf);
+ TComPattern::initAdiPatternChroma(cu, absTUPartIdxC, actualTrDepth, m_predBuf, chromaId);
+ pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, chFmt);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
//===== get residual signal =====
- assert(!((uint32_t)(size_t)fenc & (width - 1)));
- assert(!((uint32_t)(size_t)pred & (width - 1)));
- assert(!((uint32_t)(size_t)residual & (width - 1)));
- int size = g_convertToBit[width];
+ assert(!((uint32_t)(size_t)fenc & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)pred & (tuSize - 1)));
+ assert(!((uint32_t)(size_t)residual & (tuSize - 1)));
+ int size = g_convertToBit[tuSize];
primitives.calcresidual[size](fenc, pred, residual, stride);
//--- transform and quantization ---
@@ -1485,7 +1480,7 @@
m_trQuant->selectLambda(TEXT_CHROMA);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, ttype, absTUPartIdxC, &lastPos, useTransformSkipChroma);
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absTUPartIdxC, &lastPos, useTransformSkipChroma);
//--- set coded block flag ---
cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absTUPartIdxC, tuIterator.m_absPartIdxStep);
@@ -1495,21 +1490,21 @@
{
int scalingListType = 0 + ttype;
assert(scalingListType < 6);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC), REG_DCT, residual, stride, coeff, width, scalingListType, useTransformSkipChroma, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
}
else
{
int16_t* resiTmp = residual;
- memset(coeff, 0, sizeof(coeff_t) * width * height);
+ memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
primitives.blockfill_s[size](resiTmp, stride, 0);
}
//===== reconstruction =====
- assert(((intptr_t)residual & (width - 1)) == 0);
- assert(width <= 32);
+ assert(((intptr_t)residual & (tuSize - 1)) == 0);
+ assert(tuSize <= 32);
// use square primitive
- int part = partitionFromSizes(width, width);
+ int part = partitionFromSizes(tuSize, tuSize);
primitives.chroma[CHROMA_444].add_ps[part](recon, stride, pred, residual, stride, stride);
primitives.chroma[CHROMA_444].copy_pp[part](reconIPred, reconIPredStride, recon, stride);
}
@@ -1547,13 +1542,13 @@
uint32_t depth = cu->getDepth(0);
uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
uint32_t numPU = 1 << (2 * initTrDepth);
- uint32_t puSize = cu->getCUSize(0) >> initTrDepth;
+ uint32_t tuSize = cu->getCUSize(0) >> initTrDepth;
uint32_t qNumParts = cu->getTotalNumPart() >> 2;
uint32_t qPartNum = cu->getPic()->getNumPartInCU() >> ((depth + initTrDepth) << 1);
uint32_t overallDistY = 0;
uint32_t candNum;
uint64_t candCostList[FAST_UDI_MAX_RDMODE_NUM];
- uint32_t puSizeIdx = g_convertToBit[puSize]; // log2(puSize) - 2
+ uint32_t tuSizeIdx = g_convertToBit[tuSize]; // log2(tuSize) - 2
static const uint8_t intraModeNumFast[] = { 8, 8, 3, 3, 3 }; // 4x4, 8x8, 16x16, 32x32, 64x64
//===== loop over partitions =====
@@ -1562,14 +1557,14 @@
for (uint32_t pu = 0; pu < numPU; pu++, partOffset += qNumParts)
{
// Reference sample smoothing
- TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_predBufStride, m_predBufHeight, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt);
+ TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX);
//===== determine set of modes to be tested (using prediction signal only) =====
const int numModesAvailable = 35; //total number of Intra modes
- pixel* fenc = fencYuv->getLumaAddr(pu, puSize);
+ pixel* fenc = fencYuv->getLumaAddr(pu, tuSize);
uint32_t stride = predYuv->getStride();
uint32_t rdModeList[FAST_UDI_MAX_RDMODE_NUM];
- int numModesForFullRD = intraModeNumFast[puSizeIdx];
+ int numModesForFullRD = intraModeNumFast[tuSizeIdx];
bool doFastSearch = (numModesForFullRD != numModesAvailable);
if (doFastSearch)
@@ -1584,10 +1579,10 @@
candNum = 0;
uint32_t modeCosts[35];
- pixel *above = m_refAbove + puSize - 1;
- pixel *aboveFiltered = m_refAboveFlt + puSize - 1;
- pixel *left = m_refLeft + puSize - 1;
- pixel *leftFiltered = m_refLeftFlt + puSize - 1;
+ pixel *above = m_refAbove + tuSize - 1;
+ pixel *aboveFiltered = m_refAboveFlt + tuSize - 1;
+ pixel *left = m_refLeft + tuSize - 1;
+ pixel *leftFiltered = m_refLeftFlt + tuSize - 1;
// 33 Angle modes once
ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
@@ -1595,14 +1590,15 @@
ALIGN_VAR_32(pixel, bufScale[32 * 32]);
pixel _above[4 * 32 + 1];
pixel _left[4 * 32 + 1];
- pixel *aboveScale = _above + 2 * 32;
- pixel *leftScale = _left + 2 * 32;
- int scaleSize = puSize;
+ int scaleTuSize = tuSize;
int scaleStride = stride;
int costShift = 0;
- if (puSize > 32)
+ if (tuSize > 32)
{
+ pixel *aboveScale = _above + 2 * 32;
+ pixel *leftScale = _left + 2 * 32;
+
// origin is 64x64, we scale to 32x32 and setup required parameters
primitives.scale2D_64to32(bufScale, fenc, stride);
fenc = bufScale;
@@ -1613,7 +1609,7 @@
primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
- scaleSize = 32;
+ scaleTuSize = 32;
scaleStride = 32;
costShift = 2;
@@ -1624,17 +1620,17 @@
leftFiltered = leftScale;
}
- int log2SizeMinus2 = g_convertToBit[scaleSize];
+ int log2SizeMinus2 = g_convertToBit[scaleTuSize];
pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
// DC
- primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleSize <= 16));
+ primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
modeCosts[DC_IDX] = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
pixel *abovePlanar = above;
pixel *leftPlanar = left;
- if (puSize >= 8 && puSize <= 32)
+ if (tuSize >= 8 && tuSize <= 32)
{
abovePlanar = aboveFiltered;
leftPlanar = leftFiltered;
@@ -1647,14 +1643,14 @@
// Transpose NxN
primitives.transpose[log2SizeMinus2](buf_trans, fenc, scaleStride);
- primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleSize <= 16));
+ primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
for (uint32_t mode = 2; mode < numModesAvailable; mode++)
{
bool modeHor = (mode < 18);
pixel *cmp = (modeHor ? buf_trans : fenc);
- intptr_t srcStride = (modeHor ? scaleSize : scaleStride);
- modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleSize * scaleSize)], scaleSize) << costShift;
+ intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
+ modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
}
// Find N least cost modes. N = numModesForFullRD
@@ -1770,7 +1766,7 @@
if (pu != numPU - 1)
{
uint32_t zorder = cu->getZorderIdxInCU() + partOffset;
- int part = partitionFromSizes(puSize, puSize);
+ int part = partitionFromSizes(tuSize, tuSize);
pixel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
pixel* src = reconYuv->getLumaAddr(partOffset);
@@ -1817,44 +1813,44 @@
uint32_t maxMode = NUM_CHROMA_MODE;
uint32_t modeList[NUM_CHROMA_MODE];
- uint32_t width = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
- uint32_t height = cu->getCUSize(0) >> (trDepth + m_vChromaShift);
+ uint32_t tuSize = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
int chFmt = cu->getChromaFormat();
uint32_t stride = fencYuv->getCStride();
- int scaleWidth = width;
+ int scaleTuSize = tuSize;
int scaleStride = stride;
int costShift = 0;
- if (width > 32)
+ if (tuSize > 32)
{
- scaleWidth = 32;
+ scaleTuSize = 32;
scaleStride = 32;
costShift = 2;
}
- TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 1);
- TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 2);
+ TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, 1);
+ TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, 2);
cu->getAllowedChromaDir(0, modeList);
//----- check chroma modes -----
for (uint32_t mode = minMode; mode < maxMode; mode++)
{
+ uint32_t chromaPredMode = modeList[mode];
+ if (chromaPredMode == DM_CHROMA_IDX)
+ {
+ chromaPredMode = cu->getLumaIntraDir(0);
+ }
+ chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
uint64_t cost = 0;
for (int chromaId = 0; chromaId < 2; chromaId++)
{
- int sad = 0;
- uint32_t chromaPredMode = modeList[mode];
- if (chromaPredMode == DM_CHROMA_IDX)
- chromaPredMode = cu->getLumaIntraDir(0);
pixel* fenc = (chromaId > 0 ? fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
pixel* pred = (chromaId > 0 ? predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
- pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId + 1, height, m_predBuf);
+ pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId + 1, tuSize, m_predBuf);
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, chFmt);
- int log2SizeMinus2 = g_convertToBit[scaleWidth];
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, scaleTuSize, chFmt);
+ int log2SizeMinus2 = g_convertToBit[scaleTuSize];
pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
- sad = sa8d(fenc, scaleStride, pred, scaleStride) << costShift;
- cost += sad;
+ cost += sa8d(fenc, stride, pred, stride) << costShift;
}
//----- compare -----
diff -r d0acf82a77f9 -r dc0599b4da9e source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Thu May 08 18:52:17 2014 +0900
+++ b/source/encoder/compress.cpp Sat May 10 15:27:26 2014 +0900
@@ -82,21 +82,21 @@
cu->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
- uint32_t width = cu->getCUSize(0) >> initTrDepth;
- uint32_t partOffset = 0;
+ uint32_t tuSize = cu->getCUSize(0) >> initTrDepth;
+ const uint32_t partOffset = 0;
// Reference sample smoothing
- TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_search->m_predBuf, m_search->m_predBufStride,
- m_search->m_predBufHeight, m_search->m_refAbove, m_search->m_refLeft,
- m_search->m_refAboveFlt, m_search->m_refLeftFlt);
+ TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_search->m_predBuf,
+ m_search->m_refAbove, m_search->m_refLeft,
+ m_search->m_refAboveFlt, m_search->m_refLeftFlt, ALL_IDX);
pixel* fenc = m_origYuv[depth]->getLumaAddr();
uint32_t stride = m_modePredYuv[5][depth]->getStride();
- pixel *above = m_search->m_refAbove + width - 1;
- pixel *aboveFiltered = m_search->m_refAboveFlt + width - 1;
- pixel *left = m_search->m_refLeft + width - 1;
- pixel *leftFiltered = m_search->m_refLeftFlt + width - 1;
+ pixel *above = m_search->m_refAbove + tuSize - 1;
+ pixel *aboveFiltered = m_search->m_refAboveFlt + tuSize - 1;
+ pixel *left = m_search->m_refLeft + tuSize - 1;
+ pixel *leftFiltered = m_search->m_refLeftFlt + tuSize - 1;
int sad, bsad;
uint32_t bits, bbits, mode, bmode;
uint64_t cost, bcost;
@@ -104,11 +104,11 @@
// 33 Angle modes once
ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
- int scaleWidth = width;
+ int scaleTuSize = tuSize;
int scaleStride = stride;
int costMultiplier = 1;
- if (width > 32)
+ if (tuSize > 32)
{
// origin is 64x64, we scale to 32x32 and setup required parameters
ALIGN_VAR_32(pixel, bufScale[32 * 32]);
@@ -125,7 +125,7 @@
primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
- scaleWidth = 32;
+ scaleTuSize = 32;
scaleStride = 32;
costMultiplier = 4;
@@ -136,11 +136,11 @@
leftFiltered = leftScale;
}
- int log2SizeMinus2 = g_convertToBit[scaleWidth];
+ int log2SizeMinus2 = g_convertToBit[scaleTuSize];
pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
// DC
- primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleWidth <= 16));
+ primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
bsad = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
bmode = mode = DC_IDX;
bbits = m_search->xModeBitsIntra(cu, mode, partOffset, depth, initTrDepth);
@@ -149,7 +149,7 @@
pixel *abovePlanar = above;
pixel *leftPlanar = left;
- if (width >= 8 && width <= 32)
+ if (tuSize >= 8 && tuSize <= 32)
{
abovePlanar = aboveFiltered;
leftPlanar = leftFiltered;
@@ -166,14 +166,14 @@
// Transpose NxN
primitives.transpose[log2SizeMinus2](buf_trans, fenc, scaleStride);
- primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleWidth <= 16));
+ primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
for (mode = 2; mode < 35; mode++)
{
bool modeHor = (mode < 18);
pixel *cmp = (modeHor ? buf_trans : fenc);
- intptr_t srcStride = (modeHor ? scaleWidth : scaleStride);
- sad = costMultiplier * sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleWidth * scaleWidth)], scaleWidth);
+ intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
+ sad = costMultiplier * sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize);
bits = m_search->xModeBitsIntra(cu, mode, partOffset, depth, initTrDepth);
cost = m_rdCost->calcRdSADCost(sad, bits);
COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
More information about the x265-devel
mailing list