[x265] [PATCH 02 of 29] intra refactoring: intra prediction with modified planar, angular prediction functions
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:10 CET 2015
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1421075255 -19800
# Mon Jan 12 20:37:35 2015 +0530
# Node ID cf29bf7824491d35e20df5249810ff9a1520d3e3
# Parent 9ec15ed0fe2a56eaf0c45954b750cac7293ffcd9
intra refactoring: intra prediction with modified planar, angular prediction functions
Buffer used for intra prediction(around 2 (1 + 2 128) bytes) is reduced from (1152 * 1056 + 768 bytes, roughly calculated).
Planar and angular prediction functions are modified. This patch is used modified C functions for all intra prediction finctions.
diff -r 9ec15ed0fe2a -r cf29bf782449 source/common/intrapred.cpp
--- a/source/common/intrapred.cpp Mon Jan 12 14:49:22 2015 +0530
+++ b/source/common/intrapred.cpp Mon Jan 12 20:37:35 2015 +0530
@@ -76,6 +76,25 @@
dcPredFilter(above + 1, left + 1, dst, dstStride, width);
}
+template<int width>
+void intra_pred_dc_c_new(pixel* dst, intptr_t dstStride, pixel* srcPix, int /*dirMode*/, int bFilter)
+{
+ int k, l;
+
+ int dcVal = width;
+ for (int i = 0; i < width; i++)
+ dcVal += srcPix[1 + i] + srcPix[2 * width + 1 + i];
+
+ dcVal = dcVal / (width + width);
+ for (k = 0; k < width; k++)
+ for (l = 0; l < width; l++)
+ dst[k * dstStride + l] = (pixel)dcVal;
+
+ if (bFilter)
+ dcPredFilter(srcPix + 1, srcPix + (2 * width + 1), dst, dstStride, width);
+
+}
+
template<int log2Size>
void planar_pred_c(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int /*dirMode*/, int /*bFilter*/)
{
@@ -122,7 +141,20 @@
}
}
}
+template<int log2Size>
+void planar_pred_c_new(pixel* dst, intptr_t dstStride, pixel* srcPix, int /*dirMode*/, int /*bFilter*/)
+{
+ const int blkSize = 1 << log2Size;
+ pixel* above = srcPix + 1;
+ pixel* left = srcPix + (2 * blkSize + 1);
+
+ pixel topRight = above[blkSize];
+ pixel bottomLeft = left[blkSize];
+ for (int y = 0; y < blkSize; y++)
+ for (int x = 0; x < blkSize; x++)
+ dst[y * dstStride + x] = (pixel) (((blkSize - 1 - x) * left[y] + (blkSize - 1 -y) * above[x] + (x + 1) * topRight + (y + 1) * bottomLeft + blkSize) >> (log2Size + 1));
+}
template<int width>
void intra_pred_ang_c(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
{
@@ -228,6 +260,107 @@
}
}
+template<int width>
+void intra_pred_ang_c_new(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
+{
+ int width2 = width << 1;
+ // Flip the neighbours in the horizontal case.
+ int horMode = dirMode < 18;
+ pixel neighbourBuf[129];
+
+ if (horMode)
+ {
+ neighbourBuf[0] = srcPix[0];
+ for (int i = 0; i < width << 1; i++)
+ {
+ neighbourBuf[1 + i] = srcPix[width2 + 1 + i];
+ neighbourBuf[width2 + 1 + i] = srcPix[1 + i];
+ }
+ srcPix = neighbourBuf;
+ }
+
+ // Intra prediction angle and inverse angle tables.
+ const int8_t angleTable[17] = { -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32 };
+ const int16_t invAngleTable[8] = { 4096, 1638, 910, 630, 482, 390, 315, 256 };
+
+ // Get the prediction angle.
+ int angleOffset = horMode ? 10 - dirMode : dirMode - 26;
+ int angle = angleTable[8 + angleOffset];
+
+ // Vertical Prediction.
+ if (!angle)
+ {
+ for (int y = 0; y < width; y++)
+ for (int x = 0; x < width; x++)
+ dst[y * dstStride + x] = srcPix[1 + x];
+
+ if (bFilter)
+ {
+ int topLeft = srcPix[0], top = srcPix[1];
+ for (int y = 0; y < width; y++)
+ dst[y * dstStride] = x265_clip((int16_t)(top + ((srcPix[width2 + 1 + y] - topLeft) >> 1)));
+ }
+ }
+ else // Angular prediction.
+ {
+ // Get the reference pixels. The reference base is the first pixel to the top (neighbourBuf[1]).
+ pixel refBuf[64], *ref;
+
+ // Use the projected left neighbours and the top neighbours.
+ if (angle < 0)
+ {
+ // Number of neighbours projected.
+ int nbProjected = -((width * angle) >> 5) - 1;
+ ref = refBuf + nbProjected + 1;
+
+ // Project the neighbours.
+ int invAngle = invAngleTable[- angleOffset - 1];
+ int invAngleSum = 128;
+ for (int i = 0; i < nbProjected; i++)
+ {
+ invAngleSum += invAngle;
+ ref[- 2 - i] = srcPix[width2 + (invAngleSum >> 8)];
+ }
+
+ // Copy the top-left and top pixels.
+ for (int i = 0; i < width + 1; i++)
+ ref[-1 + i] = srcPix[i];
+ }
+ else // Use the top and top-right neighbours.
+ ref = srcPix + 1;
+
+ // Pass every row.
+ int angleSum = 0;
+ for (int y = 0; y < width; y++)
+ {
+ angleSum += angle;
+ int offset = angleSum >> 5;
+ int fraction = angleSum & 31;
+
+ if (fraction) // Interpolate
+ for (int x = 0; x < width; x++)
+ dst[y * dstStride + x] = (pixel)(((32 - fraction) * ref[offset + x] + fraction * ref[offset + x + 1] + 16) >> 5);
+ else // Copy.
+ for (int x = 0; x < width; x++)
+ dst[y * dstStride + x] = ref[offset + x];
+ }
+ }
+
+ // Flip for horizontal.
+ if (horMode)
+ {
+ for (int y = 0; y < width - 1; y++)
+ {
+ for (int x = y + 1; x < width; x++)
+ {
+ pixel tmp = dst[y * dstStride + x];
+ dst[y * dstStride + x] = dst[x * dstStride + y];
+ dst[x * dstStride + y] = tmp;
+ }
+ }
+ }
+}
+
template<int log2Size>
void all_angs_pred_c(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma)
{
@@ -258,6 +391,36 @@
}
}
}
+
+template<int log2Size>
+void all_angs_pred_c_new(pixel *dest, pixel *refPix, pixel *filtPix, int bLuma)
+{
+ const int size = 1 << log2Size;
+ for (int mode = 2; mode <= 34; mode++)
+ {
+ pixel *srcPix = (g_intraFilterFlags[mode] & size ? filtPix : refPix);
+ pixel *out = dest + ((mode - 2) << (log2Size * 2));
+
+ intra_pred_ang_c_new<size>(out, size, srcPix, mode, bLuma);
+
+ // Optimize code don't flip buffer
+ bool modeHor = (mode < 18);
+
+ // transpose the block if this is a horizontal mode
+ if (modeHor)
+ {
+ for (int k = 0; k < size - 1; k++)
+ {
+ for (int l = k + 1; l < size; l++)
+ {
+ pixel tmp = out[k * size + l];
+ out[k * size + l] = out[l * size + k];
+ out[l * size + k] = tmp;
+ }
+ }
+ }
+ }
+}
}
namespace x265 {
@@ -270,22 +433,43 @@
p.intra_pred[0][BLOCK_16x16] = planar_pred_c<4>;
p.intra_pred[0][BLOCK_32x32] = planar_pred_c<5>;
+ p.intra_pred_new[0][BLOCK_4x4] = planar_pred_c_new<2>;
+ p.intra_pred_new[0][BLOCK_8x8] = planar_pred_c_new<3>;
+ p.intra_pred_new[0][BLOCK_16x16] = planar_pred_c_new<4>;
+ p.intra_pred_new[0][BLOCK_32x32] = planar_pred_c_new<5>;
+
// Intra Prediction DC
p.intra_pred[1][BLOCK_4x4] = intra_pred_dc_c<4>;
p.intra_pred[1][BLOCK_8x8] = intra_pred_dc_c<8>;
p.intra_pred[1][BLOCK_16x16] = intra_pred_dc_c<16>;
p.intra_pred[1][BLOCK_32x32] = intra_pred_dc_c<32>;
+
+ p.intra_pred_new[1][BLOCK_4x4] = intra_pred_dc_c_new<4>;
+ p.intra_pred_new[1][BLOCK_8x8] = intra_pred_dc_c_new<8>;
+ p.intra_pred_new[1][BLOCK_16x16] = intra_pred_dc_c_new<16>;
+ p.intra_pred_new[1][BLOCK_32x32] = intra_pred_dc_c_new<32>;
+
for (int i = 2; i < NUM_INTRA_MODE; i++)
{
p.intra_pred[i][BLOCK_4x4] = intra_pred_ang_c<4>;
p.intra_pred[i][BLOCK_8x8] = intra_pred_ang_c<8>;
p.intra_pred[i][BLOCK_16x16] = intra_pred_ang_c<16>;
p.intra_pred[i][BLOCK_32x32] = intra_pred_ang_c<32>;
+
+ p.intra_pred_new[i][BLOCK_4x4] = intra_pred_ang_c_new<4>;
+ p.intra_pred_new[i][BLOCK_8x8] = intra_pred_ang_c_new<8>;
+ p.intra_pred_new[i][BLOCK_16x16] = intra_pred_ang_c_new<16>;
+ p.intra_pred_new[i][BLOCK_32x32] = intra_pred_ang_c_new<32>;
}
p.intra_pred_allangs[BLOCK_4x4] = all_angs_pred_c<2>;
p.intra_pred_allangs[BLOCK_8x8] = all_angs_pred_c<3>;
p.intra_pred_allangs[BLOCK_16x16] = all_angs_pred_c<4>;
p.intra_pred_allangs[BLOCK_32x32] = all_angs_pred_c<5>;
+
+ p.intra_pred_allangs_new[BLOCK_4x4] = all_angs_pred_c_new<2>;
+ p.intra_pred_allangs_new[BLOCK_8x8] = all_angs_pred_c_new<3>;
+ p.intra_pred_allangs_new[BLOCK_16x16] = all_angs_pred_c_new<4>;
+ p.intra_pred_allangs_new[BLOCK_32x32] = all_angs_pred_c_new<5>;
}
}
diff -r 9ec15ed0fe2a -r cf29bf782449 source/common/pixel.cpp
--- a/source/common/pixel.cpp Mon Jan 12 14:49:22 2015 +0530
+++ b/source/common/pixel.cpp Mon Jan 12 20:37:35 2015 +0530
@@ -643,6 +643,32 @@
}
}
+void scale1D_128to64_new(pixel *dst, const pixel *src, intptr_t /*stride*/)
+{
+ int x;
+ const pixel* src1 = src;
+ const pixel* src2 = src + 128;
+
+ pixel* dst1 = dst;
+ pixel* dst2 = dst + 64/*128*/;
+
+ for (x = 0; x < 128; x += 2)
+ {
+ // Top pixel
+ pixel pix0 = src1[(x + 0)];
+ pixel pix1 = src1[(x + 1)];
+
+ // Left pixel
+ pixel pix2 = src2[(x + 0)];
+ pixel pix3 = src2[(x + 1)];
+ int sum1 = pix0 + pix1;
+ int sum2 = pix2 + pix3;
+
+ dst1[x >> 1] = (pixel)((sum1 + 1) >> 1);
+ dst2[x >> 1] = (pixel)((sum2 + 1) >> 1);
+ }
+}
+
void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
{
uint32_t x, y;
@@ -1366,6 +1392,7 @@
p.weight_sp = weight_sp_c;
p.scale1D_128to64 = scale1D_128to64;
+ p.scale1D_128to64_new = scale1D_128to64_new;
p.scale2D_64to32 = scale2D_64to32;
p.frameInitLowres = frame_init_lowres_core;
p.ssim_4x4x2_core = ssim_4x4x2_core;
diff -r 9ec15ed0fe2a -r cf29bf782449 source/common/predict.cpp
--- a/source/common/predict.cpp Mon Jan 12 14:49:22 2015 +0530
+++ b/source/common/predict.cpp Mon Jan 12 20:37:35 2015 +0530
@@ -40,18 +40,11 @@
Predict::Predict()
{
- m_predBuf = NULL;
- m_refAbove = NULL;
- m_refAboveFlt = NULL;
- m_refLeft = NULL;
- m_refLeftFlt = NULL;
m_immedVals = NULL;
}
Predict::~Predict()
{
- X265_FREE(m_predBuf);
- X265_FREE(m_refAbove);
X265_FREE(m_immedVals);
m_predShortYuv[0].destroy();
m_predShortYuv[1].destroy();
@@ -62,16 +55,7 @@
m_csp = csp;
m_hChromaShift = CHROMA_H_SHIFT(csp);
m_vChromaShift = CHROMA_V_SHIFT(csp);
-
- int predBufHeight = ((MAX_CU_SIZE + 2) << 4);
- int predBufStride = ((MAX_CU_SIZE + 8) << 4);
- CHECKED_MALLOC(m_predBuf, pixel, predBufStride * predBufHeight);
CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
- CHECKED_MALLOC(m_refAbove, pixel, 12 * MAX_CU_SIZE);
-
- m_refAboveFlt = m_refAbove + 3 * MAX_CU_SIZE;
- m_refLeft = m_refAboveFlt + 3 * MAX_CU_SIZE;
- m_refLeftFlt = m_refLeft + 3 * MAX_CU_SIZE;
return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
@@ -82,68 +66,48 @@
void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize)
{
int tuSize = 1 << log2TrSize;
-
- pixel *refLft, *refAbv;
-
- if (!(g_intraFilterFlags[dirMode] & tuSize))
- {
- refLft = m_refLeft + tuSize - 1;
- refAbv = m_refAbove + tuSize - 1;
- }
- else
- {
- refLft = m_refLeftFlt + tuSize - 1;
- refAbv = m_refAboveFlt + tuSize - 1;
- }
+ pixel* srcPix = (!(g_intraFilterFlags[dirMode] & tuSize)) ? intraNeighbourBuf[0] : intraNeighbourBuf[1];
bool bFilter = log2TrSize <= 4;
int sizeIdx = log2TrSize - 2;
X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
- primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter);
+// primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter);
+ primitives.intra_pred_new[dirMode][sizeIdx](dst, stride, srcPix, dirMode, bFilter);
}
-void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
+void Predict::predIntraChromaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
{
int tuSize = 1 << log2TrSizeC;
int tuSize2 = tuSize << 1;
- // Create the prediction
- const int bufOffset = tuSize - 1;
- pixel buf0[3 * MAX_CU_SIZE];
- pixel buf1[3 * MAX_CU_SIZE];
- pixel* above;
- pixel* left = buf0 + bufOffset;
-
- int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
-
- left[0] = src[0];
- for (int k = 1; k < limit; k++)
- left[k] = src[k + tuSize2];
+ pixel* srcBuf = intraNeighbourBuf[0];
if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize))
{
- // generate filtered intra prediction samples
- buf0[bufOffset - 1] = src[1];
- left = buf1 + bufOffset;
- for (int i = 0; i < tuSize2; i++)
- left[i] = (buf0[bufOffset + i - 1] + 2 * buf0[bufOffset + i] + buf0[bufOffset + i + 1] + 2) >> 2;
- left[tuSize2] = buf0[bufOffset + tuSize2];
+ pixel* fltBuf = intraNeighbourBuf[1];
+ pixel topLeft = srcBuf[0], topLast = srcBuf[tuSize2], leftLast = srcBuf[tuSize2 + tuSize2];
- above = buf0 + bufOffset;
- above[0] = left[0];
+ // filtering top
for (int i = 1; i < tuSize2; i++)
- above[i] = (src[i - 1] + 2 * src[i] + src[i + 1] + 2) >> 2;
- above[tuSize2] = src[tuSize2];
- }
- else
- {
- above = buf1 + bufOffset;
- memcpy(above, src, (tuSize2 + 1) * sizeof(pixel));
+ fltBuf[i] = ((srcBuf[i] << 1) + srcBuf[i - 1] + srcBuf[i + 1] + 2) >> 2;
+ fltBuf[tuSize2] = topLast;
+
+ // filtering top-left
+ fltBuf[0] = ((srcBuf[0] << 1) + srcBuf[1] + srcBuf[tuSize2 + 1] + 2) >> 2;
+
+ //filtering left
+ fltBuf[tuSize2 + 1] = ((srcBuf[tuSize2 + 1] << 1) + topLeft + srcBuf[tuSize2 + 2] + 2) >> 2;
+ for (int i = tuSize2 + 2; i < tuSize2 + tuSize2; i++)
+ fltBuf[i] = ((srcBuf[i] << 1) + srcBuf[i - 1] + srcBuf[i + 1] + 2) >> 2;
+ fltBuf[tuSize2 + tuSize2] = leftLast;
+
+ srcBuf = intraNeighbourBuf[1];
}
int sizeIdx = log2TrSizeC - 2;
X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
- primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0);
+// primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0);
+ primitives.intra_pred_new[dirMode][sizeIdx](dst, stride, srcBuf, dirMode, 0);
}
void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx)
@@ -651,37 +615,22 @@
void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode)
{
- pixel* adiBuf = m_predBuf;
- pixel* refAbove = m_refAbove;
- pixel* refLeft = m_refLeft;
- pixel* refAboveFlt = m_refAboveFlt;
- pixel* refLeftFlt = m_refLeftFlt;
-
int tuSize = intraNeighbors.tuSize;
int tuSize2 = tuSize << 1;
pixel* adiOrigin = cu.m_encData->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
intptr_t picStride = cu.m_encData->m_reconPic->m_stride;
- fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors);
+ fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
- // initialization of ADI buffers
- const int bufOffset = tuSize - 1;
- refAbove += bufOffset;
- refLeft += bufOffset;
+ pixel* refBuf = intraNeighbourBuf[0];
+ pixel* fltBuf = intraNeighbourBuf[1];
- memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
-
- refLeft[0] = adiBuf[0];
- for (int k = 1; k < tuSize2 + 1 ; k++)
- refLeft[k] = adiBuf[k + tuSize2];
+ pixel topLeft = refBuf[0], topLast = refBuf[tuSize2], leftLast = refBuf[tuSize2 + tuSize2];
if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
{
// generate filtered intra prediction samples
- refAboveFlt += bufOffset;
- refLeftFlt += bufOffset;
-
bool bStrongSmoothing = (tuSize == 32 && cu.m_slice->m_sps->bUseStrongIntraSmoothing);
if (bStrongSmoothing)
@@ -689,56 +638,57 @@
const int trSize = 32;
const int trSize2 = trSize << 1;
const int threshold = 1 << (X265_DEPTH - 5);
- int refBL = refLeft[trSize2];
- int refTL = refAbove[0];
- int refTR = refAbove[trSize2];
- bStrongSmoothing = (abs(refBL + refTL - (refLeft[trSize] << 1)) < threshold &&
- abs(refTL + refTR - (refAbove[trSize] << 1)) < threshold);
+
+ pixel topMiddle = refBuf[32], leftMiddle = refBuf[tuSize2 + 32];
+
+ bStrongSmoothing = abs (topLeft + topLast - (topMiddle << 1)) < threshold &&
+ abs (topLeft + leftLast - (leftMiddle << 1)) < threshold;
if (bStrongSmoothing)
{
// bilinear interpolation
- const int shift = 5 + 1; // log2TrSize + 1;
- int init = (refTL << shift) + tuSize;
+ const int shift = 5 + 1;
+ int init = (topLeft << shift) + tuSize;
int deltaL, deltaR;
- refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
+ //TODO: Performance Primitive???
+ deltaL = leftLast - topLeft; deltaR = topLast - topLeft;
- //TODO: Performance Primitive???
- deltaL = refBL - refTL; deltaR = refTR - refTL;
+ fltBuf[0] = topLeft;
for (int i = 1; i < trSize2; i++)
{
- refLeftFlt[i] = (pixel)((init + deltaL * i) >> shift);
- refAboveFlt[i] = (pixel)((init + deltaR * i) >> shift);
+ fltBuf[i + tuSize2] = (pixel)((init + deltaL * i) >> shift); //Left Filtering
+ fltBuf[i] = (pixel)((init + deltaR * i) >> shift); //Above Filtering
}
- refLeftFlt[trSize2] = refLeft[trSize2];
- refAboveFlt[trSize2] = refAbove[trSize2];
+ fltBuf[trSize2] = topLast;
+ fltBuf[tuSize2 + trSize2] = leftLast;
return;
}
}
- refLeftFlt[0] = (refAbove[1] + (refLeft[0] << 1) + refLeft[1] + 2) >> 2;
+ // filtering top
for (int i = 1; i < tuSize2; i++)
- refLeftFlt[i] = (refLeft[i - 1] + (refLeft[i] << 1) + refLeft[i + 1] + 2) >> 2;
- refLeftFlt[tuSize2] = refLeft[tuSize2];
+ fltBuf[i] = ((refBuf[i] << 1) + refBuf[i - 1] + refBuf[i + 1] + 2) >> 2;
+ fltBuf[tuSize2] = topLast;
- refAboveFlt[0] = refLeftFlt[0];
- for (int i = 1; i < tuSize2; i++)
- refAboveFlt[i] = (refAbove[i - 1] + (refAbove[i] << 1) + refAbove[i + 1] + 2) >> 2;
- refAboveFlt[tuSize2] = refAbove[tuSize2];
+ // filtering top-left
+ fltBuf[0] = ((topLeft << 1) + refBuf[1] + refBuf[tuSize2 + 1] + 2) >> 2;
+
+ //filtering left
+ fltBuf[tuSize2 + 1] = ((refBuf[tuSize2 + 1] << 1) + topLeft + refBuf[tuSize2 + 2] + 2) >> 2;
+ for (int i = tuSize2 + 2; i < tuSize2 + tuSize2; i++)
+ fltBuf[i] = ((refBuf[i] << 1) + refBuf[i - 1] + refBuf[i + 1] + 2) >> 2;
+ fltBuf[tuSize2 + tuSize2] = leftLast;
}
}
void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId)
{
- uint32_t tuSize = intraNeighbors.tuSize;
-
const pixel* adiOrigin = cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
intptr_t picStride = cu.m_encData->m_reconPic->m_strideC;
- pixel* adiRef = getAdiChromaBuf(chromaId, tuSize);
- fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors);
+ fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
}
void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors)
@@ -797,7 +747,7 @@
intraNeighbors->tuSize = tuSize;
}
-void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors)
+void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, const IntraNeighbors& intraNeighbors, pixel dst[258])
{
const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1));
int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
@@ -808,39 +758,39 @@
// Nothing is available, perform DC prediction.
if (numIntraNeighbor == 0)
{
- // Fill border with DC value
+ // Fill top border with DC value
for (uint32_t i = 0; i < refSize; i++)
- adiRef[i] = dcValue;
+ dst[i] = dcValue;
+ // Fill left border with DC value
for (uint32_t i = 0; i < refSize - 1; i++)
- adiRef[i + refSize] = dcValue;
+ dst[i + refSize] = dcValue;
}
else if (numIntraNeighbor == totalUnits)
{
// Fill top border with rec. samples
const pixel* adiTemp = adiOrigin - picStride - 1;
- memcpy(adiRef, adiTemp, refSize * sizeof(*adiRef));
+ memcpy(dst, adiTemp, refSize * sizeof(pixel));
// Fill left border with rec. samples
adiTemp = adiOrigin - 1;
-
for (uint32_t i = 0; i < refSize - 1; i++)
{
- adiRef[i + refSize] = adiTemp[0];
+ dst[i + refSize] = adiTemp[0];
adiTemp += picStride;
}
}
else // reference samples are partially available
{
- const bool* bNeighborFlags = intraNeighbors.bNeighborFlags;
- const bool* pNeighborFlags;
+ const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
+ const bool *pNeighborFlags;
int aboveUnits = intraNeighbors.aboveUnits;
int leftUnits = intraNeighbors.leftUnits;
int unitWidth = intraNeighbors.unitWidth;
int unitHeight = intraNeighbors.unitHeight;
int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
pixel adiLineBuffer[5 * MAX_CU_SIZE];
- pixel* adi;
+ pixel *adi;
// Initialize
for (int i = 0; i < totalSamples; i++)
@@ -943,11 +893,11 @@
// Copy processed samples
adi = adiLineBuffer + refSize + unitWidth - 2;
- memcpy(adiRef, adi, refSize * sizeof(*adiRef));
+ memcpy(dst, adi, refSize * sizeof(pixel));
adi = adiLineBuffer + refSize - 1;
for (int i = 0; i < (int)refSize - 1; i++)
- adiRef[i + refSize] = adi[-(i + 1)];
+ dst[i + refSize] = adi[-(i + 1)];
}
}
diff -r 9ec15ed0fe2a -r cf29bf782449 source/common/predict.h
--- a/source/common/predict.h Mon Jan 12 14:49:22 2015 +0530
+++ b/source/common/predict.h Mon Jan 12 20:37:35 2015 +0530
@@ -63,13 +63,8 @@
ShortYuv m_predShortYuv[2]; /* temporary storage for weighted prediction */
int16_t* m_immedVals;
- /* Intra prediction buffers */
- pixel* m_predBuf;
- pixel* m_refAbove;
- pixel* m_refAboveFlt;
- pixel* m_refLeft;
- pixel* m_refLeftFlt;
-
+ // Unfiltered/filtered neighbours of the current partition.
+ pixel intraNeighbourBuf[2][258];
/* Slice information */
const Slice* m_predSlice;
int m_csp;
@@ -105,8 +100,7 @@
/* Intra prediction helper functions */
static void initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *IntraNeighbors);
- static void fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors);
-
+ static void fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, const IntraNeighbors& intraNeighbors, pixel dst[258]);
template<bool cip>
static bool isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT);
template<bool cip>
@@ -127,14 +121,9 @@
/* Angular Intra */
void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSize);
- void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSizeC, int chFmt);
-
+ void predIntraChromaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSizeC, int chFmt);
void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode);
void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId);
- pixel* getAdiChromaBuf(uint32_t chromaId, int tuSize)
- {
- return m_predBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * (tuSize * 2 + 1));
- }
};
}
diff -r 9ec15ed0fe2a -r cf29bf782449 source/common/primitives.h
--- a/source/common/primitives.h Mon Jan 12 14:49:22 2015 +0530
+++ b/source/common/primitives.h Mon Jan 12 20:37:35 2015 +0530
@@ -121,7 +121,9 @@
typedef void (*blockfill_s_t)(int16_t* dst, intptr_t dstride, int16_t val);
typedef void (*intra_pred_t)(pixel* dst, intptr_t dstStride, pixel* refLeft, pixel* refAbove, int dirMode, int bFilter);
+typedef void (*intra_pred_new_t)(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter);
typedef void (*intra_allangs_t)(pixel* dst, pixel* above0, pixel* left0, pixel* above1, pixel* left1, int bLuma);
+typedef void (*intra_allangs_new_t)(pixel *dst, pixel *refPix, pixel *filtPix, int bLuma);
typedef void (*cpy2Dto1D_shl_t)(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
typedef void (*cpy2Dto1D_shr_t)(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift);
@@ -250,8 +252,11 @@
denoiseDct_t denoiseDct;
intra_pred_t intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE]; /* todo: move to CU */
+ intra_pred_new_t intra_pred_new[NUM_INTRA_MODE][NUM_TR_SIZE];
intra_allangs_t intra_pred_allangs[NUM_TR_SIZE]; /* todo: move to CU */
+ intra_allangs_new_t intra_pred_allangs_new[NUM_TR_SIZE];
scale_t scale1D_128to64;
+ scale_t scale1D_128to64_new;
scale_t scale2D_64to32;
ssim_4x4x2_core_t ssim_4x4x2_core;
diff -r 9ec15ed0fe2a -r cf29bf782449 source/encoder/search.cpp
--- a/source/encoder/search.cpp Mon Jan 12 14:49:22 2015 +0530
+++ b/source/encoder/search.cpp Mon Jan 12 20:37:35 2015 +0530
@@ -777,7 +777,6 @@
ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
uint32_t qtLayer = log2TrSize - 2;
- uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = mode.fencYuv->m_csize;
const uint32_t sizeIdxC = log2TrSizeC - 2;
uint32_t outDist = 0;
@@ -815,11 +814,9 @@
// init availability pattern
initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);
- pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
// get prediction signal
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
-
+ predIntraChromaAng(chromaPredMode, pred, stride, log2TrSizeC, m_csp);
cu.setTransformSkipPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
@@ -863,7 +860,6 @@
uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
const uint32_t log2TrSizeC = 2;
- uint32_t tuSize = 4;
uint32_t qtLayer = log2TrSize - 2;
uint32_t outDist = 0;
@@ -903,7 +899,6 @@
// init availability pattern
initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);
- pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
if (chromaPredMode == DM_CHROMA_IDX)
@@ -912,7 +907,7 @@
chromaPredMode = g_chroma422IntraAngleMappingTable[chromaPredMode];
// get prediction signal
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
+ predIntraChromaAng(chromaPredMode, pred, stride, log2TrSizeC, m_csp);
uint64_t bCost = MAX_INT64;
uint32_t bDist = 0;
@@ -1076,7 +1071,6 @@
}
ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
- uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = mode.fencYuv->m_csize;
const uint32_t sizeIdxC = log2TrSizeC - 2;
@@ -1111,10 +1105,9 @@
// init availability pattern
initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, chromaId);
- pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
// get prediction signal
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
+ predIntraChromaAng(chromaPredMode, pred, stride, log2TrSizeC, m_csp);
X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not supported at low RD levels\n");
@@ -1208,10 +1201,6 @@
const pixel* fenc = intraMode.fencYuv->m_buf[0];
uint32_t stride = intraMode.fencYuv->m_size;
- pixel* above = m_refAbove + tuSize - 1;
- pixel* aboveFiltered = m_refAboveFlt + tuSize - 1;
- pixel* left = m_refLeft + tuSize - 1;
- pixel* leftFiltered = m_refLeftFlt + tuSize - 1;
int sad, bsad;
uint32_t bits, bbits, mode, bmode;
uint64_t cost, bcost;
@@ -1231,26 +1220,23 @@
primitives.scale2D_64to32(bufScale, fenc, stride);
fenc = bufScale;
- // reserve space in case primitives need to store data in above
- // or left buffers
- pixel _above[4 * 32 + 1];
- pixel _left[4 * 32 + 1];
- pixel* aboveScale = _above + 2 * 32;
- pixel* leftScale = _left + 2 * 32;
- aboveScale[0] = leftScale[0] = above[0];
- primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
- primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
+ pixel nScale[129];
+ intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
+ primitives.scale1D_128to64_new(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+
+ //TO DO: primitive
+ for (int x = 1; x < 65; x++)
+ {
+ intraNeighbourBuf[0][x] = nScale[x]; // Top pixel
+ intraNeighbourBuf[0][x + 64] = nScale[x + 64]; // Left pixel
+ intraNeighbourBuf[1][x] = nScale[x]; // Top pixel
+ intraNeighbourBuf[1][x + 64] = nScale[x + 64]; // Left pixel
+ }
scaleTuSize = 32;
scaleStride = 32;
costShift = 2;
sizeIdx = 5 - 2; // log2(scaleTuSize) - 2
-
- // Filtered and Unfiltered refAbove and refLeft pointing to above and left.
- above = aboveScale;
- left = leftScale;
- aboveFiltered = aboveScale;
- leftFiltered = leftScale;
}
pixelcmp_t sa8d = primitives.cu[sizeIdx].sa8d;
@@ -1267,23 +1253,20 @@
uint32_t rbits = getIntraRemModeBits(cu, absPartIdx, preds, mpms);
// DC
- primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+// primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+ primitives.intra_pred_new[DC_IDX][sizeIdx](tmp, scaleStride, intraNeighbourBuf[0], 0, (scaleTuSize <= 16));
bsad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
bmode = mode = DC_IDX;
bbits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
bcost = m_rdCost.calcRdSADCost(bsad, bbits);
- pixel* abovePlanar = above;
- pixel* leftPlanar = left;
-
+ // PLANAR
+ pixel* planar = intraNeighbourBuf[0];
if (tuSize & (8 | 16 | 32))
- {
- abovePlanar = aboveFiltered;
- leftPlanar = leftFiltered;
- }
-
- // PLANAR
- primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+ planar = intraNeighbourBuf[1];
+
+// primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+ primitives.intra_pred_new[PLANAR_IDX][sizeIdx](tmp, scaleStride, planar, 0, 0);
sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
mode = PLANAR_IDX;
bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
@@ -1294,7 +1277,8 @@
if (primitives.intra_pred_allangs[sizeIdx])
{
primitives.cu[sizeIdx].transpose(bufTrans, fenc, scaleStride);
- primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
+// primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
+ primitives.intra_pred_allangs_new[sizeIdx](tmp, intraNeighbourBuf[0], intraNeighbourBuf[1], (scaleTuSize <= 16));
}
else
allangs = false;
@@ -1309,9 +1293,9 @@
cost = m_rdCost.calcRdSADCost(sad, bits); \
} else { \
if (g_intraFilterFlags[angle] & scaleTuSize) \
- primitives.intra_pred[angle][sizeIdx](tmp, scaleTuSize, leftFiltered, aboveFiltered, angle, scaleTuSize <= 16); \
+ primitives.intra_pred_new[angle][sizeIdx](tmp, scaleTuSize, intraNeighbourBuf[1], angle, scaleTuSize <= 16); \
else \
- primitives.intra_pred[angle][sizeIdx](tmp, scaleTuSize, left, above, angle, scaleTuSize <= 16); \
+ primitives.intra_pred_new[angle][sizeIdx](tmp, scaleTuSize, intraNeighbourBuf[0], angle, scaleTuSize <= 16); \
sad = sa8d(fenc, scaleStride, tmp, scaleTuSize) << costShift; \
bits = (mpms & ((uint64_t)1 << angle)) ? m_entropyCoder.bitsIntraModeMPM(preds, angle) : rbits; \
cost = m_rdCost.calcRdSADCost(sad, bits); \
@@ -1453,46 +1437,38 @@
const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
uint32_t stride = predYuv->m_size;
- pixel* above = m_refAbove + tuSize - 1;
- pixel* aboveFiltered = m_refAboveFlt + tuSize - 1;
- pixel* left = m_refLeft + tuSize - 1;
- pixel* leftFiltered = m_refLeftFlt + tuSize - 1;
-
// 33 Angle modes once
ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
- ALIGN_VAR_32(pixel, bufScale[32 * 32]);
- pixel _above[4 * 32 + 1];
- pixel _left[4 * 32 + 1];
+
int scaleTuSize = tuSize;
int scaleStride = stride;
int costShift = 0;
if (tuSize > 32)
{
- pixel* aboveScale = _above + 2 * 32;
- pixel* leftScale = _left + 2 * 32;
-
// origin is 64x64, we scale to 32x32 and setup required parameters
+ ALIGN_VAR_32(pixel, bufScale[32 * 32]);
primitives.scale2D_64to32(bufScale, fenc, stride);
fenc = bufScale;
- // reserve space in case primitives need to store data in above
- // or left buffers
- aboveScale[0] = leftScale[0] = above[0];
- primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
- primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
+ pixel nScale[129];
+ intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
+ primitives.scale1D_128to64_new(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+
+ // TO DO: primitive
+ for (int x = 1; x < 65; x++)
+ {
+ intraNeighbourBuf[0][x] = nScale[x]; // Top pixel
+ intraNeighbourBuf[0][x + 64] = nScale[x + 64]; // Left pixel
+ intraNeighbourBuf[1][x] = nScale[x]; // Top pixel
+ intraNeighbourBuf[1][x + 64] = nScale[x + 64]; // Left pixel
+ }
scaleTuSize = 32;
scaleStride = 32;
costShift = 2;
sizeIdx = 5 - 2; // log2(scaleTuSize) - 2
-
- // Filtered and Unfiltered refAbove and refLeft pointing to above and left.
- above = aboveScale;
- left = leftScale;
- aboveFiltered = aboveScale;
- leftFiltered = leftScale;
}
m_entropyCoder.loadIntraDirModeLuma(m_rqt[depth].cur);
@@ -1510,29 +1486,29 @@
uint64_t bcost;
// DC
- primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+// primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+ primitives.intra_pred_new[DC_IDX][sizeIdx](tmp, scaleStride, intraNeighbourBuf[0], 0, (scaleTuSize <= 16));
uint32_t bits = (mpms & ((uint64_t)1 << DC_IDX)) ? m_entropyCoder.bitsIntraModeMPM(preds, DC_IDX) : rbits;
uint32_t sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
modeCosts[DC_IDX] = bcost = m_rdCost.calcRdSADCost(sad, bits);
// PLANAR
- pixel* abovePlanar = above;
- pixel* leftPlanar = left;
+ pixel* planar = intraNeighbourBuf[0];
if (tuSize >= 8 && tuSize <= 32)
- {
- abovePlanar = aboveFiltered;
- leftPlanar = leftFiltered;
- }
- primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+ planar = intraNeighbourBuf[1];
+
+// primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+ primitives.intra_pred_new[PLANAR_IDX][sizeIdx](tmp, scaleStride, planar, 0, 0);
bits = (mpms & ((uint64_t)1 << PLANAR_IDX)) ? m_entropyCoder.bitsIntraModeMPM(preds, PLANAR_IDX) : rbits;
sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
modeCosts[PLANAR_IDX] = m_rdCost.calcRdSADCost(sad, bits);
COPY1_IF_LT(bcost, modeCosts[PLANAR_IDX]);
// angular predictions
- if (primitives.intra_pred_allangs[sizeIdx])
+ if (primitives.intra_pred_allangs_new[sizeIdx])
{
- primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
+// primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
+ primitives.intra_pred_allangs_new[sizeIdx](tmp, intraNeighbourBuf[0], intraNeighbourBuf[1], (scaleTuSize <= 16));
primitives.cu[sizeIdx].transpose(buf_trans, fenc, scaleStride);
for (int mode = 2; mode < 35; mode++)
{
@@ -1550,9 +1526,11 @@
for (int mode = 2; mode < 35; mode++)
{
if (g_intraFilterFlags[mode] & scaleTuSize)
- primitives.intra_pred[mode][sizeIdx](tmp, scaleTuSize, leftFiltered, aboveFiltered, mode, scaleTuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](tmp, scaleTuSize, leftFiltered, aboveFiltered, mode, scaleTuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](tmp, scaleTuSize, intraNeighbourBuf[1], mode, scaleTuSize <= 16);
else
- primitives.intra_pred[mode][sizeIdx](tmp, scaleTuSize, left, above, mode, scaleTuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](tmp, scaleTuSize, left, above, mode, scaleTuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](tmp, scaleTuSize, intraNeighbourBuf[0], mode, scaleTuSize <= 16);
bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
sad = sa8d(fenc, scaleStride, tmp, scaleTuSize) << costShift;
modeCosts[mode] = m_rdCost.calcRdSADCost(sad, bits);
@@ -1663,8 +1641,6 @@
IntraNeighbors intraNeighbors;
initIntraNeighbors(cu, 0, tuDepth, false, &intraNeighbors);
- Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 1); // U
- Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 2); // V
cu.getAllowedChromaDir(0, modeList);
// check chroma modes
@@ -1681,10 +1657,9 @@
{
const pixel* fenc = fencYuv->m_buf[chromaId];
pixel* pred = predYuv->m_buf[chromaId];
- pixel* chromaPred = getAdiChromaBuf(chromaId, scaleTuSize);
-
+ Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, chromaId);
// get prediction signal
- predIntraChromaAng(chromaPred, chromaPredMode, pred, fencYuv->m_csize, log2TrSizeC, m_csp);
+ predIntraChromaAng(chromaPredMode, pred, fencYuv->m_csize, log2TrSizeC, m_csp);
cost += primitives.cu[log2TrSizeC - 2].sa8d(fenc, predYuv->m_csize, pred, fencYuv->m_csize) << costShift;
}
diff -r 9ec15ed0fe2a -r cf29bf782449 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Mon Jan 12 14:49:22 2015 +0530
+++ b/source/encoder/slicetype.cpp Mon Jan 12 20:37:35 2015 +0530
@@ -1668,37 +1668,37 @@
if (!fenc->bIntraCalculated)
{
const int sizeIdx = X265_LOWRES_CU_BITS - 2; // partition size
-
- pixel _above0[X265_LOWRES_CU_SIZE * 4 + 1], *const above0 = _above0 + 2 * X265_LOWRES_CU_SIZE;
- pixel _above1[X265_LOWRES_CU_SIZE * 4 + 1], *const above1 = _above1 + 2 * X265_LOWRES_CU_SIZE;
- pixel _left0[X265_LOWRES_CU_SIZE * 4 + 1], *const left0 = _left0 + 2 * X265_LOWRES_CU_SIZE;
- pixel _left1[X265_LOWRES_CU_SIZE * 4 + 1], *const left1 = _left1 + 2 * X265_LOWRES_CU_SIZE;
+ const int cuSize2 = cuSize << 1;
+ pixel neighbours[2][X265_LOWRES_CU_SIZE * 4 + 1];
pixel *pix_cur = fenc->lowresPlane[0] + pelOffset;
// Copy Above
- memcpy(above0, pix_cur - 1 - fenc->lumaStride, (cuSize + 1) * sizeof(pixel));
+ memcpy(neighbours[0], pix_cur - 1 - fenc->lumaStride, (cuSize + 1) * sizeof(pixel));
// Copy Left
- for (int i = 0; i < cuSize + 1; i++)
- left0[i] = pix_cur[-1 - fenc->lumaStride + i * fenc->lumaStride];
+ for (int i = 1; i < cuSize + 1; i++)
+ neighbours[0][i + cuSize2] = pix_cur[-1 - fenc->lumaStride + i * fenc->lumaStride];
for (int i = 0; i < cuSize; i++)
{
- above0[cuSize + i + 1] = above0[cuSize];
- left0[cuSize + i + 1] = left0[cuSize];
+ // Copy above-last pixel
+ neighbours[0][i + cuSize + 1] = neighbours[0][cuSize]; //neighbours[0][i + 9] = neighbours[0][8]
+ // Copy left-last pixel
+ neighbours[0][i + cuSize2 + cuSize + 1] = neighbours[0][cuSize2 + cuSize]; //neighbours[0][i + 25] = neighbours[0][24]
}
- // filtering with [1 2 1]
- // assume getUseStrongIntraSmoothing() is disabled
- above1[0] = above0[0];
- above1[2 * cuSize] = above0[2 * cuSize];
- left1[0] = left0[0];
- left1[2 * cuSize] = left0[2 * cuSize];
- for (int i = 1; i < 2 * cuSize; i++)
+ // Filter neighbour pixels with [1-2-1]
+ neighbours[1][0] = neighbours[0][0]; // Copy top-left pixel
+ neighbours[1][cuSize2] = neighbours[0][cuSize2]; //Copy top-right pixel
+ neighbours[1][cuSize2 << 1] = neighbours[0][cuSize2 << 1]; // Bottom-left pixel
+
+ neighbours[1][1] = (neighbours[0][0] + (neighbours[0][1] << 1) + neighbours[0][2] + 2) >> 2;
+ neighbours[1][cuSize2 + 1] = (neighbours[0][0] + (neighbours[0][cuSize2 + 1] << 1) + neighbours[0][cuSize2 + 1 + 1] + 2) >> 2;
+ for (int i = 2; i < cuSize2; i++)
{
- above1[i] = (above0[i - 1] + 2 * above0[i] + above0[i + 1] + 2) >> 2;
- left1[i] = (left0[i - 1] + 2 * left0[i] + left0[i + 1] + 2) >> 2;
+ neighbours[1][i] = (neighbours[0][i - 1] + (neighbours[0][i] << 1) + neighbours[0][i + 1] + 2) >> 2;
+ neighbours[1][cuSize2 + i] = (neighbours[0][cuSize2 + i - 1] + (neighbours[0][cuSize2 + i] << 1) + neighbours[0][cuSize2 + i + 1] + 2) >> 2;
}
int predsize = cuSize * cuSize;
@@ -1706,23 +1706,25 @@
// generate 35 intra predictions into m_predictions
pixelcmp_t satd = primitives.pu[partitionFromLog2Size(X265_LOWRES_CU_BITS)].satd;
int icost = m_me.COST_MAX;
- primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
+// primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
+ primitives.intra_pred_new[DC_IDX][sizeIdx](m_predictions, cuSize, neighbours[0], 0, (cuSize <= 16));
int cost = m_me.bufSATD(m_predictions, cuSize);
if (cost < icost)
icost = cost;
- pixel *above = (cuSize >= 8) ? above1 : above0;
- pixel *left = (cuSize >= 8) ? left1 : left0;
- primitives.intra_pred[PLANAR_IDX][sizeIdx](m_predictions, cuSize, left, above, 0, 0);
+
+ pixel *planar = (cuSize >= 8) ? neighbours[1] : neighbours[0];
+ primitives.intra_pred_new[PLANAR_IDX][sizeIdx](m_predictions, cuSize, planar, 0, 0);
cost = m_me.bufSATD(m_predictions, cuSize);
if (cost < icost)
icost = cost;
uint32_t mode, lowmode = 4;
- if (primitives.intra_pred_allangs[sizeIdx])
+ if (primitives.intra_pred_allangs_new[sizeIdx])
{
ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
- primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
+// primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
+ primitives.intra_pred_allangs_new[sizeIdx](m_predictions + 2 * predsize, neighbours[0], neighbours[1], (cuSize <= 16));
primitives.cu[sizeIdx].transpose(buf_trans, m_me.fencPUYuv.m_buf[0], FENC_STRIDE);
int acost = m_me.COST_MAX;
@@ -1762,9 +1764,11 @@
for (mode = 5; mode < 35; mode += 5)
{
if (g_intraFilterFlags[mode] & cuSize)
- primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left1, above1, mode, cuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left1, above1, mode, cuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](m_predictions, cuSize, neighbours[1], mode, cuSize <= 16);
else
- primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left0, above0, mode, cuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left0, above0, mode, cuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](m_predictions, cuSize, neighbours[0], mode, cuSize <= 16);
cost = m_me.bufSATD(m_predictions, cuSize);
COPY2_IF_LT(acost, cost, lowmode, mode);
}
@@ -1775,17 +1779,21 @@
mode = minusmode;
if (g_intraFilterFlags[mode] & cuSize)
- primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left1, above1, mode, cuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left1, above1, mode, cuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](m_predictions, cuSize, neighbours[1], mode, cuSize <= 16);
else
- primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left0, above0, mode, cuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left0, above0, mode, cuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](m_predictions, cuSize, neighbours[0], mode, cuSize <= 16);
cost = m_me.bufSATD(m_predictions, cuSize);
COPY2_IF_LT(acost, cost, lowmode, mode);
mode = plusmode;
if (g_intraFilterFlags[mode] & cuSize)
- primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left1, above1, mode, cuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left1, above1, mode, cuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](m_predictions, cuSize, neighbours[1], mode, cuSize <= 16);
else
- primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left0, above0, mode, cuSize <= 16);
+// primitives.intra_pred[mode][sizeIdx](m_predictions, cuSize, left0, above0, mode, cuSize <= 16);
+ primitives.intra_pred_new[mode][sizeIdx](m_predictions, cuSize, neighbours[0], mode, cuSize <= 16);
cost = m_me.bufSATD(m_predictions, cuSize);
COPY2_IF_LT(acost, cost, lowmode, mode);
}
More information about the x265-devel
mailing list