[x265] [PATCH] framepp: row based interpolate
Min Chen
chenm003 at 163.com
Mon Aug 26 12:49:26 CEST 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1377514125 -28800
# Node ID aecd81195710c367d3a21b1ec877c76368afd614
# Parent 797c13ec5d2ae7985027f59dcf4d4c5f86c1d367
framepp: row based interpolate
diff -r 797c13ec5d2a -r aecd81195710 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp Sun Aug 25 21:53:55 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp Mon Aug 26 18:48:45 2013 +0800
@@ -76,8 +76,11 @@
m_cuWidth = maxCUWidth;
m_cuHeight = maxCUHeight;
- Int numCuInWidth = m_picWidth / m_cuWidth + (m_picWidth % m_cuWidth != 0);
- Int numCuInHeight = m_picHeight / m_cuHeight + (m_picHeight % m_cuHeight != 0);
+ Int numCuInWidth = (m_picWidth + m_cuWidth - 1) / m_cuWidth;
+ Int numCuInHeight = (m_picHeight + m_cuHeight - 1) / m_cuHeight;
+
+ m_numCuInWidth = numCuInWidth;
+ m_numCuInHeight = numCuInHeight;
m_lumaMarginX = g_maxCUWidth + 16; // for 16-byte alignment
m_lumaMarginY = g_maxCUHeight + 16; // margin for 8-tap filter and infinite padding
diff -r 797c13ec5d2a -r aecd81195710 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h Sun Aug 25 21:53:55 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h Mon Aug 26 18:48:45 2013 +0800
@@ -77,7 +77,6 @@
// ------------------------------------------------------------------------------------------------
// Parameter for general YUV buffer usage
// ------------------------------------------------------------------------------------------------
-
Int m_picWidth; ///< Width of picture
Int m_picHeight; ///< Height of picture
@@ -98,6 +97,8 @@
Bool m_bIsBorderExtended;
public:
+ Int m_numCuInWidth;
+ Int m_numCuInHeight;
TComPicYuv();
virtual ~TComPicYuv();
diff -r 797c13ec5d2a -r aecd81195710 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/ipfilter.cpp Mon Aug 26 18:48:45 2013 +0800
@@ -478,31 +478,6 @@
}
}
-void filterVerticalMultiplaneExtend(short *src, int srcStride, pixel *dstE, pixel *dstI, pixel *dstP, int dstStride, int block_width, int block_height, int marginX, int marginY)
-{
- filterVertical_s_p<8>(src, srcStride, dstI, dstStride, block_width, block_height, g_lumaFilter[2]);
- filterVertical_s_p<8>(src, srcStride, dstE, dstStride, block_width, block_height, g_lumaFilter[1]);
- filterVertical_s_p<8>(src, srcStride, dstP, dstStride, block_width, block_height, g_lumaFilter[3]);
- extendPicCompBorder(dstE, dstStride, block_width, block_height, marginX, marginY);
- extendPicCompBorder(dstI, dstStride, block_width, block_height, marginX, marginY);
- extendPicCompBorder(dstP, dstStride, block_width, block_height, marginX, marginY);
-}
-
-void filterHorizontalMultiplaneExtend(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride, pixel *pDstA, pixel *pDstB, pixel *pDstC, int pDstStride, int block_width, int block_height, int marginX, int marginY)
-{
- filterConvertPelToShort(src, srcStride, midF, midStride, block_width, block_height);
- filterHorizontal_p_s<8>(src, srcStride, midB, midStride, block_width, block_height, g_lumaFilter[2]);
- filterHorizontal_p_s<8>(src, srcStride, midA, midStride, block_width, block_height, g_lumaFilter[1]);
- filterHorizontal_p_s<8>(src, srcStride, midC, midStride, block_width, block_height, g_lumaFilter[3]);
- filterConvertShortToPel(midA, midStride, pDstA, pDstStride, block_width, block_height);
- filterConvertShortToPel(midB, midStride, pDstB, pDstStride, block_width, block_height);
- filterConvertShortToPel(midC, midStride, pDstC, pDstStride, block_width, block_height);
-
- extendPicCompBorder(pDstA, pDstStride, block_width, block_height, marginX, marginY);
- extendPicCompBorder(pDstB, pDstStride, block_width, block_height, marginX, marginY);
- extendPicCompBorder(pDstC, pDstStride, block_width, block_height, marginX, marginY);
-}
-
void filterHorizontalExtendCol(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride, pixel *pDstA, pixel *pDstB, pixel *pDstC, int pDstStride, int block_width, int block_height, int marginX)
{
filterConvertPelToShort(src, srcStride, midF, midStride, block_width, block_height);
@@ -594,6 +569,147 @@
}
}
+void filterRowH(pixel *src, int srcStride, short* midA, short* midB, short* midC, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow)
+{
+ // Extend FullPel Left and Right
+ extendCURowColBorder(src, srcStride, width, height, marginX);
+
+ // Extend FullPel Top
+ if (row == 0)
+ {
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(src - marginX - (y + 1) * srcStride, src - marginX, sizeof(pixel) * (width + (marginX << 1)));
+ }
+ }
+
+ // Extend FullPel Bottom
+ if (isLastRow)
+ {
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(src - marginX + (height + y) * srcStride, src - marginX + (height - 1) * srcStride, sizeof(pixel) * (width + (marginX << 1)));
+ }
+ }
+ filterHorizontal_p_s<8>(src, srcStride, midA, midStride, width, height, g_lumaFilter[1]);
+ filterHorizontal_p_s<8>(src, srcStride, midB, midStride, width, height, g_lumaFilter[2]);
+ filterHorizontal_p_s<8>(src, srcStride, midC, midStride, width, height, g_lumaFilter[3]);
+ filterConvertShortToPel(midA, midStride, dstA, srcStride, width, height);
+ filterConvertShortToPel(midB, midStride, dstB, srcStride, width, height);
+ filterConvertShortToPel(midC, midStride, dstC, srcStride, width, height);
+
+ // Extend SubPel Left and Right
+ extendCURowColBorder(dstA, srcStride, width, height, marginX);
+ extendCURowColBorder(dstB, srcStride, width, height, marginX);
+ extendCURowColBorder(dstC, srcStride, width, height, marginX);
+
+ if (row == 0)
+ {
+ // Extend SubPel Top
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(dstA - marginX - (y + 1) * srcStride, dstA - marginX, sizeof(pixel) * srcStride);
+ ::memcpy(dstB - marginX - (y + 1) * srcStride, dstB - marginX, sizeof(pixel) * srcStride);
+ ::memcpy(dstC - marginX - (y + 1) * srcStride, dstC - marginX, sizeof(pixel) * srcStride);
+ }
+
+ // Extend midPel Top(only 3 rows)
+ for(int y = 0; y < 3; y++)
+ {
+ ::memcpy(midA - (y + 1) * midStride, midA, midStride * sizeof(short));
+ ::memcpy(midB - (y + 1) * midStride, midB, midStride * sizeof(short));
+ ::memcpy(midC - (y + 1) * midStride, midC, midStride * sizeof(short));
+ }
+ }
+
+ if (isLastRow)
+ {
+ // Extend SubPel Bottom
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(dstA - marginX + (height + y) * srcStride, dstA - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+ ::memcpy(dstB - marginX + (height + y) * srcStride, dstB - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+ ::memcpy(dstC - marginX + (height + y) * srcStride, dstC - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+ }
+
+ // Extend midPel Bottom(only 4 rows)
+ for(int y = 0; y < 4; y++)
+ {
+ ::memcpy(midA + (height + y) * midStride, midA + (height - 1) * midStride, midStride * sizeof(short));
+ ::memcpy(midB + (height + y) * midStride, midB + (height - 1) * midStride, midStride * sizeof(short));
+ ::memcpy(midC + (height + y) * midStride, midC + (height - 1) * midStride, midStride * sizeof(short));
+ }
+ }
+}
+
+void filterRowV_0(pixel *src, int srcStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow)
+{
+ filterVertical_p_p<8>(src, srcStride, dstA, srcStride, width, height, g_lumaFilter[1]);
+ filterVertical_p_p<8>(src, srcStride, dstB, srcStride, width, height, g_lumaFilter[2]);
+ filterVertical_p_p<8>(src, srcStride, dstC, srcStride, width, height, g_lumaFilter[3]);
+
+ // Extend SubPel Left and Right
+ extendCURowColBorder(dstA, srcStride, width, height, marginX);
+ extendCURowColBorder(dstB, srcStride, width, height, marginX);
+ extendCURowColBorder(dstC, srcStride, width, height, marginX);
+
+ if (row == 0)
+ {
+ // Extend SubPel Top
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(dstA - marginX - (y + 1) * srcStride, dstA - marginX, sizeof(pixel) * srcStride);
+ ::memcpy(dstB - marginX - (y + 1) * srcStride, dstB - marginX, sizeof(pixel) * srcStride);
+ ::memcpy(dstC - marginX - (y + 1) * srcStride, dstC - marginX, sizeof(pixel) * srcStride);
+ }
+ }
+
+ if (isLastRow)
+ {
+ // Extend SubPel Bottom
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(dstA - marginX + (height + y) * srcStride, dstA - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+ ::memcpy(dstB - marginX + (height + y) * srcStride, dstB - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+ ::memcpy(dstC - marginX + (height + y) * srcStride, dstC - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+ }
+ }
+}
+
+void filterRowV_N(short *midA, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow)
+{
+ filterVertical_s_p<8>(midA, midStride, dstA, dstStride, width, height, g_lumaFilter[1]);
+ filterVertical_s_p<8>(midA, midStride, dstB, dstStride, width, height, g_lumaFilter[2]);
+ filterVertical_s_p<8>(midA, midStride, dstC, dstStride, width, height, g_lumaFilter[3]);
+
+ // Extend SubPel Left and Right
+ extendCURowColBorder(dstA, dstStride, width, height, marginX);
+ extendCURowColBorder(dstB, dstStride, width, height, marginX);
+ extendCURowColBorder(dstC, dstStride, width, height, marginX);
+
+ if (row == 0)
+ {
+ // Extend SubPel Top
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(dstA - marginX - (y + 1) * dstStride, dstA - marginX, sizeof(pixel) * dstStride);
+ ::memcpy(dstB - marginX - (y + 1) * dstStride, dstB - marginX, sizeof(pixel) * dstStride);
+ ::memcpy(dstC - marginX - (y + 1) * dstStride, dstC - marginX, sizeof(pixel) * dstStride);
+ }
+ }
+
+ if (isLastRow)
+ {
+ // Extend SubPel Bottom
+ for(int y = 0; y < marginY; y++)
+ {
+ ::memcpy(dstA - marginX + (height + y) * dstStride, dstA - marginX + (height - 1) * dstStride, sizeof(pixel) * dstStride);
+ ::memcpy(dstB - marginX + (height + y) * dstStride, dstB - marginX + (height - 1) * dstStride, sizeof(pixel) * dstStride);
+ ::memcpy(dstC - marginX + (height + y) * dstStride, dstC - marginX + (height - 1) * dstStride, sizeof(pixel) * dstStride);
+ }
+ }
+}
+
namespace x265 {
// x265 private namespace
@@ -615,8 +731,9 @@
p.ipfilter_p2s = filterConvertPelToShort;
p.ipfilter_s2p = filterConvertShortToPel;
- p.filterVmulti = filterVerticalMultiplaneExtend;
- p.filterHmulti = filterHorizontalMultiplaneExtend;
+ p.filterRowH = filterRowH;
+ p.filterRowV_0 = filterRowV_0;
+ p.filterRowV_N = filterRowV_N;
p.filterVwghtd = filterVerticalWeighted;
p.filterHwghtd = filterHorizontalWeighted;
diff -r 797c13ec5d2a -r aecd81195710 source/common/primitives.h
--- a/source/common/primitives.h Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/primitives.h Mon Aug 26 18:48:45 2013 +0800
@@ -218,15 +218,15 @@
typedef void (*dequant_t)(const int* src, int* dst, int width, int height, int mcqp_miper, int mcqp_mirem, bool useScalingList,
unsigned int trSizeLog2, int *dequantCoef);
-typedef void (*filterVmulti_t)(short *src, int srcStride, pixel *dstE, pixel *dstI, pixel *dstP, int dstStride,
- int block_width, int block_height, int marginX, int marginY);
typedef void (*filterVwghtd_t)(short *src, int srcStride, pixel *dstE, pixel *dstI, pixel *dstP, int dstStride, int block_width,
int block_height, int marginX, int marginY, int w, int roundw, int shiftw, int offsetw);
-typedef void (*filterHmulti_t)(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride,
- pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int block_width, int block_height, int marginX, int marginY);
typedef void (*filterHwghtd_t)(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride,
pixel *dstF, pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int block_width, int block_height,
int marginX, int marginY, int w, int roundw, int shiftw, int offsetw);
+typedef void (*filterRowH_t)(pixel *src, int srcStride, short* midA, short* midB, short* midC, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
+typedef void (*filterRowV_0_t)(pixel *src, int srcStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
+typedef void (*filterRowV_N_t)(short *midA, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow);
+
typedef void (*weightpUni_t)(short *src, pixel *dst, int srcStride, int dstStride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,
@@ -266,8 +266,9 @@
ipfilter_ss_t ipfilter_ss[NUM_IPFILTER_S_S];
ipfilter_p2s_t ipfilter_p2s;
ipfilter_s2p_t ipfilter_s2p;
- filterVmulti_t filterVmulti;
- filterHmulti_t filterHmulti;
+ filterRowH_t filterRowH;
+ filterRowV_0_t filterRowV_0;
+ filterRowV_N_t filterRowV_N;
intra_dc_t intra_pred_dc;
intra_planar_t intra_pred_planar;
diff -r 797c13ec5d2a -r aecd81195710 source/common/reference.cpp
--- a/source/common/reference.cpp Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/reference.cpp Mon Aug 26 18:48:45 2013 +0800
@@ -79,6 +79,13 @@
size_t padwidth = width + pic->m_lumaMarginX * 2;
size_t padheight = height + pic->m_lumaMarginY * 2;
+ for (int i = 0; i < 4; i++)
+ {
+ // TODO: I am not sure [0] need space when weight, for safe I alloc either, but I DON'T FILL [0] anymore
+ m_midBuf[i] = new short[width * (height + 3 + 4)]; // middle buffer extend size: left(0), right(0), top(3), bottom(4)
+ m_midBuf[i] += 3 * width;
+ }
+
if (w)
{
setWeight(*w);
@@ -111,8 +118,16 @@
{
JobProvider::flush();
+ int width = m_reconPic->getWidth();
+
for (int i = 0; i < 4; i++)
{
+ m_midBuf[i] -= 3 * width;
+ if (m_midBuf[i])
+ {
+ delete[] m_midBuf[i];
+ m_midBuf[i] = NULL;
+ }
for (int j = 0; j < 4; j++)
{
if (i == 0 && j == 0 && !isWeighted)
@@ -158,15 +173,29 @@
}
else
{
- primitives.filterHmulti(srcPtr, lumaStride, // source buffer
- intPtrF, intPtrA, intPtrB, intPtrC, m_intStride, // 4 intermediate HPEL buffers
- lumaPlane[1][0] + bufOffset,
- lumaPlane[2][0] + bufOffset,
- lumaPlane[3][0] + bufOffset, lumaStride, // 3 (x=n, y=0) output buffers (no V interp)
- m_filterWidth + (2 * s_intMarginX), // filter dimensions with margins
- m_filterHeight + (2 * s_intMarginY),
- m_reconPic->m_lumaMarginX - s_tmpMarginX - s_intMarginX, // pixel extension margins
- m_reconPic->m_lumaMarginY - s_tmpMarginY - s_intMarginY);
+ int midStride = m_reconPic->getWidth();
+ for(int i = 0; i < m_reconPic->m_numCuInHeight; i++ )
+ {
+ int isLast = (i == m_reconPic->m_numCuInHeight - 1);
+ int rowAddr = i * g_maxCUHeight * lumaStride;
+ int rowAddrMid = (i * g_maxCUHeight) * midStride;
+
+ primitives.filterRowH(m_reconPic->getLumaAddr() + rowAddr,
+ lumaStride,
+ m_midBuf[1] + rowAddrMid,
+ m_midBuf[2] + rowAddrMid,
+ m_midBuf[3] + rowAddrMid,
+ midStride,
+ lumaPlane[1][0] + rowAddr,
+ lumaPlane[2][0] + rowAddr,
+ lumaPlane[3][0] + rowAddr,
+ m_reconPic->getWidth(),
+ g_maxCUHeight,
+ m_reconPic->getLumaMarginX(),
+ m_reconPic->getLumaMarginY(),
+ i,
+ isLast);
+ }
}
}
@@ -214,7 +243,7 @@
return false;
}
-void MotionReference::generateReferencePlane(int x)
+void MotionReference::generateReferencePlane(const int x)
{
PPAScopeEvent(GenerateReferencePlanes);
@@ -235,7 +264,44 @@
}
else
{
- primitives.filterVmulti(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, lumaStride, m_filterWidth, m_filterHeight,
- m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY);
+ int midStride = m_reconPic->getWidth();
+ for(int i = 0; i < m_reconPic->m_numCuInHeight; i++ )
+ {
+ int isLast = (i == m_reconPic->m_numCuInHeight - 1);
+ int proch = g_maxCUHeight + (i == 0 ? -4 : 0) + (isLast ? 4 : 0);
+ int offset = (i == 0 ? 0 : 4);
+ int rowAddr = (i * g_maxCUHeight - offset) * lumaStride;
+ int rowAddrMid = (i * g_maxCUHeight - offset) * midStride;
+
+ if (x == 0)
+ {
+ primitives.filterRowV_0(m_reconPic->getLumaAddr() + rowAddr,
+ lumaStride,
+ lumaPlane[0][1] + rowAddr,
+ lumaPlane[0][2] + rowAddr,
+ lumaPlane[0][3] + rowAddr,
+ m_reconPic->getWidth(),
+ proch,
+ m_reconPic->getLumaMarginX(),
+ m_reconPic->getLumaMarginY(),
+ i,
+ isLast);
+ }
+ else
+ {
+ primitives.filterRowV_N(m_midBuf[x] + rowAddrMid,
+ midStride,
+ lumaPlane[x][1] + rowAddr,
+ lumaPlane[x][2] + rowAddr,
+ lumaPlane[x][3] + rowAddr,
+ lumaStride,
+ m_reconPic->getWidth(),
+ proch,
+ m_reconPic->getLumaMarginX(),
+ m_reconPic->getLumaMarginY(),
+ i,
+ isLast);
+ }
+ }
}
}
diff -r 797c13ec5d2a -r aecd81195710 source/common/reference.h
--- a/source/common/reference.h Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/reference.h Mon Aug 26 18:48:45 2013 +0800
@@ -68,7 +68,7 @@
protected:
bool findJob();
- void generateReferencePlane(int idx);
+ void generateReferencePlane(const int idx);
intptr_t m_startPad;
TComPicYuv *m_reconPic;
@@ -89,6 +89,7 @@
int m_filterWidth;
int m_filterHeight;
short *m_intermediateValues;
+ short *m_midBuf[4]; // 0: Full, 1:1/4, 2:2/4, 3:3/4
MotionReference& operator =(const MotionReference&);
};
diff -r 797c13ec5d2a -r aecd81195710 source/common/vec/ipfilter.inc
--- a/source/common/vec/ipfilter.inc Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/vec/ipfilter.inc Mon Aug 26 18:48:45 2013 +0800
@@ -61,10 +61,8 @@
p.ipfilter_s2p = filterConvertShortToPel;
#if !HIGH_BIT_DEPTH && INSTRSET >= X265_CPU_LEVEL_SSE41
- p.filterVmulti = filterVerticalMultiplaneExtend;
p.filterVwghtd = filterVerticalWeighted;
#if !(defined(_MSC_VER) && _MSC_VER == 1500 && X86_64)
- p.filterHmulti = filterHorizontalMultiplaneExtend;
p.filterHwghtd = filterHorizontalWeighted;
p.filterHCU = filterHorizontalExtendCol;
#endif
More information about the x265-devel
mailing list