[x265] [PATCH RFC] FrameFilter : H Filter and Border Extend for Each CU Row
gopu at multicorewareinc.com
gopu at multicorewareinc.com
Fri Aug 23 13:33:30 CEST 2013
# HG changeset patch
# User ggopu
# Date 1377257287 -19800
# Node ID 3b88b507fb12cf768209a8ba05ae72dc33f5f021
# Parent 77b53186d568de6623ca7826f141b7b099880100
FrameFilter : H Filter and Border Extend for Each CU Row
diff -r 77b53186d568 -r 3b88b507fb12 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Fri Aug 23 14:19:54 2013 +0530
+++ b/source/encoder/framefilter.cpp Fri Aug 23 16:58:07 2013 +0530
@@ -117,6 +117,25 @@
{
JobProvider::enqueue();
}
+
+ /* Create buffers for Hpel/Qpel Planes */
+
+ size_t width = m_pic->getPicYuvRec()->getWidth() + m_pic->getPicYuvRec()->getLumaMarginX() * 2;
+ size_t height = g_maxCUHeight + m_pic->getPicYuvRec()->getLumaMarginY() * 2;
+ size_t stride = m_pic->getPicYuvRec()->getWidth() + 4 * 4;
+ startPad = m_pic->getPicYuvRec()->getStride() + m_pic->getPicYuvRec()->getLumaMarginX();
+
+ m_lumaPlane[0][0] = m_pic->getPicYuvRec()->getBufY() + startPad;
+ m_intermediateValues = (short*)X265_MALLOC(short, 4 * stride * (m_pic->getNumCUsInFrame() + 0 * 4));
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 4; j++)
+ {
+ if (i == 0 && j == 0)
+ continue;
+ m_lumaPlane[i][j] = (pixel*)X265_MALLOC(pixel, width * height) + startPad;
+ }
+ }
}
void FrameFilter::wait()
@@ -132,6 +151,22 @@
{
m_sao.destroyPicSaoInfo();
}
+
+ if (m_intermediateValues)
+ X265_FREE(m_intermediateValues);
+
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 4; j++)
+ {
+ if (i == 0 && j == 0)
+ continue;
+ if (m_lumaPlane[i][j])
+ {
+ X265_FREE(m_lumaPlane[i][j] - startPad);
+ }
+ }
+ }
}
void FrameFilter::enqueueRow(int row)
@@ -148,7 +183,7 @@
// Called by worker threads
- // NOTE: We are here only active both of loopfilter and sao, and row 0 always finished, so we can safe to reuse row[0]'s data
+ // NOTE: We are here only active both of loopfilter and sao, and row 0 always finished, so we can safe to reuse row[0]'s data
if (row == 0)
{
// CHECK_ME: I think the SAO uses a temp Sbac only, so I always use [0], am I right?
@@ -222,6 +257,13 @@
}
}
+ hFilterip(m_pic->getPicYuvRec()->getLumaAddr(row),
+ m_pic->getPicYuvRec()->getStride(),
+ m_pic->getPicYuvRec()->getWidth(),
+ g_maxCUHeight,
+ m_pic->getPicYuvRec()->getLumaMarginX(),
+ m_pic->getPicYuvRec()->getLumaMarginY(), row);
+
// this row of CTUs has been encoded
if (row == m_numRows - 1)
{
@@ -238,6 +280,153 @@
m_sao.processSaoUnitRow(saoParam->saoLcuParam[2], row, 2);
}
+ hFilterip(m_pic->getPicYuvRec()->getLumaAddr(row),
+ m_pic->getPicYuvRec()->getStride(),
+ m_pic->getPicYuvRec()->getWidth(),
+ g_maxCUHeight,
+ m_pic->getPicYuvRec()->getLumaMarginX(),
+ m_pic->getPicYuvRec()->getLumaMarginY(), -1);
+
m_completionEvent.trigger();
}
}
+
+/* row = 0 Extend top,
+ row = -1 Extend Bottom */
+
+void FrameFilter::hFilterip(pixel *insrc, int lumaStride, int width, int height, int lumaMarginX, int lumaMarginY, int row)
+{
+ // Generate subpels for entire frame with a margin of tmpMargin
+ static const int tmpMarginX = 4;
+ static const int tmpMarginY = 4;
+
+ // Extra margin for horizontal filter
+ static const int intMarginX = 0;
+ static const int intMarginY = 4;
+
+ int stride = width + tmpMarginX * 4;
+
+ short* intPtrF = m_intermediateValues;
+ short* intPtrA = m_intermediateValues + 1 * stride * (height + 0 * 4);
+ short* intPtrB = m_intermediateValues + 2 * stride * (height + 0 * 4);
+ short* intPtrC = m_intermediateValues + 3 * stride * (height + 0 * 4);
+
+ int filterWidth = width + tmpMarginX * 2;
+ int filterHeight = height + tmpMarginY * 2;
+
+ /* Copy the CU ROW into Buffer */
+ pixel *src;
+
+ src = insrc;
+
+ primitives.blockcpy_pp(width, height, src, lumaStride, insrc, lumaStride);
+
+ /* HPEL generation requires luma integer plane to already be extended */
+ Int x, y;
+ for (y = 0; y < height; y++) // Extend only for left and right
+ {
+ for (x = 0; x < lumaMarginX; x++)
+ {
+ src[-lumaMarginX + x] = src[0];
+ src[width + x] = src[width - 1];
+ }
+
+ src += lumaStride;
+ }
+
+ /* if first row then Extend only for top */
+ if (row == TOP)
+ {
+ src -= (lumaStride + lumaMarginX);
+ for (y = 0; y < lumaMarginY; y++)
+ {
+ ::memcpy(src + (y + 1) * lumaStride, src, sizeof(Pel) * (width + (lumaMarginX << 1)));
+ }
+ }
+
+ /* if row is last then extend only for BOTTOM */
+ if (row == BOTTOM)
+ {
+ src -= ((height - 1) * lumaStride);
+ for (y = 0; y < lumaMarginX; y++)
+ {
+ ::memcpy(src - (y + 1) * lumaStride, src, sizeof(Pel) * (width + (lumaMarginX << 1)));
+ }
+ }
+
+ int bufOffset = -(tmpMarginX + intMarginX);
+ pixel *srcPtr = src + bufOffset;
+
+ primitives.filterHCU(srcPtr, lumaStride, // source buffer
+ intPtrF, intPtrA, intPtrB, intPtrC, stride, // 4 intermediate HPEL buffers
+ m_lumaPlane[1][0] + bufOffset,
+ m_lumaPlane[2][0] + bufOffset,
+ m_lumaPlane[3][0] + bufOffset, lumaStride, // 3 (x=n, y=0) output buffers (no V interp)
+ filterWidth, // filter dimensions
+ filterHeight,
+ lumaMarginX - tmpMarginX - intMarginX // pixel extension margins only for left and right
+ );
+
+ pixel *pf, *pe, *pi, *pp;
+ int marginX = lumaMarginX - tmpMarginX - intMarginX;
+ int marginY = lumaMarginY - tmpMarginY - intMarginY;
+
+ //extend the Top only
+ if (row == TOP)
+ {
+ // Extending top rows
+ pf = m_lumaPlane[0][0] - marginX;
+ pe = m_lumaPlane[1][0] - marginX;
+ pi = m_lumaPlane[2][0] - marginX;
+ pp = m_lumaPlane[3][0] - marginX;
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pf - y * stride, pf, filterWidth + marginX * 2);
+ }
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pe - y * stride, pe, filterWidth + marginX * 2);
+ }
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pi - y * stride, pi, filterWidth + marginX * 2);
+ }
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pp - y * stride, pp, filterWidth + marginX * 2);
+ }
+ }
+
+ // Extending bottom rows
+ if (row == BOTTOM)
+ {
+ pf = m_lumaPlane[0][0] + (filterHeight - 1) * stride - marginX;
+ pe = m_lumaPlane[1][0] + (filterHeight - 1) * stride - marginX;
+ pi = m_lumaPlane[2][0] + (filterHeight - 1) * stride - marginX;
+ pp = m_lumaPlane[3][0] + (filterHeight - 1) * stride - marginX;
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pf + y * stride, pf, filterWidth + marginX * 2);
+ }
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pe + y * stride, pe, filterWidth + marginX * 2);
+ }
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pi + y * stride, pi, filterWidth + marginX * 2);
+ }
+
+ for (int y = 1; y <= marginY; y++)
+ {
+ memcpy(pp + y * stride, pp, filterWidth + marginX * 2);
+ }
+ }
+}
diff -r 77b53186d568 -r 3b88b507fb12 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Fri Aug 23 14:19:54 2013 +0530
+++ b/source/encoder/framefilter.h Fri Aug 23 16:58:07 2013 +0530
@@ -32,6 +32,9 @@
#include "threading.h"
#include "wavefront.h"
+#define TOP 0
+#define BOTTOM -1
+
class TEncTop;
namespace x265 {
@@ -62,6 +65,8 @@
void processRow(int row);
+ void hFilterip(pixel *src, int lumaStride, int width, int height, int lumaMarginX, int lumaMarginY, int row);
+
protected:
TEncCfg* m_cfg;
@@ -75,6 +80,9 @@
TEncEntropy* m_entropyCoder;
TEncSbac* m_rdGoOnSbacCoder;
int m_numRows;
+ short* m_intermediateValues; // Intermediatevalues for bw H and V
+ pixel *m_lumaPlane[4][4]; //referance plane for CU ROW
+ size_t startPad;
// TODO: if you want thread priority logic, add col here
volatile int row_ready;
@@ -82,7 +90,6 @@
Event m_completionEvent;
};
-
}
#endif // ifndef __FRAMEFILTER__
More information about the x265-devel
mailing list