[x265] [PATCH RFC] FrameFilter : H Filter and Border Extend for Each CU Row

gopu at multicorewareinc.com gopu at multicorewareinc.com
Fri Aug 23 13:33:30 CEST 2013


# HG changeset patch
# User ggopu
# Date 1377257287 -19800
# Node ID 3b88b507fb12cf768209a8ba05ae72dc33f5f021
# Parent  77b53186d568de6623ca7826f141b7b099880100
FrameFilter : H Filter and Border Extend for Each CU Row

diff -r 77b53186d568 -r 3b88b507fb12 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Fri Aug 23 14:19:54 2013 +0530
+++ b/source/encoder/framefilter.cpp	Fri Aug 23 16:58:07 2013 +0530
@@ -117,6 +117,25 @@
     {
         JobProvider::enqueue();
     }
+
+    /* Create buffers for Hpel/Qpel Planes */
+
+    size_t width = m_pic->getPicYuvRec()->getWidth() + m_pic->getPicYuvRec()->getLumaMarginX() * 2;
+    size_t height = g_maxCUHeight + m_pic->getPicYuvRec()->getLumaMarginY() * 2;
+    size_t stride = m_pic->getPicYuvRec()->getWidth() + 4 * 4;
+    startPad = m_pic->getPicYuvRec()->getStride() + m_pic->getPicYuvRec()->getLumaMarginX();
+
+    m_lumaPlane[0][0] = m_pic->getPicYuvRec()->getBufY() + startPad;
+    m_intermediateValues = (short*)X265_MALLOC(short, 4 *  stride * (m_pic->getNumCUsInFrame() + 0 * 4));
+    for (int i = 0; i < 4; i++)
+    {
+        for (int j = 0; j < 4; j++)
+        {
+            if (i == 0 && j == 0)
+                continue;
+            m_lumaPlane[i][j] = (pixel*)X265_MALLOC(pixel,  width * height) + startPad;
+        }
+    }
 }
 
 void FrameFilter::wait()
@@ -132,6 +151,22 @@
     {
         m_sao.destroyPicSaoInfo();
     }
+
+    if (m_intermediateValues)
+        X265_FREE(m_intermediateValues);
+
+    for (int i = 0; i < 4; i++)
+    {
+        for (int j = 0; j < 4; j++)
+        {
+            if (i == 0 && j == 0)
+                continue;
+            if (m_lumaPlane[i][j])
+            {
+                X265_FREE(m_lumaPlane[i][j] - startPad);
+            }
+        }
+    }
 }
 
 void FrameFilter::enqueueRow(int row)
@@ -148,7 +183,7 @@
 
     // Called by worker threads
 
-    // NOTE: We are here only active both of loopfilter and sao, and row 0 always finished, so we can safe to reuse row[0]'s data 
+    // NOTE: We are here only active both of loopfilter and sao, and row 0 always finished, so we can safe to reuse row[0]'s data
     if (row == 0)
     {
         // CHECK_ME: I think the SAO uses a temp Sbac only, so I always use [0], am I right?
@@ -222,6 +257,13 @@
         }
     }
 
+    hFilterip(m_pic->getPicYuvRec()->getLumaAddr(row),
+              m_pic->getPicYuvRec()->getStride(),
+              m_pic->getPicYuvRec()->getWidth(),
+              g_maxCUHeight,
+              m_pic->getPicYuvRec()->getLumaMarginX(),
+              m_pic->getPicYuvRec()->getLumaMarginY(), row);
+
     // this row of CTUs has been encoded
     if (row == m_numRows - 1)
     {
@@ -238,6 +280,153 @@
             m_sao.processSaoUnitRow(saoParam->saoLcuParam[2], row, 2);
         }
 
+        hFilterip(m_pic->getPicYuvRec()->getLumaAddr(row),
+                  m_pic->getPicYuvRec()->getStride(),
+                  m_pic->getPicYuvRec()->getWidth(),
+                  g_maxCUHeight,
+                  m_pic->getPicYuvRec()->getLumaMarginX(),
+                  m_pic->getPicYuvRec()->getLumaMarginY(), -1);
+
         m_completionEvent.trigger();
     }
 }
+
+/* row = 0 Extend top, 
+   row = -1 Extend Bottom */
+
+void FrameFilter::hFilterip(pixel *insrc, int lumaStride, int width, int height, int lumaMarginX, int lumaMarginY, int row)
+{
+    // Generate subpels for entire frame with a margin of tmpMargin
+    static const int tmpMarginX = 4;
+    static const int tmpMarginY = 4;
+
+    // Extra margin for horizontal filter
+    static const int intMarginX = 0;
+    static const int intMarginY = 4;
+
+    int stride = width + tmpMarginX * 4;
+
+    short* intPtrF = m_intermediateValues;
+    short* intPtrA = m_intermediateValues + 1 * stride * (height + 0 * 4);
+    short* intPtrB = m_intermediateValues + 2 * stride * (height + 0 * 4);
+    short* intPtrC = m_intermediateValues + 3 * stride * (height + 0 * 4);
+
+    int filterWidth = width + tmpMarginX * 2;
+    int filterHeight = height + tmpMarginY * 2;
+
+    /* Copy the CU ROW into Buffer */
+    pixel *src;
+
+    src = insrc;
+
+    primitives.blockcpy_pp(width, height, src, lumaStride, insrc, lumaStride);
+
+    /* HPEL generation requires luma integer plane to already be extended */
+    Int x, y;
+    for (y = 0; y < height; y++) // Extend only for left and right
+    {
+        for (x = 0; x < lumaMarginX; x++)
+        {
+            src[-lumaMarginX + x] = src[0];
+            src[width + x] = src[width - 1];
+        }
+
+        src += lumaStride;
+    }
+
+    /* if first row then Extend only for top */
+    if (row == TOP)
+    {
+        src -= (lumaStride + lumaMarginX);
+        for (y = 0; y < lumaMarginY; y++)
+        {
+            ::memcpy(src + (y + 1) * lumaStride, src, sizeof(Pel) * (width + (lumaMarginX << 1)));
+        }
+    }
+
+    /* if row is last then extend only for BOTTOM */
+    if (row == BOTTOM)
+    {
+        src -= ((height - 1) * lumaStride);
+        for (y = 0; y < lumaMarginX; y++)
+        {
+            ::memcpy(src - (y + 1) * lumaStride, src, sizeof(Pel) * (width + (lumaMarginX << 1)));
+        }
+    }
+
+    int bufOffset = -(tmpMarginX + intMarginX);
+    pixel *srcPtr = src + bufOffset;
+
+    primitives.filterHCU(srcPtr, lumaStride,            // source buffer
+                         intPtrF, intPtrA, intPtrB, intPtrC, stride,    // 4 intermediate HPEL buffers
+                         m_lumaPlane[1][0] + bufOffset,
+                         m_lumaPlane[2][0] + bufOffset,
+                         m_lumaPlane[3][0] + bufOffset, lumaStride,    // 3 (x=n, y=0) output buffers (no V interp)
+                         filterWidth,    // filter dimensions
+                         filterHeight,
+                         lumaMarginX - tmpMarginX - intMarginX    // pixel extension margins only for left and right
+                         );
+
+    pixel *pf, *pe, *pi, *pp;
+    int marginX = lumaMarginX - tmpMarginX - intMarginX;
+    int marginY = lumaMarginY - tmpMarginY - intMarginY;
+
+    //extend the Top only
+    if (row == TOP)
+    {
+        // Extending top rows
+        pf  = m_lumaPlane[0][0] - marginX;
+        pe  = m_lumaPlane[1][0] - marginX;
+        pi  = m_lumaPlane[2][0] - marginX;
+        pp  = m_lumaPlane[3][0] - marginX;
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pf - y * stride, pf, filterWidth + marginX * 2);
+        }
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pe - y * stride, pe, filterWidth + marginX * 2);
+        }
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pi - y * stride, pi, filterWidth + marginX * 2);
+        }
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pp - y * stride, pp, filterWidth + marginX * 2);
+        }
+    }
+
+    // Extending bottom rows
+    if (row == BOTTOM)
+    {
+        pf = m_lumaPlane[0][0]  + (filterHeight - 1) * stride - marginX;
+        pe = m_lumaPlane[1][0]  + (filterHeight - 1) * stride - marginX;
+        pi = m_lumaPlane[2][0]  + (filterHeight - 1) * stride - marginX;
+        pp = m_lumaPlane[3][0]  + (filterHeight - 1) * stride - marginX;
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pf + y * stride, pf, filterWidth + marginX * 2);
+        }
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pe + y * stride, pe, filterWidth + marginX * 2);
+        }
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pi + y * stride, pi, filterWidth + marginX * 2);
+        }
+
+        for (int y = 1; y <= marginY; y++)
+        {
+            memcpy(pp + y * stride, pp, filterWidth + marginX * 2);
+        }
+    }
+}
diff -r 77b53186d568 -r 3b88b507fb12 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Fri Aug 23 14:19:54 2013 +0530
+++ b/source/encoder/framefilter.h	Fri Aug 23 16:58:07 2013 +0530
@@ -32,6 +32,9 @@
 #include "threading.h"
 #include "wavefront.h"
 
+#define TOP 0
+#define BOTTOM -1
+
 class TEncTop;
 
 namespace x265 {
@@ -62,6 +65,8 @@
 
     void processRow(int row);
 
+    void hFilterip(pixel *src, int lumaStride, int width, int height, int lumaMarginX, int lumaMarginY, int row);
+
 protected:
 
     TEncCfg*            m_cfg;
@@ -75,6 +80,9 @@
     TEncEntropy*                m_entropyCoder;
     TEncSbac*                   m_rdGoOnSbacCoder;
     int                         m_numRows;
+    short*                      m_intermediateValues;  // Intermediatevalues for bw H and V
+    pixel                       *m_lumaPlane[4][4];    //referance plane for CU ROW
+    size_t                      startPad;
 
     // TODO: if you want thread priority logic, add col here
     volatile int                row_ready;
@@ -82,7 +90,6 @@
 
     Event                       m_completionEvent;
 };
-
 }
 
 #endif // ifndef __FRAMEFILTER__


More information about the x265-devel mailing list