[x265] [PATCH] framepp: row based interpolate

Min Chen chenm003 at 163.com
Mon Aug 26 12:49:26 CEST 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1377514125 -28800
# Node ID aecd81195710c367d3a21b1ec877c76368afd614
# Parent  797c13ec5d2ae7985027f59dcf4d4c5f86c1d367
framepp: row based interpolate

diff -r 797c13ec5d2a -r aecd81195710 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Sun Aug 25 21:53:55 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Mon Aug 26 18:48:45 2013 +0800
@@ -76,8 +76,11 @@
     m_cuWidth  = maxCUWidth;
     m_cuHeight = maxCUHeight;
 
-    Int numCuInWidth  = m_picWidth  / m_cuWidth  + (m_picWidth  % m_cuWidth  != 0);
-    Int numCuInHeight = m_picHeight / m_cuHeight + (m_picHeight % m_cuHeight != 0);
+    Int numCuInWidth  = (m_picWidth + m_cuWidth - 1)  / m_cuWidth;
+    Int numCuInHeight = (m_picHeight + m_cuHeight - 1) / m_cuHeight;
+
+    m_numCuInWidth = numCuInWidth;
+    m_numCuInHeight = numCuInHeight;
 
     m_lumaMarginX = g_maxCUWidth  + 16; // for 16-byte alignment
     m_lumaMarginY = g_maxCUHeight + 16; // margin for 8-tap filter and infinite padding
diff -r 797c13ec5d2a -r aecd81195710 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Sun Aug 25 21:53:55 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Mon Aug 26 18:48:45 2013 +0800
@@ -77,7 +77,6 @@
     // ------------------------------------------------------------------------------------------------
     //  Parameter for general YUV buffer usage
     // ------------------------------------------------------------------------------------------------
-
     Int   m_picWidth;          ///< Width of picture
     Int   m_picHeight;         ///< Height of picture
 
@@ -98,6 +97,8 @@
     Bool  m_bIsBorderExtended;
 
 public:
+    Int   m_numCuInWidth;
+    Int   m_numCuInHeight;
 
     TComPicYuv();
     virtual ~TComPicYuv();
diff -r 797c13ec5d2a -r aecd81195710 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/ipfilter.cpp	Mon Aug 26 18:48:45 2013 +0800
@@ -478,31 +478,6 @@
     }
 }
 
-void filterVerticalMultiplaneExtend(short *src, int srcStride, pixel *dstE, pixel *dstI, pixel *dstP, int dstStride, int block_width, int block_height, int marginX, int marginY)
-{
-    filterVertical_s_p<8>(src, srcStride, dstI, dstStride, block_width, block_height, g_lumaFilter[2]);
-    filterVertical_s_p<8>(src, srcStride, dstE, dstStride, block_width, block_height, g_lumaFilter[1]);
-    filterVertical_s_p<8>(src, srcStride, dstP, dstStride, block_width, block_height, g_lumaFilter[3]);
-    extendPicCompBorder(dstE, dstStride, block_width, block_height, marginX, marginY);
-    extendPicCompBorder(dstI, dstStride, block_width, block_height, marginX, marginY);
-    extendPicCompBorder(dstP, dstStride, block_width, block_height, marginX, marginY);
-}
-
-void filterHorizontalMultiplaneExtend(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride, pixel *pDstA, pixel *pDstB, pixel *pDstC, int pDstStride, int block_width, int block_height, int marginX, int marginY)
-{
-    filterConvertPelToShort(src, srcStride, midF, midStride, block_width, block_height);
-    filterHorizontal_p_s<8>(src, srcStride, midB, midStride, block_width, block_height, g_lumaFilter[2]);
-    filterHorizontal_p_s<8>(src, srcStride, midA, midStride, block_width, block_height, g_lumaFilter[1]);
-    filterHorizontal_p_s<8>(src, srcStride, midC, midStride, block_width, block_height, g_lumaFilter[3]);
-    filterConvertShortToPel(midA, midStride, pDstA, pDstStride, block_width, block_height);
-    filterConvertShortToPel(midB, midStride, pDstB, pDstStride, block_width, block_height);
-    filterConvertShortToPel(midC, midStride, pDstC, pDstStride, block_width, block_height);
-
-    extendPicCompBorder(pDstA, pDstStride, block_width, block_height, marginX, marginY);
-    extendPicCompBorder(pDstB, pDstStride, block_width, block_height, marginX, marginY);
-    extendPicCompBorder(pDstC, pDstStride, block_width, block_height, marginX, marginY);
-}
-
 void filterHorizontalExtendCol(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride, pixel *pDstA, pixel *pDstB, pixel *pDstC, int pDstStride, int block_width, int block_height, int marginX)
 {
     filterConvertPelToShort(src, srcStride, midF, midStride, block_width, block_height);
@@ -594,6 +569,147 @@
 }
 }
 
+void filterRowH(pixel *src, int srcStride, short* midA, short* midB, short* midC, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow)
+{
+    // Extend FullPel Left and Right
+    extendCURowColBorder(src, srcStride, width, height, marginX);
+
+    // Extend FullPel Top
+    if (row == 0)
+    {
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(src - marginX - (y + 1) * srcStride, src - marginX, sizeof(pixel) * (width + (marginX << 1)));
+        }
+    }
+
+    // Extend FullPel Bottom
+    if (isLastRow)
+    {
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(src - marginX + (height + y) * srcStride, src - marginX + (height - 1) * srcStride, sizeof(pixel) * (width + (marginX << 1)));
+        }
+    }
+    filterHorizontal_p_s<8>(src, srcStride, midA, midStride, width, height, g_lumaFilter[1]);
+    filterHorizontal_p_s<8>(src, srcStride, midB, midStride, width, height, g_lumaFilter[2]);
+    filterHorizontal_p_s<8>(src, srcStride, midC, midStride, width, height, g_lumaFilter[3]);
+    filterConvertShortToPel(midA, midStride, dstA, srcStride, width, height);
+    filterConvertShortToPel(midB, midStride, dstB, srcStride, width, height);
+    filterConvertShortToPel(midC, midStride, dstC, srcStride, width, height);
+
+    // Extend SubPel Left and Right
+    extendCURowColBorder(dstA, srcStride, width, height, marginX);
+    extendCURowColBorder(dstB, srcStride, width, height, marginX);
+    extendCURowColBorder(dstC, srcStride, width, height, marginX);
+
+    if (row == 0)
+    {
+        // Extend SubPel Top
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(dstA - marginX - (y + 1) * srcStride, dstA - marginX, sizeof(pixel) * srcStride);
+            ::memcpy(dstB - marginX - (y + 1) * srcStride, dstB - marginX, sizeof(pixel) * srcStride);
+            ::memcpy(dstC - marginX - (y + 1) * srcStride, dstC - marginX, sizeof(pixel) * srcStride);
+        }
+
+        // Extend midPel Top(only 3 rows)
+        for(int y = 0; y < 3; y++)
+        {
+            ::memcpy(midA - (y + 1) * midStride, midA, midStride * sizeof(short));
+            ::memcpy(midB - (y + 1) * midStride, midB, midStride * sizeof(short));
+            ::memcpy(midC - (y + 1) * midStride, midC, midStride * sizeof(short));
+        }
+    }
+
+    if (isLastRow)
+    {
+        // Extend SubPel Bottom
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(dstA - marginX + (height + y) * srcStride, dstA - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+            ::memcpy(dstB - marginX + (height + y) * srcStride, dstB - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+            ::memcpy(dstC - marginX + (height + y) * srcStride, dstC - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+        }
+
+        // Extend midPel Bottom(only 4 rows)
+        for(int y = 0; y < 4; y++)
+        {
+            ::memcpy(midA + (height + y) * midStride, midA + (height - 1) * midStride, midStride * sizeof(short));
+            ::memcpy(midB + (height + y) * midStride, midB + (height - 1) * midStride, midStride * sizeof(short));
+            ::memcpy(midC + (height + y) * midStride, midC + (height - 1) * midStride, midStride * sizeof(short));
+        }
+    }
+}
+
+void filterRowV_0(pixel *src, int srcStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow)
+{
+    filterVertical_p_p<8>(src, srcStride, dstA, srcStride, width, height, g_lumaFilter[1]);
+    filterVertical_p_p<8>(src, srcStride, dstB, srcStride, width, height, g_lumaFilter[2]);
+    filterVertical_p_p<8>(src, srcStride, dstC, srcStride, width, height, g_lumaFilter[3]);
+
+    // Extend SubPel Left and Right
+    extendCURowColBorder(dstA, srcStride, width, height, marginX);
+    extendCURowColBorder(dstB, srcStride, width, height, marginX);
+    extendCURowColBorder(dstC, srcStride, width, height, marginX);
+
+    if (row == 0)
+    {
+        // Extend SubPel Top
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(dstA - marginX - (y + 1) * srcStride, dstA - marginX, sizeof(pixel) * srcStride);
+            ::memcpy(dstB - marginX - (y + 1) * srcStride, dstB - marginX, sizeof(pixel) * srcStride);
+            ::memcpy(dstC - marginX - (y + 1) * srcStride, dstC - marginX, sizeof(pixel) * srcStride);
+        }
+    }
+
+    if (isLastRow)
+    {
+        // Extend SubPel Bottom
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(dstA - marginX + (height + y) * srcStride, dstA - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+            ::memcpy(dstB - marginX + (height + y) * srcStride, dstB - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+            ::memcpy(dstC - marginX + (height + y) * srcStride, dstC - marginX + (height - 1) * srcStride, sizeof(pixel) * srcStride);
+        }
+    }
+}
+
+void filterRowV_N(short *midA, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow)
+{
+    filterVertical_s_p<8>(midA, midStride, dstA, dstStride, width, height, g_lumaFilter[1]);
+    filterVertical_s_p<8>(midA, midStride, dstB, dstStride, width, height, g_lumaFilter[2]);
+    filterVertical_s_p<8>(midA, midStride, dstC, dstStride, width, height, g_lumaFilter[3]);
+
+    // Extend SubPel Left and Right
+    extendCURowColBorder(dstA, dstStride, width, height, marginX);
+    extendCURowColBorder(dstB, dstStride, width, height, marginX);
+    extendCURowColBorder(dstC, dstStride, width, height, marginX);
+
+    if (row == 0)
+    {
+        // Extend SubPel Top
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(dstA - marginX - (y + 1) * dstStride, dstA - marginX, sizeof(pixel) * dstStride);
+            ::memcpy(dstB - marginX - (y + 1) * dstStride, dstB - marginX, sizeof(pixel) * dstStride);
+            ::memcpy(dstC - marginX - (y + 1) * dstStride, dstC - marginX, sizeof(pixel) * dstStride);
+        }
+    }
+
+    if (isLastRow)
+    {
+        // Extend SubPel Bottom
+        for(int y = 0; y < marginY; y++)
+        {
+            ::memcpy(dstA - marginX + (height + y) * dstStride, dstA - marginX + (height - 1) * dstStride, sizeof(pixel) * dstStride);
+            ::memcpy(dstB - marginX + (height + y) * dstStride, dstB - marginX + (height - 1) * dstStride, sizeof(pixel) * dstStride);
+            ::memcpy(dstC - marginX + (height + y) * dstStride, dstC - marginX + (height - 1) * dstStride, sizeof(pixel) * dstStride);
+        }
+    }
+}
+
 namespace x265 {
 // x265 private namespace
 
@@ -615,8 +731,9 @@
     p.ipfilter_p2s = filterConvertPelToShort;
     p.ipfilter_s2p = filterConvertShortToPel;
 
-    p.filterVmulti = filterVerticalMultiplaneExtend;
-    p.filterHmulti = filterHorizontalMultiplaneExtend;
+    p.filterRowH = filterRowH;
+    p.filterRowV_0 = filterRowV_0;
+    p.filterRowV_N = filterRowV_N;
 
     p.filterVwghtd = filterVerticalWeighted;         
     p.filterHwghtd = filterHorizontalWeighted;
diff -r 797c13ec5d2a -r aecd81195710 source/common/primitives.h
--- a/source/common/primitives.h	Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/primitives.h	Mon Aug 26 18:48:45 2013 +0800
@@ -218,15 +218,15 @@
 typedef void (*dequant_t)(const int* src, int* dst, int width, int height, int mcqp_miper, int mcqp_mirem, bool useScalingList,
                           unsigned int trSizeLog2, int *dequantCoef);
 
-typedef void (*filterVmulti_t)(short *src, int srcStride, pixel *dstE, pixel *dstI, pixel *dstP, int dstStride,
-                               int block_width, int block_height, int marginX, int marginY);
 typedef void (*filterVwghtd_t)(short *src, int srcStride, pixel *dstE, pixel *dstI, pixel *dstP, int dstStride, int block_width,
                                int block_height, int marginX, int marginY, int w, int roundw, int shiftw, int offsetw);
-typedef void (*filterHmulti_t)(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride,
-                               pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int block_width, int block_height, int marginX, int marginY);
 typedef void (*filterHwghtd_t)(pixel *src, int srcStride, short *midF, short* midA, short* midB, short* midC, int midStride,
                                pixel *dstF, pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int block_width, int block_height,
                                int marginX, int marginY, int w, int roundw, int shiftw, int offsetw);
+typedef void (*filterRowH_t)(pixel *src, int srcStride, short* midA, short* midB, short* midC, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
+typedef void (*filterRowV_0_t)(pixel *src, int srcStride, pixel *dstA, pixel *dstB, pixel *dstC, int width, int height, int marginX, int marginY, int row, int isLastRow);
+typedef void (*filterRowV_N_t)(short *midA, int midStride, pixel *dstA, pixel *dstB, pixel *dstC, int dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow);
+
 typedef void (*weightpUni_t)(short *src, pixel *dst, int srcStride, int dstStride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
 typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,
@@ -266,8 +266,9 @@
     ipfilter_ss_t   ipfilter_ss[NUM_IPFILTER_S_S];
     ipfilter_p2s_t  ipfilter_p2s;
     ipfilter_s2p_t  ipfilter_s2p;
-    filterVmulti_t  filterVmulti;
-    filterHmulti_t  filterHmulti;
+    filterRowH_t    filterRowH;
+    filterRowV_0_t  filterRowV_0;
+    filterRowV_N_t  filterRowV_N;
 
     intra_dc_t      intra_pred_dc;
     intra_planar_t  intra_pred_planar;
diff -r 797c13ec5d2a -r aecd81195710 source/common/reference.cpp
--- a/source/common/reference.cpp	Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/reference.cpp	Mon Aug 26 18:48:45 2013 +0800
@@ -79,6 +79,13 @@
     size_t padwidth = width + pic->m_lumaMarginX * 2;
     size_t padheight = height + pic->m_lumaMarginY * 2;
 
+    for (int i = 0; i < 4; i++)
+    {
+        // TODO: I am not sure [0] need space when weight, for safe I alloc either, but I DON'T FILL [0] anymore
+        m_midBuf[i] = new short[width * (height + 3 + 4)];  // middle buffer extend size: left(0), right(0), top(3), bottom(4)
+        m_midBuf[i] += 3 * width;
+    }
+
     if (w) 
     {
         setWeight(*w);
@@ -111,8 +118,16 @@
 {
     JobProvider::flush();
 
+    int width = m_reconPic->getWidth();
+
     for (int i = 0; i < 4; i++)
     {
+        m_midBuf[i] -= 3 * width;
+        if (m_midBuf[i])
+        {
+            delete[] m_midBuf[i];
+            m_midBuf[i] = NULL;
+        }
         for (int j = 0; j < 4; j++)
         {
             if (i == 0 && j == 0 && !isWeighted)
@@ -158,15 +173,29 @@
        }
        else
        {
-            primitives.filterHmulti(srcPtr, lumaStride,                        // source buffer
-                                intPtrF, intPtrA, intPtrB, intPtrC, m_intStride, // 4 intermediate HPEL buffers
-                                lumaPlane[1][0] + bufOffset,
-                                lumaPlane[2][0] + bufOffset,
-                                lumaPlane[3][0] + bufOffset, lumaStride,     // 3 (x=n, y=0) output buffers (no V interp)
-                                m_filterWidth + (2 * s_intMarginX),              // filter dimensions with margins
-                                m_filterHeight + (2 * s_intMarginY),
-                                m_reconPic->m_lumaMarginX - s_tmpMarginX - s_intMarginX, // pixel extension margins
-                                m_reconPic->m_lumaMarginY - s_tmpMarginY - s_intMarginY);
+            int midStride = m_reconPic->getWidth();
+            for(int i = 0; i < m_reconPic->m_numCuInHeight; i++ )
+            {
+                int isLast = (i == m_reconPic->m_numCuInHeight - 1);
+                int rowAddr = i * g_maxCUHeight * lumaStride;
+                int rowAddrMid = (i * g_maxCUHeight) * midStride;
+
+                primitives.filterRowH(m_reconPic->getLumaAddr() + rowAddr,
+                                      lumaStride,
+                                      m_midBuf[1] + rowAddrMid,
+                                      m_midBuf[2] + rowAddrMid,
+                                      m_midBuf[3] + rowAddrMid,
+                                      midStride,
+                                      lumaPlane[1][0] + rowAddr,
+                                      lumaPlane[2][0] + rowAddr,
+                                      lumaPlane[3][0] + rowAddr,
+                                      m_reconPic->getWidth(),
+                                      g_maxCUHeight,
+                                      m_reconPic->getLumaMarginX(),
+                                      m_reconPic->getLumaMarginY(),
+                                      i,
+                                      isLast);
+            }
         }
     }
 
@@ -214,7 +243,7 @@
     return false;
 }
 
-void MotionReference::generateReferencePlane(int x)
+void MotionReference::generateReferencePlane(const int x)
 {
     PPAScopeEvent(GenerateReferencePlanes);
 
@@ -235,7 +264,44 @@
     }
     else
     {
-        primitives.filterVmulti(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, lumaStride, m_filterWidth, m_filterHeight,
-                                m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY);
+        int midStride = m_reconPic->getWidth();
+        for(int i = 0; i < m_reconPic->m_numCuInHeight; i++ )
+        {
+            int isLast = (i == m_reconPic->m_numCuInHeight - 1);
+            int proch = g_maxCUHeight + (i == 0 ? -4 : 0) + (isLast ? 4 : 0);
+            int offset = (i == 0 ? 0 : 4);
+            int rowAddr = (i * g_maxCUHeight - offset) * lumaStride;
+            int rowAddrMid = (i * g_maxCUHeight - offset) * midStride;
+
+            if (x == 0)
+            {
+                primitives.filterRowV_0(m_reconPic->getLumaAddr() + rowAddr,
+                                        lumaStride,
+                                        lumaPlane[0][1] + rowAddr,
+                                        lumaPlane[0][2] + rowAddr,
+                                        lumaPlane[0][3] + rowAddr,
+                                        m_reconPic->getWidth(),
+                                        proch,
+                                        m_reconPic->getLumaMarginX(),
+                                        m_reconPic->getLumaMarginY(),
+                                        i,
+                                        isLast);
+            }
+            else
+            {
+                primitives.filterRowV_N(m_midBuf[x] + rowAddrMid,
+                                        midStride,
+                                        lumaPlane[x][1] + rowAddr,
+                                        lumaPlane[x][2] + rowAddr,
+                                        lumaPlane[x][3] + rowAddr,
+                                        lumaStride,
+                                        m_reconPic->getWidth(),
+                                        proch,
+                                        m_reconPic->getLumaMarginX(),
+                                        m_reconPic->getLumaMarginY(),
+                                        i,
+                                        isLast);
+            }
+        }
     }
 }
diff -r 797c13ec5d2a -r aecd81195710 source/common/reference.h
--- a/source/common/reference.h	Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/reference.h	Mon Aug 26 18:48:45 2013 +0800
@@ -68,7 +68,7 @@
 protected:
 
     bool findJob();
-    void generateReferencePlane(int idx);
+    void generateReferencePlane(const int idx);
 
     intptr_t     m_startPad;
     TComPicYuv  *m_reconPic;
@@ -89,6 +89,7 @@
     int         m_filterWidth;
     int         m_filterHeight;
     short      *m_intermediateValues;
+    short      *m_midBuf[4];  // 0: Full, 1:1/4, 2:2/4, 3:3/4
 
     MotionReference& operator =(const MotionReference&);
 };
diff -r 797c13ec5d2a -r aecd81195710 source/common/vec/ipfilter.inc
--- a/source/common/vec/ipfilter.inc	Sun Aug 25 21:53:55 2013 -0500
+++ b/source/common/vec/ipfilter.inc	Mon Aug 26 18:48:45 2013 +0800
@@ -61,10 +61,8 @@
     p.ipfilter_s2p = filterConvertShortToPel;
 
 #if !HIGH_BIT_DEPTH && INSTRSET >= X265_CPU_LEVEL_SSE41
-    p.filterVmulti = filterVerticalMultiplaneExtend;
     p.filterVwghtd = filterVerticalWeighted;
 #if !(defined(_MSC_VER) && _MSC_VER == 1500 && X86_64)
-    p.filterHmulti = filterHorizontalMultiplaneExtend;
     p.filterHwghtd = filterHorizontalWeighted;
     p.filterHCU     = filterHorizontalExtendCol;
 #endif



More information about the x265-devel mailing list