[x265] [PATCH] Enabling weight prediction for half and full pel

Wed Oct 9 09:54:13 CEST 2013

# HG changeset patch
# User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
# Date 1381305224 -19800
#      Wed Oct 09 13:23:44 2013 +0530
# Node ID 233ac9a05362715f2523f51e39a5961af46813de
# Parent  fc7fbdd18bc0d6d7f98180332e065d83c054fe02
Enabling weight prediction for half and full pel

diff -r fc7fbdd18bc0 -r 233ac9a05362 source/common/reference.cpp

--- a/source/common/reference.cpp	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/common/reference.cpp	Wed Oct 09 13:23:44 2013 +0530
@@ -58,6 +58,7 @@
 int MotionReference::init(TComPicYuv* pic, wpScalingParam *w)
 {
     m_reconPic = pic;
+    unweightedFPelPlane = pic->getLumaAddr();
     lumaStride = pic->getStride();
     m_startPad = pic->m_lumaMarginY * lumaStride + pic->m_lumaMarginX;
     m_next = NULL;
@@ -89,21 +90,19 @@
         X265_FREE(fpelPlane - m_startPad);
 }
 
-void MotionReference::applyWeight(TComPic* ref, int rows, int numRows)
+void MotionReference::applyWeight(int rows, int numRows)
 {
     rows = X265_MIN(rows, numRows-1);
     if (m_numWeightedRows >= rows)
         return;
-
-    TComPicYuv* pic = ref->getPicYuvRec();
-    int marginX = pic->m_lumaMarginX;
-    int marginY = pic->m_lumaMarginY;
-    pixel* src = (pixel*) pic->getLumaAddr() + (m_numWeightedRows * (int)g_maxCUHeight * lumaStride);
+    int marginX = m_reconPic->m_lumaMarginX;
+    int marginY = m_reconPic->m_lumaMarginY;
+    pixel* src = (pixel*) m_reconPic->getLumaAddr() + (m_numWeightedRows * (int)g_maxCUHeight * lumaStride);
     pixel* dst = fpelPlane + ((m_numWeightedRows * (int)g_maxCUHeight) * lumaStride);
-    int width = pic->getWidth();
+    int width = m_reconPic->getWidth();
     int height = ((rows - m_numWeightedRows) * g_maxCUHeight);
     if (rows == numRows - 1)
-        height = ((pic->getHeight() % g_maxCUHeight) ? (pic->getHeight() % g_maxCUHeight) : g_maxCUHeight);
+        height = ((m_reconPic->getHeight() % g_maxCUHeight) ? (m_reconPic->getHeight() % g_maxCUHeight) : g_maxCUHeight);
     size_t dstStride = lumaStride;
 
     // Computing weighted CU rows
@@ -128,7 +127,7 @@
     // Extending Bottom
     if (rows == (numRows - 1))
     {
-        pixel *pixY = fpelPlane - marginX + (pic->getHeight() - 1) * dstStride;
+        pixel *pixY = fpelPlane - marginX + (m_reconPic->getHeight() - 1) * dstStride;
         for (int y = 0; y < marginY; y++)
         {
             memcpy(pixY + (y + 1) * dstStride, pixY, dstStride * sizeof(pixel));
diff -r fc7fbdd18bc0 -r 233ac9a05362 source/common/reference.h
--- a/source/common/reference.h	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/common/reference.h	Wed Oct 09 13:23:44 2013 +0530
@@ -43,6 +43,7 @@
 
     pixel* fpelPlane;
     pixel* lowresPlane[4];
+    pixel* unweightedFPelPlane;
 
     bool isWeighted;
     bool isLowres;
@@ -60,7 +61,7 @@
     MotionReference();
     ~MotionReference();
     int  init(TComPicYuv*, wpScalingParam* w = NULL);
-    void applyWeight(TComPic* src, int rows, int numRows);
+    void applyWeight(int rows, int numRows);
 
     MotionReference *m_next;
     TComPicYuv      *m_reconPic;
diff -r fc7fbdd18bc0 -r 233ac9a05362 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/encoder/frameencoder.cpp	Wed Oct 09 13:23:44 2013 +0530
@@ -972,6 +972,10 @@
                     {
                         refpic->m_reconRowWait.wait();
                     }
+                    if (slice->getPPS()->getUseWP() && (slice->getSliceType() == P_SLICE))
+                    {
+                        slice->m_mref[list][ref]->applyWeight(row + refLagRows, m_numRows);
+                    }
                 }
             }
 
@@ -1004,6 +1008,10 @@
                         {
                             refpic->m_reconRowWait.wait();
                         }
+                        if (slice->getPPS()->getUseWP() && (slice->getSliceType() == P_SLICE))
+                        {
+                            slice->m_mref[list][ref]->applyWeight(i + refLagRows, m_numRows);
+                        }
                     }
                 }
 
diff -r fc7fbdd18bc0 -r 233ac9a05362 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/encoder/motion.cpp	Wed Oct 09 13:23:44 2013 +0530
@@ -89,6 +89,7 @@
     fenc = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
     subpelbuf = (pixel*)X265_MALLOC(pixel, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1));
     immedVal = (short*)X265_MALLOC(short, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
+    immedVal2 = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
 }
 
 MotionEstimate::~MotionEstimate()
@@ -96,6 +97,7 @@
     X265_FREE(fenc);
     X265_FREE(subpelbuf);
     X265_FREE(immedVal);
+    X265_FREE(immedVal2);
 }
 
 void MotionEstimate::setSourcePU(int offset, int width, int height)
@@ -831,7 +833,7 @@
                 }
                 else
                 {
-                    subpelInterpolate(fqref, ref->lumaStride, xFrac, yFrac, dir);
+                    subpelInterpolate(ref, qmv0, dir);
                     cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE + (dir == 2)) + mvcost0;
                     cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf + (dir == 2) + (dir == 1 ? FENC_STRIDE : 0), FENC_STRIDE + (dir == 2)) + mvcost1;
                 }
@@ -1140,47 +1142,61 @@
         {
             return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
         }
-        else if (yFrac == 0)
+        else
         {
-            primitives.ipfilter_pp[FILTER_H_P_P_8](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[xFrac]);
+            subpelInterpolate(ref, qmv, 0);
+        }
+        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
+    }
+}
+
+void MotionEstimate::subpelInterpolate(ReferencePlanes *ref, MV qmv, int dir)
+{
+    int xFrac = qmv.x & 0x3;
+    int yFrac = qmv.y & 0x3;
+    assert(yFrac | xFrac);
+    int realWidth = blockwidth + (dir == 2);
+    int realHeight = blockheight + (dir == 1);
+    intptr_t realStride = FENC_STRIDE + (dir == 2);
+    pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
+
+    if (ref->isWeighted)
+    {
+        if (yFrac == 0)
+        {
+            primitives.ipfilter_ps[FILTER_H_P_S_8](fref, ref->lumaStride, immedVal, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
+            primitives.weightpUni(immedVal, subpelbuf, realStride, realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift, ref->offset);
         }
         else if (xFrac == 0)
         {
-            primitives.ipfilter_pp[FILTER_V_P_P_8](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[yFrac]);
+            primitives.ipfilter_ps[FILTER_V_P_S_8](fref, ref->lumaStride, immedVal, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
+            primitives.weightpUni(immedVal, subpelbuf, realStride, realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift, ref->offset);
         }
         else
         {
             int filterSize = NTAPS_LUMA;
             int halfFilterSize = (filterSize >> 1);
-            primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth, blockheight + filterSize - 1, g_lumaFilter[xFrac]);
-            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[yFrac]);
+            primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
+            primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal + (halfFilterSize - 1) * realWidth, realWidth, immedVal2, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
+            primitives.weightpUni(immedVal2, subpelbuf, realStride, realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift, ref->offset);
         }
-
-        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
-    }
-}
-
-void MotionEstimate::subpelInterpolate(pixel *fref, intptr_t lumaStride, int xFrac, int yFrac, int dir)
-{
-    assert(yFrac | xFrac);
-
-    int realWidth = blockwidth + (dir == 2);
-    int realHeight = blockheight + (dir == 1);
-    intptr_t realStride = FENC_STRIDE + (dir == 2);
-
-    if (yFrac == 0)
-    {
-        primitives.ipfilter_pp[FILTER_H_P_P_8](fref, lumaStride, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
-    }
-    else if (xFrac == 0)
-    {
-        primitives.ipfilter_pp[FILTER_V_P_P_8](fref, lumaStride, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
     }
     else
     {
-        int filterSize = NTAPS_LUMA;
-        int halfFilterSize = (filterSize >> 1);
-        primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * lumaStride, lumaStride, immedVal, realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
-        primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal + (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
+        if (yFrac == 0)
+        {
+            primitives.ipfilter_pp[FILTER_H_P_P_8](fref, ref->lumaStride, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
+        }
+        else if (xFrac == 0)
+        {
+            primitives.ipfilter_pp[FILTER_V_P_P_8](fref, ref->lumaStride, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
+        }
+        else
+        {
+            int filterSize = NTAPS_LUMA;
+            int halfFilterSize = (filterSize >> 1);
+            primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
+            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal + (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
+        }
     }
 }
diff -r fc7fbdd18bc0 -r 233ac9a05362 source/encoder/motion.h
--- a/source/encoder/motion.h	Wed Oct 09 00:00:10 2013 -0500
+++ b/source/encoder/motion.h	Wed Oct 09 13:23:44 2013 +0530
@@ -54,6 +54,7 @@
     /* subpel generation buffers */
     pixel *subpelbuf;
     short *immedVal;
+    int16_t *immedVal2;
     int blockwidth;
     int blockheight;
 
@@ -96,7 +97,7 @@
 
     int subpelCompare(ReferencePlanes *ref, const MV & qmv, pixelcmp_t);
 
-    void subpelInterpolate(pixel *fref, intptr_t lumaStride, int xFrac, int yFrac, int dir);
+    inline void subpelInterpolate(ReferencePlanes *ref, MV qmv, int dir);
 
 protected: