[x265] [PATCH RFC] Calling C primitive for weighted-IPFilter and using x265 ME

deepthidevaki at multicorewareinc.com deepthidevaki at multicorewareinc.com
Thu Jul 18 07:50:37 CEST 2013


# HG changeset patch
# User Deepthi Devaki
# Date 1374126601 -19800
# Node ID 0b14a9a0468bbe26ff038027d735f5f97a4fafea
# Parent  031c4c889edc4d2af969e2a717cd62c5a950e61a
Calling C primitive for weighted-IPFilter and using x265 ME

diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Thu Jul 18 11:20:01 2013 +0530
@@ -251,6 +251,24 @@
     m_bIsBorderExtended = true;
 }
 
+Void TComPicYuv::extendPicBorder(x265::ThreadPool *pool, wpScalingParam *w)
+{
+    if (m_bIsBorderExtended)
+        return;
+
+    /* HPEL generation requires luma integer plane to already be extended */
+    xExtendPicCompBorder(getLumaAddr(), getStride(), getWidth(), getHeight(), m_lumaMarginX, m_lumaMarginY);
+
+    xExtendPicCompBorder(getCbAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
+    xExtendPicCompBorder(getCrAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
+
+    if (m_refList == NULL)
+        m_refList = new x265::MotionReference(this, pool);
+    m_refList->generateReferencePlanes(w);
+
+    m_bIsBorderExtended = true;
+}
+
 Void TComPicYuv::xExtendPicCompBorder(Pel* recon, Int stride, Int width, Int height, Int iMarginX, Int iMarginY)
 {
     Int   x, y;
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Thu Jul 18 11:20:01 2013 +0530
@@ -184,6 +184,7 @@
 
     //  Extend function of picture buffer
     Void  extendPicBorder(x265::ThreadPool *pool);
+    Void  extendPicBorder(x265::ThreadPool *pool, wpScalingParam *w);
 
     //  Dump picture
     Void  dump(Char* pFileName, Bool bAdd = false);
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h	Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibCommon/TComSlice.h	Thu Jul 18 11:20:01 2013 +0530
@@ -1413,7 +1413,6 @@
     UInt        m_sliceSegmentBits;
     Bool        m_bFinalized;
 
-    wpScalingParam  m_weightPredTable[2][MAX_NUM_REF][3]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]
     wpACDCParam     m_weightACDCParam[3];                 // [0:Y, 1:U, 2:V]
 
     std::vector<UInt> m_tileByteLocation;
@@ -1431,6 +1430,8 @@
 
 public:
 
+    wpScalingParam  m_weightPredTable[2][MAX_NUM_REF][3]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]
+
     TComSlice();
     virtual ~TComSlice();
     Void      initSlice();
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibEncoder/TEncSlice.cpp
--- a/source/Lib/TLibEncoder/TEncSlice.cpp	Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibEncoder/TEncSlice.cpp	Thu Jul 18 11:20:01 2013 +0530
@@ -424,13 +424,30 @@
 
     Int numPredDir = slice->isInterP() ? 1 : 2;
 
-    for (Int refList = 0; refList < numPredDir; refList++)
+    wpexplicit = (slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP()) ;
+
+    if(wpexplicit)
     {
-        RefPicList  picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
-        for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)
+        for (Int refList = 0; refList < numPredDir; refList++)
         {
-            // To do: Call the merged IP + weighted frames if weighted prediction enabled
-            slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool()); 
+            RefPicList  picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
+            for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)
+            {
+                //Call the merged IP + weighted frames if weighted prediction enabled
+                wpScalingParam *w = &(slice->m_weightPredTable[picList][refIdxTemp][0]);
+                slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool(), w); 
+            }
+        }
+    }
+    else
+    {
+        for (Int refList = 0; refList < numPredDir; refList++)
+        {
+            RefPicList  picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
+            for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)
+            {                
+                slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool());                 
+            }
         }
     }
 
diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.cpp
--- a/source/common/reference.cpp	Thu Jul 18 00:40:54 2013 -0500
+++ b/source/common/reference.cpp	Thu Jul 18 11:20:01 2013 +0530
@@ -141,6 +141,50 @@
     xFree(m_intermediateValues);
 }
 
+void MotionReference::generateReferencePlanes(wpScalingParam *w)
+{
+        PPAScopeEvent(GenerateIntermediates);
+        m_intermediateValues = (short*)xMalloc(short, 4 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4));
+
+        short* intPtrF = m_intermediateValues;
+        short* intPtrA = m_intermediateValues + 1 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+        short* intPtrB = m_intermediateValues + 2 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+        short* intPtrC = m_intermediateValues + 3 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+
+        int bufOffset = -(s_tmpMarginY + s_intMarginY) * m_lumaStride - (s_tmpMarginX + s_intMarginX);
+        pixel *srcPtr = (pixel*)m_reconPic->getLumaAddr() + bufOffset;
+
+        int weight      = w->inputWeight;
+        int offset = w->inputOffset * (1 << (g_bitDepth - 8));
+        int shift  = w->log2WeightDenom;
+        int round  = (w->log2WeightDenom >= 1) ? (1 << (w->log2WeightDenom - 1)) : (0);
+
+
+        /* This one function call generates the four intermediate (short) planes for each
+         * QPEL offset in the horizontal direction.  At the same time it outputs the three
+         * Y=0 output (padded pixel) planes since they require no vertical interpolation */
+
+        primitives.filterHwghtd(srcPtr, m_lumaStride,               // source buffer
+                        intPtrF, intPtrA, intPtrB, intPtrC, m_intStride, // 4 intermediate HPEL buffers
+                        m_lumaPlane[1][0] + bufOffset,
+                        m_lumaPlane[2][0] + bufOffset,
+                        m_lumaPlane[3][0] + bufOffset, m_lumaStride,     // 3 (x=n, y=0) output buffers (no V interp)
+                        m_filterWidth + (2 * s_intMarginX),              // filter dimensions with margins
+                        m_filterHeight + (2 * s_intMarginY),
+                        m_reconPic->m_lumaMarginX - s_tmpMarginX - s_intMarginX, // pixel extension margins
+                        m_reconPic->m_lumaMarginY - s_tmpMarginY - s_intMarginY,
+                        weight, round, shift, offset);
+
+        /* serial path for when no thread pool is present: ALWAYS calling serial path temporarily until weightp+thread fix */
+        for (int i = 0; i < 4; i++)
+        {
+            generateReferencePlane(i, weight, round, shift, offset);
+        }
+
+    xFree(m_intermediateValues);
+}
+
+
 bool MotionReference::findJob()
 {
     /* Called by thread pool worker threads */
@@ -175,3 +219,19 @@
 
     primitives.filterVmulti(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight, m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY);
 }
+
+void MotionReference::generateReferencePlane(int x, int w, int roundw, int shiftw, int offsetw)
+{
+    PPAScopeEvent(GenerateReferencePlanes);
+
+    /* this function will be called by 4 threads, with x = 0, 1, 2, 3 */
+    short* filteredBlockTmp = m_intermediateValues + x * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+    short* intPtr = filteredBlockTmp + s_intMarginY * m_intStride + s_intMarginX;
+
+    /* the Y=0 plane was generated during horizontal interpolation */
+    pixel *dstPtr1 = m_lumaPlane[x][1] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;
+    pixel *dstPtr2 = m_lumaPlane[x][2] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;
+    pixel *dstPtr3 = m_lumaPlane[x][3] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;
+
+    primitives.filterVwghtd(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight, m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY, w, roundw, shiftw, offsetw);
+}
\ No newline at end of file
diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.h
--- a/source/common/reference.h	Thu Jul 18 00:40:54 2013 -0500
+++ b/source/common/reference.h	Thu Jul 18 11:20:01 2013 +0530
@@ -28,6 +28,8 @@
 #include "threading.h"
 #include "threadpool.h"
 
+#include "TLibCommon/TComSlice.h"
+
 class TComPicYuv;
 
 namespace x265 {
@@ -42,6 +44,8 @@
     ~MotionReference();
 
     void generateReferencePlanes();
+    void generateReferencePlanes(wpScalingParam *w);
+
 
     /* indexed by [hpelx|qpelx][hpely|qpely] */
     pixel* m_lumaPlane[4][4];
@@ -54,6 +58,8 @@
 
     bool findJob();
     void generateReferencePlane(int idx);
+    void generateReferencePlane(int x, int w, int roundw, int shiftw, int offsetw);
+
 
     intptr_t     m_startPad;
     TComPicYuv  *m_reconPic;
diff -r 031c4c889edc -r 0b14a9a0468b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Jul 18 00:40:54 2013 -0500
+++ b/source/encoder/encoder.cpp	Thu Jul 18 11:20:01 2013 +0530
@@ -196,7 +196,7 @@
     setQP(param->qp);
 
     //====== Motion search ========
-    if (param->searchMethod != X265_ORIG_SEARCH && (param->bEnableWeightedPred || param->bEnableWeightedBiPred))
+    if (param->searchMethod != X265_ORIG_SEARCH && (param->bEnableWeightedBiPred))
     {
         x265_log(param, X265_LOG_WARNING, "Weighted prediction only supported by HM ME, forcing --me 4\n");
         param->searchMethod = X265_ORIG_SEARCH;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: xhevc_deepthid.patch
Type: text/x-patch
Size: 10056 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130718/564ac81a/attachment-0001.bin>


More information about the x265-devel mailing list