[x265] [PATCH RFC] Calling C primitive for weighted-IPFilter and using x265 ME
deepthidevaki at multicorewareinc.com
deepthidevaki at multicorewareinc.com
Thu Jul 18 07:50:37 CEST 2013
# HG changeset patch
# User Deepthi Devaki
# Date 1374126601 -19800
# Node ID 0b14a9a0468bbe26ff038027d735f5f97a4fafea
# Parent 031c4c889edc4d2af969e2a717cd62c5a950e61a
Calling C primitive for weighted-IPFilter and using x265 ME
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp Thu Jul 18 11:20:01 2013 +0530
@@ -251,6 +251,24 @@
m_bIsBorderExtended = true;
}
+Void TComPicYuv::extendPicBorder(x265::ThreadPool *pool, wpScalingParam *w)
+{
+ if (m_bIsBorderExtended)
+ return;
+
+ /* HPEL generation requires luma integer plane to already be extended */
+ xExtendPicCompBorder(getLumaAddr(), getStride(), getWidth(), getHeight(), m_lumaMarginX, m_lumaMarginY);
+
+ xExtendPicCompBorder(getCbAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
+ xExtendPicCompBorder(getCrAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
+
+ if (m_refList == NULL)
+ m_refList = new x265::MotionReference(this, pool);
+ m_refList->generateReferencePlanes(w);
+
+ m_bIsBorderExtended = true;
+}
+
Void TComPicYuv::xExtendPicCompBorder(Pel* recon, Int stride, Int width, Int height, Int iMarginX, Int iMarginY)
{
Int x, y;
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h Thu Jul 18 11:20:01 2013 +0530
@@ -184,6 +184,7 @@
// Extend function of picture buffer
Void extendPicBorder(x265::ThreadPool *pool);
+ Void extendPicBorder(x265::ThreadPool *pool, wpScalingParam *w);
// Dump picture
Void dump(Char* pFileName, Bool bAdd = false);
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibCommon/TComSlice.h Thu Jul 18 11:20:01 2013 +0530
@@ -1413,7 +1413,6 @@
UInt m_sliceSegmentBits;
Bool m_bFinalized;
- wpScalingParam m_weightPredTable[2][MAX_NUM_REF][3]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]
wpACDCParam m_weightACDCParam[3]; // [0:Y, 1:U, 2:V]
std::vector<UInt> m_tileByteLocation;
@@ -1431,6 +1430,8 @@
public:
+ wpScalingParam m_weightPredTable[2][MAX_NUM_REF][3]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]
+
TComSlice();
virtual ~TComSlice();
Void initSlice();
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibEncoder/TEncSlice.cpp
--- a/source/Lib/TLibEncoder/TEncSlice.cpp Thu Jul 18 00:40:54 2013 -0500
+++ b/source/Lib/TLibEncoder/TEncSlice.cpp Thu Jul 18 11:20:01 2013 +0530
@@ -424,13 +424,30 @@
Int numPredDir = slice->isInterP() ? 1 : 2;
- for (Int refList = 0; refList < numPredDir; refList++)
+ wpexplicit = (slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP()) ;
+
+ if(wpexplicit)
{
- RefPicList picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
- for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)
+ for (Int refList = 0; refList < numPredDir; refList++)
{
- // To do: Call the merged IP + weighted frames if weighted prediction enabled
- slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool());
+ RefPicList picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
+ for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)
+ {
+ //Call the merged IP + weighted frames if weighted prediction enabled
+ wpScalingParam *w = &(slice->m_weightPredTable[picList][refIdxTemp][0]);
+ slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool(), w);
+ }
+ }
+ }
+ else
+ {
+ for (Int refList = 0; refList < numPredDir; refList++)
+ {
+ RefPicList picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
+ for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)
+ {
+ slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool());
+ }
}
}
diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.cpp
--- a/source/common/reference.cpp Thu Jul 18 00:40:54 2013 -0500
+++ b/source/common/reference.cpp Thu Jul 18 11:20:01 2013 +0530
@@ -141,6 +141,50 @@
xFree(m_intermediateValues);
}
+void MotionReference::generateReferencePlanes(wpScalingParam *w)
+{
+ PPAScopeEvent(GenerateIntermediates);
+ m_intermediateValues = (short*)xMalloc(short, 4 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4));
+
+ short* intPtrF = m_intermediateValues;
+ short* intPtrA = m_intermediateValues + 1 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+ short* intPtrB = m_intermediateValues + 2 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+ short* intPtrC = m_intermediateValues + 3 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+
+ int bufOffset = -(s_tmpMarginY + s_intMarginY) * m_lumaStride - (s_tmpMarginX + s_intMarginX);
+ pixel *srcPtr = (pixel*)m_reconPic->getLumaAddr() + bufOffset;
+
+ int weight = w->inputWeight;
+ int offset = w->inputOffset * (1 << (g_bitDepth - 8));
+ int shift = w->log2WeightDenom;
+ int round = (w->log2WeightDenom >= 1) ? (1 << (w->log2WeightDenom - 1)) : (0);
+
+
+ /* This one function call generates the four intermediate (short) planes for each
+ * QPEL offset in the horizontal direction. At the same time it outputs the three
+ * Y=0 output (padded pixel) planes since they require no vertical interpolation */
+
+ primitives.filterHwghtd(srcPtr, m_lumaStride, // source buffer
+ intPtrF, intPtrA, intPtrB, intPtrC, m_intStride, // 4 intermediate HPEL buffers
+ m_lumaPlane[1][0] + bufOffset,
+ m_lumaPlane[2][0] + bufOffset,
+ m_lumaPlane[3][0] + bufOffset, m_lumaStride, // 3 (x=n, y=0) output buffers (no V interp)
+ m_filterWidth + (2 * s_intMarginX), // filter dimensions with margins
+ m_filterHeight + (2 * s_intMarginY),
+ m_reconPic->m_lumaMarginX - s_tmpMarginX - s_intMarginX, // pixel extension margins
+ m_reconPic->m_lumaMarginY - s_tmpMarginY - s_intMarginY,
+ weight, round, shift, offset);
+
+ /* serial path for when no thread pool is present: ALWAYS calling serial path temporarily until weightp+thread fix */
+ for (int i = 0; i < 4; i++)
+ {
+ generateReferencePlane(i, weight, round, shift, offset);
+ }
+
+ xFree(m_intermediateValues);
+}
+
+
bool MotionReference::findJob()
{
/* Called by thread pool worker threads */
@@ -175,3 +219,19 @@
primitives.filterVmulti(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight, m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY);
}
+
+void MotionReference::generateReferencePlane(int x, int w, int roundw, int shiftw, int offsetw)
+{
+ PPAScopeEvent(GenerateReferencePlanes);
+
+ /* this function will be called by 4 threads, with x = 0, 1, 2, 3 */
+ short* filteredBlockTmp = m_intermediateValues + x * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);
+ short* intPtr = filteredBlockTmp + s_intMarginY * m_intStride + s_intMarginX;
+
+ /* the Y=0 plane was generated during horizontal interpolation */
+ pixel *dstPtr1 = m_lumaPlane[x][1] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;
+ pixel *dstPtr2 = m_lumaPlane[x][2] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;
+ pixel *dstPtr3 = m_lumaPlane[x][3] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;
+
+ primitives.filterVwghtd(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight, m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY, w, roundw, shiftw, offsetw);
+}
\ No newline at end of file
diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.h
--- a/source/common/reference.h Thu Jul 18 00:40:54 2013 -0500
+++ b/source/common/reference.h Thu Jul 18 11:20:01 2013 +0530
@@ -28,6 +28,8 @@
#include "threading.h"
#include "threadpool.h"
+#include "TLibCommon/TComSlice.h"
+
class TComPicYuv;
namespace x265 {
@@ -42,6 +44,8 @@
~MotionReference();
void generateReferencePlanes();
+ void generateReferencePlanes(wpScalingParam *w);
+
/* indexed by [hpelx|qpelx][hpely|qpely] */
pixel* m_lumaPlane[4][4];
@@ -54,6 +58,8 @@
bool findJob();
void generateReferencePlane(int idx);
+ void generateReferencePlane(int x, int w, int roundw, int shiftw, int offsetw);
+
intptr_t m_startPad;
TComPicYuv *m_reconPic;
diff -r 031c4c889edc -r 0b14a9a0468b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Jul 18 00:40:54 2013 -0500
+++ b/source/encoder/encoder.cpp Thu Jul 18 11:20:01 2013 +0530
@@ -196,7 +196,7 @@
setQP(param->qp);
//====== Motion search ========
- if (param->searchMethod != X265_ORIG_SEARCH && (param->bEnableWeightedPred || param->bEnableWeightedBiPred))
+ if (param->searchMethod != X265_ORIG_SEARCH && (param->bEnableWeightedBiPred))
{
x265_log(param, X265_LOG_WARNING, "Weighted prediction only supported by HM ME, forcing --me 4\n");
param->searchMethod = X265_ORIG_SEARCH;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: xhevc_deepthid.patch
Type: text/x-patch
Size: 10056 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130718/564ac81a/attachment-0001.bin>
More information about the x265-devel
mailing list