[x265] [PATCH RFC] Calling C primitive for weighted-IPFilter and using x265 ME
Steve Borho
steve at borho.org
Thu Jul 18 09:24:01 CEST 2013
On Thu, Jul 18, 2013 at 12:50 AM, <deepthidevaki at multicorewareinc.com>wrote:
> # HG changeset patch
> # User Deepthi Devaki
> # Date 1374126601 -19800
> # Node ID 0b14a9a0468bbe26ff038027d735f5f97a4fafea
> # Parent 031c4c889edc4d2af969e2a717cd62c5a950e61a
> Calling C primitive for weighted-IPFilter and using x265 ME
>
> diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp Thu Jul 18 00:40:54 2013
> -0500
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp Thu Jul 18 11:20:01 2013
> +0530
> @@ -251,6 +251,24 @@
> m_bIsBorderExtended = true;
> }
>
> +Void TComPicYuv::extendPicBorder(x265::ThreadPool *pool, wpScalingParam
> *w)
> +{
> + if (m_bIsBorderExtended)
> + return;
>
We can't early out anymore here. We need to make sure there exists a
MotionReference for this weight w.
> + /* HPEL generation requires luma integer plane to already be extended
> */
> + xExtendPicCompBorder(getLumaAddr(), getStride(), getWidth(),
> getHeight(), m_lumaMarginX, m_lumaMarginY);
> +
> + xExtendPicCompBorder(getCbAddr(), getCStride(), getWidth() >> 1,
> getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
> + xExtendPicCompBorder(getCrAddr(), getCStride(), getWidth() >> 1,
> getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);
> +
> + if (m_refList == NULL)
> + m_refList = new x265::MotionReference(this, pool);
> + m_refList->generateReferencePlanes(w);
>
Similarly here, m_refList needs to be turned into a real linked list now.
If there isn't a MotionReference with weight w, we should insert a new
MotionReference to the start of the list and interpolate with weight w.
The code that frees m_refList will need to be made aware that it is a
list, to avoid leaking memory.
> + m_bIsBorderExtended = true;
> +}
> +
> Void TComPicYuv::xExtendPicCompBorder(Pel* recon, Int stride, Int width,
> Int height, Int iMarginX, Int iMarginY)
> {
> Int x, y;
> diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h Thu Jul 18 00:40:54 2013
> -0500
> +++ b/source/Lib/TLibCommon/TComPicYuv.h Thu Jul 18 11:20:01 2013
> +0530
> @@ -184,6 +184,7 @@
>
> // Extend function of picture buffer
> Void extendPicBorder(x265::ThreadPool *pool);
> + Void extendPicBorder(x265::ThreadPool *pool, wpScalingParam *w);
>
> // Dump picture
> Void dump(Char* pFileName, Bool bAdd = false);
> diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComSlice.h
> --- a/source/Lib/TLibCommon/TComSlice.h Thu Jul 18 00:40:54 2013 -0500
> +++ b/source/Lib/TLibCommon/TComSlice.h Thu Jul 18 11:20:01 2013 +0530
> @@ -1413,7 +1413,6 @@
> UInt m_sliceSegmentBits;
> Bool m_bFinalized;
>
> - wpScalingParam m_weightPredTable[2][MAX_NUM_REF][3]; //
> [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]
> wpACDCParam m_weightACDCParam[3]; // [0:Y, 1:U,
> 2:V]
>
> std::vector<UInt> m_tileByteLocation;
> @@ -1431,6 +1430,8 @@
>
> public:
>
> + wpScalingParam m_weightPredTable[2][MAX_NUM_REF][3]; //
> [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]
> +
> TComSlice();
> virtual ~TComSlice();
> Void initSlice();
> diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibEncoder/TEncSlice.cpp
> --- a/source/Lib/TLibEncoder/TEncSlice.cpp Thu Jul 18 00:40:54 2013
> -0500
> +++ b/source/Lib/TLibEncoder/TEncSlice.cpp Thu Jul 18 11:20:01 2013
> +0530
> @@ -424,13 +424,30 @@
>
> Int numPredDir = slice->isInterP() ? 1 : 2;
>
> - for (Int refList = 0; refList < numPredDir; refList++)
> + wpexplicit = (slice->getSliceType() == P_SLICE &&
> slice->getPPS()->getUseWP()) ;
> +
> + if(wpexplicit)
> {
> - RefPicList picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
> - for (Int refIdxTemp = 0; refIdxTemp <
> slice->getNumRefIdx(picList); refIdxTemp++)
> + for (Int refList = 0; refList < numPredDir; refList++)
> {
> - // To do: Call the merged IP + weighted frames if weighted
> prediction enabled
> - slice->getRefPic(picList,
> refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool());
> + RefPicList picList = (refList ? REF_PIC_LIST_1 :
> REF_PIC_LIST_0);
> + for (Int refIdxTemp = 0; refIdxTemp <
> slice->getNumRefIdx(picList); refIdxTemp++)
> + {
> + //Call the merged IP + weighted frames if weighted
> prediction enabled
> + wpScalingParam *w =
> &(slice->m_weightPredTable[picList][refIdxTemp][0]);
> + slice->getRefPic(picList,
> refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool(),
> w);
> + }
> + }
> + }
> + else
> + {
> + for (Int refList = 0; refList < numPredDir; refList++)
> + {
> + RefPicList picList = (refList ? REF_PIC_LIST_1 :
> REF_PIC_LIST_0);
> + for (Int refIdxTemp = 0; refIdxTemp <
> slice->getNumRefIdx(picList); refIdxTemp++)
> + {
> + slice->getRefPic(picList,
> refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool());
> + }
> }
> }
>
ok
>
> diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.cpp
> --- a/source/common/reference.cpp Thu Jul 18 00:40:54 2013 -0500
> +++ b/source/common/reference.cpp Thu Jul 18 11:20:01 2013 +0530
> @@ -141,6 +141,50 @@
> xFree(m_intermediateValues);
> }
>
> +void MotionReference::generateReferencePlanes(wpScalingParam *w)
> +{
> + PPAScopeEvent(GenerateIntermediates);
> + m_intermediateValues = (short*)xMalloc(short, 4 * m_intStride *
> (m_reconPic->getHeight() + s_tmpMarginY * 4));
> +
> + short* intPtrF = m_intermediateValues;
> + short* intPtrA = m_intermediateValues + 1 * m_intStride *
> (m_reconPic->getHeight() + s_tmpMarginY * 4);
> + short* intPtrB = m_intermediateValues + 2 * m_intStride *
> (m_reconPic->getHeight() + s_tmpMarginY * 4);
> + short* intPtrC = m_intermediateValues + 3 * m_intStride *
> (m_reconPic->getHeight() + s_tmpMarginY * 4);
> +
> + int bufOffset = -(s_tmpMarginY + s_intMarginY) * m_lumaStride -
> (s_tmpMarginX + s_intMarginX);
> + pixel *srcPtr = (pixel*)m_reconPic->getLumaAddr() + bufOffset;
> +
> + int weight = w->inputWeight;
> + int offset = w->inputOffset * (1 << (g_bitDepth - 8));
> + int shift = w->log2WeightDenom;
> + int round = (w->log2WeightDenom >= 1) ? (1 <<
> (w->log2WeightDenom - 1)) : (0);
> +
> +
> + /* This one function call generates the four intermediate (short)
> planes for each
> + * QPEL offset in the horizontal direction. At the same time it
> outputs the three
> + * Y=0 output (padded pixel) planes since they require no
> vertical interpolation */
> +
> + primitives.filterHwghtd(srcPtr, m_lumaStride, //
> source buffer
> + intPtrF, intPtrA, intPtrB, intPtrC, m_intStride,
> // 4 intermediate HPEL buffers
> + m_lumaPlane[1][0] + bufOffset,
> + m_lumaPlane[2][0] + bufOffset,
> + m_lumaPlane[3][0] + bufOffset, m_lumaStride,
> // 3 (x=n, y=0) output buffers (no V interp)
> + m_filterWidth + (2 * s_intMarginX),
> // filter dimensions with margins
> + m_filterHeight + (2 * s_intMarginY),
> + m_reconPic->m_lumaMarginX - s_tmpMarginX -
> s_intMarginX, // pixel extension margins
> + m_reconPic->m_lumaMarginY - s_tmpMarginY -
> s_intMarginY,
> + weight, round, shift, offset);
> +
> + /* serial path for when no thread pool is present: ALWAYS calling
> serial path temporarily until weightp+thread fix */
>
what happens when it runs in parallel? Is it just that you need to store
the weights in the MotionReference so it can pass them along to the worker
threads?
> + for (int i = 0; i < 4; i++)
> + {
> + generateReferencePlane(i, weight, round, shift, offset);
> + }
> +
> + xFree(m_intermediateValues);
> +}
> +
> +
> bool MotionReference::findJob()
> {
> /* Called by thread pool worker threads */
> @@ -175,3 +219,19 @@
>
> primitives.filterVmulti(intPtr, m_intStride, dstPtr1, dstPtr2,
> dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight,
> m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY -
> s_tmpMarginY);
> }
>
Might as well pass wpScalingParam to this function as well.
> +void MotionReference::generateReferencePlane(int x, int w, int roundw,
> int shiftw, int offsetw)
> +{
> + PPAScopeEvent(GenerateReferencePlanes);
> +
> + /* this function will be called by 4 threads, with x = 0, 1, 2, 3 */
> + short* filteredBlockTmp = m_intermediateValues + x * m_intStride *
> (m_reconPic->getHeight() + s_tmpMarginY * 4);
> + short* intPtr = filteredBlockTmp + s_intMarginY * m_intStride +
> s_intMarginX;
> +
> + /* the Y=0 plane was generated during horizontal interpolation */
> + pixel *dstPtr1 = m_lumaPlane[x][1] - s_tmpMarginY * m_lumaStride -
> s_tmpMarginX;
> + pixel *dstPtr2 = m_lumaPlane[x][2] - s_tmpMarginY * m_lumaStride -
> s_tmpMarginX;
> + pixel *dstPtr3 = m_lumaPlane[x][3] - s_tmpMarginY * m_lumaStride -
> s_tmpMarginX;
> +
> + primitives.filterVwghtd(intPtr, m_intStride, dstPtr1, dstPtr2,
> dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight,
> m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY -
> s_tmpMarginY, w, roundw, shiftw, offsetw);
> +}
> \ No newline at end of file
> diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.h
> --- a/source/common/reference.h Thu Jul 18 00:40:54 2013 -0500
> +++ b/source/common/reference.h Thu Jul 18 11:20:01 2013 +0530
> @@ -28,6 +28,8 @@
> #include "threading.h"
> #include "threadpool.h"
>
> +#include "TLibCommon/TComSlice.h"
> +
> class TComPicYuv;
>
> namespace x265 {
> @@ -42,6 +44,8 @@
> ~MotionReference();
>
> void generateReferencePlanes();
> + void generateReferencePlanes(wpScalingParam *w);
> +
>
> /* indexed by [hpelx|qpelx][hpely|qpely] */
> pixel* m_lumaPlane[4][4];
> @@ -54,6 +58,8 @@
>
> bool findJob();
> void generateReferencePlane(int idx);
> + void generateReferencePlane(int x, int w, int roundw, int shiftw, int
> offsetw);
> +
>
> intptr_t m_startPad;
> TComPicYuv *m_reconPic;
> diff -r 031c4c889edc -r 0b14a9a0468b source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Thu Jul 18 00:40:54 2013 -0500
> +++ b/source/encoder/encoder.cpp Thu Jul 18 11:20:01 2013 +0530
> @@ -196,7 +196,7 @@
> setQP(param->qp);
>
> //====== Motion search ========
> - if (param->searchMethod != X265_ORIG_SEARCH &&
> (param->bEnableWeightedPred || param->bEnableWeightedBiPred))
>
Probably too early for this. Once this works, most of the HM search code
will be removed.
> + if (param->searchMethod != X265_ORIG_SEARCH &&
> (param->bEnableWeightedBiPred))
> {
> x265_log(param, X265_LOG_WARNING, "Weighted prediction only
> supported by HM ME, forcing --me 4\n");
> param->searchMethod = X265_ORIG_SEARCH;
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> http://mailman.videolan.org/listinfo/x265-devel
>
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130718/ce6d5019/attachment.html>
More information about the x265-devel
mailing list