<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, Jul 18, 2013 at 12:50 AM, <span dir="ltr"><<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex"># HG changeset patch<br>
# User Deepthi Devaki<br>
# Date 1374126601 -19800<br>
# Node ID 0b14a9a0468bbe26ff038027d735f5f97a4fafea<br>
# Parent 031c4c889edc4d2af969e2a717cd62c5a950e61a<br>
Calling C primitive for weighted-IPFilter and using x265 ME<br>
<br>
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.cpp<br>
--- a/source/Lib/TLibCommon/TComPicYuv.cpp Thu Jul 18 00:40:54 2013 -0500<br>
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp Thu Jul 18 11:20:01 2013 +0530<br>
@@ -251,6 +251,24 @@<br>
m_bIsBorderExtended = true;<br>
}<br>
<br>
+Void TComPicYuv::extendPicBorder(x265::ThreadPool *pool, wpScalingParam *w)<br>
+{<br>
+ if (m_bIsBorderExtended)<br>
+ return;<br></blockquote><div><br></div><div>We can't early out anymore here. We need to make sure there exists a MotionReference for this weight w.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ /* HPEL generation requires luma integer plane to already be extended */<br>
+ xExtendPicCompBorder(getLumaAddr(), getStride(), getWidth(), getHeight(), m_lumaMarginX, m_lumaMarginY);<br>
+<br>
+ xExtendPicCompBorder(getCbAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);<br>
+ xExtendPicCompBorder(getCrAddr(), getCStride(), getWidth() >> 1, getHeight() >> 1, m_chromaMarginX, m_chromaMarginY);<br>
+<br>
+ if (m_refList == NULL)<br>
+ m_refList = new x265::MotionReference(this, pool);<br>
+ m_refList->generateReferencePlanes(w);<br></blockquote><div><br></div><div>Similarly here, m_refList needs to be turned into a real linked list now. If there isn't a MotionReference with weight w, we should insert a new MotionReference to the start of the list and interpolate with weight w. The code that frees m_refList will need to be made aware that it is a list, to avoid leaking memory.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ m_bIsBorderExtended = true;<br>
+}<br>
+<br>
Void TComPicYuv::xExtendPicCompBorder(Pel* recon, Int stride, Int width, Int height, Int iMarginX, Int iMarginY)<br>
{<br>
Int x, y;<br>
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComPicYuv.h<br>
--- a/source/Lib/TLibCommon/TComPicYuv.h Thu Jul 18 00:40:54 2013 -0500<br>
+++ b/source/Lib/TLibCommon/TComPicYuv.h Thu Jul 18 11:20:01 2013 +0530<br>
@@ -184,6 +184,7 @@<br>
<br>
// Extend function of picture buffer<br>
Void extendPicBorder(x265::ThreadPool *pool);<br>
+ Void extendPicBorder(x265::ThreadPool *pool, wpScalingParam *w);<br>
<br>
// Dump picture<br>
Void dump(Char* pFileName, Bool bAdd = false);<br>
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibCommon/TComSlice.h<br>
--- a/source/Lib/TLibCommon/TComSlice.h Thu Jul 18 00:40:54 2013 -0500<br>
+++ b/source/Lib/TLibCommon/TComSlice.h Thu Jul 18 11:20:01 2013 +0530<br>
@@ -1413,7 +1413,6 @@<br>
UInt m_sliceSegmentBits;<br>
Bool m_bFinalized;<br>
<br>
- wpScalingParam m_weightPredTable[2][MAX_NUM_REF][3]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]<br>
wpACDCParam m_weightACDCParam[3]; // [0:Y, 1:U, 2:V]<br>
<br>
std::vector<UInt> m_tileByteLocation;<br>
@@ -1431,6 +1430,8 @@<br>
<br>
public:<br>
<br>
+ wpScalingParam m_weightPredTable[2][MAX_NUM_REF][3]; // [REF_PIC_LIST_0 or REF_PIC_LIST_1][refIdx][0:Y, 1:U, 2:V]<br>
+<br>
TComSlice();<br>
virtual ~TComSlice();<br>
Void initSlice();<br>
diff -r 031c4c889edc -r 0b14a9a0468b source/Lib/TLibEncoder/TEncSlice.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSlice.cpp Thu Jul 18 00:40:54 2013 -0500<br>
+++ b/source/Lib/TLibEncoder/TEncSlice.cpp Thu Jul 18 11:20:01 2013 +0530<br>
@@ -424,13 +424,30 @@<br>
<br>
Int numPredDir = slice->isInterP() ? 1 : 2;<br>
<br>
- for (Int refList = 0; refList < numPredDir; refList++)<br>
+ wpexplicit = (slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP()) ;<br>
+<br>
+ if(wpexplicit)<br>
{<br>
- RefPicList picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);<br>
- for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)<br>
+ for (Int refList = 0; refList < numPredDir; refList++)<br>
{<br>
- // To do: Call the merged IP + weighted frames if weighted prediction enabled<br>
- slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool());<br>
+ RefPicList picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);<br>
+ for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)<br>
+ {<br>
+ //Call the merged IP + weighted frames if weighted prediction enabled<br>
+ wpScalingParam *w = &(slice->m_weightPredTable[picList][refIdxTemp][0]);<br>
+ slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool(), w);<br>
+ }<br>
+ }<br>
+ }<br>
+ else<br>
+ {<br>
+ for (Int refList = 0; refList < numPredDir; refList++)<br>
+ {<br>
+ RefPicList picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);<br>
+ for (Int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(picList); refIdxTemp++)<br>
+ {<br>
+ slice->getRefPic(picList, refIdxTemp)->getPicYuvRec()->extendPicBorder(x265::ThreadPool::getThreadPool());<br>
+ }<br>
}<br>
}<br></blockquote><div><br></div><div>ok</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
<br>
diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.cpp<br>
--- a/source/common/reference.cpp Thu Jul 18 00:40:54 2013 -0500<br>
+++ b/source/common/reference.cpp Thu Jul 18 11:20:01 2013 +0530<br>
@@ -141,6 +141,50 @@<br>
xFree(m_intermediateValues);<br>
}<br>
<br>
+void MotionReference::generateReferencePlanes(wpScalingParam *w)<br>
+{<br>
+ PPAScopeEvent(GenerateIntermediates);<br>
+ m_intermediateValues = (short*)xMalloc(short, 4 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4));<br>
+<br>
+ short* intPtrF = m_intermediateValues;<br>
+ short* intPtrA = m_intermediateValues + 1 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);<br>
+ short* intPtrB = m_intermediateValues + 2 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);<br>
+ short* intPtrC = m_intermediateValues + 3 * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);<br>
+<br>
+ int bufOffset = -(s_tmpMarginY + s_intMarginY) * m_lumaStride - (s_tmpMarginX + s_intMarginX);<br>
+ pixel *srcPtr = (pixel*)m_reconPic->getLumaAddr() + bufOffset;<br>
+<br>
+ int weight = w->inputWeight;<br>
+ int offset = w->inputOffset * (1 << (g_bitDepth - 8));<br>
+ int shift = w->log2WeightDenom;<br>
+ int round = (w->log2WeightDenom >= 1) ? (1 << (w->log2WeightDenom - 1)) : (0);<br>
+<br>
+<br>
+ /* This one function call generates the four intermediate (short) planes for each<br>
+ * QPEL offset in the horizontal direction. At the same time it outputs the three<br>
+ * Y=0 output (padded pixel) planes since they require no vertical interpolation */<br>
+<br>
+ primitives.filterHwghtd(srcPtr, m_lumaStride, // source buffer<br>
+ intPtrF, intPtrA, intPtrB, intPtrC, m_intStride, // 4 intermediate HPEL buffers<br>
+ m_lumaPlane[1][0] + bufOffset,<br>
+ m_lumaPlane[2][0] + bufOffset,<br>
+ m_lumaPlane[3][0] + bufOffset, m_lumaStride, // 3 (x=n, y=0) output buffers (no V interp)<br>
+ m_filterWidth + (2 * s_intMarginX), // filter dimensions with margins<br>
+ m_filterHeight + (2 * s_intMarginY),<br>
+ m_reconPic->m_lumaMarginX - s_tmpMarginX - s_intMarginX, // pixel extension margins<br>
+ m_reconPic->m_lumaMarginY - s_tmpMarginY - s_intMarginY,<br>
+ weight, round, shift, offset);<br>
+<br>
+ /* serial path for when no thread pool is present: ALWAYS calling serial path temporarily until weightp+thread fix */<br></blockquote><div><br></div><div>what happens when it runs in parallel? Is it just that you need to store the weights in the MotionReference so it can pass them along to the worker threads?</div>
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ for (int i = 0; i < 4; i++)<br>
+ {<br>
+ generateReferencePlane(i, weight, round, shift, offset);<br>
+ }<br>
+<br>
+ xFree(m_intermediateValues);<br>
+}<br>
+<br>
+<br>
bool MotionReference::findJob()<br>
{<br>
/* Called by thread pool worker threads */<br>
@@ -175,3 +219,19 @@<br>
<br>
primitives.filterVmulti(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight, m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY);<br>
}<br></blockquote><div><br></div><div>Might as well pass wpScalingParam to this function as well.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+void MotionReference::generateReferencePlane(int x, int w, int roundw, int shiftw, int offsetw)<br>
+{<br>
+ PPAScopeEvent(GenerateReferencePlanes);<br>
+<br>
+ /* this function will be called by 4 threads, with x = 0, 1, 2, 3 */<br>
+ short* filteredBlockTmp = m_intermediateValues + x * m_intStride * (m_reconPic->getHeight() + s_tmpMarginY * 4);<br>
+ short* intPtr = filteredBlockTmp + s_intMarginY * m_intStride + s_intMarginX;<br>
+<br>
+ /* the Y=0 plane was generated during horizontal interpolation */<br>
+ pixel *dstPtr1 = m_lumaPlane[x][1] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;<br>
+ pixel *dstPtr2 = m_lumaPlane[x][2] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;<br>
+ pixel *dstPtr3 = m_lumaPlane[x][3] - s_tmpMarginY * m_lumaStride - s_tmpMarginX;<br>
+<br>
+ primitives.filterVwghtd(intPtr, m_intStride, dstPtr1, dstPtr2, dstPtr3, m_lumaStride, m_filterWidth, m_filterHeight, m_reconPic->m_lumaMarginX - s_tmpMarginX, m_reconPic->m_lumaMarginY - s_tmpMarginY, w, roundw, shiftw, offsetw);<br>
+}<br>
\ No newline at end of file<br>
diff -r 031c4c889edc -r 0b14a9a0468b source/common/reference.h<br>
--- a/source/common/reference.h Thu Jul 18 00:40:54 2013 -0500<br>
+++ b/source/common/reference.h Thu Jul 18 11:20:01 2013 +0530<br>
@@ -28,6 +28,8 @@<br>
#include "threading.h"<br>
#include "threadpool.h"<br>
<br>
+#include "TLibCommon/TComSlice.h"<br>
+<br>
class TComPicYuv;<br>
<br>
namespace x265 {<br>
@@ -42,6 +44,8 @@<br>
~MotionReference();<br>
<br>
void generateReferencePlanes();<br>
+ void generateReferencePlanes(wpScalingParam *w);<br>
+<br>
<br>
/* indexed by [hpelx|qpelx][hpely|qpely] */<br>
pixel* m_lumaPlane[4][4];<br>
@@ -54,6 +58,8 @@<br>
<br>
bool findJob();<br>
void generateReferencePlane(int idx);<br>
+ void generateReferencePlane(int x, int w, int roundw, int shiftw, int offsetw);<br>
+<br>
<br>
intptr_t m_startPad;<br>
TComPicYuv *m_reconPic;<br>
diff -r 031c4c889edc -r 0b14a9a0468b source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp Thu Jul 18 00:40:54 2013 -0500<br>
+++ b/source/encoder/encoder.cpp Thu Jul 18 11:20:01 2013 +0530<br>
@@ -196,7 +196,7 @@<br>
setQP(param->qp);<br>
<br>
//====== Motion search ========<br>
- if (param->searchMethod != X265_ORIG_SEARCH && (param->bEnableWeightedPred || param->bEnableWeightedBiPred))<br></blockquote><div><br></div><div>Probably too early for this. Once this works, most of the HM search code will be removed.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ if (param->searchMethod != X265_ORIG_SEARCH && (param->bEnableWeightedBiPred))<br>
{<br>
x265_log(param, X265_LOG_WARNING, "Weighted prediction only supported by HM ME, forcing --me 4\n");<br>
param->searchMethod = X265_ORIG_SEARCH;<br>
<br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="http://mailman.videolan.org/listinfo/x265-devel" target="_blank">http://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>