[x265] [PATCH] weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse
kavitha at multicorewareinc.com
kavitha at multicorewareinc.com
Fri Feb 21 19:50:24 CET 2014
# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1393008408 -19800
# Sat Feb 22 00:16:48 2014 +0530
# Node ID e0f5813915e6b96078eada45f352c8e247d23b32
# Parent 5e2043f89aa11363dffe33a0ff06550a7d862326
weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse
diff -r 5e2043f89aa1 -r e0f5813915e6 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Fri Feb 21 03:05:48 2014 -0600
+++ b/source/encoder/weightPrediction.cpp Sat Feb 22 00:16:48 2014 +0530
@@ -32,6 +32,14 @@
using namespace x265;
namespace weightp {
+
+struct cache
+{
+ pixel *mcRef[2][MAX_NUM_REF][3];
+ uint32_t unweightedCost[2][MAX_NUM_REF][3];
+ pixel *weightTemp;
+};
+
/* make a motion compensated copy of lowres ref into mcout with the same stride.
* The borders of mcout are not extended */
void mcLuma(pixel * mcout,
@@ -213,7 +221,7 @@
bool tryCommonDenom(TComSlice& slice,
x265_param& param,
wpScalingParam wp[2][MAX_NUM_REF][3],
- pixel * temp,
+ cache& cacheData,
int indenom)
{
TComPic *pic = slice.getPic();
@@ -221,12 +229,6 @@
Lowres& fenc = pic->m_lowres;
int curPoc = slice.getPOC();
- /* caller provides temp space for two full-pel planes. Split it
- * in half for motion compensation of the reference and then the
- * weighting */
- pixel *mcTemp = temp;
- pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
-
int log2denom[3] = { indenom };
int csp = picorig->m_picCsp;
int hshift = CHROMA_H_SHIFT(csp);
@@ -327,6 +329,8 @@
pixel *fref;
int origstride, frefstride;
int width, height;
+ pixel* &buf = cacheData.mcRef[list][ref][yuv];
+ uint32_t& origscore = cacheData.unweightedCost[list][ref][yuv];
switch (yuv)
{
case 0:
@@ -338,8 +342,12 @@
if (bMotionCompensate)
{
- mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
- fref = mcTemp;
+ if (!buf)
+ {
+ buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+ mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
+ }
+ fref = buf;
}
break;
@@ -359,8 +367,12 @@
if (bMotionCompensate)
{
- mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
- fref = mcTemp;
+ if (!buf)
+ {
+ buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+ mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+ }
+ fref = buf;
}
break;
@@ -373,8 +385,12 @@
if (bMotionCompensate)
{
- mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
- fref = mcTemp;
+ if (!buf)
+ {
+ buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+ mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+ }
+ fref = buf;
}
break;
@@ -389,9 +405,12 @@
int minscale = w.inputWeight;
int minoff = 0;
- uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
- if (!origscore)
- continue;
+ if (origscore == 0)
+ {
+ origscore = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, NULL);
+ if (!origscore)
+ continue;
+ }
uint32_t minscore = origscore;
bool bFound = false;
@@ -431,7 +450,7 @@
}
SET_WEIGHT(w, true, curScale, mindenom, ioff);
- uint32_t s = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, &w);
+ uint32_t s = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, &w);
COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
if (minoff == curOffset - oD && ioff != curOffset - oD)
break;
@@ -481,13 +500,11 @@
wpScalingParam wp[2][MAX_NUM_REF][3];
int numPredDir = slice.isInterP() ? 1 : 2;
- /* TODO: perf - collect some of this data into a struct which is passed to
- * tryCommonDenom() to avoid recalculating some data. Motion compensated
- * reference planes can be cached this way */
-
+ weightp::cache cacheData;
+ memset(&cacheData, 0, sizeof(cacheData));
TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
- pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
- if (temp)
+ cacheData.weightTemp = X265_MALLOC(pixel, orig->getStride() * orig->getHeight());
+ if (cacheData.weightTemp)
{
int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
do
@@ -503,13 +520,23 @@
}
}
- if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
+ if (weightp::tryCommonDenom(slice, param, wp, cacheData, denom))
break;
denom--; // decrement to satisfy the range limitation
}
while (denom > 0);
- X265_FREE(temp);
+ X265_FREE(cacheData.weightTemp);
+ for (int list = 0; list < numPredDir; list++)
+ {
+ for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+ {
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ X265_FREE(cacheData.mcRef[list][ref][yuv]);
+ }
+ }
+ }
}
if (param.logLevel >= X265_LOG_DEBUG)
More information about the x265-devel
mailing list