[x265] [PATCH] weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse
kavitha at multicorewareinc.com
kavitha at multicorewareinc.com
Thu Feb 20 13:27:32 CET 2014
# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1392899156 -19800
# Thu Feb 20 17:55:56 2014 +0530
# Node ID a359217a5a5dbd2c8183c41dbf3f7193a09e0269
# Parent 3389061b75a486e004409ab628c46fed39d03b72
weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse
diff -r 3389061b75a4 -r a359217a5a5d source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Wed Feb 19 17:03:21 2014 -0600
+++ b/source/encoder/weightPrediction.cpp Thu Feb 20 17:55:56 2014 +0530
@@ -32,6 +32,13 @@
using namespace x265;
namespace weightp {
+
+struct cache
+{
+ pixel *mcRef[2][MAX_NUM_REF][3];
+ uint32_t weightcost[2][MAX_NUM_REF][3];
+};
+
/* make a motion compensated copy of lowres ref into mcout with the same stride.
* The borders of mcout are not extended */
void mcLuma(pixel * mcout,
@@ -213,7 +220,7 @@
bool tryCommonDenom(TComSlice& slice,
x265_param& param,
wpScalingParam wp[2][MAX_NUM_REF][3],
- pixel * temp,
+ cache& cacheData,
int indenom)
{
TComPic *pic = slice.getPic();
@@ -221,12 +228,6 @@
Lowres& fenc = pic->m_lowres;
int curPoc = slice.getPOC();
- /* caller provides temp space for two full-pel planes. Split it
- * in half for motion compensation of the reference and then the
- * weighting */
- pixel *mcTemp = temp;
- pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
-
int log2denom[3] = { indenom };
int csp = picorig->m_picCsp;
int hshift = CHROMA_H_SHIFT(csp);
@@ -327,6 +328,8 @@
pixel *fref;
int origstride, frefstride;
int width, height;
+ pixel* &buf = cacheData.mcRef[list][ref][yuv];
+ uint32_t& origscore = cacheData.weightcost[list][ref][yuv];
switch (yuv)
{
case 0:
@@ -338,8 +341,12 @@
if (bMotionCompensate)
{
- mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
- fref = mcTemp;
+ if (!buf)
+ {
+ buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+ mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
+ }
+ fref = buf;
}
break;
@@ -359,8 +366,12 @@
if (bMotionCompensate)
{
- mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
- fref = mcTemp;
+ if (!buf)
+ {
+ buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+ mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+ }
+ fref = buf;
}
break;
@@ -373,8 +384,12 @@
if (bMotionCompensate)
{
- mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
- fref = mcTemp;
+ if (!buf)
+ {
+ buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+ mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+ }
+ fref = buf;
}
break;
@@ -388,10 +403,14 @@
int mindenom = w.log2WeightDenom;
int minscale = w.inputWeight;
int minoff = 0;
+ pixel *weightTemp = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
- uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
- if (!origscore)
- continue;
+ if (origscore == 0)
+ {
+ origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
+ if (!origscore)
+ continue;
+ }
uint32_t minscore = origscore;
bool bFound = false;
@@ -437,6 +456,7 @@
break;
}
}
+ X265_FREE(weightTemp);
// if chroma denoms diverged, we must start over
if (mindenom < log2denom[yuv])
@@ -481,48 +501,38 @@
wpScalingParam wp[2][MAX_NUM_REF][3];
int numPredDir = slice.isInterP() ? 1 : 2;
- /* TODO: perf - collect some of this data into a struct which is passed to
- * tryCommonDenom() to avoid recalculating some data. Motion compensated
- * reference planes can be cached this way */
-
- TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
- pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
- if (temp)
+ weightp::cache cacheData;
+ memset(&cacheData, 0, sizeof(cacheData));
+ int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
+ do
{
- int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
- do
- {
- /* reset weight states */
- for (int list = 0; list < numPredDir; list++)
- {
- for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
- {
- SET_WEIGHT(wp[list][ref][0], false, 1 << denom, denom, 0);
- SET_WEIGHT(wp[list][ref][1], false, 1 << denom, denom, 0);
- SET_WEIGHT(wp[list][ref][2], false, 1 << denom, denom, 0);
- }
- }
-
- if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
- break;
- denom--; // decrement to satisfy the range limitation
- }
- while (denom > 0);
-
- X265_FREE(temp);
- }
-
- if (param.logLevel >= X265_LOG_DEBUG)
- {
- char buf[1024];
- int p = 0;
- bool bWeighted = false;
-
- p = sprintf(buf, "poc: %d weights:", slice.getPOC());
+ /* reset weight states */
for (int list = 0; list < numPredDir; list++)
{
for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
{
+ SET_WEIGHT(wp[list][ref][0], false, 1 << denom, denom, 0);
+ SET_WEIGHT(wp[list][ref][1], false, 1 << denom, denom, 0);
+ SET_WEIGHT(wp[list][ref][2], false, 1 << denom, denom, 0);
+ }
+ }
+
+ if (weightp::tryCommonDenom(slice, param, wp, cacheData, denom))
+ break;
+ denom--; // decrement to satisfy the range limitation
+ }
+ while (denom > 0);
+
+ char buf[1024];
+ int p = 0;
+ bool bWeighted = false;
+ p = sprintf(buf, "poc: %d weights:", slice.getPOC());
+ for (int list = 0; list < numPredDir; list++)
+ {
+ for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+ {
+ if (param.logLevel >= X265_LOG_DEBUG)
+ {
wpScalingParam* w = &wp[list][ref][0];
if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
{
@@ -537,13 +547,18 @@
p += sprintf(buf + p, "]");
}
}
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ if(cacheData.mcRef[list][ref][yuv])
+ X265_FREE(cacheData.mcRef[list][ref][yuv]);
+ }
}
- if (bWeighted)
- {
- if (p < 80) // pad with spaces to ensure progress line overwritten
- sprintf(buf + p, "%*s", 80-p, " ");
- x265_log(¶m, X265_LOG_DEBUG, "%s\n", buf);
- }
+ }
+ if (param.logLevel >= X265_LOG_DEBUG && bWeighted)
+ {
+ if (p < 80) // pad with spaces to ensure progress line overwritten
+ sprintf(buf + p, "%*s", 80-p, " ");
+ x265_log(¶m, X265_LOG_DEBUG, "%s\n", buf);
}
slice.setWpScaling(wp);
}
More information about the x265-devel
mailing list