[x265] [PATCH] weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse
Steve Borho
steve at borho.org
Fri Feb 21 20:31:08 CET 2014
On Fri, Feb 21, 2014 at 12:50 PM, <kavitha at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Kavitha Sampath <kavitha at multicorewareinc.com>
> # Date 1393008408 -19800
> # Sat Feb 22 00:16:48 2014 +0530
> # Node ID e0f5813915e6b96078eada45f352c8e247d23b32
> # Parent 5e2043f89aa11363dffe33a0ff06550a7d862326
> weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse
>
> diff -r 5e2043f89aa1 -r e0f5813915e6 source/encoder/weightPrediction.cpp
> --- a/source/encoder/weightPrediction.cpp Fri Feb 21 03:05:48 2014 -0600
> +++ b/source/encoder/weightPrediction.cpp Sat Feb 22 00:16:48 2014 +0530
> @@ -32,6 +32,14 @@
> using namespace x265;
>
> namespace weightp {
> +
> +struct cache
> +{
> + pixel *mcRef[2][MAX_NUM_REF][3];
> + uint32_t unweightedCost[2][MAX_NUM_REF][3];
> + pixel *weightTemp;
> +};
> +
> /* make a motion compensated copy of lowres ref into mcout with the same stride.
> * The borders of mcout are not extended */
> void mcLuma(pixel * mcout,
> @@ -213,7 +221,7 @@
> bool tryCommonDenom(TComSlice& slice,
> x265_param& param,
> wpScalingParam wp[2][MAX_NUM_REF][3],
> - pixel * temp,
> + cache& cacheData,
> int indenom)
> {
> TComPic *pic = slice.getPic();
> @@ -221,12 +229,6 @@
> Lowres& fenc = pic->m_lowres;
> int curPoc = slice.getPOC();
>
> - /* caller provides temp space for two full-pel planes. Split it
> - * in half for motion compensation of the reference and then the
> - * weighting */
> - pixel *mcTemp = temp;
> - pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
> -
> int log2denom[3] = { indenom };
> int csp = picorig->m_picCsp;
> int hshift = CHROMA_H_SHIFT(csp);
> @@ -327,6 +329,8 @@
> pixel *fref;
> int origstride, frefstride;
> int width, height;
> + pixel* &buf = cacheData.mcRef[list][ref][yuv];
> + uint32_t& origscore = cacheData.unweightedCost[list][ref][yuv];
> switch (yuv)
> {
> case 0:
> @@ -338,8 +342,12 @@
>
> if (bMotionCompensate)
> {
> - mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
> - fref = mcTemp;
> + if (!buf)
> + {
> + buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> + mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
> + }
> + fref = buf;
> }
> break;
>
> @@ -359,8 +367,12 @@
>
> if (bMotionCompensate)
> {
> - mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> - fref = mcTemp;
> + if (!buf)
> + {
> + buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> + mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> + }
> + fref = buf;
> }
> break;
>
> @@ -373,8 +385,12 @@
>
> if (bMotionCompensate)
> {
> - mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> - fref = mcTemp;
> + if (!buf)
> + {
> + buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> + mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> + }
> + fref = buf;
> }
> break;
>
> @@ -389,9 +405,12 @@
> int minscale = w.inputWeight;
> int minoff = 0;
>
> - uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
> - if (!origscore)
> - continue;
> + if (origscore == 0)
> + {
> + origscore = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, NULL);
> + if (!origscore)
> + continue;
> + }
So now it will cache the unweighted cost unless it was zero, then it
will measure it every time? it seems a bit odd.
>
> uint32_t minscore = origscore;
> bool bFound = false;
> @@ -431,7 +450,7 @@
> }
>
> SET_WEIGHT(w, true, curScale, mindenom, ioff);
> - uint32_t s = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, &w);
> + uint32_t s = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, &w);
> COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
> if (minoff == curOffset - oD && ioff != curOffset - oD)
> break;
> @@ -481,13 +500,11 @@
> wpScalingParam wp[2][MAX_NUM_REF][3];
> int numPredDir = slice.isInterP() ? 1 : 2;
>
> - /* TODO: perf - collect some of this data into a struct which is passed to
> - * tryCommonDenom() to avoid recalculating some data. Motion compensated
> - * reference planes can be cached this way */
> -
> + weightp::cache cacheData;
> + memset(&cacheData, 0, sizeof(cacheData));
> TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
> - pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
> - if (temp)
> + cacheData.weightTemp = X265_MALLOC(pixel, orig->getStride() * orig->getHeight());
> + if (cacheData.weightTemp)
> {
> int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
> do
> @@ -503,13 +520,23 @@
> }
> }
>
> - if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
> + if (weightp::tryCommonDenom(slice, param, wp, cacheData, denom))
> break;
> denom--; // decrement to satisfy the range limitation
> }
> while (denom > 0);
>
> - X265_FREE(temp);
> + X265_FREE(cacheData.weightTemp);
> + for (int list = 0; list < numPredDir; list++)
> + {
> + for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> + {
> + for (int yuv = 0; yuv < 3; yuv++)
> + {
> + X265_FREE(cacheData.mcRef[list][ref][yuv]);
> + }
> + }
> + }
> }
>
> if (param.logLevel >= X265_LOG_DEBUG)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list