[x265] [PATCH] weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse

Steve Borho steve at borho.org
Fri Feb 21 20:31:08 CET 2014


On Fri, Feb 21, 2014 at 12:50 PM,  <kavitha at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Kavitha Sampath <kavitha at multicorewareinc.com>
> # Date 1393008408 -19800
> #      Sat Feb 22 00:16:48 2014 +0530
> # Node ID e0f5813915e6b96078eada45f352c8e247d23b32
> # Parent  5e2043f89aa11363dffe33a0ff06550a7d862326
> weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse
>
> diff -r 5e2043f89aa1 -r e0f5813915e6 source/encoder/weightPrediction.cpp
> --- a/source/encoder/weightPrediction.cpp       Fri Feb 21 03:05:48 2014 -0600
> +++ b/source/encoder/weightPrediction.cpp       Sat Feb 22 00:16:48 2014 +0530
> @@ -32,6 +32,14 @@
>  using namespace x265;
>
>  namespace weightp {
> +
> +struct cache
> +{
> +    pixel    *mcRef[2][MAX_NUM_REF][3];
> +    uint32_t unweightedCost[2][MAX_NUM_REF][3];
> +    pixel    *weightTemp;
> +};
> +
>  /* make a motion compensated copy of lowres ref into mcout with the same stride.
>   * The borders of mcout are not extended */
>  void mcLuma(pixel *    mcout,
> @@ -213,7 +221,7 @@
>  bool tryCommonDenom(TComSlice&     slice,
>                      x265_param&    param,
>                      wpScalingParam wp[2][MAX_NUM_REF][3],
> -                    pixel *        temp,
> +                    cache&         cacheData,
>                      int            indenom)
>  {
>      TComPic *pic = slice.getPic();
> @@ -221,12 +229,6 @@
>      Lowres& fenc = pic->m_lowres;
>      int curPoc = slice.getPOC();
>
> -    /* caller provides temp space for two full-pel planes. Split it
> -     * in half for motion compensation of the reference and then the
> -     * weighting */
> -    pixel *mcTemp = temp;
> -    pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
> -
>      int log2denom[3] = { indenom };
>      int csp = picorig->m_picCsp;
>      int hshift = CHROMA_H_SHIFT(csp);
> @@ -327,6 +329,8 @@
>                  pixel *fref;
>                  int    origstride, frefstride;
>                  int    width, height;
> +                pixel* &buf = cacheData.mcRef[list][ref][yuv];
> +                uint32_t& origscore = cacheData.unweightedCost[list][ref][yuv];
>                  switch (yuv)
>                  {
>                  case 0:
> @@ -338,8 +342,12 @@
>
>                      if (bMotionCompensate)
>                      {
> -                        mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
> -                        fref = mcTemp;
> +                        if (!buf)
> +                        {
> +                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> +                            mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
> +                        }
> +                        fref = buf;
>                      }
>                      break;
>
> @@ -359,8 +367,12 @@
>
>                      if (bMotionCompensate)
>                      {
> -                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> -                        fref = mcTemp;
> +                        if (!buf)
> +                        {
> +                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> +                            mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> +                        }
> +                        fref = buf;
>                      }
>                      break;
>
> @@ -373,8 +385,12 @@
>
>                      if (bMotionCompensate)
>                      {
> -                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> -                        fref = mcTemp;
> +                        if (!buf)
> +                        {
> +                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> +                            mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
> +                        }
> +                        fref = buf;
>                      }
>                      break;
>
> @@ -389,9 +405,12 @@
>                  int minscale = w.inputWeight;
>                  int minoff = 0;
>
> -                uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
> -                if (!origscore)
> -                    continue;
> +                if (origscore == 0)
> +                {
> +                    origscore = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, NULL);
> +                    if (!origscore)
> +                        continue;
> +                }

So now it will cache the unweighted cost unless it was zero, then it
will measure it every time?  it seems a bit odd.

>
>                  uint32_t minscore = origscore;
>                  bool bFound = false;
> @@ -431,7 +450,7 @@
>                          }
>
>                          SET_WEIGHT(w, true, curScale, mindenom, ioff);
> -                        uint32_t s = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, &w);
> +                        uint32_t s = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, &w);
>                          COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
>                          if (minoff == curOffset - oD && ioff != curOffset - oD)
>                              break;
> @@ -481,13 +500,11 @@
>      wpScalingParam wp[2][MAX_NUM_REF][3];
>      int numPredDir = slice.isInterP() ? 1 : 2;
>
> -    /* TODO: perf - collect some of this data into a struct which is passed to
> -     * tryCommonDenom() to avoid recalculating some data.  Motion compensated
> -     * reference planes can be cached this way */
> -
> +    weightp::cache cacheData;
> +    memset(&cacheData, 0, sizeof(cacheData));
>      TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
> -    pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
> -    if (temp)
> +    cacheData.weightTemp = X265_MALLOC(pixel, orig->getStride() * orig->getHeight());
> +    if (cacheData.weightTemp)
>      {
>          int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
>          do
> @@ -503,13 +520,23 @@
>                  }
>              }
>
> -            if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
> +            if (weightp::tryCommonDenom(slice, param, wp, cacheData, denom))
>                  break;
>              denom--; // decrement to satisfy the range limitation
>          }
>          while (denom > 0);
>
> -        X265_FREE(temp);
> +        X265_FREE(cacheData.weightTemp);
> +        for (int list = 0; list < numPredDir; list++)
> +        {
> +            for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> +            {
> +                for (int yuv = 0; yuv < 3; yuv++)
> +                {
> +                    X265_FREE(cacheData.mcRef[list][ref][yuv]);
> +                }
> +            }
> +        }
>      }
>
>      if (param.logLevel >= X265_LOG_DEBUG)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list