[x265] [PATCH 4 of 4] Enabling weight prediction for half and full pel

Tue Oct 8 21:25:05 CEST 2013

On Tue, Oct 8, 2013 at 6:41 AM, <shazeb at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
> # Date 1381232285 -19800
> #      Tue Oct 08 17:08:05 2013 +0530
> # Node ID ad8608b0a64869c3b8bbc32e0bb418f7b1dde4bb
> # Parent  18a5d7c3464d1a3fa98afa95a0fe7a8894bcd3d2
> Enabling weight prediction for half and full pel
>
> diff -r 18a5d7c3464d -r ad8608b0a648 source/common/reference.cpp
> --- a/source/common/reference.cpp       Tue Oct 08 16:56:39 2013 +0530
> +++ b/source/common/reference.cpp       Tue Oct 08 17:08:05 2013 +0530
> @@ -58,6 +58,7 @@
>  int MotionReference::init(TComPicYuv* pic, wpScalingParam *w)
>  {
>      m_reconPic = pic;
> +    unweightedFPelPlane = pic->getLumaAddr();
>

the motion reference object has m_reconPic, so it can call
m_reconPic->getLumaAddr() at any time.  I don't see the need for another
pointer

>      lumaStride = pic->getStride();
>      m_startPad = pic->m_lumaMarginY * lumaStride + pic->m_lumaMarginX;
>      m_next = NULL;
> diff -r 18a5d7c3464d -r ad8608b0a648 source/common/reference.h
> --- a/source/common/reference.h Tue Oct 08 16:56:39 2013 +0530
> +++ b/source/common/reference.h Tue Oct 08 17:08:05 2013 +0530
> @@ -43,6 +43,7 @@
>
>      pixel* fpelPlane;
>      pixel* lowresPlane[4];
> +    pixel* unweightedFPelPlane;
>
>      bool isWeighted;
>      bool isLowres;
> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Tue Oct 08 16:56:39 2013 +0530
> +++ b/source/encoder/frameencoder.cpp   Tue Oct 08 17:08:05 2013 +0530
> @@ -909,6 +909,10 @@
>                      {
>                          refpic->m_reconRowWait.wait();
>                      }
> +                    if(slice->getPPS()->getUseWP() &&
> (slice->getSliceType() == P_SLICE))
>

white-space

> +                    {
> +                        slice->m_mref[list][ref]->applyWeight(refpic, row
> + refLagRows, m_numRows);
> +                    }
>                  }
>              }
>
> @@ -941,6 +945,10 @@
>                          {
>                              refpic->m_reconRowWait.wait();
>                          }
> +                        if(slice->getPPS()->getUseWP() &&
> (slice->getSliceType() == P_SLICE))
>

white-space

> +                        {
> +                            slice->m_mref[list][ref]->applyWeight(refpic,
> i + refLagRows, m_numRows);
> +                        }
>                      }
>                  }
>
> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Tue Oct 08 16:56:39 2013 +0530
> +++ b/source/encoder/motion.cpp Tue Oct 08 17:08:05 2013 +0530
> @@ -89,6 +89,7 @@
>      fenc = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
>      subpelbuf = (pixel*)X265_MALLOC(pixel, (MAX_CU_SIZE + 1) *
> (MAX_CU_SIZE + 1));
>      immedVal = (short*)X265_MALLOC(short, (MAX_CU_SIZE + 1) *
> (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
> +    immedVal2 = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) *
> (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
>

this is the same as immedVal (short==int16_t most everywhere).  It would be
better to change immedVal's type to int16_t* and to cast it as short
everywhere necessary in the short term until we can fix all the
interpolation primitives to use int16_t instead of short.

>  }
>
>  MotionEstimate::~MotionEstimate()
> @@ -96,6 +97,7 @@
>      X265_FREE(fenc);
>      X265_FREE(subpelbuf);
>      X265_FREE(immedVal);
> +    X265_FREE(immedVal2);
>  }
>
>  void MotionEstimate::setSourcePU(int offset, int width, int height)
> @@ -831,7 +833,7 @@
>                  }
>                  else
>                  {
> -                    subpelInterpolate(fqref, ref->lumaStride, xFrac,
> yFrac, dir);
> +                    subpelInterpolate(ref, qmv0, dir);
>                      cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf,
> FENC_STRIDE + (dir == 2)) + mvcost0;
>                      cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf + (dir
> == 2) + (dir == 1 ? FENC_STRIDE : 0), FENC_STRIDE + (dir == 2)) + mvcost1;
>                  }
> @@ -1140,47 +1142,61 @@
>          {
>              return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
>          }
> -        else if (yFrac == 0)
> +        else
>          {
> -            primitives.ipfilter_pp[FILTER_H_P_P_8](fref, ref->lumaStride,
> subpelbuf, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[xFrac]);
> +            subpelInterpolate(ref, qmv, 0);
>

as a post-step; we should try to declare subpelInterpolate as inline

> +        }
> +        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
> +    }
> +}
> +
> +void MotionEstimate::subpelInterpolate(ReferencePlanes *ref, MV qmv, int
> dir)
> +{
> +    int xFrac = qmv.x & 0x3;
> +    int yFrac = qmv.y & 0x3;
> +    assert(yFrac | xFrac);
> +    int realWidth = blockwidth + (dir == 2);
> +    int realHeight = blockheight + (dir == 1);
> +    intptr_t realStride = FENC_STRIDE + (dir == 2);
> +    pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2) +
> (qmv.y >> 2) * ref->lumaStride;
> +
> +    if (ref->isWeighted)
> +    {
> +        if (yFrac == 0)
> +        {
> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref, ref->lumaStride,
> immedVal, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
> +            primitives.weightpUni(immedVal, subpelbuf, realStride,
> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
> ref->offset);
>          }
>          else if (xFrac == 0)
>          {
> -            primitives.ipfilter_pp[FILTER_V_P_P_8](fref, ref->lumaStride,
> subpelbuf, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[yFrac]);
> +            primitives.ipfilter_ps[FILTER_V_P_S_8](fref, ref->lumaStride,
> immedVal, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
> +            primitives.weightpUni(immedVal, subpelbuf, realStride,
> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
> ref->offset);
>          }
>          else
>          {
>              int filterSize = NTAPS_LUMA;
>              int halfFilterSize = (filterSize >> 1);
> -            primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize
> - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth,
> blockheight + filterSize - 1, g_lumaFilter[xFrac]);
> -            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
> (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE,
> blockwidth, blockheight, g_lumaFilter[yFrac]);
> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize
> - 1) * ref->lumaStride, ref->lumaStride, immedVal, realWidth, realWidth,
> realHeight + filterSize - 1, g_lumaFilter[xFrac]);
> +            primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal +
> (halfFilterSize - 1) * realWidth, realWidth, immedVal2, realStride,
> realWidth, realHeight, g_lumaFilter[yFrac]);
> +            primitives.weightpUni(immedVal2, subpelbuf, realStride,
> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
> ref->offset);
>          }
> -
> -        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
> -    }
> -}
> -
> -void MotionEstimate::subpelInterpolate(pixel *fref, intptr_t lumaStride,
> int xFrac, int yFrac, int dir)
> -{
> -    assert(yFrac | xFrac);
> -
> -    int realWidth = blockwidth + (dir == 2);
> -    int realHeight = blockheight + (dir == 1);
> -    intptr_t realStride = FENC_STRIDE + (dir == 2);
> -
> -    if (yFrac == 0)
> -    {
> -        primitives.ipfilter_pp[FILTER_H_P_P_8](fref, lumaStride,
> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
> -    }
> -    else if (xFrac == 0)
> -    {
> -        primitives.ipfilter_pp[FILTER_V_P_P_8](fref, lumaStride,
> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
>      }
>      else
>      {
> -        int filterSize = NTAPS_LUMA;
> -        int halfFilterSize = (filterSize >> 1);
> -        primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize -
> 1) * lumaStride, lumaStride, immedVal, realWidth, realWidth, realHeight +
> filterSize - 1, g_lumaFilter[xFrac]);
> -        primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal + (halfFilterSize
> - 1) * realWidth, realWidth, subpelbuf, realStride, realWidth, realHeight,
> g_lumaFilter[yFrac]);
> +        if (yFrac == 0)
> +        {
> +            primitives.ipfilter_pp[FILTER_H_P_P_8](fref, ref->lumaStride,
> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
> +        }
> +        else if (xFrac == 0)
> +        {
> +            primitives.ipfilter_pp[FILTER_V_P_P_8](fref, ref->lumaStride,
> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
> +        }
> +        else
> +        {
> +            int filterSize = NTAPS_LUMA;
> +            int halfFilterSize = (filterSize >> 1);
> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize
> - 1) * ref->lumaStride, ref->lumaStride, immedVal, realWidth, realWidth,
> realHeight + filterSize - 1, g_lumaFilter[xFrac]);
> +            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
> (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride,
> realWidth, realHeight, g_lumaFilter[yFrac]);
> +        }
>      }
>  }
> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/motion.h
> --- a/source/encoder/motion.h   Tue Oct 08 16:56:39 2013 +0530
> +++ b/source/encoder/motion.h   Tue Oct 08 17:08:05 2013 +0530
> @@ -54,6 +54,7 @@
>      /* subpel generation buffers */
>      pixel *subpelbuf;
>      short *immedVal;
> +    int16_t *immedVal2;
>      int blockwidth;
>      int blockheight;
>
> @@ -96,7 +97,7 @@
>
>      int subpelCompare(ReferencePlanes *ref, const MV & qmv, pixelcmp_t);
>
> -    void subpelInterpolate(pixel *fref, intptr_t lumaStride, int xFrac,
> int yFrac, int dir);
> +    void subpelInterpolate(ReferencePlanes *ref, MV qmv, int dir);
>
>  protected:
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>

-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131008/e44df8d4/attachment-0001.html>