[x265] [PATCH 4 of 4] Enabling weight prediction for half and full pel

Wed Oct 9 08:10:42 CEST 2013

On Wed, Oct 9, 2013 at 12:55 AM, Shazeb Khan <shazeb at multicorewareinc.com>wrote:

>
>
>
> On Wed, Oct 9, 2013 at 12:55 AM, Steve Borho <steve at borho.org> wrote:
>
>>
>>
>>
>> On Tue, Oct 8, 2013 at 6:41 AM, <shazeb at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
>>> # Date 1381232285 -19800
>>> #      Tue Oct 08 17:08:05 2013 +0530
>>> # Node ID ad8608b0a64869c3b8bbc32e0bb418f7b1dde4bb
>>> # Parent  18a5d7c3464d1a3fa98afa95a0fe7a8894bcd3d2
>>> Enabling weight prediction for half and full pel
>>>
>>> diff -r 18a5d7c3464d -r ad8608b0a648 source/common/reference.cpp
>>> --- a/source/common/reference.cpp       Tue Oct 08 16:56:39 2013 +0530
>>> +++ b/source/common/reference.cpp       Tue Oct 08 17:08:05 2013 +0530
>>> @@ -58,6 +58,7 @@
>>>  int MotionReference::init(TComPicYuv* pic, wpScalingParam *w)
>>>  {
>>>      m_reconPic = pic;
>>> +    unweightedFPelPlane = pic->getLumaAddr();
>>>
>>
>> the motion reference object has m_reconPic, so it can call
>> m_reconPic->getLumaAddr() at any time.  I don't see the need for another
>> pointer
>>
>
> Its not possible to refer m_reconPic in subpelInterpolate(), since the
> parameter is catched in ReferencePlanes type pointer rather than
> MotionReference pointer. Its required to have/use a pointer in superclass.
> Can there be another way?
>

Not safely. I see the need to add it to ReferencePlanes now.

>
>
>>
>>
>>>      lumaStride = pic->getStride();
>>>      m_startPad = pic->m_lumaMarginY * lumaStride + pic->m_lumaMarginX;
>>>      m_next = NULL;
>>> diff -r 18a5d7c3464d -r ad8608b0a648 source/common/reference.h
>>> --- a/source/common/reference.h Tue Oct 08 16:56:39 2013 +0530
>>> +++ b/source/common/reference.h Tue Oct 08 17:08:05 2013 +0530
>>> @@ -43,6 +43,7 @@
>>>
>>>      pixel* fpelPlane;
>>>      pixel* lowresPlane[4];
>>> +    pixel* unweightedFPelPlane;
>>>
>>>      bool isWeighted;
>>>      bool isLowres;
>>> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/frameencoder.cpp
>>> --- a/source/encoder/frameencoder.cpp   Tue Oct 08 16:56:39 2013 +0530
>>> +++ b/source/encoder/frameencoder.cpp   Tue Oct 08 17:08:05 2013 +0530
>>> @@ -909,6 +909,10 @@
>>>                      {
>>>                          refpic->m_reconRowWait.wait();
>>>                      }
>>> +                    if(slice->getPPS()->getUseWP() &&
>>> (slice->getSliceType() == P_SLICE))
>>>
>>
>> white-space
>>
>>
>>> +                    {
>>> +                        slice->m_mref[list][ref]->applyWeight(refpic,
>>> row + refLagRows, m_numRows);
>>> +                    }
>>>                  }
>>>              }
>>>
>>> @@ -941,6 +945,10 @@
>>>                          {
>>>                              refpic->m_reconRowWait.wait();
>>>                          }
>>> +                        if(slice->getPPS()->getUseWP() &&
>>> (slice->getSliceType() == P_SLICE))
>>>
>>
>> white-space
>>
>>
>>> +                        {
>>> +
>>>  slice->m_mref[list][ref]->applyWeight(refpic, i + refLagRows, m_numRows);
>>> +                        }
>>>                      }
>>>                  }
>>>
>>> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/motion.cpp
>>> --- a/source/encoder/motion.cpp Tue Oct 08 16:56:39 2013 +0530
>>> +++ b/source/encoder/motion.cpp Tue Oct 08 17:08:05 2013 +0530
>>> @@ -89,6 +89,7 @@
>>>      fenc = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
>>>      subpelbuf = (pixel*)X265_MALLOC(pixel, (MAX_CU_SIZE + 1) *
>>> (MAX_CU_SIZE + 1));
>>>      immedVal = (short*)X265_MALLOC(short, (MAX_CU_SIZE + 1) *
>>> (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
>>> +    immedVal2 = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) *
>>> (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
>>>
>>
>> this is the same as immedVal (short==int16_t most everywhere).  It would
>> be better to change immedVal's type to int16_t* and to cast it as short
>> everywhere necessary in the short term until we can fix all the
>> interpolation primitives to use int16_t instead of short.
>>
>
> In subpelInterpolate
>
> pixel      -----------[interpolatePS]------>   immedVal
>  -------[interpolateSS]------->    immedVal2
>  ---------[weightp]--------->      subpellbuf
>
> immedVal2 serves as intermediate buffer.
>

Got it.  Let's rename them to immedH and immedV then, to make it more clear
how they are used.  I still think they should both be allocated as int16_t
type.  We shouldn't be adding any new references to short type to the code.

>
>
>>
>>
>>>  }
>>>
>>>  MotionEstimate::~MotionEstimate()
>>> @@ -96,6 +97,7 @@
>>>      X265_FREE(fenc);
>>>      X265_FREE(subpelbuf);
>>>      X265_FREE(immedVal);
>>> +    X265_FREE(immedVal2);
>>>  }
>>>
>>>  void MotionEstimate::setSourcePU(int offset, int width, int height)
>>> @@ -831,7 +833,7 @@
>>>                  }
>>>                  else
>>>                  {
>>> -                    subpelInterpolate(fqref, ref->lumaStride, xFrac,
>>> yFrac, dir);
>>> +                    subpelInterpolate(ref, qmv0, dir);
>>>                      cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf,
>>> FENC_STRIDE + (dir == 2)) + mvcost0;
>>>                      cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf +
>>> (dir == 2) + (dir == 1 ? FENC_STRIDE : 0), FENC_STRIDE + (dir == 2)) +
>>> mvcost1;
>>>                  }
>>> @@ -1140,47 +1142,61 @@
>>>          {
>>>              return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
>>>          }
>>> -        else if (yFrac == 0)
>>> +        else
>>>          {
>>> -            primitives.ipfilter_pp[FILTER_H_P_P_8](fref,
>>> ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, blockheight,
>>> g_lumaFilter[xFrac]);
>>> +            subpelInterpolate(ref, qmv, 0);
>>>
>>
>> as a post-step; we should try to declare subpelInterpolate as inline
>>
>>
>>>  +        }
>>> +        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
>>> +    }
>>> +}
>>> +
>>> +void MotionEstimate::subpelInterpolate(ReferencePlanes *ref, MV qmv,
>>> int dir)
>>> +{
>>> +    int xFrac = qmv.x & 0x3;
>>> +    int yFrac = qmv.y & 0x3;
>>> +    assert(yFrac | xFrac);
>>> +    int realWidth = blockwidth + (dir == 2);
>>> +    int realHeight = blockheight + (dir == 1);
>>> +    intptr_t realStride = FENC_STRIDE + (dir == 2);
>>> +    pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2)
>>> + (qmv.y >> 2) * ref->lumaStride;
>>> +
>>> +    if (ref->isWeighted)
>>> +    {
>>> +        if (yFrac == 0)
>>> +        {
>>> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref,
>>> ref->lumaStride, immedVal, realStride, realWidth, realHeight,
>>> g_lumaFilter[xFrac]);
>>> +            primitives.weightpUni(immedVal, subpelbuf, realStride,
>>> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
>>> ref->offset);
>>>          }
>>>          else if (xFrac == 0)
>>>          {
>>> -            primitives.ipfilter_pp[FILTER_V_P_P_8](fref,
>>> ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, blockheight,
>>> g_lumaFilter[yFrac]);
>>> +            primitives.ipfilter_ps[FILTER_V_P_S_8](fref,
>>> ref->lumaStride, immedVal, realStride, realWidth, realHeight,
>>> g_lumaFilter[yFrac]);
>>> +            primitives.weightpUni(immedVal, subpelbuf, realStride,
>>> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
>>> ref->offset);
>>>          }
>>>          else
>>>          {
>>>              int filterSize = NTAPS_LUMA;
>>>              int halfFilterSize = (filterSize >> 1);
>>> -            primitives.ipfilter_ps[FILTER_H_P_S_8](fref -
>>> (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal,
>>> blockwidth, blockwidth, blockheight + filterSize - 1, g_lumaFilter[xFrac]);
>>> -            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
>>> (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE,
>>> blockwidth, blockheight, g_lumaFilter[yFrac]);
>>> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref -
>>> (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal,
>>> realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
>>> +            primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal +
>>> (halfFilterSize - 1) * realWidth, realWidth, immedVal2, realStride,
>>> realWidth, realHeight, g_lumaFilter[yFrac]);
>>> +            primitives.weightpUni(immedVal2, subpelbuf, realStride,
>>> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
>>> ref->offset);
>>>          }
>>> -
>>> -        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
>>> -    }
>>> -}
>>> -
>>> -void MotionEstimate::subpelInterpolate(pixel *fref, intptr_t
>>> lumaStride, int xFrac, int yFrac, int dir)
>>> -{
>>> -    assert(yFrac | xFrac);
>>> -
>>> -    int realWidth = blockwidth + (dir == 2);
>>> -    int realHeight = blockheight + (dir == 1);
>>> -    intptr_t realStride = FENC_STRIDE + (dir == 2);
>>> -
>>> -    if (yFrac == 0)
>>> -    {
>>> -        primitives.ipfilter_pp[FILTER_H_P_P_8](fref, lumaStride,
>>> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
>>> -    }
>>> -    else if (xFrac == 0)
>>> -    {
>>> -        primitives.ipfilter_pp[FILTER_V_P_P_8](fref, lumaStride,
>>> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
>>>      }
>>>      else
>>>      {
>>> -        int filterSize = NTAPS_LUMA;
>>> -        int halfFilterSize = (filterSize >> 1);
>>> -        primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize -
>>> 1) * lumaStride, lumaStride, immedVal, realWidth, realWidth, realHeight +
>>> filterSize - 1, g_lumaFilter[xFrac]);
>>> -        primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
>>> (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride,
>>> realWidth, realHeight, g_lumaFilter[yFrac]);
>>> +        if (yFrac == 0)
>>> +        {
>>> +            primitives.ipfilter_pp[FILTER_H_P_P_8](fref,
>>> ref->lumaStride, subpelbuf, realStride, realWidth, realHeight,
>>> g_lumaFilter[xFrac]);
>>> +        }
>>> +        else if (xFrac == 0)
>>> +        {
>>> +            primitives.ipfilter_pp[FILTER_V_P_P_8](fref,
>>> ref->lumaStride, subpelbuf, realStride, realWidth, realHeight,
>>> g_lumaFilter[yFrac]);
>>> +        }
>>> +        else
>>> +        {
>>> +            int filterSize = NTAPS_LUMA;
>>> +            int halfFilterSize = (filterSize >> 1);
>>> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref -
>>> (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal,
>>> realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
>>> +            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
>>> (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride,
>>> realWidth, realHeight, g_lumaFilter[yFrac]);
>>> +        }
>>>      }
>>>  }
>>> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/motion.h
>>> --- a/source/encoder/motion.h   Tue Oct 08 16:56:39 2013 +0530
>>> +++ b/source/encoder/motion.h   Tue Oct 08 17:08:05 2013 +0530
>>> @@ -54,6 +54,7 @@
>>>      /* subpel generation buffers */
>>>      pixel *subpelbuf;
>>>      short *immedVal;
>>> +    int16_t *immedVal2;
>>>      int blockwidth;
>>>      int blockheight;
>>>
>>> @@ -96,7 +97,7 @@
>>>
>>>      int subpelCompare(ReferencePlanes *ref, const MV & qmv, pixelcmp_t);
>>>
>>> -    void subpelInterpolate(pixel *fref, intptr_t lumaStride, int xFrac,
>>> int yFrac, int dir);
>>> +    void subpelInterpolate(ReferencePlanes *ref, MV qmv, int dir);
>>>
>>>  protected:
>>>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>>
>> --
>> Steve Borho
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>

-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131009/7ce29a11/attachment-0001.html>