[x265] [PATCH 3 of 3] Reduce half HPEL interpolate works by merge nest point

Mon Sep 23 19:25:24 CEST 2013

On Sun, Sep 22, 2013 at 11:40 PM, Min Chen <chenm003 at 163.com> wrote:

> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1379911059 -28800
> # Node ID b009957b4443ef790936c80f9df538e75d742306
> # Parent  cec05efee900c68cb09ddd4135133836bb2e9c3b
> Reduce half HPEL interpolate works by merge nest point
>

This is interesting, but it needs a better commit message, probably some
more comments, and definitely a better method name.  I can probably clean
this up pretty quickly on my end, just need to discuss a few details with
you.

>
> diff -r cec05efee900 -r b009957b4443 source/common/vec/ipfilter8.inc
> --- a/source/common/vec/ipfilter8.inc   Mon Sep 23 12:37:26 2013 +0800
> +++ b/source/common/vec/ipfilter8.inc   Mon Sep 23 12:37:39 2013 +0800
> @@ -679,7 +679,8 @@
>
>      int row, col;
>
> -    assert(height % 2 == 0);
> +    if (N == 4)
> +        assert(height % 2 == 0);
>
>      uint32_t leftCols = (8 - (width & 7)) * 8;
>      uint32_t mask_shift = ((uint32_t)~0 >> leftCols);
> diff -r cec05efee900 -r b009957b4443 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Mon Sep 23 12:37:26 2013 +0800
> +++ b/source/encoder/motion.cpp Mon Sep 23 12:37:39 2013 +0800
> @@ -87,8 +87,8 @@
>          init_scales();
>
>      fenc = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
> -    subpelbuf = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
> -    immedVal = (short*)X265_MALLOC(short, MAX_CU_SIZE * (MAX_CU_SIZE +
> NTAPS_LUMA - 1));
> +    subpelbuf = (pixel*)X265_MALLOC(pixel, (MAX_CU_SIZE + 1) *
> (MAX_CU_SIZE + 1));
> +    immedVal = (short*)X265_MALLOC(short, (MAX_CU_SIZE + 1) *
> (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
>  }
>
>  MotionEstimate::~MotionEstimate()
> @@ -122,6 +122,7 @@
>  static const MV hex2[8] = { MV(-1, -2), MV(-2, 0), MV(-1, 2), MV(1, 2),
> MV(2, 0), MV(1, -2), MV(-1, -2), MV(-2, 0) };
>  static const uint8_t mod6m1[8] = { 5, 0, 1, 2, 3, 4, 5, 0 };  /* (x-1)%6
> */
>  static const MV square1[9] = { MV(0, 0), MV(0, -1), MV(0, 1), MV(-1, 0),
> MV(1, 0), MV(-1, -1), MV(-1, 1), MV(1, -1), MV(1, 1) };
> +static const int square1_dir[9] = { 0, 1, 1, 2, 2, 1, 1, 1, 1 };
>  static const MV hex4[16] =
>  {
>      MV(0, -4),  MV(0, 4),  MV(-2, -3), MV(2, -3),
> @@ -793,17 +794,55 @@
>      else
>          hpelcomp = sad;
>
> -    for (int iter = 0; iter < wl.hpel_iters; iter++)
> +    if (ref->isLowres)
>      {
> -        int bdir = 0, cost;
> -        for (int i = 1; i <= wl.hpel_dirs; i++)
> +        for (int iter = 0; iter < wl.hpel_iters; iter++)
>          {
> -            MV qmv = bmv + square1[i] * 2;
> -            cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
> -            COPY2_IF_LT(bcost, cost, bdir, i);
> +            int bdir = 0, cost;
> +            for (int i = 1; i <= wl.hpel_dirs; i++)
> +            {
> +                MV qmv = bmv + square1[i] * 2;
> +                cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
> +                COPY2_IF_LT(bcost, cost, bdir, i+0);
> +            }
> +            bmv += square1[bdir] * 2;
>          }
> +    }
> +    else
> +    {
> +        for (int iter = 0; iter < wl.hpel_iters; iter++)
> +        {
> +            int bdir = 0, cost0, cost1;
> +            for (int i = 1; i <= wl.hpel_dirs; i+=2)
> +            {
> +                MV qmv0 = bmv + square1[i  ] * 2;
> +                MV qmv1 = bmv + square1[i+1] * 2;
> +                int mvcost0 = mvcost(qmv0);
> +                int mvcost1 = mvcost(qmv1);
> +                int dir = square1_dir[i];
>
> -        bmv += square1[bdir] * 2;
> +                pixel *fref = ref->fpelPlane + blockOffset + (qmv0.x >>
> 2) + (qmv0.y >> 2) * ref->lumaStride;
> +                int xFrac = qmv0.x & 0x3;
> +                int yFrac = qmv0.y & 0x3;
> +
> +                // TODO: sad_x2
> +                if (xFrac == 0 && yFrac == 0)
> +                {
> +                    intptr_t offset = (dir == 2) + (dir == 1 ?
> ref->lumaStride : 0);
> +                    cost0 = hpelcomp(fenc, FENC_STRIDE, fref,
> ref->lumaStride) + mvcost0;
> +                    cost1 = hpelcomp(fenc, FENC_STRIDE, fref + offset,
> ref->lumaStride) + mvcost1;
> +                }
> +                else
> +                {
> +                    subpelInterpolate2(fref, ref->lumaStride, xFrac,
> yFrac, dir);
> +                    cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf,
> FENC_STRIDE + (dir == 2)) + mvcost0;
> +                    cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf + (dir
> == 2) + (dir == 1 ? FENC_STRIDE : 0), FENC_STRIDE + (dir == 2)) + mvcost1;
> +                }
> +                COPY2_IF_LT(bcost, cost0, bdir, i+0);
> +                COPY2_IF_LT(bcost, cost1, bdir, i+1);
> +            }
> +            bmv += square1[bdir] * 2;
> +        }
>      }
>      /* if HPEL search used SAD, remeasure with SATD before QPEL */
>      if (!wl.hpel_satd)
> @@ -1125,3 +1164,28 @@
>          return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
>      }
>  }
> +
> +void MotionEstimate::subpelInterpolate2(pixel *fref, intptr_t lumaStride,
> int xFrac, int yFrac, int dir)
> +{
> +    assert(yFrac | xFrac);
> +
> +    int realWidth = blockwidth + (dir == 2);
> +    int realHeight = blockheight + (dir == 1);
> +    intptr_t realStride = FENC_STRIDE + (dir == 2);
> +
> +    if (yFrac == 0)
> +    {
> +        primitives.ipfilter_pp[FILTER_H_P_P_8](fref, lumaStride,
> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
> +    }
> +    else if (xFrac == 0)
> +    {
> +        primitives.ipfilter_pp[FILTER_V_P_P_8](fref, lumaStride,
> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
> +    }
> +    else
> +    {
> +        int filterSize = NTAPS_LUMA;
> +        int halfFilterSize = (filterSize >> 1);
> +        primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize -
> 1) * lumaStride, lumaStride, immedVal, realWidth, realWidth, realHeight +
> filterSize - 1, g_lumaFilter[xFrac]);
> +        primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal + (halfFilterSize
> - 1) * realWidth, realWidth, subpelbuf, realStride, realWidth, realHeight,
> g_lumaFilter[yFrac]);
> +    }
> +}
> diff -r cec05efee900 -r b009957b4443 source/encoder/motion.h
> --- a/source/encoder/motion.h   Mon Sep 23 12:37:26 2013 +0800
> +++ b/source/encoder/motion.h   Mon Sep 23 12:37:39 2013 +0800
> @@ -95,6 +95,7 @@
>      int motionEstimate(ReferencePlanes *ref, const MV & mvmin, const MV &
> mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV
> & outQMv);
>
>      int subpelCompare(ReferencePlanes *ref, const MV & qmv, pixelcmp_t);
> +    void subpelInterpolate2(pixel *fref, intptr_t lumaStride, int xFrac,
> int yFrac, int dir);
>
>  protected:
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>

-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130923/0ed7f55a/attachment-0001.html>