[x265] [PATCH] Parameter passing optimisation

N Vijay Anand nvijay.anand at trispacetech.com
Sun Aug 7 11:18:26 CEST 2016


On Sun, Aug 7, 2016 at 11:35 AM, <nvijay.anand at trispacetech.com> wrote:

> # HG changeset patch
> # User N Vijay Anand <nvijay.anand at trispacetech.com>
> # Date 1470549890 -19800
> #      Sun Aug 07 11:34:50 2016 +0530
> # Node ID 340dd470d7ebbdd6e9532b9ea6e830627600d3bf
> # Parent  f46e843a27cbaa4a1c79f1a43d41a04d63f601c4
> Parameter passing optimisation
>
> diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Fri Aug 05 17:02:17 2016 +0530
> +++ b/source/encoder/motion.cpp Sun Aug 07 11:34:50 2016 +0530
> @@ -100,23 +100,21 @@
>
>  }
>
> -#define MAX_NUM_BESTVECTORS     (16)
> -
> -inline void PushToBMVStack(MV  *bStack, MV & bv, int *bCostStack, int
> bcost, int maxNumBmv)
> +inline void PushToBMVStack(BmvStack *bStack, MV & bv, int bcost)
>  {
> -    for (int i=0; i<maxNumBmv; i++)
> +    for (int i=0; i<bStack->maxNumBmv; i++)
>      {
> -        if((bCostStack[i] == bcost) && (bv == bStack[i]))
> +        if((bStack->bmvCostStack[i] == bcost) && (bv ==
> bStack->bmvStack[i]))
>              break;
> -        if((bCostStack[i] >= bcost) && (bv != bStack[i]))
> +        if((bStack->bmvCostStack[i] >= bcost) && (bv !=
> bStack->bmvStack[i]))
>          {
> -            for (int j=maxNumBmv-1; j>i; j--)
> +            for (int j=bStack->maxNumBmv-1; j>i; j--)
>              {
> -                bStack[j] = bStack[j-1];
> -                bCostStack[j] = bCostStack[j-1];
> +                bStack->bmvStack[j] = bStack->bmvStack[j-1];
> +                bStack->bmvCostStack[j] = bStack->bmvCostStack[j-1];
>              }
> -            bStack[i] = bv;
> -            bCostStack[i] = bcost;
> +            bStack->bmvStack[i] = bv;
> +            bStack->bmvCostStack[i] = bcost;
>              break;
>          }
>      }
> @@ -247,7 +245,7 @@
>              bmv = tmv; \
>              bPointNr = point; \
>              bDistance = dist; \
> -            PushToBMVStack(bmvStack, tmv, bmvCostStack, cost, maxNumBmv);
> \
> +            PushToBMVStack(bmvStack, tmv, cost); \
>          } \
>      } while (0)
>
> @@ -257,7 +255,7 @@
>          int cost = sad(fenc, FENC_STRIDE, fref + (mx) + (my) * stride,
> stride); \
>          cost += mvcost(MV(mx, my) << 2); \
>          COPY2_IF_LT(bcost, cost, bmv, MV(mx, my)); \
> -        PushToBMVStack(bmvStack, MV(mx,my), bmvCostStack, cost,
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, MV(mx,my), cost); \
>      } while (0)
>
>  #define COST_MV_X3_DIR(m0x, m0y, m1x, m1y, m2x, m2y, costs) \
> @@ -271,9 +269,9 @@
>          (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
>          (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
>          (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
> -        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack,
> (costs)[0], maxNumBmv); \
> -        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack,
> (costs)[1], maxNumBmv); \
> -        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack,
> (costs)[2], maxNumBmv); \
> +        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
> +        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
> +        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
>      }
>
>  #define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y,
> p2, d2, m3x, m3y, p3, d3) \
> @@ -289,13 +287,13 @@
>          (costs)[2] += mvcost(MV(m2x, m2y) << 2); \
>          (costs)[3] += mvcost(MV(m3x, m3y) << 2); \
>          COPY4_IF_LT(bcost, costs[0], bmv, MV(m0x, m0y), bPointNr, p0,
> bDistance, d0); \
> -        PushToBMVStack(bmvStack, MV(m0x,m0y), bmvCostStack, (costs)[0],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, MV(m0x,m0y), (costs)[0]); \
>          COPY4_IF_LT(bcost, costs[1], bmv, MV(m1x, m1y), bPointNr, p1,
> bDistance, d1); \
> -        PushToBMVStack(bmvStack, MV(m1x,m1y), bmvCostStack, (costs)[1],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, MV(m1x,m1y), (costs)[1]); \
>          COPY4_IF_LT(bcost, costs[2], bmv, MV(m2x, m2y), bPointNr, p2,
> bDistance, d2); \
> -        PushToBMVStack(bmvStack, MV(m2x,m2y), bmvCostStack, (costs)[2],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, MV(m2x,m2y), (costs)[2]); \
>          COPY4_IF_LT(bcost, costs[3], bmv, MV(m3x, m3y), bPointNr, p3,
> bDistance, d3); \
> -        PushToBMVStack(bmvStack, MV(m3x,m3y), bmvCostStack, (costs)[3],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, MV(m3x,m3y), (costs)[3]); \
>      }
>
>  #define COST_MV_X4(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y) \
> @@ -312,13 +310,13 @@
>          costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
>          costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
>          COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
> -        PushToBMVStack(bmvStack, omv+MV(m0x,m0y), bmvCostStack, costs[0],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, omv+MV(m0x,m0y), costs[0]); \
>          COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
> -        PushToBMVStack(bmvStack, omv+MV(m1x,m1y), bmvCostStack, costs[1],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, omv+MV(m1x,m1y), costs[1]); \
>          COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
> -        PushToBMVStack(bmvStack, omv+MV(m2x,m2y), bmvCostStack, costs[2],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, omv+MV(m2x,m2y), costs[2]); \
>          COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
> -        PushToBMVStack(bmvStack, omv+MV(m3x,m3y), bmvCostStack, costs[3],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, omv+MV(m3x,m3y), costs[3]); \
>      }
>
>  #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
> @@ -334,10 +332,10 @@
>          (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
>          (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
>          (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
> -        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack,
> (costs)[0], maxNumBmv); \
> -        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack,
> (costs)[1], maxNumBmv); \
> -        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack,
> (costs)[2], maxNumBmv); \
> -        PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), bmvCostStack,
> (costs)[3], maxNumBmv); \
> +        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
> +        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
> +        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
> +        PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), (costs)[3]); \
>      }
>
>  #define DIA1_ITER(mx, my) \
> @@ -377,9 +375,7 @@
>                                         const MV &       mvmax,
>                                         MV &             bmv,
>                                         int &            bcost,
> -                                       MV              *bmvStack,
> -                                       int             *bmvCostStack,
> -                                       int              maxNumBmv,
> +                                       BmvStack        *bmvStack,
>                                         int &            bPointNr,
>                                         int &            bDistance,
>                                         int              earlyExitIters,
> @@ -658,23 +654,24 @@
>
>      /* re-measure full pel rounded MVP with SAD as search start point */
>      MV bmv = pmv.roundToFPel();
> -    MV bmvStack[MAX_NUM_BESTVECTORS];
> -    int bmvCostStack[MAX_NUM_BESTVECTORS];
> -    int bcost = bprecost;
> -    const int maxNumBmv = 1 << searchMethod;
> +    BmvStack *bmvStack, bMVStack;
> +    int bcost = bprecost;
>
> -    bmvStack[0] = bmv;
> -    bmvCostStack[0] = bprecost;
> -    for (int i=1 ; i < maxNumBmv; i++)
> +    bmvStack = &bMVStack;
> +    bmvStack->bmvStack[0] = bmv;
> +    bmvStack->bmvCostStack[0] = bprecost;
> +    bmvStack->maxNumBmv = 1 << searchMethod;
> +
> +    for (int i=1 ; i < bmvStack->maxNumBmv; i++)
>      {
> -        bmvStack[i] = bmv;
> -        bmvCostStack[i] = 0x7fffffff;
> +        bmvStack->bmvStack[i] = bmv;
> +        bmvStack->bmvCostStack[i] = 0x7fffffff;
>      }
>
>      if (pmv.isSubpel())
>      {
>          bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride,
> stride) + mvcost(bmv << 2);
> -        bmvCostStack[0] = bcost;
> +        bmvStack->bmvCostStack[0] = bcost;
>      }
>
>      // measure SAD cost at MV(0) if MVP is not zero
> @@ -685,8 +682,8 @@
>          {
>              bcost = cost;
>              bmv = 0;
> -            bmvStack[0] = bmv;
> -            bmvCostStack[0] = bcost;
> +            bmvStack->bmvStack[0] = bmv;
> +            bmvStack->bmvCostStack[0] = bcost;
>          }
>      }
>
> @@ -816,8 +813,8 @@
>          /* refine predictors */
>          omv = bmv;
>          ucost1 = bcost;
> -        bmvStack[0] = bmv;
> -        bmvCostStack[0] = bcost;
> +        bmvStack->bmvStack[0] = bmv;
> +        bmvStack->bmvCostStack[0] = bcost;
>          DIA1_ITER(pmv.x, pmv.y);
>          if (pmv.notZero())
>              DIA1_ITER(0, 0);
> @@ -945,7 +942,7 @@
>      do \
>      {  \
>          COPY2_IF_LT(bcost, costs[k], dir, x * 16 + (y & 15)); \
> -        PushToBMVStack(bmvStack, omv+MV(x*i,y*i), bmvCostStack, costs[k],
> maxNumBmv); \
> +        PushToBMVStack(bmvStack, omv+MV(x*i,y*i), costs[k]); \
>      } while (0)
>
>                  SADS(0, +0, -4, +0, +4, -2, -3, +2, -3);
> @@ -1007,7 +1004,7 @@
>          int bDistance = 0;
>
>          const int EarlyExitIters = 3;
> -        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bmvCostStack, maxNumBmv, bPointNr, bDistance, EarlyExitIters, merange);
> +        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bPointNr, bDistance, EarlyExitIters, merange);
>
>          if (bDistance == 1)
>          {
> @@ -1059,19 +1056,19 @@
>                                 stride, costs);
>                          costs[0] += mvcost(tmv << 2);
>                          COPY2_IF_LT(bcost, costs[0], bmv, tmv);
> -                        PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[0], maxNumBmv);
> +                        PushToBMVStack(bmvStack, tmv, (costs)[0]);
>                          tmv.x += RasterDistance;
>                          costs[1] += mvcost(tmv << 2);
>                          COPY2_IF_LT(bcost, costs[1], bmv, tmv);
> -                        PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[1], maxNumBmv);
> +                        PushToBMVStack(bmvStack, tmv, (costs)[1]);
>                          tmv.x += RasterDistance;
>                          costs[2] += mvcost(tmv << 2);
>                          COPY2_IF_LT(bcost, costs[2], bmv, tmv);
> -                        PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[2], maxNumBmv);
> +                        PushToBMVStack(bmvStack, tmv, (costs)[2]);
>                          tmv.x += RasterDistance;
>                          costs[3] += mvcost(tmv << 3);
>                          COPY2_IF_LT(bcost, costs[3], bmv, tmv);
> -                        PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[3], maxNumBmv);
> +                        PushToBMVStack(bmvStack, tmv, (costs)[3]);
>                      }
>                      else
>                          COST_MV(tmv.x, tmv.y);
> @@ -1085,7 +1082,7 @@
>              bDistance = 0;
>              bPointNr = 0;
>              const int MaxIters = 32;
> -            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bmvCostStack, maxNumBmv, bPointNr, bDistance, MaxIters, merange);
> +            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bPointNr, bDistance, MaxIters, merange);
>
>              if (bDistance == 1)
>              {
> @@ -1135,19 +1132,19 @@
>                             stride, costs);
>                      costs[0] += mvcost(tmv << 2);
>                      COPY2_IF_LT(bcost, costs[0], bmv, tmv);
> -                    PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[0], maxNumBmv);
> +                    PushToBMVStack(bmvStack, tmv, (costs)[0]);
>                      tmv.x++;
>                      costs[1] += mvcost(tmv << 2);
>                      COPY2_IF_LT(bcost, costs[1], bmv, tmv);
> -                    PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[1], maxNumBmv);
> +                    PushToBMVStack(bmvStack, tmv, (costs)[1]);
>                      tmv.x++;
>                      costs[2] += mvcost(tmv << 2);
>                      COPY2_IF_LT(bcost, costs[2], bmv, tmv);
> -                    PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[2], maxNumBmv);
> +                    PushToBMVStack(bmvStack, tmv, (costs)[2]);
>                      tmv.x++;
>                      costs[3] += mvcost(tmv << 2);
>                      COPY2_IF_LT(bcost, costs[3], bmv, tmv);
> -                    PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[3], maxNumBmv);
> +                    PushToBMVStack(bmvStack, tmv, (costs)[3]);
>                  }
>                  else
>                      COST_MV(tmv.x, tmv.y);
> @@ -1166,14 +1163,14 @@
>      {
>          bmv = bestpre;
>          bcost = bprecost;
> -        PushToBMVStack(bmvStack, bmv, bmvCostStack, bcost, maxNumBmv);
> +        PushToBMVStack(bmvStack, bmv, bcost);
>      }
>      else
>      {
>          bmv = bmv.toQPel(); // promote search bmv to qpel
> -        for (int i=0; i<maxNumBmv; i++)
> +        for (int i=0; i<bmvStack->maxNumBmv; i++)
>          {
> -          bmvStack[i] = bmvStack[i].toQPel();
> +          bmvStack->bmvStack[i] = bmvStack->bmvStack[i].toQPel();
>          }
>      }
>
> @@ -1188,11 +1185,11 @@
>      else if (ref->isLowres)
>      {
>          int bdir = 0;
> -        for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
> +        for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
>          {
>            bdir = 0;
> -          bmv =  bmvStack[nBmv];
> -          bcost = bmvCostStack[nBmv];
> +          bmv =  bmvStack->bmvStack[nBmv];
> +          bcost = bmvStack->bmvCostStack[nBmv];
>
>            for (int i = 1; i <= wl.hpel_dirs; i++)
>            {
> @@ -1213,18 +1210,18 @@
>            }
>
>            bmv += square1[bdir];
> -          bmvStack[nBmv] = bmv;
> -          bmvCostStack[nBmv] = bcost;
> +          bmvStack->bmvStack[nBmv] = bmv;
> +          bmvStack->bmvCostStack[nBmv] = bcost;
>          }
>
> -        bmv = bmvStack[0];
> -        bcost = bmvCostStack[0];
> -        for (int i=1; i<maxNumBmv; i++)
> +        bmv = bmvStack->bmvStack[0];
> +        bcost = bmvStack->bmvCostStack[0];
> +        for (int i=1; i<bmvStack->maxNumBmv; i++)
>          {
> -          if (bmvCostStack[i]<bcost)
> +          if (bmvStack->bmvCostStack[i]<bcost)
>            {
> -            bmv = bmvStack[i];
> -            bcost = bmvCostStack[i];
> +            bmv = bmvStack->bmvStack[i];
> +            bcost = bmvStack->bmvCostStack[i];
>            }
>          }
>      }
> @@ -1232,10 +1229,10 @@
>      {
>          pixelcmp_t hpelcomp;
>
> -        for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
> +        for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
>          {
> -          bmv =  bmvStack[nBmv];
> -          bcost = bmvCostStack[nBmv];
> +          bmv =  bmvStack->bmvStack[nBmv];
> +          bcost = bmvStack->bmvCostStack[nBmv];
>
>            if (wl.hpel_satd)
>            {
> @@ -1281,18 +1278,18 @@
>                    break;
>            }
>
> -          bmvStack[nBmv] = bmv;
> -          bmvCostStack[nBmv] = bcost;
> +          bmvStack->bmvStack[nBmv] = bmv;
> +          bmvStack->bmvCostStack[nBmv] = bcost;
>          }
>
> -        bmv = bmvStack[0];
> -        bcost = bmvCostStack[0];
> -        for (int i=1; i<maxNumBmv; i++)
> +        bmv = bmvStack->bmvStack[0];
> +        bcost = bmvStack->bmvCostStack[0];
> +        for (int i=1; i<bmvStack->maxNumBmv; i++)
>          {
> -          if (bmvCostStack[i]<bcost)
> +          if (bmvStack->bmvCostStack[i]<bcost)
>            {
> -            bmv = bmvStack[i];
> -            bcost = bmvCostStack[i];
> +            bmv = bmvStack->bmvStack[i];
> +            bcost = bmvStack->bmvCostStack[i];
>            }
>          }
>      }
> diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.h
> --- a/source/encoder/motion.h   Fri Aug 05 17:02:17 2016 +0530
> +++ b/source/encoder/motion.h   Sun Aug 07 11:34:50 2016 +0530
> @@ -34,6 +34,15 @@
>  namespace X265_NS {
>  // private x265 namespace
>
> +#define MAX_NUM_BESTVECTORS     (16)
> +
> +typedef struct _BmvStack
> +{
> +    MV  bmvStack[MAX_NUM_BESTVECTORS];
> +    int bmvCostStack[MAX_NUM_BESTVECTORS];
> +    int maxNumBmv;
> +}BmvStack;
> +
>  class MotionEstimate : public BitCost
>  {
>  protected:
> @@ -101,9 +110,7 @@
>                                    const MV &       mvmax,
>                                    MV &             bmv,
>                                    int &            bcost,
> -                                  MV              *bmvStack,
> -                                  int             *bCostStack,
> -                                  int              maxNumBmv,
> +                                  BmvStack        *bmvStack,
>                                    int &            bPointNr,
>                                    int &            bDistance,
>                                    int              earlyExitIters,
>



-- 
Principal Architect and Director,
TriSpace Technologies Pvt Ltd.,
Bangalore

CONFIDENTIALITY NOTE : The information in this e-mail is confidential and
privileged; it is intended for use solely by the individual or entity named
as the recipient hereof. Disclosure, copying, distribution, or use of the
contents of this e-mail by persons other than the intended recipient is
strictly prohibited and may violate applicable laws. If you have received
this e-mail in error, please delete the original message and notify us by
return email or collect call immediately. Thank you. TriSpace Technologies
Pvt. Ltd.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160807/efc18544/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: X265_2.pptx
Type: application/vnd.openxmlformats-officedocument.presentationml.presentation
Size: 67615 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160807/efc18544/attachment-0001.pptx>


More information about the x265-devel mailing list