[x265] [PATCH] Parameter passing optimisation
N Vijay Anand
nvijay.anand at trispacetech.com
Sun Aug 7 11:18:26 CEST 2016
On Sun, Aug 7, 2016 at 11:35 AM, <nvijay.anand at trispacetech.com> wrote:
> # HG changeset patch
> # User N Vijay Anand <nvijay.anand at trispacetech.com>
> # Date 1470549890 -19800
> # Sun Aug 07 11:34:50 2016 +0530
> # Node ID 340dd470d7ebbdd6e9532b9ea6e830627600d3bf
> # Parent f46e843a27cbaa4a1c79f1a43d41a04d63f601c4
> Parameter passing optimisation
>
> diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Fri Aug 05 17:02:17 2016 +0530
> +++ b/source/encoder/motion.cpp Sun Aug 07 11:34:50 2016 +0530
> @@ -100,23 +100,21 @@
>
> }
>
> -#define MAX_NUM_BESTVECTORS (16)
> -
> -inline void PushToBMVStack(MV *bStack, MV & bv, int *bCostStack, int
> bcost, int maxNumBmv)
> +inline void PushToBMVStack(BmvStack *bStack, MV & bv, int bcost)
> {
> - for (int i=0; i<maxNumBmv; i++)
> + for (int i=0; i<bStack->maxNumBmv; i++)
> {
> - if((bCostStack[i] == bcost) && (bv == bStack[i]))
> + if((bStack->bmvCostStack[i] == bcost) && (bv ==
> bStack->bmvStack[i]))
> break;
> - if((bCostStack[i] >= bcost) && (bv != bStack[i]))
> + if((bStack->bmvCostStack[i] >= bcost) && (bv !=
> bStack->bmvStack[i]))
> {
> - for (int j=maxNumBmv-1; j>i; j--)
> + for (int j=bStack->maxNumBmv-1; j>i; j--)
> {
> - bStack[j] = bStack[j-1];
> - bCostStack[j] = bCostStack[j-1];
> + bStack->bmvStack[j] = bStack->bmvStack[j-1];
> + bStack->bmvCostStack[j] = bStack->bmvCostStack[j-1];
> }
> - bStack[i] = bv;
> - bCostStack[i] = bcost;
> + bStack->bmvStack[i] = bv;
> + bStack->bmvCostStack[i] = bcost;
> break;
> }
> }
> @@ -247,7 +245,7 @@
> bmv = tmv; \
> bPointNr = point; \
> bDistance = dist; \
> - PushToBMVStack(bmvStack, tmv, bmvCostStack, cost, maxNumBmv);
> \
> + PushToBMVStack(bmvStack, tmv, cost); \
> } \
> } while (0)
>
> @@ -257,7 +255,7 @@
> int cost = sad(fenc, FENC_STRIDE, fref + (mx) + (my) * stride,
> stride); \
> cost += mvcost(MV(mx, my) << 2); \
> COPY2_IF_LT(bcost, cost, bmv, MV(mx, my)); \
> - PushToBMVStack(bmvStack, MV(mx,my), bmvCostStack, cost,
> maxNumBmv); \
> + PushToBMVStack(bmvStack, MV(mx,my), cost); \
> } while (0)
>
> #define COST_MV_X3_DIR(m0x, m0y, m1x, m1y, m2x, m2y, costs) \
> @@ -271,9 +269,9 @@
> (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
> (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
> (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
> - PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack,
> (costs)[0], maxNumBmv); \
> - PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack,
> (costs)[1], maxNumBmv); \
> - PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack,
> (costs)[2], maxNumBmv); \
> + PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
> + PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
> + PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
> }
>
> #define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y,
> p2, d2, m3x, m3y, p3, d3) \
> @@ -289,13 +287,13 @@
> (costs)[2] += mvcost(MV(m2x, m2y) << 2); \
> (costs)[3] += mvcost(MV(m3x, m3y) << 2); \
> COPY4_IF_LT(bcost, costs[0], bmv, MV(m0x, m0y), bPointNr, p0,
> bDistance, d0); \
> - PushToBMVStack(bmvStack, MV(m0x,m0y), bmvCostStack, (costs)[0],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, MV(m0x,m0y), (costs)[0]); \
> COPY4_IF_LT(bcost, costs[1], bmv, MV(m1x, m1y), bPointNr, p1,
> bDistance, d1); \
> - PushToBMVStack(bmvStack, MV(m1x,m1y), bmvCostStack, (costs)[1],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, MV(m1x,m1y), (costs)[1]); \
> COPY4_IF_LT(bcost, costs[2], bmv, MV(m2x, m2y), bPointNr, p2,
> bDistance, d2); \
> - PushToBMVStack(bmvStack, MV(m2x,m2y), bmvCostStack, (costs)[2],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, MV(m2x,m2y), (costs)[2]); \
> COPY4_IF_LT(bcost, costs[3], bmv, MV(m3x, m3y), bPointNr, p3,
> bDistance, d3); \
> - PushToBMVStack(bmvStack, MV(m3x,m3y), bmvCostStack, (costs)[3],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, MV(m3x,m3y), (costs)[3]); \
> }
>
> #define COST_MV_X4(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y) \
> @@ -312,13 +310,13 @@
> costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
> costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
> COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
> - PushToBMVStack(bmvStack, omv+MV(m0x,m0y), bmvCostStack, costs[0],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, omv+MV(m0x,m0y), costs[0]); \
> COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
> - PushToBMVStack(bmvStack, omv+MV(m1x,m1y), bmvCostStack, costs[1],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, omv+MV(m1x,m1y), costs[1]); \
> COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
> - PushToBMVStack(bmvStack, omv+MV(m2x,m2y), bmvCostStack, costs[2],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, omv+MV(m2x,m2y), costs[2]); \
> COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
> - PushToBMVStack(bmvStack, omv+MV(m3x,m3y), bmvCostStack, costs[3],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, omv+MV(m3x,m3y), costs[3]); \
> }
>
> #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
> @@ -334,10 +332,10 @@
> (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
> (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
> (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
> - PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack,
> (costs)[0], maxNumBmv); \
> - PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack,
> (costs)[1], maxNumBmv); \
> - PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack,
> (costs)[2], maxNumBmv); \
> - PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), bmvCostStack,
> (costs)[3], maxNumBmv); \
> + PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
> + PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
> + PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
> + PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), (costs)[3]); \
> }
>
> #define DIA1_ITER(mx, my) \
> @@ -377,9 +375,7 @@
> const MV & mvmax,
> MV & bmv,
> int & bcost,
> - MV *bmvStack,
> - int *bmvCostStack,
> - int maxNumBmv,
> + BmvStack *bmvStack,
> int & bPointNr,
> int & bDistance,
> int earlyExitIters,
> @@ -658,23 +654,24 @@
>
> /* re-measure full pel rounded MVP with SAD as search start point */
> MV bmv = pmv.roundToFPel();
> - MV bmvStack[MAX_NUM_BESTVECTORS];
> - int bmvCostStack[MAX_NUM_BESTVECTORS];
> - int bcost = bprecost;
> - const int maxNumBmv = 1 << searchMethod;
> + BmvStack *bmvStack, bMVStack;
> + int bcost = bprecost;
>
> - bmvStack[0] = bmv;
> - bmvCostStack[0] = bprecost;
> - for (int i=1 ; i < maxNumBmv; i++)
> + bmvStack = &bMVStack;
> + bmvStack->bmvStack[0] = bmv;
> + bmvStack->bmvCostStack[0] = bprecost;
> + bmvStack->maxNumBmv = 1 << searchMethod;
> +
> + for (int i=1 ; i < bmvStack->maxNumBmv; i++)
> {
> - bmvStack[i] = bmv;
> - bmvCostStack[i] = 0x7fffffff;
> + bmvStack->bmvStack[i] = bmv;
> + bmvStack->bmvCostStack[i] = 0x7fffffff;
> }
>
> if (pmv.isSubpel())
> {
> bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride,
> stride) + mvcost(bmv << 2);
> - bmvCostStack[0] = bcost;
> + bmvStack->bmvCostStack[0] = bcost;
> }
>
> // measure SAD cost at MV(0) if MVP is not zero
> @@ -685,8 +682,8 @@
> {
> bcost = cost;
> bmv = 0;
> - bmvStack[0] = bmv;
> - bmvCostStack[0] = bcost;
> + bmvStack->bmvStack[0] = bmv;
> + bmvStack->bmvCostStack[0] = bcost;
> }
> }
>
> @@ -816,8 +813,8 @@
> /* refine predictors */
> omv = bmv;
> ucost1 = bcost;
> - bmvStack[0] = bmv;
> - bmvCostStack[0] = bcost;
> + bmvStack->bmvStack[0] = bmv;
> + bmvStack->bmvCostStack[0] = bcost;
> DIA1_ITER(pmv.x, pmv.y);
> if (pmv.notZero())
> DIA1_ITER(0, 0);
> @@ -945,7 +942,7 @@
> do \
> { \
> COPY2_IF_LT(bcost, costs[k], dir, x * 16 + (y & 15)); \
> - PushToBMVStack(bmvStack, omv+MV(x*i,y*i), bmvCostStack, costs[k],
> maxNumBmv); \
> + PushToBMVStack(bmvStack, omv+MV(x*i,y*i), costs[k]); \
> } while (0)
>
> SADS(0, +0, -4, +0, +4, -2, -3, +2, -3);
> @@ -1007,7 +1004,7 @@
> int bDistance = 0;
>
> const int EarlyExitIters = 3;
> - StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bmvCostStack, maxNumBmv, bPointNr, bDistance, EarlyExitIters, merange);
> + StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bPointNr, bDistance, EarlyExitIters, merange);
>
> if (bDistance == 1)
> {
> @@ -1059,19 +1056,19 @@
> stride, costs);
> costs[0] += mvcost(tmv << 2);
> COPY2_IF_LT(bcost, costs[0], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[0], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[0]);
> tmv.x += RasterDistance;
> costs[1] += mvcost(tmv << 2);
> COPY2_IF_LT(bcost, costs[1], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[1], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[1]);
> tmv.x += RasterDistance;
> costs[2] += mvcost(tmv << 2);
> COPY2_IF_LT(bcost, costs[2], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[2], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[2]);
> tmv.x += RasterDistance;
> costs[3] += mvcost(tmv << 3);
> COPY2_IF_LT(bcost, costs[3], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[3], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[3]);
> }
> else
> COST_MV(tmv.x, tmv.y);
> @@ -1085,7 +1082,7 @@
> bDistance = 0;
> bPointNr = 0;
> const int MaxIters = 32;
> - StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bmvCostStack, maxNumBmv, bPointNr, bDistance, MaxIters, merange);
> + StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack,
> bPointNr, bDistance, MaxIters, merange);
>
> if (bDistance == 1)
> {
> @@ -1135,19 +1132,19 @@
> stride, costs);
> costs[0] += mvcost(tmv << 2);
> COPY2_IF_LT(bcost, costs[0], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[0], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[0]);
> tmv.x++;
> costs[1] += mvcost(tmv << 2);
> COPY2_IF_LT(bcost, costs[1], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[1], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[1]);
> tmv.x++;
> costs[2] += mvcost(tmv << 2);
> COPY2_IF_LT(bcost, costs[2], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[2], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[2]);
> tmv.x++;
> costs[3] += mvcost(tmv << 2);
> COPY2_IF_LT(bcost, costs[3], bmv, tmv);
> - PushToBMVStack(bmvStack, tmv, bmvCostStack,
> (costs)[3], maxNumBmv);
> + PushToBMVStack(bmvStack, tmv, (costs)[3]);
> }
> else
> COST_MV(tmv.x, tmv.y);
> @@ -1166,14 +1163,14 @@
> {
> bmv = bestpre;
> bcost = bprecost;
> - PushToBMVStack(bmvStack, bmv, bmvCostStack, bcost, maxNumBmv);
> + PushToBMVStack(bmvStack, bmv, bcost);
> }
> else
> {
> bmv = bmv.toQPel(); // promote search bmv to qpel
> - for (int i=0; i<maxNumBmv; i++)
> + for (int i=0; i<bmvStack->maxNumBmv; i++)
> {
> - bmvStack[i] = bmvStack[i].toQPel();
> + bmvStack->bmvStack[i] = bmvStack->bmvStack[i].toQPel();
> }
> }
>
> @@ -1188,11 +1185,11 @@
> else if (ref->isLowres)
> {
> int bdir = 0;
> - for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
> + for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
> {
> bdir = 0;
> - bmv = bmvStack[nBmv];
> - bcost = bmvCostStack[nBmv];
> + bmv = bmvStack->bmvStack[nBmv];
> + bcost = bmvStack->bmvCostStack[nBmv];
>
> for (int i = 1; i <= wl.hpel_dirs; i++)
> {
> @@ -1213,18 +1210,18 @@
> }
>
> bmv += square1[bdir];
> - bmvStack[nBmv] = bmv;
> - bmvCostStack[nBmv] = bcost;
> + bmvStack->bmvStack[nBmv] = bmv;
> + bmvStack->bmvCostStack[nBmv] = bcost;
> }
>
> - bmv = bmvStack[0];
> - bcost = bmvCostStack[0];
> - for (int i=1; i<maxNumBmv; i++)
> + bmv = bmvStack->bmvStack[0];
> + bcost = bmvStack->bmvCostStack[0];
> + for (int i=1; i<bmvStack->maxNumBmv; i++)
> {
> - if (bmvCostStack[i]<bcost)
> + if (bmvStack->bmvCostStack[i]<bcost)
> {
> - bmv = bmvStack[i];
> - bcost = bmvCostStack[i];
> + bmv = bmvStack->bmvStack[i];
> + bcost = bmvStack->bmvCostStack[i];
> }
> }
> }
> @@ -1232,10 +1229,10 @@
> {
> pixelcmp_t hpelcomp;
>
> - for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
> + for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
> {
> - bmv = bmvStack[nBmv];
> - bcost = bmvCostStack[nBmv];
> + bmv = bmvStack->bmvStack[nBmv];
> + bcost = bmvStack->bmvCostStack[nBmv];
>
> if (wl.hpel_satd)
> {
> @@ -1281,18 +1278,18 @@
> break;
> }
>
> - bmvStack[nBmv] = bmv;
> - bmvCostStack[nBmv] = bcost;
> + bmvStack->bmvStack[nBmv] = bmv;
> + bmvStack->bmvCostStack[nBmv] = bcost;
> }
>
> - bmv = bmvStack[0];
> - bcost = bmvCostStack[0];
> - for (int i=1; i<maxNumBmv; i++)
> + bmv = bmvStack->bmvStack[0];
> + bcost = bmvStack->bmvCostStack[0];
> + for (int i=1; i<bmvStack->maxNumBmv; i++)
> {
> - if (bmvCostStack[i]<bcost)
> + if (bmvStack->bmvCostStack[i]<bcost)
> {
> - bmv = bmvStack[i];
> - bcost = bmvCostStack[i];
> + bmv = bmvStack->bmvStack[i];
> + bcost = bmvStack->bmvCostStack[i];
> }
> }
> }
> diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.h
> --- a/source/encoder/motion.h Fri Aug 05 17:02:17 2016 +0530
> +++ b/source/encoder/motion.h Sun Aug 07 11:34:50 2016 +0530
> @@ -34,6 +34,15 @@
> namespace X265_NS {
> // private x265 namespace
>
> +#define MAX_NUM_BESTVECTORS (16)
> +
> +typedef struct _BmvStack
> +{
> + MV bmvStack[MAX_NUM_BESTVECTORS];
> + int bmvCostStack[MAX_NUM_BESTVECTORS];
> + int maxNumBmv;
> +}BmvStack;
> +
> class MotionEstimate : public BitCost
> {
> protected:
> @@ -101,9 +110,7 @@
> const MV & mvmax,
> MV & bmv,
> int & bcost,
> - MV *bmvStack,
> - int *bCostStack,
> - int maxNumBmv,
> + BmvStack *bmvStack,
> int & bPointNr,
> int & bDistance,
> int earlyExitIters,
>
--
Principal Architect and Director,
TriSpace Technologies Pvt Ltd.,
Bangalore
CONFIDENTIALITY NOTE : The information in this e-mail is confidential and
privileged; it is intended for use solely by the individual or entity named
as the recipient hereof. Disclosure, copying, distribution, or use of the
contents of this e-mail by persons other than the intended recipient is
strictly prohibited and may violate applicable laws. If you have received
this e-mail in error, please delete the original message and notify us by
return email or collect call immediately. Thank you. TriSpace Technologies
Pvt. Ltd.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160807/efc18544/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: X265_2.pptx
Type: application/vnd.openxmlformats-officedocument.presentationml.presentation
Size: 67615 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160807/efc18544/attachment-0001.pptx>
More information about the x265-devel
mailing list