[x265] [PATCH] Parameter passing optimisation
nvijay.anand at trispacetech.com
nvijay.anand at trispacetech.com
Sun Aug 7 08:05:39 CEST 2016
# HG changeset patch
# User N Vijay Anand <nvijay.anand at trispacetech.com>
# Date 1470549890 -19800
# Sun Aug 07 11:34:50 2016 +0530
# Node ID 340dd470d7ebbdd6e9532b9ea6e830627600d3bf
# Parent f46e843a27cbaa4a1c79f1a43d41a04d63f601c4
Parameter passing optimisation
diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Fri Aug 05 17:02:17 2016 +0530
+++ b/source/encoder/motion.cpp Sun Aug 07 11:34:50 2016 +0530
@@ -100,23 +100,21 @@
}
-#define MAX_NUM_BESTVECTORS (16)
-
-inline void PushToBMVStack(MV *bStack, MV & bv, int *bCostStack, int bcost, int maxNumBmv)
+inline void PushToBMVStack(BmvStack *bStack, MV & bv, int bcost)
{
- for (int i=0; i<maxNumBmv; i++)
+ for (int i=0; i<bStack->maxNumBmv; i++)
{
- if((bCostStack[i] == bcost) && (bv == bStack[i]))
+ if((bStack->bmvCostStack[i] == bcost) && (bv == bStack->bmvStack[i]))
break;
- if((bCostStack[i] >= bcost) && (bv != bStack[i]))
+ if((bStack->bmvCostStack[i] >= bcost) && (bv != bStack->bmvStack[i]))
{
- for (int j=maxNumBmv-1; j>i; j--)
+ for (int j=bStack->maxNumBmv-1; j>i; j--)
{
- bStack[j] = bStack[j-1];
- bCostStack[j] = bCostStack[j-1];
+ bStack->bmvStack[j] = bStack->bmvStack[j-1];
+ bStack->bmvCostStack[j] = bStack->bmvCostStack[j-1];
}
- bStack[i] = bv;
- bCostStack[i] = bcost;
+ bStack->bmvStack[i] = bv;
+ bStack->bmvCostStack[i] = bcost;
break;
}
}
@@ -247,7 +245,7 @@
bmv = tmv; \
bPointNr = point; \
bDistance = dist; \
- PushToBMVStack(bmvStack, tmv, bmvCostStack, cost, maxNumBmv); \
+ PushToBMVStack(bmvStack, tmv, cost); \
} \
} while (0)
@@ -257,7 +255,7 @@
int cost = sad(fenc, FENC_STRIDE, fref + (mx) + (my) * stride, stride); \
cost += mvcost(MV(mx, my) << 2); \
COPY2_IF_LT(bcost, cost, bmv, MV(mx, my)); \
- PushToBMVStack(bmvStack, MV(mx,my), bmvCostStack, cost, maxNumBmv); \
+ PushToBMVStack(bmvStack, MV(mx,my), cost); \
} while (0)
#define COST_MV_X3_DIR(m0x, m0y, m1x, m1y, m2x, m2y, costs) \
@@ -271,9 +269,9 @@
(costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
(costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
(costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
- PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
- PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
- PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
+ PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
+ PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
+ PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
}
#define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y, p2, d2, m3x, m3y, p3, d3) \
@@ -289,13 +287,13 @@
(costs)[2] += mvcost(MV(m2x, m2y) << 2); \
(costs)[3] += mvcost(MV(m3x, m3y) << 2); \
COPY4_IF_LT(bcost, costs[0], bmv, MV(m0x, m0y), bPointNr, p0, bDistance, d0); \
- PushToBMVStack(bmvStack, MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
+ PushToBMVStack(bmvStack, MV(m0x,m0y), (costs)[0]); \
COPY4_IF_LT(bcost, costs[1], bmv, MV(m1x, m1y), bPointNr, p1, bDistance, d1); \
- PushToBMVStack(bmvStack, MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
+ PushToBMVStack(bmvStack, MV(m1x,m1y), (costs)[1]); \
COPY4_IF_LT(bcost, costs[2], bmv, MV(m2x, m2y), bPointNr, p2, bDistance, d2); \
- PushToBMVStack(bmvStack, MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
+ PushToBMVStack(bmvStack, MV(m2x,m2y), (costs)[2]); \
COPY4_IF_LT(bcost, costs[3], bmv, MV(m3x, m3y), bPointNr, p3, bDistance, d3); \
- PushToBMVStack(bmvStack, MV(m3x,m3y), bmvCostStack, (costs)[3], maxNumBmv); \
+ PushToBMVStack(bmvStack, MV(m3x,m3y), (costs)[3]); \
}
#define COST_MV_X4(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y) \
@@ -312,13 +310,13 @@
costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
- PushToBMVStack(bmvStack, omv+MV(m0x,m0y), bmvCostStack, costs[0], maxNumBmv); \
+ PushToBMVStack(bmvStack, omv+MV(m0x,m0y), costs[0]); \
COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
- PushToBMVStack(bmvStack, omv+MV(m1x,m1y), bmvCostStack, costs[1], maxNumBmv); \
+ PushToBMVStack(bmvStack, omv+MV(m1x,m1y), costs[1]); \
COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
- PushToBMVStack(bmvStack, omv+MV(m2x,m2y), bmvCostStack, costs[2], maxNumBmv); \
+ PushToBMVStack(bmvStack, omv+MV(m2x,m2y), costs[2]); \
COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
- PushToBMVStack(bmvStack, omv+MV(m3x,m3y), bmvCostStack, costs[3], maxNumBmv); \
+ PushToBMVStack(bmvStack, omv+MV(m3x,m3y), costs[3]); \
}
#define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
@@ -334,10 +332,10 @@
(costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
(costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
(costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
- PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
- PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
- PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
- PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), bmvCostStack, (costs)[3], maxNumBmv); \
+ PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
+ PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
+ PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
+ PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), (costs)[3]); \
}
#define DIA1_ITER(mx, my) \
@@ -377,9 +375,7 @@
const MV & mvmax,
MV & bmv,
int & bcost,
- MV *bmvStack,
- int *bmvCostStack,
- int maxNumBmv,
+ BmvStack *bmvStack,
int & bPointNr,
int & bDistance,
int earlyExitIters,
@@ -658,23 +654,24 @@
/* re-measure full pel rounded MVP with SAD as search start point */
MV bmv = pmv.roundToFPel();
- MV bmvStack[MAX_NUM_BESTVECTORS];
- int bmvCostStack[MAX_NUM_BESTVECTORS];
- int bcost = bprecost;
- const int maxNumBmv = 1 << searchMethod;
+ BmvStack *bmvStack, bMVStack;
+ int bcost = bprecost;
- bmvStack[0] = bmv;
- bmvCostStack[0] = bprecost;
- for (int i=1 ; i < maxNumBmv; i++)
+ bmvStack = &bMVStack;
+ bmvStack->bmvStack[0] = bmv;
+ bmvStack->bmvCostStack[0] = bprecost;
+ bmvStack->maxNumBmv = 1 << searchMethod;
+
+ for (int i=1 ; i < bmvStack->maxNumBmv; i++)
{
- bmvStack[i] = bmv;
- bmvCostStack[i] = 0x7fffffff;
+ bmvStack->bmvStack[i] = bmv;
+ bmvStack->bmvCostStack[i] = 0x7fffffff;
}
if (pmv.isSubpel())
{
bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
- bmvCostStack[0] = bcost;
+ bmvStack->bmvCostStack[0] = bcost;
}
// measure SAD cost at MV(0) if MVP is not zero
@@ -685,8 +682,8 @@
{
bcost = cost;
bmv = 0;
- bmvStack[0] = bmv;
- bmvCostStack[0] = bcost;
+ bmvStack->bmvStack[0] = bmv;
+ bmvStack->bmvCostStack[0] = bcost;
}
}
@@ -816,8 +813,8 @@
/* refine predictors */
omv = bmv;
ucost1 = bcost;
- bmvStack[0] = bmv;
- bmvCostStack[0] = bcost;
+ bmvStack->bmvStack[0] = bmv;
+ bmvStack->bmvCostStack[0] = bcost;
DIA1_ITER(pmv.x, pmv.y);
if (pmv.notZero())
DIA1_ITER(0, 0);
@@ -945,7 +942,7 @@
do \
{ \
COPY2_IF_LT(bcost, costs[k], dir, x * 16 + (y & 15)); \
- PushToBMVStack(bmvStack, omv+MV(x*i,y*i), bmvCostStack, costs[k], maxNumBmv); \
+ PushToBMVStack(bmvStack, omv+MV(x*i,y*i), costs[k]); \
} while (0)
SADS(0, +0, -4, +0, +4, -2, -3, +2, -3);
@@ -1007,7 +1004,7 @@
int bDistance = 0;
const int EarlyExitIters = 3;
- StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, maxNumBmv, bPointNr, bDistance, EarlyExitIters, merange);
+ StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bPointNr, bDistance, EarlyExitIters, merange);
if (bDistance == 1)
{
@@ -1059,19 +1056,19 @@
stride, costs);
costs[0] += mvcost(tmv << 2);
COPY2_IF_LT(bcost, costs[0], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[0]);
tmv.x += RasterDistance;
costs[1] += mvcost(tmv << 2);
COPY2_IF_LT(bcost, costs[1], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[1]);
tmv.x += RasterDistance;
costs[2] += mvcost(tmv << 2);
COPY2_IF_LT(bcost, costs[2], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[2]);
tmv.x += RasterDistance;
costs[3] += mvcost(tmv << 3);
COPY2_IF_LT(bcost, costs[3], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[3]);
}
else
COST_MV(tmv.x, tmv.y);
@@ -1085,7 +1082,7 @@
bDistance = 0;
bPointNr = 0;
const int MaxIters = 32;
- StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, maxNumBmv, bPointNr, bDistance, MaxIters, merange);
+ StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bPointNr, bDistance, MaxIters, merange);
if (bDistance == 1)
{
@@ -1135,19 +1132,19 @@
stride, costs);
costs[0] += mvcost(tmv << 2);
COPY2_IF_LT(bcost, costs[0], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[0]);
tmv.x++;
costs[1] += mvcost(tmv << 2);
COPY2_IF_LT(bcost, costs[1], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[1]);
tmv.x++;
costs[2] += mvcost(tmv << 2);
COPY2_IF_LT(bcost, costs[2], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[2]);
tmv.x++;
costs[3] += mvcost(tmv << 2);
COPY2_IF_LT(bcost, costs[3], bmv, tmv);
- PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3], maxNumBmv);
+ PushToBMVStack(bmvStack, tmv, (costs)[3]);
}
else
COST_MV(tmv.x, tmv.y);
@@ -1166,14 +1163,14 @@
{
bmv = bestpre;
bcost = bprecost;
- PushToBMVStack(bmvStack, bmv, bmvCostStack, bcost, maxNumBmv);
+ PushToBMVStack(bmvStack, bmv, bcost);
}
else
{
bmv = bmv.toQPel(); // promote search bmv to qpel
- for (int i=0; i<maxNumBmv; i++)
+ for (int i=0; i<bmvStack->maxNumBmv; i++)
{
- bmvStack[i] = bmvStack[i].toQPel();
+ bmvStack->bmvStack[i] = bmvStack->bmvStack[i].toQPel();
}
}
@@ -1188,11 +1185,11 @@
else if (ref->isLowres)
{
int bdir = 0;
- for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
+ for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
{
bdir = 0;
- bmv = bmvStack[nBmv];
- bcost = bmvCostStack[nBmv];
+ bmv = bmvStack->bmvStack[nBmv];
+ bcost = bmvStack->bmvCostStack[nBmv];
for (int i = 1; i <= wl.hpel_dirs; i++)
{
@@ -1213,18 +1210,18 @@
}
bmv += square1[bdir];
- bmvStack[nBmv] = bmv;
- bmvCostStack[nBmv] = bcost;
+ bmvStack->bmvStack[nBmv] = bmv;
+ bmvStack->bmvCostStack[nBmv] = bcost;
}
- bmv = bmvStack[0];
- bcost = bmvCostStack[0];
- for (int i=1; i<maxNumBmv; i++)
+ bmv = bmvStack->bmvStack[0];
+ bcost = bmvStack->bmvCostStack[0];
+ for (int i=1; i<bmvStack->maxNumBmv; i++)
{
- if (bmvCostStack[i]<bcost)
+ if (bmvStack->bmvCostStack[i]<bcost)
{
- bmv = bmvStack[i];
- bcost = bmvCostStack[i];
+ bmv = bmvStack->bmvStack[i];
+ bcost = bmvStack->bmvCostStack[i];
}
}
}
@@ -1232,10 +1229,10 @@
{
pixelcmp_t hpelcomp;
- for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
+ for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
{
- bmv = bmvStack[nBmv];
- bcost = bmvCostStack[nBmv];
+ bmv = bmvStack->bmvStack[nBmv];
+ bcost = bmvStack->bmvCostStack[nBmv];
if (wl.hpel_satd)
{
@@ -1281,18 +1278,18 @@
break;
}
- bmvStack[nBmv] = bmv;
- bmvCostStack[nBmv] = bcost;
+ bmvStack->bmvStack[nBmv] = bmv;
+ bmvStack->bmvCostStack[nBmv] = bcost;
}
- bmv = bmvStack[0];
- bcost = bmvCostStack[0];
- for (int i=1; i<maxNumBmv; i++)
+ bmv = bmvStack->bmvStack[0];
+ bcost = bmvStack->bmvCostStack[0];
+ for (int i=1; i<bmvStack->maxNumBmv; i++)
{
- if (bmvCostStack[i]<bcost)
+ if (bmvStack->bmvCostStack[i]<bcost)
{
- bmv = bmvStack[i];
- bcost = bmvCostStack[i];
+ bmv = bmvStack->bmvStack[i];
+ bcost = bmvStack->bmvCostStack[i];
}
}
}
diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.h
--- a/source/encoder/motion.h Fri Aug 05 17:02:17 2016 +0530
+++ b/source/encoder/motion.h Sun Aug 07 11:34:50 2016 +0530
@@ -34,6 +34,15 @@
namespace X265_NS {
// private x265 namespace
+#define MAX_NUM_BESTVECTORS (16)
+
+typedef struct _BmvStack
+{
+ MV bmvStack[MAX_NUM_BESTVECTORS];
+ int bmvCostStack[MAX_NUM_BESTVECTORS];
+ int maxNumBmv;
+}BmvStack;
+
class MotionEstimate : public BitCost
{
protected:
@@ -101,9 +110,7 @@
const MV & mvmax,
MV & bmv,
int & bcost,
- MV *bmvStack,
- int *bCostStack,
- int maxNumBmv,
+ BmvStack *bmvStack,
int & bPointNr,
int & bDistance,
int earlyExitIters,
More information about the x265-devel
mailing list