[x265] [PATCH] Parameter passing optimisation

nvijay.anand at trispacetech.com nvijay.anand at trispacetech.com
Sun Aug 7 08:05:39 CEST 2016


# HG changeset patch
# User N Vijay Anand <nvijay.anand at trispacetech.com>
# Date 1470549890 -19800
#      Sun Aug 07 11:34:50 2016 +0530
# Node ID 340dd470d7ebbdd6e9532b9ea6e830627600d3bf
# Parent  f46e843a27cbaa4a1c79f1a43d41a04d63f601c4
Parameter passing optimisation

diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Fri Aug 05 17:02:17 2016 +0530
+++ b/source/encoder/motion.cpp	Sun Aug 07 11:34:50 2016 +0530
@@ -100,23 +100,21 @@
 
 }
 
-#define MAX_NUM_BESTVECTORS     (16)
-
-inline void PushToBMVStack(MV  *bStack, MV & bv, int *bCostStack, int bcost, int maxNumBmv)
+inline void PushToBMVStack(BmvStack *bStack, MV & bv, int bcost)
 {
-    for (int i=0; i<maxNumBmv; i++)
+    for (int i=0; i<bStack->maxNumBmv; i++)
     {
-        if((bCostStack[i] == bcost) && (bv == bStack[i]))
+        if((bStack->bmvCostStack[i] == bcost) && (bv == bStack->bmvStack[i]))
             break;
-        if((bCostStack[i] >= bcost) && (bv != bStack[i]))
+        if((bStack->bmvCostStack[i] >= bcost) && (bv != bStack->bmvStack[i]))
         {
-            for (int j=maxNumBmv-1; j>i; j--)
+            for (int j=bStack->maxNumBmv-1; j>i; j--)
             {
-                bStack[j] = bStack[j-1];
-                bCostStack[j] = bCostStack[j-1];
+                bStack->bmvStack[j] = bStack->bmvStack[j-1];
+                bStack->bmvCostStack[j] = bStack->bmvCostStack[j-1];
             }
-            bStack[i] = bv;
-            bCostStack[i] = bcost;
+            bStack->bmvStack[i] = bv;
+            bStack->bmvCostStack[i] = bcost;
             break;
         }
     }
@@ -247,7 +245,7 @@
             bmv = tmv; \
             bPointNr = point; \
             bDistance = dist; \
-            PushToBMVStack(bmvStack, tmv, bmvCostStack, cost, maxNumBmv); \
+            PushToBMVStack(bmvStack, tmv, cost); \
         } \
     } while (0)
 
@@ -257,7 +255,7 @@
         int cost = sad(fenc, FENC_STRIDE, fref + (mx) + (my) * stride, stride); \
         cost += mvcost(MV(mx, my) << 2); \
         COPY2_IF_LT(bcost, cost, bmv, MV(mx, my)); \
-        PushToBMVStack(bmvStack, MV(mx,my), bmvCostStack, cost, maxNumBmv); \
+        PushToBMVStack(bmvStack, MV(mx,my), cost); \
     } while (0)
 
 #define COST_MV_X3_DIR(m0x, m0y, m1x, m1y, m2x, m2y, costs) \
@@ -271,9 +269,9 @@
         (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
         (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
         (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
-        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
-        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
-        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
+        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
+        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
+        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
     }
 
 #define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y, p2, d2, m3x, m3y, p3, d3) \
@@ -289,13 +287,13 @@
         (costs)[2] += mvcost(MV(m2x, m2y) << 2); \
         (costs)[3] += mvcost(MV(m3x, m3y) << 2); \
         COPY4_IF_LT(bcost, costs[0], bmv, MV(m0x, m0y), bPointNr, p0, bDistance, d0); \
-        PushToBMVStack(bmvStack, MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
+        PushToBMVStack(bmvStack, MV(m0x,m0y), (costs)[0]); \
         COPY4_IF_LT(bcost, costs[1], bmv, MV(m1x, m1y), bPointNr, p1, bDistance, d1); \
-        PushToBMVStack(bmvStack, MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
+        PushToBMVStack(bmvStack, MV(m1x,m1y), (costs)[1]); \
         COPY4_IF_LT(bcost, costs[2], bmv, MV(m2x, m2y), bPointNr, p2, bDistance, d2); \
-        PushToBMVStack(bmvStack, MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
+        PushToBMVStack(bmvStack, MV(m2x,m2y), (costs)[2]); \
         COPY4_IF_LT(bcost, costs[3], bmv, MV(m3x, m3y), bPointNr, p3, bDistance, d3); \
-        PushToBMVStack(bmvStack, MV(m3x,m3y), bmvCostStack, (costs)[3], maxNumBmv); \
+        PushToBMVStack(bmvStack, MV(m3x,m3y), (costs)[3]); \
     }
 
 #define COST_MV_X4(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y) \
@@ -312,13 +310,13 @@
         costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
         costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
         COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
-        PushToBMVStack(bmvStack, omv+MV(m0x,m0y), bmvCostStack, costs[0], maxNumBmv); \
+        PushToBMVStack(bmvStack, omv+MV(m0x,m0y), costs[0]); \
         COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
-        PushToBMVStack(bmvStack, omv+MV(m1x,m1y), bmvCostStack, costs[1], maxNumBmv); \
+        PushToBMVStack(bmvStack, omv+MV(m1x,m1y), costs[1]); \
         COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
-        PushToBMVStack(bmvStack, omv+MV(m2x,m2y), bmvCostStack, costs[2], maxNumBmv); \
+        PushToBMVStack(bmvStack, omv+MV(m2x,m2y), costs[2]); \
         COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
-        PushToBMVStack(bmvStack, omv+MV(m3x,m3y), bmvCostStack, costs[3], maxNumBmv); \
+        PushToBMVStack(bmvStack, omv+MV(m3x,m3y), costs[3]); \
     }
 
 #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
@@ -334,10 +332,10 @@
         (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
         (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
         (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
-        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
-        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
-        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
-        PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), bmvCostStack, (costs)[3], maxNumBmv); \
+        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), (costs)[0]); \
+        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), (costs)[1]); \
+        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), (costs)[2]); \
+        PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), (costs)[3]); \
     }
 
 #define DIA1_ITER(mx, my) \
@@ -377,9 +375,7 @@
                                        const MV &       mvmax,
                                        MV &             bmv,
                                        int &            bcost,
-                                       MV              *bmvStack,
-                                       int             *bmvCostStack,
-                                       int              maxNumBmv,
+                                       BmvStack        *bmvStack,
                                        int &            bPointNr,
                                        int &            bDistance,
                                        int              earlyExitIters,
@@ -658,23 +654,24 @@
 
     /* re-measure full pel rounded MVP with SAD as search start point */
     MV bmv = pmv.roundToFPel();
-    MV bmvStack[MAX_NUM_BESTVECTORS];
-    int bmvCostStack[MAX_NUM_BESTVECTORS];
-    int bcost = bprecost;    
-    const int maxNumBmv = 1 << searchMethod;
+    BmvStack *bmvStack, bMVStack;
+    int bcost = bprecost;
 
-    bmvStack[0] = bmv;
-    bmvCostStack[0] = bprecost;
-    for (int i=1 ; i < maxNumBmv; i++)
+    bmvStack = &bMVStack;
+    bmvStack->bmvStack[0] = bmv;
+    bmvStack->bmvCostStack[0] = bprecost;
+    bmvStack->maxNumBmv = 1 << searchMethod;
+
+    for (int i=1 ; i < bmvStack->maxNumBmv; i++)
     {
-        bmvStack[i] = bmv;
-        bmvCostStack[i] = 0x7fffffff;
+        bmvStack->bmvStack[i] = bmv;
+        bmvStack->bmvCostStack[i] = 0x7fffffff;
     }
 
     if (pmv.isSubpel())
     {
         bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
-        bmvCostStack[0] = bcost;
+        bmvStack->bmvCostStack[0] = bcost;
     }
 
     // measure SAD cost at MV(0) if MVP is not zero
@@ -685,8 +682,8 @@
         {
             bcost = cost;
             bmv = 0;
-            bmvStack[0] = bmv;
-            bmvCostStack[0] = bcost;
+            bmvStack->bmvStack[0] = bmv;
+            bmvStack->bmvCostStack[0] = bcost;
         }
     }
 
@@ -816,8 +813,8 @@
         /* refine predictors */
         omv = bmv;
         ucost1 = bcost;
-        bmvStack[0] = bmv;
-        bmvCostStack[0] = bcost;
+        bmvStack->bmvStack[0] = bmv;
+        bmvStack->bmvCostStack[0] = bcost;
         DIA1_ITER(pmv.x, pmv.y);
         if (pmv.notZero())
             DIA1_ITER(0, 0);
@@ -945,7 +942,7 @@
     do \
     {  \
         COPY2_IF_LT(bcost, costs[k], dir, x * 16 + (y & 15)); \
-        PushToBMVStack(bmvStack, omv+MV(x*i,y*i), bmvCostStack, costs[k], maxNumBmv); \
+        PushToBMVStack(bmvStack, omv+MV(x*i,y*i), costs[k]); \
     } while (0)
 
                 SADS(0, +0, -4, +0, +4, -2, -3, +2, -3);
@@ -1007,7 +1004,7 @@
         int bDistance = 0;
 
         const int EarlyExitIters = 3;
-        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, maxNumBmv, bPointNr, bDistance, EarlyExitIters, merange);
+        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bPointNr, bDistance, EarlyExitIters, merange);
 
         if (bDistance == 1)
         {
@@ -1059,19 +1056,19 @@
                                stride, costs);
                         costs[0] += mvcost(tmv << 2);
                         COPY2_IF_LT(bcost, costs[0], bmv, tmv);
-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0], maxNumBmv);
+                        PushToBMVStack(bmvStack, tmv, (costs)[0]);
                         tmv.x += RasterDistance;
                         costs[1] += mvcost(tmv << 2);
                         COPY2_IF_LT(bcost, costs[1], bmv, tmv);
-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1], maxNumBmv);
+                        PushToBMVStack(bmvStack, tmv, (costs)[1]);
                         tmv.x += RasterDistance;
                         costs[2] += mvcost(tmv << 2);
                         COPY2_IF_LT(bcost, costs[2], bmv, tmv);
-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2], maxNumBmv);
+                        PushToBMVStack(bmvStack, tmv, (costs)[2]);
                         tmv.x += RasterDistance;
                         costs[3] += mvcost(tmv << 3);
                         COPY2_IF_LT(bcost, costs[3], bmv, tmv);
-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3], maxNumBmv);
+                        PushToBMVStack(bmvStack, tmv, (costs)[3]);
                     }
                     else
                         COST_MV(tmv.x, tmv.y);
@@ -1085,7 +1082,7 @@
             bDistance = 0;
             bPointNr = 0;
             const int MaxIters = 32;
-            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, maxNumBmv, bPointNr, bDistance, MaxIters, merange);
+            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bPointNr, bDistance, MaxIters, merange);
 
             if (bDistance == 1)
             {
@@ -1135,19 +1132,19 @@
                            stride, costs);
                     costs[0] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[0], bmv, tmv);
-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0], maxNumBmv);
+                    PushToBMVStack(bmvStack, tmv, (costs)[0]);
                     tmv.x++;
                     costs[1] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[1], bmv, tmv);
-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1], maxNumBmv);
+                    PushToBMVStack(bmvStack, tmv, (costs)[1]);
                     tmv.x++;
                     costs[2] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[2], bmv, tmv);
-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2], maxNumBmv);
+                    PushToBMVStack(bmvStack, tmv, (costs)[2]);
                     tmv.x++;
                     costs[3] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[3], bmv, tmv);
-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3], maxNumBmv);
+                    PushToBMVStack(bmvStack, tmv, (costs)[3]);
                 }
                 else
                     COST_MV(tmv.x, tmv.y);
@@ -1166,14 +1163,14 @@
     {
         bmv = bestpre;
         bcost = bprecost;
-        PushToBMVStack(bmvStack, bmv, bmvCostStack, bcost, maxNumBmv);
+        PushToBMVStack(bmvStack, bmv, bcost);
     }
     else
     {
         bmv = bmv.toQPel(); // promote search bmv to qpel
-        for (int i=0; i<maxNumBmv; i++)
+        for (int i=0; i<bmvStack->maxNumBmv; i++)
         {
-          bmvStack[i] = bmvStack[i].toQPel();
+          bmvStack->bmvStack[i] = bmvStack->bmvStack[i].toQPel();
         }
     }
 
@@ -1188,11 +1185,11 @@
     else if (ref->isLowres)
     {
         int bdir = 0;
-        for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
+        for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
         {
           bdir = 0;
-          bmv =  bmvStack[nBmv];
-          bcost = bmvCostStack[nBmv];
+          bmv =  bmvStack->bmvStack[nBmv];
+          bcost = bmvStack->bmvCostStack[nBmv];
 
           for (int i = 1; i <= wl.hpel_dirs; i++)
           {
@@ -1213,18 +1210,18 @@
           }
 
           bmv += square1[bdir];
-          bmvStack[nBmv] = bmv;
-          bmvCostStack[nBmv] = bcost;
+          bmvStack->bmvStack[nBmv] = bmv;
+          bmvStack->bmvCostStack[nBmv] = bcost;
         }
 
-        bmv = bmvStack[0];
-        bcost = bmvCostStack[0];
-        for (int i=1; i<maxNumBmv; i++)
+        bmv = bmvStack->bmvStack[0];
+        bcost = bmvStack->bmvCostStack[0];
+        for (int i=1; i<bmvStack->maxNumBmv; i++)
         {
-          if (bmvCostStack[i]<bcost)
+          if (bmvStack->bmvCostStack[i]<bcost)
           {
-            bmv = bmvStack[i];
-            bcost = bmvCostStack[i];
+            bmv = bmvStack->bmvStack[i];
+            bcost = bmvStack->bmvCostStack[i];
           }
         }
     }
@@ -1232,10 +1229,10 @@
     {
         pixelcmp_t hpelcomp;
 
-        for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
+        for (int nBmv=0; nBmv<bmvStack->maxNumBmv; nBmv++)
         {
-          bmv =  bmvStack[nBmv];
-          bcost = bmvCostStack[nBmv];
+          bmv =  bmvStack->bmvStack[nBmv];
+          bcost = bmvStack->bmvCostStack[nBmv];
 
           if (wl.hpel_satd)
           {
@@ -1281,18 +1278,18 @@
                   break;
           }
 
-          bmvStack[nBmv] = bmv;
-          bmvCostStack[nBmv] = bcost;
+          bmvStack->bmvStack[nBmv] = bmv;
+          bmvStack->bmvCostStack[nBmv] = bcost;
         }
 
-        bmv = bmvStack[0];
-        bcost = bmvCostStack[0];
-        for (int i=1; i<maxNumBmv; i++)
+        bmv = bmvStack->bmvStack[0];
+        bcost = bmvStack->bmvCostStack[0];
+        for (int i=1; i<bmvStack->maxNumBmv; i++)
         {
-          if (bmvCostStack[i]<bcost)
+          if (bmvStack->bmvCostStack[i]<bcost)
           {
-            bmv = bmvStack[i];
-            bcost = bmvCostStack[i];
+            bmv = bmvStack->bmvStack[i];
+            bcost = bmvStack->bmvCostStack[i];
           }
         }
     }
diff -r f46e843a27cb -r 340dd470d7eb source/encoder/motion.h
--- a/source/encoder/motion.h	Fri Aug 05 17:02:17 2016 +0530
+++ b/source/encoder/motion.h	Sun Aug 07 11:34:50 2016 +0530
@@ -34,6 +34,15 @@
 namespace X265_NS {
 // private x265 namespace
 
+#define MAX_NUM_BESTVECTORS     (16)
+
+typedef struct _BmvStack
+{
+    MV  bmvStack[MAX_NUM_BESTVECTORS];
+    int bmvCostStack[MAX_NUM_BESTVECTORS];
+    int maxNumBmv;
+}BmvStack;
+
 class MotionEstimate : public BitCost
 {
 protected:
@@ -101,9 +110,7 @@
                                   const MV &       mvmax,
                                   MV &             bmv,
                                   int &            bcost,
-                                  MV              *bmvStack,
-                                  int             *bCostStack,
-                                  int              maxNumBmv,
+                                  BmvStack        *bmvStack,
                                   int &            bPointNr,
                                   int &            bDistance,
                                   int              earlyExitIters,


More information about the x265-devel mailing list