[x265] [PATCH] max num bmv parameterized

chen chenm003 at 163.com
Fri Aug 5 17:17:41 CEST 2016


At 2016-08-05 19:32:53,nvijay.anand at trispacetech.com wrote:
># HG changeset patch
># User N Vijay Anand <nvijay.anand at trispacetech.com>
># Date 1470396737 -19800
>#      Fri Aug 05 17:02:17 2016 +0530
># Node ID f46e843a27cbaa4a1c79f1a43d41a04d63f601c4
># Parent  72f16a34946f5f03da875c67a75124dfec1b4ecb
>max num bmv parameterized
>
>diff -r 72f16a34946f -r f46e843a27cb source/encoder/motion.cpp
>--- a/source/encoder/motion.cpp	Sat Jul 30 09:48:59 2016 +0530
>+++ b/source/encoder/motion.cpp	Fri Aug 05 17:02:17 2016 +0530
>@@ -100,17 +100,17 @@
> 
> }
> 
>-#define MAX_NUM_BESTVECTORS     (4)
>+#define MAX_NUM_BESTVECTORS     (16)
in you new patch, you never use this constant anymore. I guess Pradeep want to keep this because pass maxNumBmv throughput function need extra instructions to set up parameters registers.

> 
>-inline void PushToBMVStack(MV  *bStack, MV & bv, int *bCostStack, int bcost)
>+inline void PushToBMVStack(MV  *bStack, MV & bv, int *bCostStack, int bcost, int maxNumBmv)
> {
>-    for (int i=0; i<MAX_NUM_BESTVECTORS; i++)
>+    for (int i=0; i<maxNumBmv; i++)
>     {
>         if((bCostStack[i] == bcost) && (bv == bStack[i]))
>             break;
>-        if((bCostStack[i] > bcost) && (bv != bStack[i]))
>+        if((bCostStack[i] >= bcost) && (bv != bStack[i]))
>         {
>-            for (int j=MAX_NUM_BESTVECTORS-1; j>i; j--)
>+            for (int j=maxNumBmv-1; j>i; j--)
>             {
>                 bStack[j] = bStack[j-1];
>                 bCostStack[j] = bCostStack[j-1];
>@@ -247,7 +247,7 @@
>             bmv = tmv; \
>             bPointNr = point; \
>             bDistance = dist; \
>-            PushToBMVStack(bmvStack, tmv, bmvCostStack, cost); \
>+            PushToBMVStack(bmvStack, tmv, bmvCostStack, cost, maxNumBmv); \
>         } \
>     } while (0)
> 
>@@ -257,7 +257,7 @@
>         int cost = sad(fenc, FENC_STRIDE, fref + (mx) + (my) * stride, stride); \
>         cost += mvcost(MV(mx, my) << 2); \
>         COPY2_IF_LT(bcost, cost, bmv, MV(mx, my)); \
>-        PushToBMVStack(bmvStack, MV(mx,my), bmvCostStack, cost); \
>+        PushToBMVStack(bmvStack, MV(mx,my), bmvCostStack, cost, maxNumBmv); \
>     } while (0)
> 
> #define COST_MV_X3_DIR(m0x, m0y, m1x, m1y, m2x, m2y, costs) \
>@@ -271,9 +271,9 @@
>         (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
>         (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
>         (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
>-        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0]); \
>-        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1]); \
>-        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2]); \
>+        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
>+        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
>+        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
>     }
> 
> #define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y, p2, d2, m3x, m3y, p3, d3) \
>@@ -289,13 +289,13 @@
>         (costs)[2] += mvcost(MV(m2x, m2y) << 2); \
>         (costs)[3] += mvcost(MV(m3x, m3y) << 2); \
>         COPY4_IF_LT(bcost, costs[0], bmv, MV(m0x, m0y), bPointNr, p0, bDistance, d0); \
>-        PushToBMVStack(bmvStack, MV(m0x,m0y), bmvCostStack, (costs)[0]); \
>+        PushToBMVStack(bmvStack, MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
>         COPY4_IF_LT(bcost, costs[1], bmv, MV(m1x, m1y), bPointNr, p1, bDistance, d1); \
>-        PushToBMVStack(bmvStack, MV(m1x,m1y), bmvCostStack, (costs)[1]); \
>+        PushToBMVStack(bmvStack, MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
>         COPY4_IF_LT(bcost, costs[2], bmv, MV(m2x, m2y), bPointNr, p2, bDistance, d2); \
>-        PushToBMVStack(bmvStack, MV(m2x,m2y), bmvCostStack, (costs)[2]); \
>+        PushToBMVStack(bmvStack, MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
>         COPY4_IF_LT(bcost, costs[3], bmv, MV(m3x, m3y), bPointNr, p3, bDistance, d3); \
>-        PushToBMVStack(bmvStack, MV(m3x,m3y), bmvCostStack, (costs)[3]); \
>+        PushToBMVStack(bmvStack, MV(m3x,m3y), bmvCostStack, (costs)[3], maxNumBmv); \
>     }
> 
> #define COST_MV_X4(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y) \
>@@ -312,13 +312,13 @@
>         costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
>         costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
>         COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
>-        PushToBMVStack(bmvStack, omv+MV(m0x,m0y), bmvCostStack, (costs)[0]); \
>+        PushToBMVStack(bmvStack, omv+MV(m0x,m0y), bmvCostStack, costs[0], maxNumBmv); \
>         COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
>-        PushToBMVStack(bmvStack, omv+MV(m1x,m1y), bmvCostStack, (costs)[1]); \
>+        PushToBMVStack(bmvStack, omv+MV(m1x,m1y), bmvCostStack, costs[1], maxNumBmv); \
>         COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
>-        PushToBMVStack(bmvStack, omv+MV(m2x,m2y), bmvCostStack, (costs)[2]); \
>+        PushToBMVStack(bmvStack, omv+MV(m2x,m2y), bmvCostStack, costs[2], maxNumBmv); \
>         COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
>-        PushToBMVStack(bmvStack, omv+MV(m3x,m3y), bmvCostStack, (costs)[3]); \
>+        PushToBMVStack(bmvStack, omv+MV(m3x,m3y), bmvCostStack, costs[3], maxNumBmv); \
>     }
> 
> #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
>@@ -334,10 +334,10 @@
>         (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
>         (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
>         (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
>-        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0]); \
>-        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1]); \
>-        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2]); \
>-        PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), bmvCostStack, (costs)[3]); \
>+        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0], maxNumBmv); \
>+        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1], maxNumBmv); \
>+        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2], maxNumBmv); \
>+        PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), bmvCostStack, (costs)[3], maxNumBmv); \
>     }
> 
> #define DIA1_ITER(mx, my) \
>@@ -379,6 +379,7 @@
>                                        int &            bcost,
>                                        MV              *bmvStack,
>                                        int             *bmvCostStack,
>+                                       int              maxNumBmv,
>                                        int &            bPointNr,
>                                        int &            bDistance,
>                                        int              earlyExitIters,
>@@ -657,9 +658,19 @@
> 
>     /* re-measure full pel rounded MVP with SAD as search start point */
>     MV bmv = pmv.roundToFPel();
>-    MV bmvStack[4] = {bmv, bmv, bmv, bmv};
>-    int bmvCostStack[4] = {bprecost, 0x7fffffff, 0x7fffffff, 0x7fffffff};
>-    int bcost = bprecost;
>+    MV bmvStack[MAX_NUM_BESTVECTORS];
>+    int bmvCostStack[MAX_NUM_BESTVECTORS];
>+    int bcost = bprecost;    
>+    const int maxNumBmv = 1 << searchMethod;
>+
>+    bmvStack[0] = bmv;
>+    bmvCostStack[0] = bprecost;
>+    for (int i=1 ; i < maxNumBmv; i++)
>+    {
>+        bmvStack[i] = bmv;
>+        bmvCostStack[i] = 0x7fffffff;
>+    }
>+
>     if (pmv.isSubpel())
>     {
>         bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
>@@ -934,7 +945,7 @@
>     do \
>     {  \
>         COPY2_IF_LT(bcost, costs[k], dir, x * 16 + (y & 15)); \
>-        PushToBMVStack(bmvStack, bmv+MV(x*i,y*i), bmvCostStack, costs[k]); \
>+        PushToBMVStack(bmvStack, omv+MV(x*i,y*i), bmvCostStack, costs[k], maxNumBmv); \
>     } while (0)
> 
>                 SADS(0, +0, -4, +0, +4, -2, -3, +2, -3);
>@@ -996,7 +1007,7 @@
>         int bDistance = 0;
> 
>         const int EarlyExitIters = 3;
>-        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, bPointNr, bDistance, EarlyExitIters, merange);
>+        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, maxNumBmv, bPointNr, bDistance, EarlyExitIters, merange);
> 
>         if (bDistance == 1)
>         {
>@@ -1048,19 +1059,19 @@
>                                stride, costs);
>                         costs[0] += mvcost(tmv << 2);
>                         COPY2_IF_LT(bcost, costs[0], bmv, tmv);
>-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0]);
>+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0], maxNumBmv);
>                         tmv.x += RasterDistance;
>                         costs[1] += mvcost(tmv << 2);
>                         COPY2_IF_LT(bcost, costs[1], bmv, tmv);
>-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1]);
>+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1], maxNumBmv);
>                         tmv.x += RasterDistance;
>                         costs[2] += mvcost(tmv << 2);
>                         COPY2_IF_LT(bcost, costs[2], bmv, tmv);
>-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2]);
>+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2], maxNumBmv);
>                         tmv.x += RasterDistance;
>                         costs[3] += mvcost(tmv << 3);
>                         COPY2_IF_LT(bcost, costs[3], bmv, tmv);
>-                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3]);
>+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3], maxNumBmv);
>                     }
>                     else
>                         COST_MV(tmv.x, tmv.y);
>@@ -1074,7 +1085,7 @@
>             bDistance = 0;
>             bPointNr = 0;
>             const int MaxIters = 32;
>-            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, bPointNr, bDistance, MaxIters, merange);
>+            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, maxNumBmv, bPointNr, bDistance, MaxIters, merange);
> 
>             if (bDistance == 1)
>             {
>@@ -1124,19 +1135,19 @@
>                            stride, costs);
>                     costs[0] += mvcost(tmv << 2);
>                     COPY2_IF_LT(bcost, costs[0], bmv, tmv);
>-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0]);
>+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0], maxNumBmv);
>                     tmv.x++;
>                     costs[1] += mvcost(tmv << 2);
>                     COPY2_IF_LT(bcost, costs[1], bmv, tmv);
>-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1]);
>+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1], maxNumBmv);
>                     tmv.x++;
>                     costs[2] += mvcost(tmv << 2);
>                     COPY2_IF_LT(bcost, costs[2], bmv, tmv);
>-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2]);
>+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2], maxNumBmv);
>                     tmv.x++;
>                     costs[3] += mvcost(tmv << 2);
>                     COPY2_IF_LT(bcost, costs[3], bmv, tmv);
>-                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3]);
>+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3], maxNumBmv);
>                 }
>                 else
>                     COST_MV(tmv.x, tmv.y);
>@@ -1155,12 +1166,12 @@
>     {
>         bmv = bestpre;
>         bcost = bprecost;
>-        PushToBMVStack(bmvStack, bmv, bmvCostStack, bcost);
>+        PushToBMVStack(bmvStack, bmv, bmvCostStack, bcost, maxNumBmv);
>     }
>     else
>     {
>         bmv = bmv.toQPel(); // promote search bmv to qpel
>-        for (int i=0; i<4; i++)
>+        for (int i=0; i<maxNumBmv; i++)
>         {
>           bmvStack[i] = bmvStack[i].toQPel();
>         }
>@@ -1177,7 +1188,7 @@
>     else if (ref->isLowres)
>     {
>         int bdir = 0;
>-        for (int nBmv=0; nBmv<MAX_NUM_BESTVECTORS; nBmv++)
>+        for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
>         {
>           bdir = 0;
>           bmv =  bmvStack[nBmv];
>@@ -1208,7 +1219,7 @@
> 
>         bmv = bmvStack[0];
>         bcost = bmvCostStack[0];
>-        for (int i=1; i<MAX_NUM_BESTVECTORS; i++)
>+        for (int i=1; i<maxNumBmv; i++)
>         {
>           if (bmvCostStack[i]<bcost)
>           {
>@@ -1221,9 +1232,10 @@
>     {
>         pixelcmp_t hpelcomp;
> 
>-        for (int nBmv=0; nBmv<MAX_NUM_BESTVECTORS; nBmv++)
>+        for (int nBmv=0; nBmv<maxNumBmv; nBmv++)
>         {
>           bmv =  bmvStack[nBmv];
>+          bcost = bmvCostStack[nBmv];
> 
>           if (wl.hpel_satd)
>           {
>@@ -1275,7 +1287,7 @@
> 
>         bmv = bmvStack[0];
>         bcost = bmvCostStack[0];
>-        for (int i=1; i<MAX_NUM_BESTVECTORS; i++)
>+        for (int i=1; i<maxNumBmv; i++)
>         {
>           if (bmvCostStack[i]<bcost)
>           {
>@@ -1287,6 +1299,7 @@
> 
>     x265_emms();
>     outQMv = bmv;
>+
>     return bcost;
> }
> 
>diff -r 72f16a34946f -r f46e843a27cb source/encoder/motion.h
>--- a/source/encoder/motion.h	Sat Jul 30 09:48:59 2016 +0530
>+++ b/source/encoder/motion.h	Fri Aug 05 17:02:17 2016 +0530
>@@ -103,6 +103,7 @@
>                                   int &            bcost,
>                                   MV              *bmvStack,
>                                   int             *bCostStack,
>+                                  int              maxNumBmv,
>                                   int &            bPointNr,
>                                   int &            bDistance,
>                                   int              earlyExitIters,
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160805/dc331651/attachment-0001.html>


More information about the x265-devel mailing list