[x265] [PATCH 1 of 2] Compression Gains improved by ~10%

nvijay.anand at trispacetech.com nvijay.anand at trispacetech.com
Wed Jul 27 19:17:43 CEST 2016


# HG changeset patch
# User N Vijay Anand <nvijay.anand at trispacetech.com>
# Date 1469638855 -19800
#      Wed Jul 27 22:30:55 2016 +0530
# Node ID 837738a747ead31f905421c3ff413e36f9022ab9
# Parent  5a0e139e29386ecebafc9c555aedcd3e0f61c70c
Compression Gains improved by ~10%.
PSNR values need to be reconfirmed.
Subjective Quality same as x265.

diff -r 5a0e139e2938 -r 837738a747ea source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Fri Jul 22 13:13:42 2016 +0530
+++ b/source/encoder/motion.cpp	Wed Jul 27 22:30:55 2016 +0530
@@ -99,6 +99,27 @@
 
 }
 
+inline void PushToBMVStack(MV  *bStack, MV & bv, int *bCostStack, int bcost)
+{
+    for (int i=0; i<4; i++)
+    {
+        if((bCostStack[i] == bcost) && (bv == bStack[i]))
+            break;
+        if((bCostStack[i] > bcost) && (bv != bStack[i]))
+        {
+            for (int j=3; j>i; j--)
+            {
+                bStack[j] = bStack[j-1];
+                bCostStack[j] = bCostStack[j-1];
+            }
+            bStack[i] = bv;
+            bCostStack[i] = bcost;
+            break;
+        }
+    }
+    return;
+}
+
 MotionEstimate::MotionEstimate()
 {
     ctuAddr = -1;
@@ -223,6 +244,7 @@
             bmv = tmv; \
             bPointNr = point; \
             bDistance = dist; \
+            PushToBMVStack(bmvStack, tmv, bmvCostStack, cost); \
         } \
     } while (0)
 
@@ -232,6 +254,7 @@
         int cost = sad(fenc, FENC_STRIDE, fref + (mx) + (my) * stride, stride); \
         cost += mvcost(MV(mx, my) << 2); \
         COPY2_IF_LT(bcost, cost, bmv, MV(mx, my)); \
+        PushToBMVStack(bmvStack, MV(mx,my), bmvCostStack, cost); \
     } while (0)
 
 #define COST_MV_X3_DIR(m0x, m0y, m1x, m1y, m2x, m2y, costs) \
@@ -245,6 +268,9 @@
         (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
         (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
         (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
+        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0]); \
+        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1]); \
+        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2]); \
     }
 
 #define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y, p2, d2, m3x, m3y, p3, d3) \
@@ -260,9 +286,13 @@
         (costs)[2] += mvcost(MV(m2x, m2y) << 2); \
         (costs)[3] += mvcost(MV(m3x, m3y) << 2); \
         COPY4_IF_LT(bcost, costs[0], bmv, MV(m0x, m0y), bPointNr, p0, bDistance, d0); \
+        PushToBMVStack(bmvStack, MV(m0x,m0y), bmvCostStack, (costs)[0]); \
         COPY4_IF_LT(bcost, costs[1], bmv, MV(m1x, m1y), bPointNr, p1, bDistance, d1); \
+        PushToBMVStack(bmvStack, MV(m1x,m1y), bmvCostStack, (costs)[1]); \
         COPY4_IF_LT(bcost, costs[2], bmv, MV(m2x, m2y), bPointNr, p2, bDistance, d2); \
+        PushToBMVStack(bmvStack, MV(m2x,m2y), bmvCostStack, (costs)[2]); \
         COPY4_IF_LT(bcost, costs[3], bmv, MV(m3x, m3y), bPointNr, p3, bDistance, d3); \
+        PushToBMVStack(bmvStack, MV(m3x,m3y), bmvCostStack, (costs)[3]); \
     }
 
 #define COST_MV_X4(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y) \
@@ -279,9 +309,13 @@
         costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
         costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
         COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
+        PushToBMVStack(bmvStack, omv+MV(m0x,m0y), bmvCostStack, (costs)[0]); \
         COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
+        PushToBMVStack(bmvStack, omv+MV(m1x,m1y), bmvCostStack, (costs)[1]); \
         COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
+        PushToBMVStack(bmvStack, omv+MV(m2x,m2y), bmvCostStack, (costs)[2]); \
         COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
+        PushToBMVStack(bmvStack, omv+MV(m3x,m3y), bmvCostStack, (costs)[3]); \
     }
 
 #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
@@ -297,6 +331,10 @@
         (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
         (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
         (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
+        PushToBMVStack(bmvStack, bmv+MV(m0x,m0y), bmvCostStack, (costs)[0]); \
+        PushToBMVStack(bmvStack, bmv+MV(m1x,m1y), bmvCostStack, (costs)[1]); \
+        PushToBMVStack(bmvStack, bmv+MV(m2x,m2y), bmvCostStack, (costs)[2]); \
+        PushToBMVStack(bmvStack, bmv+MV(m3x,m3y), bmvCostStack, (costs)[3]); \
     }
 
 #define DIA1_ITER(mx, my) \
@@ -336,6 +374,8 @@
                                        const MV &       mvmax,
                                        MV &             bmv,
                                        int &            bcost,
+                                       MV              *bmvStack,
+                                       int             *bmvCostStack,
                                        int &            bPointNr,
                                        int &            bDistance,
                                        int              earlyExitIters,
@@ -614,6 +654,8 @@
 
     /* re-measure full pel rounded MVP with SAD as search start point */
     MV bmv = pmv.roundToFPel();
+    MV bmvStack[4] = {bmv, bmv, bmv, bmv};
+    int bmvCostStack[4] = {0x7fff, 0x7fff, 0x7fff, 0x7fff};
     int bcost = bprecost;
     if (pmv.isSubpel())
         bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
@@ -755,6 +797,8 @@
         /* refine predictors */
         omv = bmv;
         ucost1 = bcost;
+        bmvStack[0] = bmv;
+        bmvCostStack[0] = bcost;
         DIA1_ITER(pmv.x, pmv.y);
         if (pmv.notZero())
             DIA1_ITER(0, 0);
@@ -878,7 +922,12 @@
            stride, costs + 4 * k); \
     fref_base += 2 * dy;
 #define ADD_MVCOST(k, x, y) costs[k] += p_cost_omvx[x * 4 * i] + p_cost_omvy[y * 4 * i]
-#define MIN_MV(k, x, y)     COPY2_IF_LT(bcost, costs[k], dir, x * 16 + (y & 15))
+#define MIN_MV(k, x, y)  \
+    do \
+    {  \
+        COPY2_IF_LT(bcost, costs[k], dir, x * 16 + (y & 15)); \
+        PushToBMVStack(bmvStack, bmv+MV(x*i,y*i), bmvCostStack, costs[k]); \
+    } while (0)
 
                 SADS(0, +0, -4, +0, +4, -2, -3, +2, -3);
                 SADS(1, -4, -2, +4, -2, -4, -1, +4, -1);
@@ -916,6 +965,7 @@
                 MIN_MV(13, 4, 2);
                 MIN_MV(14, -2, 3);
                 MIN_MV(15, 2, 3);
+
 #undef SADS
 #undef ADD_MVCOST
 #undef MIN_MV
@@ -938,7 +988,8 @@
         int bDistance = 0;
 
         const int EarlyExitIters = 3;
-        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bPointNr, bDistance, EarlyExitIters, merange);
+        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, bPointNr, bDistance, EarlyExitIters, merange);
+
         if (bDistance == 1)
         {
             // if best distance was only 1, check two missing points.  If no new point is found, stop
@@ -989,15 +1040,19 @@
                                stride, costs);
                         costs[0] += mvcost(tmv << 2);
                         COPY2_IF_LT(bcost, costs[0], bmv, tmv);
+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0]);
                         tmv.x += RasterDistance;
                         costs[1] += mvcost(tmv << 2);
                         COPY2_IF_LT(bcost, costs[1], bmv, tmv);
+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1]);
                         tmv.x += RasterDistance;
                         costs[2] += mvcost(tmv << 2);
                         COPY2_IF_LT(bcost, costs[2], bmv, tmv);
+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2]);
                         tmv.x += RasterDistance;
                         costs[3] += mvcost(tmv << 3);
                         COPY2_IF_LT(bcost, costs[3], bmv, tmv);
+                        PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3]);
                     }
                     else
                         COST_MV(tmv.x, tmv.y);
@@ -1011,7 +1066,7 @@
             bDistance = 0;
             bPointNr = 0;
             const int MaxIters = 32;
-            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bPointNr, bDistance, MaxIters, merange);
+            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bmvStack, bmvCostStack, bPointNr, bDistance, MaxIters, merange);
 
             if (bDistance == 1)
             {
@@ -1061,15 +1116,19 @@
                            stride, costs);
                     costs[0] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[0], bmv, tmv);
+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[0]);
                     tmv.x++;
                     costs[1] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[1], bmv, tmv);
+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[1]);
                     tmv.x++;
                     costs[2] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[2], bmv, tmv);
+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[2]);
                     tmv.x++;
                     costs[3] += mvcost(tmv << 2);
                     COPY2_IF_LT(bcost, costs[3], bmv, tmv);
+                    PushToBMVStack(bmvStack, tmv, bmvCostStack, (costs)[3]);
                 }
                 else
                     COST_MV(tmv.x, tmv.y);
@@ -1090,7 +1149,13 @@
         bcost = bprecost;
     }
     else
+    {
         bmv = bmv.toQPel(); // promote search bmv to qpel
+        for (int i=0; i<4; i++)
+        {
+          bmvStack[i] = bmvStack[i].toQPel();
+        }
+    }
 
     const SubpelWorkload& wl = workload[this->subpelRefine];
 
@@ -1103,72 +1168,111 @@
     else if (ref->isLowres)
     {
         int bdir = 0;
-        for (int i = 1; i <= wl.hpel_dirs; i++)
+        for (int nBmv=0; nBmv<4; nBmv++)
         {
-            MV qmv = bmv + square1[i] * 2;
-            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
-            COPY2_IF_LT(bcost, cost, bdir, i);
+          bdir = 0;
+          bmv =  bmvStack[nBmv];
+          bcost = bmvCostStack[nBmv];
+
+          for (int i = 1; i <= wl.hpel_dirs; i++)
+          {
+              MV qmv = bmv + square1[i] * 2;
+              int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
+              COPY2_IF_LT(bcost, cost, bdir, i);
+          }
+
+          bmv += square1[bdir] * 2;
+          bcost = ref->lowresQPelCost(fenc, blockOffset, bmv, satd) + mvcost(bmv);
+
+          bdir = 0;
+          for (int i = 1; i <= wl.qpel_dirs; i++)
+          {
+              MV qmv = bmv + square1[i];
+              int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
+              COPY2_IF_LT(bcost, cost, bdir, i);
+          }
+
+          bmv += square1[bdir];
+          bmvStack[nBmv] = bmv;
+          bmvCostStack[nBmv] = bcost;
         }
 
-        bmv += square1[bdir] * 2;
-        bcost = ref->lowresQPelCost(fenc, blockOffset, bmv, satd) + mvcost(bmv);
-
-        bdir = 0;
-        for (int i = 1; i <= wl.qpel_dirs; i++)
+        bmv = bmvStack[0];
+        bcost = bmvCostStack[0];
+        for (int i=1; i<4; i++)
         {
-            MV qmv = bmv + square1[i];
-            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
-            COPY2_IF_LT(bcost, cost, bdir, i);
+          if (bmvCostStack[i]<bcost)
+          {
+            bmv = bmvStack[i];
+            bcost = bmvCostStack[i];
+          }
         }
-
-        bmv += square1[bdir];
     }
     else
     {
         pixelcmp_t hpelcomp;
 
-        if (wl.hpel_satd)
-        {
-            bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
-            hpelcomp = satd;
-        }
-        else
-            hpelcomp = sad;
-
-        for (int iter = 0; iter < wl.hpel_iters; iter++)
+        for (int nBmv=0; nBmv<4; nBmv++)
         {
-            int bdir = 0;
-            for (int i = 1; i <= wl.hpel_dirs; i++)
-            {
-                MV qmv = bmv + square1[i] * 2;
-                int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
-                COPY2_IF_LT(bcost, cost, bdir, i);
-            }
+          bmv =  bmvStack[nBmv];
 
-            if (bdir)
-                bmv += square1[bdir] * 2;
-            else
-                break;
+          if (wl.hpel_satd)
+          {
+              bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
+              hpelcomp = satd;
+          }
+          else
+              hpelcomp = sad;
+
+          for (int iter = 0; iter < wl.hpel_iters; iter++)
+          {
+              int bdir = 0;
+              for (int i = 1; i <= wl.hpel_dirs; i++)
+              {
+                  MV qmv = bmv + square1[i] * 2;
+                  int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
+                  COPY2_IF_LT(bcost, cost, bdir, i);
+              }
+
+              if (bdir)
+                  bmv += square1[bdir] * 2;
+              else
+                  break;
+          }
+
+          /* if HPEL search used SAD, remeasure with SATD before QPEL */
+          if (!wl.hpel_satd)
+              bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
+
+          for (int iter = 0; iter < wl.qpel_iters; iter++)
+          {
+              int bdir = 0;
+              for (int i = 1; i <= wl.qpel_dirs; i++)
+              {
+                  MV qmv = bmv + square1[i];
+                  int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
+                  COPY2_IF_LT(bcost, cost, bdir, i);
+              }
+
+              if (bdir)
+                  bmv += square1[bdir];
+              else
+                  break;
+          }
+
+          bmvStack[nBmv] = bmv;
+          bmvCostStack[nBmv] = bcost;
         }
 
-        /* if HPEL search used SAD, remeasure with SATD before QPEL */
-        if (!wl.hpel_satd)
-            bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
-
-        for (int iter = 0; iter < wl.qpel_iters; iter++)
+        bmv = bmvStack[0];
+        bcost = bmvCostStack[0];
+        for (int i=1; i<4; i++)
         {
-            int bdir = 0;
-            for (int i = 1; i <= wl.qpel_dirs; i++)
-            {
-                MV qmv = bmv + square1[i];
-                int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
-                COPY2_IF_LT(bcost, cost, bdir, i);
-            }
-
-            if (bdir)
-                bmv += square1[bdir];
-            else
-                break;
+          if (bmvCostStack[i]<bcost)
+          {
+            bmv = bmvStack[i];
+            bcost = bmvCostStack[i];
+          }
         }
     }
 


More information about the x265-devel mailing list