[x265] [PATCH 3 of 3] improve Diamon Search by reduce duplicate SAD [OUTPUT CHANGED]

Min Chen chenm003 at 163.com
Sat Jun 27 04:13:23 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1435370058 25200
# Node ID fb038742dabd93a1993562c093839e5362ad3d46
# Parent  a8198965fbf18d7ced2cd8e8acd07fcc4ee6a2b3
improve Diamon Search by reduce duplicate SAD [OUTPUT CHANGED]
Side effect by modify DIA ME template:
  0           0
2   3  -->  3   1
  1           2
---
 source/encoder/motion.cpp |   41 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 41 insertions(+), 0 deletions(-)

diff -r a8198965fbf1 -r fb038742dabd source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Fri Jun 26 18:54:15 2015 -0700
+++ b/source/encoder/motion.cpp	Fri Jun 26 18:54:18 2015 -0700
@@ -2,6 +2,7 @@
  * Copyright (C) 2013 x265 project
  *
  * Authors: Steve Borho <steve at borho.org>
+ *          Min Chen <chenm003 at 163.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -59,6 +60,8 @@
 int sizeScale[NUM_PU_SIZES];
 #define SAD_THRESH(v) (bcost < (((v >> 4) * sizeScale[partEnum])))
 
+const MV dia2[6] = { MV(1, 0), MV(0, -1), MV(-1, 0), MV(0, 1), MV(1, 0), MV(0, -1) };
+
 /* radius 2 hexagon. repeated entries are to avoid having to compute mod6 every time. */
 const MV hex2[8] = { MV(-1, -2), MV(-2, 0), MV(-1, 2), MV(1, 2), MV(2, 0), MV(1, -2), MV(-1, -2), MV(-2, 0) };
 const uint8_t mod6m1[8] = { 5, 0, 1, 2, 3, 4, 5, 0 };  /* (x-1)%6 */
@@ -681,6 +684,7 @@
     case X265_DIA_SEARCH:
     {
         /* diamond search, radius 1 */
+#if 0
         bcost <<= 4;
         int i = merange;
         do
@@ -699,6 +703,43 @@
         while (--i && bmv.checkRange(mvmin, mvmax));
         bcost >>= 4;
         break;
+
+#else // if 0
+
+        /* approach equivalent to the above, but eliminates duplicate candidates */
+        bcost <<= 3;
+        COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
+        COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
+        COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
+        COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
+        COPY1_IF_LT(bcost, (costs[3] << 3) + 5);
+
+        if (bcost & 7)
+        {
+            int dir = (bcost & 7) - 2;
+            bmv += dia2[dir + 1];
+
+            for (int i = merange - 1; i > 0 && bmv.checkRange(mvmin, mvmax); i--)
+            {
+                COST_MV_X3_DIR(dia2[dir + 0].x, dia2[dir + 0].y,
+                               dia2[dir + 1].x, dia2[dir + 1].y,
+                               dia2[dir + 2].x, dia2[dir + 2].y,
+                               costs);
+                bcost &= ~7;
+                COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
+                COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
+                COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
+                if (!(bcost & 7))
+                    break;
+                dir += (bcost & 7) - 1;
+                X265_CHECK(dir >= 0 && dir <= 5, "dir check failure\n");
+                dir = (3 & (dir + 3));  // (dir-1)%4
+                bmv += dia2[dir];
+            }
+        }
+        bcost >>= 3;
+        break;
+#endif // if 0
     }
 
     case X265_HEX_SEARCH:



More information about the x265-devel mailing list