[x265] [PATCH 3 of 3] improve Diamon Search by reduce duplicate SAD [OUTPUT CHANGED]
Min Chen
chenm003 at 163.com
Sat Jun 27 04:13:23 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1435370058 25200
# Node ID fb038742dabd93a1993562c093839e5362ad3d46
# Parent a8198965fbf18d7ced2cd8e8acd07fcc4ee6a2b3
improve Diamon Search by reduce duplicate SAD [OUTPUT CHANGED]
Side effect by modify DIA ME template:
0 0
2 3 --> 3 1
1 2
---
source/encoder/motion.cpp | 41 +++++++++++++++++++++++++++++++++++++++++
1 files changed, 41 insertions(+), 0 deletions(-)
diff -r a8198965fbf1 -r fb038742dabd source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Fri Jun 26 18:54:15 2015 -0700
+++ b/source/encoder/motion.cpp Fri Jun 26 18:54:18 2015 -0700
@@ -2,6 +2,7 @@
* Copyright (C) 2013 x265 project
*
* Authors: Steve Borho <steve at borho.org>
+ * Min Chen <chenm003 at 163.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -59,6 +60,8 @@
int sizeScale[NUM_PU_SIZES];
#define SAD_THRESH(v) (bcost < (((v >> 4) * sizeScale[partEnum])))
+const MV dia2[6] = { MV(1, 0), MV(0, -1), MV(-1, 0), MV(0, 1), MV(1, 0), MV(0, -1) };
+
/* radius 2 hexagon. repeated entries are to avoid having to compute mod6 every time. */
const MV hex2[8] = { MV(-1, -2), MV(-2, 0), MV(-1, 2), MV(1, 2), MV(2, 0), MV(1, -2), MV(-1, -2), MV(-2, 0) };
const uint8_t mod6m1[8] = { 5, 0, 1, 2, 3, 4, 5, 0 }; /* (x-1)%6 */
@@ -681,6 +684,7 @@
case X265_DIA_SEARCH:
{
/* diamond search, radius 1 */
+#if 0
bcost <<= 4;
int i = merange;
do
@@ -699,6 +703,43 @@
while (--i && bmv.checkRange(mvmin, mvmax));
bcost >>= 4;
break;
+
+#else // if 0
+
+ /* approach equivalent to the above, but eliminates duplicate candidates */
+ bcost <<= 3;
+ COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
+ COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
+ COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
+ COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
+ COPY1_IF_LT(bcost, (costs[3] << 3) + 5);
+
+ if (bcost & 7)
+ {
+ int dir = (bcost & 7) - 2;
+ bmv += dia2[dir + 1];
+
+ for (int i = merange - 1; i > 0 && bmv.checkRange(mvmin, mvmax); i--)
+ {
+ COST_MV_X3_DIR(dia2[dir + 0].x, dia2[dir + 0].y,
+ dia2[dir + 1].x, dia2[dir + 1].y,
+ dia2[dir + 2].x, dia2[dir + 2].y,
+ costs);
+ bcost &= ~7;
+ COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
+ COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
+ COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
+ if (!(bcost & 7))
+ break;
+ dir += (bcost & 7) - 1;
+ X265_CHECK(dir >= 0 && dir <= 5, "dir check failure\n");
+ dir = (3 & (dir + 3)); // (dir-1)%4
+ bmv += dia2[dir];
+ }
+ }
+ bcost >>= 3;
+ break;
+#endif // if 0
}
case X265_HEX_SEARCH:
More information about the x265-devel
mailing list