[x264-devel] commit: Faster MV prediction (Jason Garrett-Glaser )

Sat Jun 20 01:28:59 CEST 2009

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Fri Jun 19 16:02:39 2009 -0700| [803c9d94641e57544932114f61f523e19bba6b4d] | committer: Jason Garrett-Glaser 

Faster MV prediction
Smaller code size, plus I get to use goto.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=803c9d94641e57544932114f61f523e19bba6b4d
---

 common/macroblock.c |   78 +++++++++++++++++++++++++--------------------------
 1 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/common/macroblock.c b/common/macroblock.c
index 792d40a..836d203 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -36,9 +36,9 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
     int     i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width ];
     int16_t *mv_c  = h->mb.cache.mv[i_list][i8 - 8 + i_width];
 
-    int i_count;
+    int i_count = 0;
 
-    if( (idx&0x03) == 3 || ( i_width == 2 && (idx&0x3) == 2 )|| i_refc == -2 )
+    if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
     {
         i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
         mv_c   = h->mb.cache.mv[i_list][i8 - 8 - 1];
@@ -46,38 +46,52 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
 
     if( h->mb.i_partition == D_16x8 )
     {
-        if( idx == 0 && i_refb == i_ref )
+        if( idx == 0 )
         {
-            *(uint32_t*)mvp = *(uint32_t*)mv_b;
-            return;
+            if( i_refb == i_ref )
+            {
+                *(uint32_t*)mvp = *(uint32_t*)mv_b;
+                return;
+            }
         }
-        else if( idx != 0 && i_refa == i_ref )
+        else
         {
-            *(uint32_t*)mvp = *(uint32_t*)mv_a;
-            return;
+            if( i_refa == i_ref )
+            {
+                *(uint32_t*)mvp = *(uint32_t*)mv_a;
+                return;
+            }
         }
     }
     else if( h->mb.i_partition == D_8x16 )
     {
-        if( idx == 0 && i_refa == i_ref )
+        if( idx == 0 )
         {
-            *(uint32_t*)mvp = *(uint32_t*)mv_a;
-            return;
+            if( i_refa == i_ref )
+            {
+                *(uint32_t*)mvp = *(uint32_t*)mv_a;
+                return;
+            }
         }
-        else if( idx != 0 && i_refc == i_ref )
+        else
         {
-            *(uint32_t*)mvp = *(uint32_t*)mv_c;
-            return;
+            if( i_refc == i_ref )
+            {
+                *(uint32_t*)mvp = *(uint32_t*)mv_c;
+                return;
+            }
         }
     }
 
-    i_count = 0;
     if( i_refa == i_ref ) i_count++;
     if( i_refb == i_ref ) i_count++;
     if( i_refc == i_ref ) i_count++;
 
     if( i_count > 1 )
+    {
+median:
         x264_median_mv( mvp, mv_a, mv_b, mv_c );
+    }
     else if( i_count == 1 )
     {
         if( i_refa == i_ref )
@@ -90,7 +104,7 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
     else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
         *(uint32_t*)mvp = *(uint32_t*)mv_a;
     else
-        x264_median_mv( mvp, mv_a, mv_b, mv_c );
+        goto median;
 }
 
 void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
@@ -102,7 +116,7 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2]
     int     i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
     int16_t *mv_c  = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
 
-    int i_count;
+    int i_count = 0;
 
     if( i_refc == -2 )
     {
@@ -110,13 +124,15 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2]
         mv_c   = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
     }
 
-    i_count = 0;
     if( i_refa == i_ref ) i_count++;
     if( i_refb == i_ref ) i_count++;
     if( i_refc == i_ref ) i_count++;
 
     if( i_count > 1 )
+    {
+median:
         x264_median_mv( mvp, mv_a, mv_b, mv_c );
+    }
     else if( i_count == 1 )
     {
         if( i_refa == i_ref )
@@ -129,7 +145,7 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2]
     else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
         *(uint32_t*)mvp = *(uint32_t*)mv_a;
     else
-        x264_median_mv( mvp, mv_a, mv_b, mv_c );
+        goto median;
 }
 
 
@@ -156,7 +172,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
 {
     int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
     int i_mb_8x8 =  4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
-    int i8, i4;
+    int i8;
     const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
 
     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
@@ -182,6 +198,8 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
             const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
             const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
             const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+            if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
+                return 0;
             x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
             x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
             x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
@@ -197,26 +215,6 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
         }
     }
 
-    if( h->param.i_threads > 1 )
-    {
-        for( i4=0; i4<16; i4+=4 )
-        {
-            if( h->mb.cache.mv[0][x264_scan8[i4]][1] > h->mb.mv_max_spel[1]
-             || h->mb.cache.mv[1][x264_scan8[i4]][1] > h->mb.mv_max_spel[1] )
-            {
-#if 0
-                fprintf(stderr, "direct_temporal: (%d,%d) (%d,%d) > %d \n",
-                        h->mb.cache.mv[0][x264_scan8[i4]][0],
-                        h->mb.cache.mv[0][x264_scan8[i4]][1],
-                        h->mb.cache.mv[1][x264_scan8[i4]][0],
-                        h->mb.cache.mv[1][x264_scan8[i4]][1],
-                        h->mb.mv_max_spel[1]);
-#endif
-                return 0;
-            }
-        }
-    }
-
     return 1;
 }