[x264-devel] commit: Improve temporal MV prediction (Jason Garrett-Glaser )

Thu May 6 07:49:22 CEST 2010

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Sun May  2 11:41:36 2010 -0700| [3cf182b2d0b5fd38538a3b6e5102b925a5747687] | committer: Jason Garrett-Glaser 

Improve temporal MV prediction
Predict based on the results of p16x16 search, not final MVs.
This lets us get predictions even if mode decision chose intra.
Also improves cache coherency.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3cf182b2d0b5fd38538a3b6e5102b925a5747687
---

 common/frame.c      |    2 ++
 common/frame.h      |    3 ++-
 common/macroblock.c |   35 ++++++++++++++---------------------
 common/mvpred.c     |   18 +++++++-----------
 4 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/common/frame.c b/common/frame.c
index 66ebee8..5d2cd57 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -105,6 +105,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
         CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
         CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
         CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
+        CHECKED_MALLOC( frame->mv16x16, 2*i_mb_count * sizeof(int16_t) );
         CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
         if( h->param.i_bframe )
         {
@@ -206,6 +207,7 @@ void x264_frame_delete( x264_frame_t *frame )
         x264_free( frame->mb_partition );
         x264_free( frame->mv[0] );
         x264_free( frame->mv[1] );
+        x264_free( frame->mv16x16 );
         x264_free( frame->ref[0] );
         x264_free( frame->ref[1] );
         x264_pthread_mutex_destroy( &frame->mutex );
diff --git a/common/frame.h b/common/frame.h
index 2d6ea0b..6093075 100644
--- a/common/frame.h
+++ b/common/frame.h
@@ -83,6 +83,7 @@ typedef struct x264_frame
     int8_t  *mb_type;
     uint8_t *mb_partition;
     int16_t (*mv[2])[2];
+    int16_t (*mv16x16)[2];
     int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
 
     /* Stored as (lists_used << LOWRES_COST_SHIFT) + (cost).
@@ -96,7 +97,7 @@ typedef struct x264_frame
     int8_t  *ref[2];
     int     i_ref[2];
     int     ref_poc[2][16];
-    int16_t inv_ref_poc[2][32]; // inverse values (list0 only) to avoid divisions in MB encoding
+    int16_t inv_ref_poc[2]; // inverse values of ref0 poc to avoid divisions in temporal MV prediction
 
     /* for adaptive B-frame decision.
      * contains the SATD cost of the lowres frame encoded in various modes
diff --git a/common/macroblock.c b/common/macroblock.c
index 58837dd..f402588 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -25,24 +25,6 @@
 #include "common.h"
 #include "encoder/me.h"
 
-/* Set up a lookup table for delta pocs to reduce an IDIV to an IMUL */
-static void setup_inverse_delta_pocs( x264_t *h )
-{
-    for( int field = 0; field <= h->sh.b_mbaff; field++ )
-    {
-        int curpoc = h->fdec->i_poc + field*h->sh.i_delta_poc_bottom;
-        for( int i = 0; i < (h->i_ref0<<h->sh.b_mbaff); i++ )
-        {
-            int refpoc = h->fref0[i>>h->sh.b_mbaff]->i_poc;
-            if( h->sh.b_mbaff && field^(i&1) )
-                refpoc += h->sh.i_delta_poc_bottom;
-            int delta = curpoc - refpoc;
-
-            h->fdec->inv_ref_poc[field][i] = (256 + delta/2) / delta;
-        }
-    }
-}
-
 static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
 {
     int i8    = x264_scan8[0]+x+8*y;
@@ -268,7 +250,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
         else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
             i_refs = X264_MIN(16, i_refs + 1); //blind weights add one duplicate frame
 
-        for( int j = 0; j < i_refs; j++ )
+        for( int j = !i; j < i_refs; j++ )
             CHECKED_MALLOC( h->mb.mvr[i][j], 2 * i_mb_count * sizeof(int16_t) );
     }
 
@@ -318,7 +300,7 @@ fail: return -1;
 void x264_macroblock_cache_free( x264_t *h )
 {
     for( int i = 0; i < 2; i++ )
-        for( int j = 0; j < 32; j++ )
+        for( int j = !i; j < 32; j++ )
             x264_free( h->mb.mvr[i][j] );
     for( int i = 0; i < 16; i++ )
         x264_free( h->mb.p_weight_buf[i] );
@@ -382,6 +364,7 @@ void x264_macroblock_slice_init( x264_t *h )
 {
     h->mb.mv[0] = h->fdec->mv[0];
     h->mb.mv[1] = h->fdec->mv[1];
+    h->mb.mvr[0][0] = h->fdec->mv16x16;
     h->mb.ref[0] = h->fdec->ref[0];
     h->mb.ref[1] = h->fdec->ref[1];
     h->mb.type = h->fdec->mb_type;
@@ -416,7 +399,17 @@ void x264_macroblock_slice_init( x264_t *h )
     /* init with not available (for top right idx=7,15) */
     memset( h->mb.cache.ref, -2, sizeof( h->mb.cache.ref ) );
 
-    setup_inverse_delta_pocs( h );
+    if( h->i_ref0 > 0 )
+        for( int field = 0; field <= h->sh.b_mbaff; field++ )
+        {
+            int curpoc = h->fdec->i_poc + field*h->sh.i_delta_poc_bottom;
+            int refpoc = h->fref0[0]->i_poc;
+            if( h->sh.b_mbaff && field )
+                refpoc += h->sh.i_delta_poc_bottom;
+            int delta = curpoc - refpoc;
+
+            h->fdec->inv_ref_poc[field] = (256 + delta/2) / delta;
+        }
 
     h->mb.i_neighbour4[6] =
     h->mb.i_neighbour4[9] =
diff --git a/common/mvpred.c b/common/mvpred.c
index c7133d5..de91826 100755
--- a/common/mvpred.c
+++ b/common/mvpred.c
@@ -445,17 +445,13 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
         if( h->sh.b_mbaff && field^(i_ref&1) )
             refpoc += h->sh.i_delta_poc_bottom;
 
-#define SET_TMVP(dx, dy) { \
-            int i_b4 = h->mb.i_b4_xy + dx*4 + dy*4*h->mb.i_b4_stride; \
-            int i_b8 = h->mb.i_b8_xy + dx*2 + dy*2*h->mb.i_b8_stride; \
-            int ref_col = l0->ref[0][i_b8]; \
-            if( ref_col >= 0 ) \
-            { \
-                int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field][ref_col];\
-                mvc[i][0] = (l0->mv[0][i_b4][0]*scale + 128) >> 8;\
-                mvc[i][1] = (l0->mv[0][i_b4][1]*scale + 128) >> 8;\
-                i++; \
-            } \
+#define SET_TMVP( dx, dy )\
+        { \
+            int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
+            int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field];\
+            mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8;\
+            mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8;\
+            i++;\
         }
 
         SET_TMVP(0,0);