[x264-devel] commit: Add subme=0 (fullpel motion estimation only) (Jason Garrett-Glaser )

Thu Nov 13 08:08:54 CET 2008

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon Nov 10 23:34:02 2008 -0800| [aa14719bf2b78f8fd3da7bbabb0faf142313dae1] | committer: Jason Garrett-Glaser 

Add subme=0 (fullpel motion estimation only)
Only for experimental purposes and ultra-fast encoding.  Probably not a good idea for firstpass.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=aa14719bf2b78f8fd3da7bbabb0faf142313dae1
---

 common/frame.c      |   16 ++++++++++++----
 common/macroblock.c |    3 ---
 encoder/encoder.c   |   19 ++++++++++++++-----
 encoder/me.c        |   12 ++++++------
 x264.c              |    3 ++-
 5 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/common/frame.c b/common/frame.c
index 4c20cdd..b190de7 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -61,10 +61,18 @@ x264_frame_t *x264_frame_new( x264_t *h )
     }
     /* all 4 luma planes allocated together, since the cacheline split code
      * requires them to be in-phase wrt cacheline alignment. */
-    CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
-    for( i = 0; i < 4; i++ )
-        frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
-    frame->plane[0] = frame->filtered[0];
+    if( h->param.analyse.i_subpel_refine )
+    {
+        CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
+        for( i = 0; i < 4; i++ )
+            frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+        frame->plane[0] = frame->filtered[0];
+    }
+    else
+    {
+        CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
+        frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
+    }
 
     if( h->frames.b_have_lowres )
     {
diff --git a/common/macroblock.c b/common/macroblock.c
index 32ea5b0..25683f1 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -488,9 +488,6 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
 #undef SET_TMVP
     }
 
-    if(i == 0)
-        *(uint32_t*)mvc[i] = 0;
-
     *i_mvc = i;
 }
 
diff --git a/encoder/encoder.c b/encoder/encoder.c
index c22f7b0..fbb2ea1 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -441,7 +441,11 @@ static int x264_validate_parameters( x264_t *h )
     if( h->param.i_keyint_max <= 0 )
         h->param.i_keyint_max = 1;
     h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
-
+    if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
+    {
+        x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
+        h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
+    }
     h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
     h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
     h->param.b_bframe_pyramid = h->param.b_bframe_pyramid && h->param.i_bframe > 1;
@@ -474,7 +478,7 @@ static int x264_validate_parameters( x264_t *h )
     if( h->param.analyse.i_me_method == X264_ME_TESA &&
         (h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1) )
         h->param.analyse.i_me_method = X264_ME_ESA;
-    h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 9 );
+    h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 9 );
     h->param.analyse.b_mixed_references = h->param.analyse.b_mixed_references && h->param.i_frame_reference > 1;
     h->param.analyse.inter &= X264_ANALYSE_PSUB16x16|X264_ANALYSE_PSUB8x8|X264_ANALYSE_BSUB16x16|
                               X264_ANALYSE_I4x4|X264_ANALYSE_I8x8;
@@ -820,7 +824,9 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
     COPY( analyse.i_direct_mv_pred );
     COPY( analyse.i_me_range );
     COPY( analyse.i_noise_reduction );
-    COPY( analyse.i_subpel_refine );
+    /* We can't switch out of subme=0 during encoding. */
+    if( h->param.analyse.i_subpel_refine )
+        COPY( analyse.i_subpel_refine );
     COPY( analyse.i_trellis );
     COPY( analyse.b_chroma_me );
     COPY( analyse.b_dct_decimate );
@@ -1002,8 +1008,11 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
     if( b_hpel )
     {
         x264_frame_expand_border( h, h->fdec, min_y, b_end );
-        x264_frame_filter( h, h->fdec, min_y, b_end );
-        x264_frame_expand_border_filtered( h, h->fdec, min_y, b_end );
+        if( h->param.analyse.i_subpel_refine )
+        {
+            x264_frame_filter( h, h->fdec, min_y, b_end );
+            x264_frame_expand_border_filtered( h, h->fdec, min_y, b_end );
+        }
     }
 
     if( h->param.i_threads > 1 && h->fdec->b_kept_as_ref )
diff --git a/encoder/me.c b/encoder/me.c
index 4acae2e..d86bc03 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -32,7 +32,7 @@
  * the subme=8,9 values are much higher because any amount of satd search makes
  * up its time by reducing the number of qpel-rd iterations. */
 static const int subpel_iterations[][4] =
-   {{1,0,0,0},
+   {{0,0,0,0},
     {1,1,0,0},
     {0,1,1,0},
     {0,2,1,0},
@@ -165,7 +165,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
     uint8_t *p_fref = m->p_fref[0];
     DECLARE_ALIGNED_16( uint8_t pix[16*16] );
 
-    int i = 0, j;
+    int i, j;
     int dir;
     int costs[6];
 
@@ -190,7 +190,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
     {
         uint32_t bmv = pack16to32_mask(bmx,bmy);
         COST_MV_HPEL( bmx, bmy );
-        do
+        for( i = 0; i < i_mvc; i++ )
         {
             if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) )
             {
@@ -198,7 +198,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                 int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 );
                 COST_MV_HPEL( mx, my );
             }
-        } while( ++i < i_mvc );
+        }
         bmx = ( bpred_mx + 2 ) >> 2;
         bmy = ( bpred_my + 2 ) >> 2;
         COST_MV( bmx, bmy );
@@ -214,7 +214,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
          * sensible to remove the cost of the MV from the rounded MVP to avoid unfairly
          * biasing against use of the predicted motion vector. */
         bcost -= BITS_MVD( pmx, pmy );
-        do
+        for( i = 0; i < i_mvc; i++ )
         {
             int mx = (mvc[i][0] + 2) >> 2;
             int my = (mvc[i][1] + 2) >> 2;
@@ -224,7 +224,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                 my = x264_clip3( my, mv_y_min, mv_y_max );
                 COST_MV( mx, my );
             }
-        } while( ++i < i_mvc );
+        }
     }
     COST_MV( 0, 0 );
 
diff --git a/x264.c b/x264.c
index 3bc4028..e8febc2 100644
--- a/x264.c
+++ b/x264.c
@@ -249,7 +249,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
     H1( "      --mvrange <integer>     Maximum motion vector length [-1 (auto)]\n" );
     H1( "      --mvrange-thread <int>  Minimum buffer between threads [-1 (auto)]\n" );
     H0( "  -m, --subme <integer>       Subpixel motion estimation and mode decision [%d]\n", defaults->analyse.i_subpel_refine );
-    H1( "                                  - 1: SAD mode decision, one qpel iteration\n"
+    H1( "                                  - 0: fullpel only (not recommended)\n"
+        "                                  - 1: SAD mode decision, one qpel iteration\n"
         "                                  - 2: SATD mode decision\n"
         "                                  - 3-5: Progressively more qpel\n"
         "                                  - 6: RD mode decision for I/P-frames\n"