[x264-devel] analyse: Reduce the size the cost_mv arrays

Mon Jun 26 21:58:17 CEST 2017

x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sun Feb 19 10:48:33 2017 +0100| [c9d2c1c80b25c6ae15c41b200ec44ac2dabce725] | committer: Anton Mitrofanov

analyse: Reduce the size the cost_mv arrays

Use a dynamic size depending on the MV range. Reduces memory consumption by
up to a few megabytes.

Drop a related old miscompilation check since it may otherwise cause an
out-of-bounds memory access.

Also remove an unused extern variable declaration.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=c9d2c1c80b25c6ae15c41b200ec44ac2dabce725
---

 encoder/analyse.c | 30 +++++++++++++++++-------------
 encoder/encoder.c |  8 --------
 encoder/me.h      |  2 --
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/encoder/analyse.c b/encoder/analyse.c
index 8bb83fbf..036d6c15 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -264,29 +264,31 @@ static uint16_t x264_cost_i4x4_mode[(QP_MAX+2)*32];
 
 static int init_costs( x264_t *h, float *logs, int qp )
 {
-    int lambda = x264_lambda_tab[qp];
     if( h->cost_mv[qp] )
         return 0;
+
+    int mv_range = h->param.analyse.i_mv_range;
+    int lambda = x264_lambda_tab[qp];
     /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
-    CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
-    h->cost_mv[qp] += 2*4*2048;
-    for( int i = 0; i <= 2*4*2048; i++ )
+    CHECKED_MALLOC( h->cost_mv[qp], (4*4*mv_range + 1) * sizeof(uint16_t) );
+    h->cost_mv[qp] += 2*4*mv_range;
+    for( int i = 0; i <= 2*4*mv_range; i++ )
     {
         h->cost_mv[qp][-i] =
-        h->cost_mv[qp][i]  = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
+        h->cost_mv[qp][i]  = X264_MIN( (int)(lambda * logs[i] + .5f), UINT16_MAX );
     }
     x264_pthread_mutex_lock( &cost_ref_mutex );
     for( int i = 0; i < 3; i++ )
         for( int j = 0; j < 33; j++ )
-            x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
+            x264_cost_ref[qp][i][j] = i ? X264_MIN( lambda * bs_size_te( i, j ), UINT16_MAX ) : 0;
     x264_pthread_mutex_unlock( &cost_ref_mutex );
     if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
     {
         for( int j = 0; j < 4; j++ )
         {
-            CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
-            h->cost_mv_fpel[qp][j] += 2*2048;
-            for( int i = -2*2048; i < 2*2048; i++ )
+            CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*mv_range + 1) * sizeof(uint16_t) );
+            h->cost_mv_fpel[qp][j] += 2*mv_range;
+            for( int i = -2*mv_range; i < 2*mv_range; i++ )
                 h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
         }
     }
@@ -300,12 +302,13 @@ fail:
 
 int x264_analyse_init_costs( x264_t *h )
 {
-    float *logs = x264_malloc( (2*4*2048+1) * sizeof(float) );
+    int mv_range = h->param.analyse.i_mv_range;
+    float *logs = x264_malloc( (2*4*mv_range+1) * sizeof(float) );
     if( !logs )
         return -1;
 
     logs[0] = 0.718f;
-    for( int i = 1; i <= 2*4*2048; i++ )
+    for( int i = 1; i <= 2*4*mv_range; i++ )
         logs[i] = log2f( i+1 ) * 2.0f + 1.718f;
 
     for( int qp = X264_MIN( h->param.rc.i_qp_min, QP_MAX_SPEC ); qp <= h->param.rc.i_qp_max; qp++ )
@@ -324,13 +327,14 @@ fail:
 
 void x264_analyse_free_costs( x264_t *h )
 {
+    int mv_range = h->param.analyse.i_mv_range;
     for( int i = 0; i < QP_MAX+1; i++ )
     {
         if( h->cost_mv[i] )
-            x264_free( h->cost_mv[i] - 2*4*2048 );
+            x264_free( h->cost_mv[i] - 2*4*mv_range );
         if( h->cost_mv_fpel[i][0] )
             for( int j = 0; j < 4; j++ )
-                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
+                x264_free( h->cost_mv_fpel[i][j] - 2*mv_range );
     }
 }
 
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 053b4cf2..0b379aef 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1593,14 +1593,6 @@ x264_t *x264_encoder_open( x264_param_t *param )
     if( x264_analyse_init_costs( h ) )
         goto fail;
 
-    static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
-    /* Checks for known miscompilation issues. */
-    if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
-    {
-        x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
-        goto fail;
-    }
-
     /* Must be volatile or else GCC will optimize it out. */
     volatile int temp = 392;
     if( x264_clz( temp ) != 23 )
diff --git a/encoder/me.h b/encoder/me.h
index 7f48cc66..505e3ce1 100644
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -66,8 +66,6 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
 void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
 uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
 
-extern uint16_t *x264_cost_mv_fpel[QP_MAX+1][4];
-
 #define COPY1_IF_LT(x,y)\
 if( (y) < (x) )\
     (x) = (y);