[x264-devel] analyse: Reduce the size the cost_mv arrays
Henrik Gramner
git at videolan.org
Mon Jun 26 21:58:17 CEST 2017
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sun Feb 19 10:48:33 2017 +0100| [c9d2c1c80b25c6ae15c41b200ec44ac2dabce725] | committer: Anton Mitrofanov
analyse: Reduce the size the cost_mv arrays
Use a dynamic size depending on the MV range. Reduces memory consumption by
up to a few megabytes.
Drop a related old miscompilation check since it may otherwise cause an
out-of-bounds memory access.
Also remove an unused extern variable declaration.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=c9d2c1c80b25c6ae15c41b200ec44ac2dabce725
---
encoder/analyse.c | 30 +++++++++++++++++-------------
encoder/encoder.c | 8 --------
encoder/me.h | 2 --
3 files changed, 17 insertions(+), 23 deletions(-)
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 8bb83fbf..036d6c15 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -264,29 +264,31 @@ static uint16_t x264_cost_i4x4_mode[(QP_MAX+2)*32];
static int init_costs( x264_t *h, float *logs, int qp )
{
- int lambda = x264_lambda_tab[qp];
if( h->cost_mv[qp] )
return 0;
+
+ int mv_range = h->param.analyse.i_mv_range;
+ int lambda = x264_lambda_tab[qp];
/* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
- CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
- h->cost_mv[qp] += 2*4*2048;
- for( int i = 0; i <= 2*4*2048; i++ )
+ CHECKED_MALLOC( h->cost_mv[qp], (4*4*mv_range + 1) * sizeof(uint16_t) );
+ h->cost_mv[qp] += 2*4*mv_range;
+ for( int i = 0; i <= 2*4*mv_range; i++ )
{
h->cost_mv[qp][-i] =
- h->cost_mv[qp][i] = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
+ h->cost_mv[qp][i] = X264_MIN( (int)(lambda * logs[i] + .5f), UINT16_MAX );
}
x264_pthread_mutex_lock( &cost_ref_mutex );
for( int i = 0; i < 3; i++ )
for( int j = 0; j < 33; j++ )
- x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
+ x264_cost_ref[qp][i][j] = i ? X264_MIN( lambda * bs_size_te( i, j ), UINT16_MAX ) : 0;
x264_pthread_mutex_unlock( &cost_ref_mutex );
if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
{
for( int j = 0; j < 4; j++ )
{
- CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
- h->cost_mv_fpel[qp][j] += 2*2048;
- for( int i = -2*2048; i < 2*2048; i++ )
+ CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*mv_range + 1) * sizeof(uint16_t) );
+ h->cost_mv_fpel[qp][j] += 2*mv_range;
+ for( int i = -2*mv_range; i < 2*mv_range; i++ )
h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
}
}
@@ -300,12 +302,13 @@ fail:
int x264_analyse_init_costs( x264_t *h )
{
- float *logs = x264_malloc( (2*4*2048+1) * sizeof(float) );
+ int mv_range = h->param.analyse.i_mv_range;
+ float *logs = x264_malloc( (2*4*mv_range+1) * sizeof(float) );
if( !logs )
return -1;
logs[0] = 0.718f;
- for( int i = 1; i <= 2*4*2048; i++ )
+ for( int i = 1; i <= 2*4*mv_range; i++ )
logs[i] = log2f( i+1 ) * 2.0f + 1.718f;
for( int qp = X264_MIN( h->param.rc.i_qp_min, QP_MAX_SPEC ); qp <= h->param.rc.i_qp_max; qp++ )
@@ -324,13 +327,14 @@ fail:
void x264_analyse_free_costs( x264_t *h )
{
+ int mv_range = h->param.analyse.i_mv_range;
for( int i = 0; i < QP_MAX+1; i++ )
{
if( h->cost_mv[i] )
- x264_free( h->cost_mv[i] - 2*4*2048 );
+ x264_free( h->cost_mv[i] - 2*4*mv_range );
if( h->cost_mv_fpel[i][0] )
for( int j = 0; j < 4; j++ )
- x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
+ x264_free( h->cost_mv_fpel[i][j] - 2*mv_range );
}
}
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 053b4cf2..0b379aef 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1593,14 +1593,6 @@ x264_t *x264_encoder_open( x264_param_t *param )
if( x264_analyse_init_costs( h ) )
goto fail;
- static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
- /* Checks for known miscompilation issues. */
- if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
- {
- x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
- goto fail;
- }
-
/* Must be volatile or else GCC will optimize it out. */
volatile int temp = 392;
if( x264_clz( temp ) != 23 )
diff --git a/encoder/me.h b/encoder/me.h
index 7f48cc66..505e3ce1 100644
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -66,8 +66,6 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
-extern uint16_t *x264_cost_mv_fpel[QP_MAX+1][4];
-
#define COPY1_IF_LT(x,y)\
if( (y) < (x) )\
(x) = (y);
More information about the x264-devel
mailing list