diff --git a/Makefile b/Makefile index fb97d25..0cfec2c 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \ common/frame.c common/dct.c common/cpu.c common/cabac.c \ common/common.c common/mdate.c common/set.c \ common/quant.c common/vlc.c \ - encoder/analyse.c encoder/me.c encoder/ratecontrol.c \ + encoder/analyse.c encoder/me.c encoder/ratecontrol.c encoder/lookahead.c\ encoder/set.c encoder/macroblock.c encoder/cabac.c \ encoder/cavlc.c encoder/encoder.c @@ -19,6 +19,7 @@ ifeq ($(VIS),yes) SRCS += common/visualize.c common/display-x11.c endif + # MMX/SSE optims ifneq ($(AS),) X86SRC0 = cabac-a.asm dct-a.asm deblock-a.asm mc-a.asm mc-a2.asm \ diff --git a/common/common.c b/common/common.c index c163e09..15d4f7d 100644 --- a/common/common.c +++ b/common/common.c @@ -533,6 +533,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->rc.f_ip_factor = atof(value); OPT2("pbratio", "pb-factor") p->rc.f_pb_factor = atof(value); + OPT("lookahead") + p->rc.i_lookahead = atoi(value); OPT("aq-mode") p->rc.i_aq_mode = atoi(value); OPT("aq-strength") @@ -718,6 +720,16 @@ void *x264_malloc( int i_size ) } /**************************************************************************** + * x264_mallocz: + ****************************************************************************/ +void *x264_mallocz( int i_size ) +{ + void *p = x264_malloc( i_size ); + memset( p, 0, i_size ); + return p; +} + +/**************************************************************************** * x264_free: ****************************************************************************/ void x264_free( void *p ) @@ -770,9 +782,9 @@ void x264_reduce_fraction( int *n, int *d ) c = a % b; while(c) { - a = b; - b = c; - c = a % b; + a = b; + b = c; + c = a % b; } *n /= b; *d /= b; @@ -878,6 +890,9 @@ char *x264_param2string( x264_param_t *p, int b_res ) if( p->rc.i_vbv_buffer_size ) s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d", p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size ); + if( !p->rc.b_stat_read ) + s += sprintf( s, " lookahead=%d", p->rc.i_lookahead ); + } else if( p->rc.i_rc_method == X264_RC_CQP ) s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); diff --git a/common/common.h b/common/common.h index 78b1efb..f94878f 100644 --- a/common/common.h +++ b/common/common.h @@ -24,6 +24,14 @@ #ifndef X264_COMMON_H #define X264_COMMON_H + +/**************************************************************************** + * Error handling + ****************************************************************************/ +#define X264_SUCCESS 0 +#define X264_ERROR_UNSPECIFIED 1 +#define X264_ERROR 1 + /**************************************************************************** * Macros ****************************************************************************/ @@ -82,6 +90,7 @@ /* x264_malloc : will do or emulate a memalign * you have to use x264_free for buffers allocated with x264_malloc */ void *x264_malloc( int ); +void *x264_mallocz( int ); void *x264_realloc( void *p, int i_size ); void x264_free( void * ); @@ -209,6 +218,21 @@ typedef struct } x264_slice_header_t; +typedef struct x264_lookahead_t +{ + int b_thread_active; + int b_exit_thread; + int i_last_idr_planned; + x264_pthread_t thread_handle; + x264_frame_t *last_nonb_planned; + /* Using a synch list here since encoder.c pushes frames back to this list */ + synch_frame_list_t next; + /* Input buffer (for next) */ + synch_frame_list_t ifbuf; + /* Input buffer (for next) */ + synch_frame_list_t ofbuf; +} x264_lookahead_t; + /* From ffmpeg */ #define X264_SCAN8_SIZE (6*8) @@ -318,11 +342,11 @@ struct x264_t struct { /* Frames to be encoded (whose types have been decided) */ - x264_frame_t *current[X264_BFRAME_MAX*4+3]; + x264_frame_t **current; /* Temporary buffer (frames types not yet decided) */ x264_frame_t *next[X264_BFRAME_MAX*4+3]; /* Unused frames */ - x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4]; + x264_frame_t **unused; /* For adaptive B decision */ x264_frame_t *last_nonb; @@ -340,7 +364,6 @@ struct x264_t int b_have_lowres; /* Whether 1/2 resolution luma planes are being used */ int b_have_sub8x8_esa; } frames; - /* current frame being encoded */ x264_frame_t *fenc; @@ -623,6 +646,7 @@ struct x264_t #if VISUALIZE struct visualize_t *visualize; #endif + x264_lookahead_t *lookahead; }; // included at the end because it needs x264_t diff --git a/common/frame.c b/common/frame.c index a51dc29..721e3be 100644 --- a/common/frame.c +++ b/common/frame.c @@ -935,8 +935,7 @@ void x264_frame_push_unused( x264_t *h, x264_frame_t *frame ) assert( frame->i_reference_count > 0 ); frame->i_reference_count--; if( frame->i_reference_count == 0 ) - x264_frame_push( h->frames.unused, frame ); - assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL ); + x264_frame_push( h->frames.unused, frame ); } x264_frame_t *x264_frame_pop_unused( x264_t *h ) @@ -971,3 +970,114 @@ void x264_frame_sort( x264_frame_t **list, int b_dts ) } } while( !b_ok ); } + +void x264_frame_delete_list( x264_frame_t **frame_list ) +{ + int i; + + if ( !frame_list ) + return; + + for( i = 0; frame_list[i]; i++ ) + x264_frame_delete( frame_list[i] ); + + x264_free( frame_list ); +} + +int x264_synch_frame_list_init( synch_frame_list_t *slist, int max_size ) +{ + slist->i_max_size = max_size; + assert( slist->i_max_size ); + slist->list = x264_mallocz( (max_size + 3) * sizeof(x264_frame_t*) ); + if ( !slist->list ) + return X264_ERROR; + + if ( x264_pthread_mutex_init( &slist->mutex, NULL ) + || x264_pthread_cond_init( &slist->cv_full, NULL ) + || x264_pthread_cond_init( &slist->cv_empty, NULL ) ) + { + return X264_ERROR; + } + return X264_SUCCESS; +} + +void x264_synch_frame_list_invalidate( synch_frame_list_t *slist ) +{ + slist->i_max_size = -1; //safe -- not changed since initialization + + x264_pthread_cond_broadcast( &slist->cv_full ); + x264_pthread_cond_broadcast( &slist->cv_empty ); +} + +int x264_synch_frame_list_destroy( synch_frame_list_t *slist ) +{ + + if ( slist->i_max_size > 0 ) + return X264_ERROR; + + if ( x264_pthread_mutex_destroy( &slist->mutex ) + || x264_pthread_cond_destroy( &slist->cv_full ) + || x264_pthread_cond_destroy( &slist->cv_empty ) ) + { + return X264_ERROR; + } + + x264_frame_delete_list( slist->list ); + slist->list = NULL; + + return X264_SUCCESS; +} + +void x264_synch_frame_list_put( synch_frame_list_t *slist, x264_frame_t *frame ) +{ + if ( slist->i_max_size < 1 ) return; + while ( ( slist->i_max_size > 0 ) && (slist->list[slist->i_max_size - 1] ) ) + x264_pthread_cond_wait( &slist->cv_empty, &slist->mutex ); + + x264_pthread_mutex_lock( &slist->mutex ); + x264_frame_push( slist->list,frame ); + x264_pthread_mutex_unlock( &slist->mutex ); + if ( x264_synch_frame_list_get_size(slist) >= slist->i_max_size) + x264_pthread_cond_broadcast( &slist->cv_full ); +} + +x264_frame_t * x264_synch_frame_list_get( synch_frame_list_t *slist ) +{ + x264_frame_t *frame = NULL; + if ( slist->i_max_size < 1 ) return NULL; + x264_pthread_mutex_lock( &slist->mutex ); + + while ( ( slist->i_max_size > 0 ) && (!slist->list[0]) ) + x264_pthread_cond_wait( &slist->cv_full, &slist->mutex ); + + frame = x264_frame_shift( slist->list ); + x264_pthread_cond_broadcast( &slist->cv_empty ); + + x264_pthread_mutex_unlock( &slist->mutex ); + + return frame; +} + +int x264_synch_frame_list_get_size( synch_frame_list_t *slist ) +{ + int fno = 0; + + x264_pthread_mutex_lock( &slist->mutex ); + while (slist->list[fno]) fno++; + x264_pthread_mutex_unlock( &slist->mutex ); + return fno; +} + +void x264_synch_frame_list_sort_pts( synch_frame_list_t *slist ) +{ + x264_pthread_mutex_lock( &slist->mutex ); + x264_frame_sort_pts(slist->list); + x264_pthread_mutex_unlock( &slist->mutex ); +} + +void x264_synch_frame_list_unshift( synch_frame_list_t *slist, x264_frame_t *frame ) +{ + x264_pthread_mutex_lock( &slist->mutex ); + x264_frame_unshift( slist->list, frame ); + x264_pthread_mutex_unlock( &slist->mutex ); +} diff --git a/common/frame.h b/common/frame.h index aad77f5..a6f3a1d 100644 --- a/common/frame.h +++ b/common/frame.h @@ -93,6 +93,17 @@ typedef struct } x264_frame_t; +// synchronized frame list +typedef struct +{ + x264_frame_t **list; + char name[0x100]; + int i_max_size; + x264_pthread_mutex_t mutex; + x264_pthread_cond_t cv_full; + x264_pthread_cond_t cv_empty; +} synch_frame_list_t; + typedef void (*x264_deblock_inter_t)( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); typedef void (*x264_deblock_intra_t)( uint8_t *pix, int stride, int alpha, int beta ); typedef struct @@ -135,7 +146,21 @@ x264_frame_t *x264_frame_shift( x264_frame_t **list ); void x264_frame_push_unused( x264_t *h, x264_frame_t *frame ); x264_frame_t *x264_frame_pop_unused( x264_t *h ); void x264_frame_sort( x264_frame_t **list, int b_dts ); +void x264_frame_delete_list( x264_frame_t **frame_list ); + +int x264_synch_frame_list_init( synch_frame_list_t *slist, int nelem ); +void x264_synch_frame_list_invalidate( synch_frame_list_t *slist ); +int x264_synch_frame_list_destroy( synch_frame_list_t *slist ); +void x264_synch_frame_list_put( synch_frame_list_t *slist, x264_frame_t *frame ); +x264_frame_t *x264_synch_frame_list_get( synch_frame_list_t *slist ); + +int x264_synch_frame_list_get_size( synch_frame_list_t *slist ); +void x264_synch_frame_list_sort_pts( synch_frame_list_t *slist ); +void x264_synch_frame_list_unshift( synch_frame_list_t *slist, x264_frame_t *frame ); + #define x264_frame_sort_dts(list) x264_frame_sort(list, 1) #define x264_frame_sort_pts(list) x264_frame_sort(list, 0) + + #endif diff --git a/common/osdep.h b/common/osdep.h index c9dd3c4..6aa04c4 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -84,6 +84,14 @@ #define NOINLINE #endif +#if ( defined(__MINGW32__) || defined(_MSC_VER)) +#define usleep(t) + +#else +#include +#endif //usleep + + /* threads */ #if defined(SYS_BEOS) #include @@ -108,19 +116,25 @@ #endif //SYS_* #ifdef USE_REAL_PTHREAD -#define x264_pthread_t pthread_t -#define x264_pthread_create pthread_create -#define x264_pthread_join pthread_join -#define x264_pthread_mutex_t pthread_mutex_t -#define x264_pthread_mutex_init pthread_mutex_init -#define x264_pthread_mutex_destroy pthread_mutex_destroy -#define x264_pthread_mutex_lock pthread_mutex_lock -#define x264_pthread_mutex_unlock pthread_mutex_unlock -#define x264_pthread_cond_t pthread_cond_t -#define x264_pthread_cond_init pthread_cond_init -#define x264_pthread_cond_destroy pthread_cond_destroy -#define x264_pthread_cond_broadcast pthread_cond_broadcast -#define x264_pthread_cond_wait pthread_cond_wait +#define x264_pthread_t pthread_t +#define x264_pthread_create pthread_create +#define x264_pthread_join pthread_join +#define x264_pthread_mutex_t pthread_mutex_t +#define x264_pthread_mutex_init pthread_mutex_init +#define x264_pthread_mutex_destroy pthread_mutex_destroy +#define x264_pthread_mutex_lock pthread_mutex_lock +#define x264_pthread_mutex_unlock pthread_mutex_unlock +#define x264_pthread_cond_t pthread_cond_t +#define x264_pthread_cond_init pthread_cond_init +#define x264_pthread_cond_destroy pthread_cond_destroy +#define x264_pthread_cond_broadcast pthread_cond_broadcast +#define x264_pthread_cond_signal pthread_cond_signal +#define x264_pthread_cond_wait pthread_cond_wait +#define x264_pthread_cond_timedwait pthread_cond_timedwait +#define x264_pthread_self pthread_self +#define x264_pthread_mutexattr_init pthread_mutexattr_init +#define x264_pthread_mutexattr_destroy pthread_mutexattr_destroy +#define x264_pthread_mutexattr_settype pthread_mutexattr_settype #else #define x264_pthread_mutex_t int #define x264_pthread_mutex_init(m,f) @@ -131,7 +145,10 @@ #define x264_pthread_cond_init(c,f) #define x264_pthread_cond_destroy(c) #define x264_pthread_cond_broadcast(c) +#define x264_pthread_cond_signal(c) #define x264_pthread_cond_wait(c,m) +#define x264_pthread_cond_timedwait(c,m,t) usleep(100) +#define x264_pthread_self() #endif #define WORD_SIZE sizeof(void*) diff --git a/encoder/analyse.h b/encoder/analyse.h index b8c828f..f89dbd7 100644 --- a/encoder/analyse.h +++ b/encoder/analyse.h @@ -27,4 +27,13 @@ void x264_macroblock_analyse( x264_t *h ); void x264_slicetype_decide( x264_t *h ); +// lookahead functions +int x264_lookahead_init( x264_t *h ); +int x264_lookahead_empty( x264_t *h ); +void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame ); +void x264_lookahead_get_frames( x264_t *h ); +int x264_lookahead_done_frames( x264_t *h ); +int x264_lookahead_try_frames( x264_t *h ); +int x264_lookahead_destroy( x264_t *h, int thread_handle); + #endif diff --git a/encoder/encoder.c b/encoder/encoder.c index 9326a39..c3eebf2 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -372,6 +372,9 @@ static int x264_validate_parameters( x264_t *h ) #endif } + if( h->param.rc.i_lookahead < 0 || h->param.i_threads < 2 ) + h->param.rc.i_lookahead = 0; + if( h->param.b_interlaced ) { if( h->param.analyse.i_me_method >= X264_ME_ESA ) @@ -699,9 +702,10 @@ x264_t *x264_encoder_open ( x264_param_t *param ) /* Init frames. */ if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS ) - h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1; + h->frames.i_delay = h->param.rc.i_lookahead + X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1; else - h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1; + h->frames.i_delay = h->param.rc.i_lookahead + h->param.i_bframe + h->param.i_threads - 1; + h->frames.i_max_ref0 = h->param.i_frame_reference; h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames; h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering; @@ -714,9 +718,13 @@ x264_t *x264_encoder_open ( x264_param_t *param ) h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8); h->frames.i_last_idr = - h->param.i_keyint_max; + h->frames.i_input = 0; h->frames.last_nonb = NULL; + h->frames.current = x264_mallocz( (h->param.rc.i_lookahead + h->param.i_bframe + 3) * sizeof(x264_frame_t*) ); + h->frames.unused = x264_mallocz( (h->param.rc.i_lookahead + h->param.i_bframe + h->param.i_threads*2 + 16+4) * sizeof(x264_frame_t*) ); + h->i_ref0 = 0; h->i_ref1 = 0; @@ -768,10 +776,10 @@ x264_t *x264_encoder_open ( x264_param_t *param ) h->thread[0] = h; h->i_thread_num = 0; - for( i = 1; i < h->param.i_threads; i++ ) - h->thread[i] = x264_malloc( sizeof(x264_t) ); + for( i = 1; i <= h->param.i_threads; i++ ) + h->thread[i] = x264_mallocz( sizeof(x264_t) ); - for( i = 0; i < h->param.i_threads; i++ ) + for( i = 0; i <= h->param.i_threads; i++ ) { if( i > 0 ) *h->thread[i] = *h; @@ -781,6 +789,12 @@ x264_t *x264_encoder_open ( x264_param_t *param ) return NULL; } + if( x264_lookahead_init( h ) != X264_SUCCESS ) + { + x264_log(h, X264_LOG_ERROR, "%s:%d: x264_lookahead_init( h ) failed\n", __FILE__, __LINE__); + return NULL; + } + if( x264_ratecontrol_new( h ) < 0 ) return NULL; @@ -1078,8 +1092,8 @@ static inline void x264_reference_update( x264_t *h ) } /* adaptive B decision needs a pointer, since it can't use the ref lists */ - if( h->sh.i_type != SLICE_TYPE_B ) - h->frames.last_nonb = h->fdec; + if( (h->param.rc.i_lookahead == 0) && (h->sh.i_type != SLICE_TYPE_B) ) + h->lookahead->last_nonb_planned = h->fdec; /* move frame in the buffer */ x264_frame_push( h->frames.reference, h->fdec ); @@ -1303,6 +1317,7 @@ static void x264_thread_sync_context( x264_t *dst, x264_t *src ) // copy everything except the per-thread pointers and the constants. memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) ); + dst->stat = src->stat; } @@ -1364,7 +1379,6 @@ int x264_encoder_encode( x264_t *h, x264_t *thread_current, *thread_prev, *thread_oldest; int i_nal_type; int i_nal_ref_idc; - int i_global_qp; if( h->param.i_threads > 1) @@ -1374,10 +1388,11 @@ int x264_encoder_encode( x264_t *h, thread_current = h->thread[ i%t ]; thread_prev = h->thread[ (i-1)%t ]; thread_oldest = h->thread[ (i+1)%t ]; + x264_thread_sync_context( thread_current, thread_prev ); x264_thread_sync_ratecontrol( thread_current, thread_prev, thread_oldest ); + h = thread_current; -// fprintf(stderr, "current: %p prev: %p oldest: %p \n", thread_current, thread_prev, thread_oldest); } else { @@ -1393,7 +1408,6 @@ int x264_encoder_encode( x264_t *h, /* no data out */ *pi_nal = 0; *pp_nal = NULL; - /* ------------------- Setup new frame from picture -------------------- */ if( pic_in != NULL ) { @@ -1407,56 +1421,40 @@ int x264_encoder_encode( x264_t *h, h->param.i_height != 16 * h->sps->i_mb_height ) x264_frame_expand_border_mod16( h, fenc ); - fenc->i_frame = h->frames.i_input++; - - x264_frame_push( h->frames.next, fenc ); - if( h->frames.b_have_lowres ) x264_frame_init_lowres( h, fenc ); if( h->param.rc.i_aq_mode ) x264_adaptive_quant_frame( h, fenc ); - if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads ) - { - /* Nothing yet to encode */ - /* waiting for filling bframe buffer */ - pic_out->i_type = X264_TYPE_AUTO; - return 0; - } + /* 2: Place the frame into the queue for its slice type decision */ + x264_lookahead_put_frame( h, fenc ); } - if( h->frames.current[0] == NULL ) + if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads ) { - int bframes = 0; - /* 2: Select frame types */ - if( h->frames.next[0] == NULL ) - { - x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); - return 0; - } - - x264_stack_align( x264_slicetype_decide, h ); + /* Nothing yet to encode */ + /* waiting for filling bframe buffer */ + pic_out->i_type = X264_TYPE_AUTO; + return 0; + } - /* 3: move some B-frames and 1 non-B to encode queue */ - while( IS_X264_TYPE_B( h->frames.next[bframes]->i_type ) ) - bframes++; - x264_frame_push( h->frames.current, x264_frame_shift( &h->frames.next[bframes] ) ); - /* FIXME: when max B-frames > 3, BREF may no longer be centered after GOP closing */ - if( h->param.b_bframe_pyramid && bframes > 1 ) - { - x264_frame_t *mid = x264_frame_shift( &h->frames.next[bframes/2] ); - mid->i_type = X264_TYPE_BREF; - x264_frame_push( h->frames.current, mid ); - bframes--; - } - while( bframes-- ) - x264_frame_push( h->frames.current, x264_frame_shift( h->frames.next ) ); + if( !h->frames.current[0] && x264_lookahead_empty( h ) ) + { + x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); + return 0; } + + /* 3: The picture is analyzed in the lookahead (or when x264_lookahead_get_frames is called if threads==1 */ + x264_lookahead_get_frames( h ); /* ------------------- Get frame to be encoded ------------------------- */ /* 4: get picture to encode */ + if( h->frames.current[0] == NULL) + return 0; /* Nothing to do yet */ + h->fenc = x264_frame_shift( h->frames.current ); + if( h->fenc == NULL ) { /* Nothing yet to encode (ex: waiting for I/P with B frames) */ @@ -1470,6 +1468,7 @@ do_encode: if( h->fenc->i_type == X264_TYPE_IDR ) { h->frames.i_last_idr = h->fenc->i_frame; + h->i_frame_num = 0; } /* ------------------- Setup frame context ----------------------------- */ @@ -1666,7 +1665,7 @@ do_encode: if( h->param.i_bframe_adaptive || b > 1 ) h->fenc->i_type = X264_TYPE_AUTO; x264_frame_sort_pts( h->frames.current ); - x264_frame_unshift( h->frames.next, h->fenc ); + x264_synch_frame_list_unshift( &h->lookahead->next, h->fenc ); h->fenc = h->frames.current[b-1]; h->frames.current[b-1] = NULL; h->fenc->i_type = X264_TYPE_P; @@ -1675,17 +1674,14 @@ do_encode: /* Do IDR if needed */ else if( i_gop_size >= h->param.i_keyint_min ) { - /* Reset */ - h->i_frame_num = 0; - /* Reinit field of fenc */ h->fenc->i_type = X264_TYPE_IDR; h->fenc->i_poc = 0; /* Put enqueued frames back in the pool */ while( h->frames.current[0] ) - x264_frame_push( h->frames.next, x264_frame_shift( h->frames.current ) ); - x264_frame_sort_pts( h->frames.next ); + x264_synch_frame_list_put( &h->lookahead->next, x264_frame_shift( h->frames.current ) ); + x264_synch_frame_list_sort_pts( &h->lookahead->next ); } else { @@ -1881,6 +1877,8 @@ void x264_encoder_close ( x264_t *h ) || h->stat.i_mb_count[SLICE_TYPE_P][I_PCM] || h->stat.i_mb_count[SLICE_TYPE_B][I_PCM]; + x264_lookahead_destroy(h->thread[h->param.i_threads], h->thread_handle); + for( i=0; iparam.i_threads; i++ ) { // don't strictly have to wait for the other threads, but it's simpler than canceling them @@ -2075,6 +2073,8 @@ void x264_encoder_close ( x264_t *h ) x264_log( h, X264_LOG_INFO, "kb/s:%.1f\n", f_bitrate ); } + + /* rc */ x264_ratecontrol_delete( h ); @@ -2129,4 +2129,6 @@ void x264_encoder_close ( x264_t *h ) x264_free( h->thread[i]->out.p_bitstream ); x264_free( h->thread[i] ); } + x264_free( h->frames.current ); + x264_free( h->frames.unused ); } diff --git a/encoder/lookahead.c b/encoder/lookahead.c new file mode 100644 index 0000000..b8888d8 --- /dev/null +++ b/encoder/lookahead.c @@ -0,0 +1,286 @@ +/***************************************************************************** + * c: h264 encoder library + ***************************************************************************** + * Copyright (C) 2008 Avail Media + * + * Authors: Alex Giladi + * Michael Kazmier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +/* LOOKAHEAD (threaded and non-threaded mode + * + * Lookahead types: + * [1] Slice type / scene cut; + * + * In non-threaded mode, we run the existing slicetype decision code as it was + * In threaded mode, we run in a separate thread, that lives between the calls + * to x264_encoder_open() and x264_encoder_close(), and performs lookahead for + * the number of frames specified in rc_lookahead + */ + +#include "common/osdep.h" +#include "common/common.h" +#include "common/cpu.h" + +#include "analyse.h" +#include "ratecontrol.h" + +#define LA_FBUF_SIZE (h->param.rc.i_lookahead + h->param.i_bframe + h->param.i_threads*2 + 6) + +void x264_lookahead_thread( x264_t *h ); +void x264_lookahead_thread_worker( x264_t *h ); +void x264_lookahead_shift(x264_t *h, x264_frame_t **dst , x264_frame_t **src); + +void x264_lookahead_shift(x264_t *h, x264_frame_t **dst , x264_frame_t **src) +{ + int bframes = 0; + int i_frames = 0; + + while( src[i_frames] ) + { + if ( IS_X264_TYPE_B( src[i_frames]->i_type ) ) + bframes++; + else + break; + i_frames++; + } + x264_frame_push( dst, x264_frame_shift( &src[bframes] ) ); + /* FIXME: when max B-frames > 3, BREF may no longer be centered after GOP closing */ + if( h->param.b_bframe_pyramid && bframes > 1 ) + { + x264_frame_t *mid = x264_frame_shift( &src[bframes/2] ); + mid->i_type = X264_TYPE_BREF; + x264_frame_push( dst, mid ); + bframes--; + } + while( bframes-- ) + x264_frame_push( dst, x264_frame_shift( src ) ); +} + +int x264_lookahead_init( x264_t *h ) +{ + x264_lookahead_t *h_lookahead = x264_mallocz( sizeof( x264_lookahead_t ) ); + int i; + for( i=0; i <= h->param.i_threads; i++ ) + h->thread[i]->lookahead = h_lookahead; + + h_lookahead->b_thread_active = 0; + h_lookahead->thread_handle = 0; + h_lookahead->b_exit_thread = 1; + h_lookahead->i_last_idr_planned = - h->param.i_keyint_max; + h_lookahead->last_nonb_planned = NULL; + h_lookahead->ifbuf.i_max_size = -1; + h_lookahead->ofbuf.i_max_size = -1; + + if ( (x264_synch_frame_list_init( &h_lookahead->ifbuf,LA_FBUF_SIZE ) != X264_SUCCESS ) + || ( x264_synch_frame_list_init( &h_lookahead->ofbuf,LA_FBUF_SIZE ) != X264_SUCCESS ) + || ( x264_synch_frame_list_init( &h_lookahead->next,LA_FBUF_SIZE ) != X264_SUCCESS ) ) + { + x264_log( h, X264_LOG_ERROR, + "failed to initialize synchronized lists\n"); + + return X264_ERROR; + } + + sprintf(h_lookahead->ifbuf.name, "ifbuf"); + sprintf(h_lookahead->ofbuf.name, "ofbuf"); + sprintf(h_lookahead->next.name, "next"); + + if ( h->param.i_bframe == 0 ) + { + x264_log( h, X264_LOG_ERROR, "currently slicetype lookahead does not work with bframes=0 and is disabled\n"); + h->param.rc.i_lookahead = 0; + return X264_SUCCESS; + } + + if ( ( h->param.rc.i_lookahead < 1 ) || ( h->param.i_threads < 2 ) ) + return X264_SUCCESS; + +#ifdef HAVE_PTHREAD + + h_lookahead->b_exit_thread = 0; + + if ( x264_pthread_create( + &h->thread_handle, NULL, + (void *)x264_lookahead_thread, (void *)h->thread[h->param.i_threads] ) ) + { + x264_log( h, X264_LOG_ERROR, + "failed to create lookahead thread\n"); + return X264_ERROR; + } + + while ( !h_lookahead->b_thread_active ) + usleep(100); + +#endif //HAVE_PTHREAD + + return X264_SUCCESS; +} + + +void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame ) +{ + frame->i_frame = h->frames.i_input++; + + if ( h->lookahead->b_thread_active ) + { + x264_synch_frame_list_put(&h->lookahead->ifbuf,frame); + x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_full ); + } + else + { + x264_synch_frame_list_put(&h->lookahead->next, frame ); + } +} + +int x264_lookahead_empty( x264_t *h ) +{ + return ( ( x264_synch_frame_list_get_size( &h->lookahead->ifbuf ) == 0 ) + && ( x264_synch_frame_list_get_size( &h->lookahead->ofbuf ) == 0 ) + && ( x264_synch_frame_list_get_size( &h->lookahead->next ) == 0 ) ); +} + +int x264_lookahead_try_frames( x264_t *h ) +{ + return ( x264_synch_frame_list_get_size( &h->lookahead->ifbuf ) ); +} + +int x264_lookahead_done_frames( x264_t *h ) +{ + return ( x264_synch_frame_list_get_size( &h->lookahead->ofbuf ) ); +} + +void x264_lookahead_get_frames( x264_t *h ) +{ + if ( h->lookahead->b_thread_active ) + { + if ( h->frames.current[0] || !h->lookahead->ofbuf.list[0] ) + return; + + x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); + x264_lookahead_shift( h, h->frames.current, h->lookahead->ofbuf.list); + x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); + } + else + { + if ( h->frames.current[0] != NULL ) + return; + + x264_stack_align( x264_slicetype_decide, h ); + + int bframes=0; + while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) ) + bframes++; + + h->lookahead->last_nonb_planned = h->lookahead->next.list[bframes]; + + while (bframes--) + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list) ); // move all b frames + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list) ); // and the last non-brframe + x264_lookahead_shift( h, h->frames.current, h->lookahead->ofbuf.list); // Now make all b and the last non-bframe available to the current list + } +} + +void x264_lookahead_thread( x264_t *h ) +{ +#ifdef HAVE_MMX + /* Misalign mask has to be set separately for each thread. */ + if( h->param.cpu&X264_CPU_SSE_MISALIGN ) + x264_cpu_mask_misalign_sse(); +#endif + x264_stack_align( x264_lookahead_thread_worker, h ); + +} + +void x264_lookahead_thread_worker( x264_t *h) +{ + int quit = 0; + h->lookahead->b_thread_active = 1; +#define QUIT_THREAD(q) ( q = h->lookahead->b_exit_thread ) + while ( !QUIT_THREAD(quit) ) + { + if( ( h->frames.i_input >= h->param.rc.i_lookahead ) && ( h->lookahead->ofbuf.list[0] ) ) // FIXME: can we safely call frames.i_input in this context? + { + x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_full ); + } + if( !h->lookahead->ifbuf.list[0] && !h->lookahead->next.list[0]) + usleep(1000); /* Arbitrary number to keep lookahead thread from spinning with nothing to do. */ + + while( ( !QUIT_THREAD(quit) ) + && ( h->lookahead->ifbuf.list[0] ) ) + { + x264_frame_t *frame = x264_synch_frame_list_get( &h->lookahead->ifbuf ); + h->frames.i_input = frame->i_frame; + x264_synch_frame_list_put( &h->lookahead->next, frame); + } + if ( QUIT_THREAD(quit) ) + break; + + int next_list_size = x264_synch_frame_list_get_size( &h->lookahead->next ); + + if ( ( (next_list_size > h->param.i_bframe + 1) && h->lookahead->next.list[0] ) + || ( (h->lookahead->next.list[0] && !h->lookahead->ifbuf.list[0] && !h->lookahead->ofbuf.list[0] && h->frames.i_input >= h->param.i_bframe ) ) ) + { + + x264_pthread_mutex_lock( &h->lookahead->next.mutex ); + + x264_stack_align( x264_slicetype_decide, h ); + int bframes=0; + while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) ) + bframes++; + h->lookahead->last_nonb_planned = h->lookahead->next.list[bframes]; // Set in x264_reference_update when not threading + + x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); + while (bframes--) + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list) ); // move all b frames + x264_frame_push( h->lookahead->ofbuf.list, x264_frame_shift( h->lookahead->next.list) ); // and the last non-brframe + + x264_pthread_mutex_unlock( &h->lookahead->next.mutex ); + x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); + } + if ( QUIT_THREAD(quit) ) + break; + } + h->lookahead->b_thread_active = 0; +} + + +int x264_lookahead_destroy( x264_t *h, int thread_handle ) +{ + +#ifdef HAVE_PTHREAD + if ( h->b_thread_active ) + { + h->lookahead->b_exit_thread = 1; + usleep(100); + + x264_synch_frame_list_invalidate(&h->lookahead->ofbuf); + x264_synch_frame_list_invalidate(&h->lookahead->ifbuf); + x264_synch_frame_list_invalidate(&h->lookahead->next); + + x264_pthread_join( thread_handle, NULL ); + h->lookahead->thread_handle = 0; + + x264_synch_frame_list_destroy(&h->lookahead->ofbuf); + x264_synch_frame_list_destroy(&h->lookahead->ifbuf); + x264_synch_frame_list_destroy(&h->lookahead->next); + + } +#endif // HAVE_PTHREAD + + return 0; +} diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index af0a00a..b512d8b 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -271,8 +271,7 @@ int x264_ratecontrol_new( x264_t *h ) x264_emms(); - rc = h->rc = x264_malloc( h->param.i_threads * sizeof(x264_ratecontrol_t) ); - memset( rc, 0, h->param.i_threads * sizeof(x264_ratecontrol_t) ); + rc = h->rc = x264_mallocz( h->param.i_threads * sizeof(x264_ratecontrol_t) ); rc->b_abr = h->param.rc.i_rc_method != X264_RC_CQP && !h->param.rc.b_stat_read; rc->b_2pass = h->param.rc.i_rc_method == X264_RC_ABR && h->param.rc.b_stat_read; @@ -490,8 +489,7 @@ int x264_ratecontrol_new( x264_t *h ) return -1; } - rc->entry = (ratecontrol_entry_t*) x264_malloc(rc->num_entries * sizeof(ratecontrol_entry_t)); - memset(rc->entry, 0, rc->num_entries * sizeof(ratecontrol_entry_t)); + rc->entry = (ratecontrol_entry_t*) x264_mallocz(rc->num_entries * sizeof(ratecontrol_entry_t)); /* init all to skipped p frames */ for(i=0; inum_entries; i++) diff --git a/encoder/slicetype.c b/encoder/slicetype.c index e00be6f..3e6741a 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -439,7 +439,7 @@ static int scenecut( x264_t *h, x264_frame_t *frame, int pdist ) int icost = frame->i_cost_est[0][0]; int pcost = frame->i_cost_est[pdist][0]; float f_bias; - int i_gop_size = frame->i_frame - h->frames.i_last_idr; + int i_gop_size = frame->i_frame - h->lookahead->i_last_idr_planned; float f_thresh_max = h->param.i_scenecut_threshold / 100.0; /* magic numbers pulled out of thin air */ float f_thresh_min = f_thresh_max * h->param.i_keyint_min @@ -483,21 +483,22 @@ static void x264_slicetype_analyse( x264_t *h ) int i_mb_count = NUM_MBS; int cost1p0, cost2p0, cost1b1, cost2p1; int idr_frame_type; + x264_frame_t *last_nonb = h->lookahead->last_nonb_planned; assert( h->frames.b_have_lowres ); - if( !h->frames.last_nonb ) + if( !last_nonb ) return; - frames[0] = h->frames.last_nonb; - for( j = 0; h->frames.next[j] && h->frames.next[j]->i_type == X264_TYPE_AUTO; j++ ) - frames[j+1] = h->frames.next[j]; - keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1; + + frames[0] = last_nonb; + for( j = 0; h->lookahead->next.list[j] && h->lookahead->next.list[j]->i_type == X264_TYPE_AUTO; j++ ) + frames[j+1] = h->lookahead->next.list[j]; + keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_idr_planned - 1; num_frames = X264_MIN( j, keyint_limit ); if( num_frames == 0 ) return; - x264_lowres_context_init( h, &a ); - idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I; + idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_idr_planned >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I; if( num_frames == 1 ) { @@ -509,6 +510,7 @@ no_b_frames: if( scenecut( h, frames[1], 1 ) ) frames[1]->i_type = idr_frame_type; } + return; } @@ -552,7 +554,6 @@ no_b_frames: if( cost1p0 + cost2p0 < cost1b1 + cost2p1 ) goto no_b_frames; - // arbitrary and untuned #define INTER_THRESH 300 #define P_SENS_BIAS (50 - h->param.i_bframe_bias) @@ -580,15 +581,15 @@ void x264_slicetype_decide( x264_t *h ) int bframes; int i; - if( h->frames.next[0] == NULL ) + if( h->lookahead->next.list[0] == NULL ) return; if( h->param.rc.b_stat_read ) { /* Use the frame types from the first pass */ - for( i = 0; h->frames.next[i] != NULL; i++ ) - h->frames.next[i]->i_type = - x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame ); + for( i = 0; h->lookahead->next.list[i] != NULL; i++ ) + h->lookahead->next.list[i]->i_type = + x264_ratecontrol_slice_type( h, h->lookahead->next.list[i]->i_frame ); } else if( (h->param.i_bframe && h->param.i_bframe_adaptive) || h->param.b_pre_scenecut ) @@ -596,10 +597,9 @@ void x264_slicetype_decide( x264_t *h ) for( bframes = 0;; bframes++ ) { - frm = h->frames.next[bframes]; - + frm = h->lookahead->next.list[bframes]; /* Limit GOP size */ - if( frm->i_frame - h->frames.i_last_idr >= h->param.i_keyint_max ) + if( frm->i_frame - h->lookahead->i_last_idr_planned >= h->param.i_keyint_max ) { if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_IDR; @@ -609,19 +609,16 @@ void x264_slicetype_decide( x264_t *h ) if( frm->i_type == X264_TYPE_IDR ) { /* Close GOP */ + h->lookahead->i_last_idr_planned = frm->i_frame; if( bframes > 0 ) { bframes--; - h->frames.next[bframes]->i_type = X264_TYPE_P; - } - else - { - h->i_frame_num = 0; + h->lookahead->next.list[bframes]->i_type = X264_TYPE_P; } } if( bframes == h->param.i_bframe - || h->frames.next[bframes+1] == NULL ) + || h->lookahead->next.list[bframes+1] == NULL ) { if( IS_X264_TYPE_B( frm->i_type ) ) x264_log( h, X264_LOG_WARNING, "specified frame type is not compatible with max B-frames\n" ); diff --git a/x264.c b/x264.c index 04bb44c..31b9f2c 100644 --- a/x264.c +++ b/x264.c @@ -197,6 +197,7 @@ static void Help( x264_param_t *defaults, int b_longhelp ) H0( " --ratetol Allowed variance of average bitrate [%.1f]\n", defaults->rc.f_rate_tolerance ); H0( " --ipratio QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor ); H0( " --pbratio QP factor between P and B [%.2f]\n", defaults->rc.f_pb_factor ); + H0( " --lookahead Buffer within which to run VBV planning [%d]\n", defaults->rc.i_lookahead ); H1( " --chroma-qp-offset QP difference between chroma and luma [%d]\n", defaults->analyse.i_chroma_qp_offset ); H1( " --aq-mode AQ method [%d]\n" " - 0: Disabled\n" @@ -444,6 +445,7 @@ static int Parse( int argc, char **argv, { "vbv-init", required_argument,NULL, 0 }, { "ipratio", required_argument, NULL, 0 }, { "pbratio", required_argument, NULL, 0 }, + { "lookahead", required_argument, NULL, 0 }, { "chroma-qp-offset", required_argument, NULL, 0 }, { "pass", required_argument, NULL, 'p' }, { "stats", required_argument, NULL, 0 }, @@ -715,13 +717,13 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame ) { file_pos = ftell( opt->qpfile ); ret = fscanf( opt->qpfile, "%d %c %d\n", &num, &type, &qp ); - if( num > i_frame || ret == EOF ) - { - pic->i_type = X264_TYPE_AUTO; - pic->i_qpplus1 = 0; - fseek( opt->qpfile , file_pos , SEEK_SET ); - break; - } + if( num > i_frame || ret == EOF ) + { + pic->i_type = X264_TYPE_AUTO; + pic->i_qpplus1 = 0; + fseek( opt->qpfile , file_pos , SEEK_SET ); + break; + } if( num < i_frame ) continue; pic->i_qpplus1 = qp+1; diff --git a/x264.h b/x264.h index 820185f..a63fe83 100644 --- a/x264.h +++ b/x264.h @@ -269,6 +269,7 @@ typedef struct x264_param_t float f_vbv_buffer_init; /* <=1: fraction of buffer_size. >1: kbit */ float f_ip_factor; float f_pb_factor; + int i_lookahead; int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */ float f_aq_strength;