diff --git a/common/common.h b/common/common.h index f2a0c54..fd45f22 100644 --- a/common/common.h +++ b/common/common.h @@ -268,9 +268,30 @@ struct x264_t bs_t bs; int i_frame_size; } out; +//XXX + /* cabac context */ + x264_cabac_t cabac; + + /* Current MB DCT coeffs */ + struct + { + DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] ); + DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] ); + // FIXME share memory? + DECLARE_ALIGNED_16( int16_t luma8x8[4][64] ); + DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] ); + } dct; + + /* We use only one SPS and one PPS */ + x264_sps_t sps_array[1]; + x264_sps_t *sps; + x264_pps_t pps_array[1]; + x264_pps_t *pps; +//XXX /**** thread synchronization starts here ****/ +/// DON'T MOVE i_frame !!! /* frame number/poc */ int i_frame; @@ -284,11 +305,6 @@ struct x264_t int i_nal_type; /* threads only */ int i_nal_ref_idc; /* threads only */ - /* We use only one SPS and one PPS */ - x264_sps_t sps_array[1]; - x264_sps_t *sps; - x264_pps_t pps_array[1]; - x264_pps_t *pps; int i_idr_pic_id; /* quantization matrix for decoding, [cqm][qp%6][coef_y][coef_x] */ @@ -312,8 +328,6 @@ struct x264_t /* Slice header */ x264_slice_header_t sh; - /* cabac context */ - x264_cabac_t cabac; struct { @@ -355,15 +369,6 @@ struct x264_t - /* Current MB DCT coeffs */ - struct - { - DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] ); - DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] ); - // FIXME share memory? - DECLARE_ALIGNED_16( int16_t luma8x8[4][64] ); - DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] ); - } dct; /* MB table and cache for current frame/mb */ struct @@ -375,50 +380,18 @@ struct x264_t int i_b8_stride; int i_b4_stride; - /* Current index */ - int i_mb_x; - int i_mb_y; - int i_mb_xy; - int i_b8_xy; - int i_b4_xy; - /* Search parameters */ - int i_me_method; - int i_subpel_refine; - int b_chroma_me; - int b_trellis; - int b_noise_reduction; int i_psy_rd; /* Psy RD strength--fixed point value*/ int i_psy_trellis; /* Psy trellis strength--fixed point value*/ int b_interlaced; - /* Allowed qpel MV range to stay within the picture + emulated edge pixels */ - int mv_min[2]; - int mv_max[2]; - /* Subpel MV range for motion search. - * same mv_min/max but includes levels' i_mv_range. */ - int mv_min_spel[2]; - int mv_max_spel[2]; - /* Fullpel MV range for motion search */ - int mv_min_fpel[2]; - int mv_max_fpel[2]; - /* neighboring MBs */ - unsigned int i_neighbour; - unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */ - unsigned int i_neighbour4[16]; /* at the time the block is coded */ - int i_mb_type_top; - int i_mb_type_left; - int i_mb_type_topleft; - int i_mb_type_topright; - int i_mb_prev_xy; - int i_mb_top_xy; /**** thread synchronization ends here ****/ /* subsequent variables are either thread-local or constant, * and won't be copied from one thread to another */ - +//DON't MOVE mb.type!!! /* mb table */ int8_t *type; /* mb type */ int8_t *qp; /* mb qp */ @@ -457,6 +430,47 @@ struct x264_t /* if we've already done MC, we don't need to do it again */ int b_skip_mc; +//XXX + /* Current index */ + int i_mb_x; + int i_mb_y; + int i_mb_xy; + int i_b8_xy; + int i_b4_xy; + + /* neighboring MBs */ + unsigned int i_neighbour; + unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */ + unsigned int i_neighbour4[16]; /* at the time the block is coded */ + int i_mb_type_top; + int i_mb_type_left; + int i_mb_type_topleft; + int i_mb_type_topright; + int i_mb_prev_xy; + int i_mb_top_xy; + + /* Search parameters */ + int i_me_method; + int i_subpel_refine; + int b_chroma_me; + int b_trellis; + int b_noise_reduction; + + /* Allowed qpel MV range to stay within the picture + emulated edge pixels */ + int mv_min[2]; + int mv_max[2]; + /* Subpel MV range for motion search. + * same mv_min/max but includes levels' i_mv_range. */ + int mv_min_spel[2]; + int mv_max_spel[2]; + /* Fullpel MV range for motion search */ + int mv_min_fpel[2]; + int mv_max_fpel[2]; + + + +//XXX + struct { /* space for p_fenc and p_fdec */ diff --git a/common/cpu.c b/common/cpu.c index d8ed4d3..541a885 100644 --- a/common/cpu.c +++ b/common/cpu.c @@ -275,7 +275,8 @@ int x264_cpu_num_processors( void ) return 1; #elif defined(_WIN32) - return pthread_num_processors_np(); + //return pthread_num_processors_np(); + return 1; #elif defined(SYS_LINUX) unsigned int bit; diff --git a/common/osdep.h b/common/osdep.h index 25bb138..696dce8 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -83,6 +83,7 @@ #define NOINLINE #endif + /* threads */ #if defined(SYS_BEOS) #include @@ -96,6 +97,90 @@ #endif #define HAVE_PTHREAD 1 +#elif defined(_OPENMP) + +#include +#if defined(_WIN32) || defined(_WIN64) +#include // for SwitchToThread XXX +#endif +typedef struct +{ + int status; // 0 == completed, 1 = running + omp_lock_t handle; // what to wait for +} x264_pthread_t; + +#define x264_pthread_create(t, u, f, d) { \ + omp_init_lock(&(t)->handle); \ + (t)->status = 1; \ + __pragma(omp task) \ + { \ + ((void (*)(void *))(f))(d); \ + omp_set_lock(&(t)->handle); \ + (t)->status = 0; \ + omp_unset_lock(&(t)->handle); \ + } \ +} + +#define x264_pthread_join(t,s) { \ + omp_set_lock(&(t).handle); \ + while ((t).status != 0) { \ + omp_unset_lock(&(t).handle); \ + SwitchToThread(); \ + omp_set_lock(&(t).handle); \ + } \ + omp_unset_lock(&(t).handle); \ + omp_destroy_lock(&(t).handle); \ +} + +typedef omp_lock_t x264_pthread_mutex_t; +#define x264_pthread_mutex_init(m,f) omp_init_lock(m) +#define x264_pthread_mutex_destroy(m) omp_destroy_lock(m) + +typedef int x264_pthread_cond_t; +#define x264_pthread_cond_init(c,f) (*(c) = 1) +#define x264_pthread_cond_destroy(c) (*(c) = 0) + +#define x264_pthread_mutex_lock(m) omp_set_lock(m) +#define x264_pthread_mutex_unlock(m) omp_unset_lock(m) +#define x264_pthread_cond_broadcast(c) +#define x264_pthread_cond_wait(c,m) (omp_unset_lock(m), SwitchToThread(), omp_set_lock(m)) +#define HAVE_PTHREAD 1 +#define ALREADY_DEFINED_PTHREAD + +#elif defined(_WIN32) || defined(_WIN64) +#include + +#define HAVE_PTHREAD 1 +#define x264_pthread_t HANDLE +#define x264_pthread_create(t,u,f,d) (*(t)=CreateThread(0,0,f,d,0,0)) +#define x264_pthread_join(t,s) (WaitForSingleObject(t,INFINITE) != WAIT_OBJECT_0 || !CloseHandle(t)) +typedef CRITICAL_SECTION x264_pthread_mutex_t; +#define x264_pthread_mutex_init(m,f) InitializeCriticalSection(m) +#define x264_pthread_mutex_destroy(m) DeleteCriticalSection(m) + +#if defined(VISTA) +typedef CONDITION_VARIABLE x264_pthread_cond_t; +#define x264_pthread_cond_init(c,f) InitializeConditionVariable(c) +#define x264_pthread_cond_destroy(c) +#else +typedef int x264_pthread_cond_t; +#define x264_pthread_cond_init(c,f) (*(c) = 0) +#define x264_pthread_cond_destroy(c) +#endif + +#define x264_pthread_mutex_lock(m) EnterCriticalSection(m) +#define x264_pthread_mutex_unlock(m) LeaveCriticalSection(m) + +#if defined(VISTA) +#define x264_pthread_cond_broadcast(c) WakeAllConditionVariable(c) +#define x264_pthread_cond_wait(c,m) SleepConditionVariableCS(c,m,INFINITE) +#else +// we simulate this by not waiting. Wasteful, perhaps +#define x264_pthread_cond_broadcast(c) +#define x264_pthread_cond_wait(c,m) (LeaveCriticalSection(m), SwitchToThread(), EnterCriticalSection(m)) + +#endif +#define ALREADY_DEFINED_PTHREAD #elif defined(HAVE_PTHREAD) #include #define USE_REAL_PTHREAD @@ -120,7 +205,7 @@ #define x264_pthread_cond_destroy pthread_cond_destroy #define x264_pthread_cond_broadcast pthread_cond_broadcast #define x264_pthread_cond_wait pthread_cond_wait -#else +#elif !defined(ALREADY_DEFINED_PTHREAD) #define x264_pthread_mutex_t int #define x264_pthread_mutex_init(m,f) #define x264_pthread_mutex_destroy(m) @@ -132,7 +217,6 @@ #define x264_pthread_cond_broadcast(c) #define x264_pthread_cond_wait(c,m) #endif - #define WORD_SIZE sizeof(void*) #if !defined(_WIN64) && !defined(__LP64__) diff --git a/encoder/analyse.c b/encoder/analyse.c index 8457c38..ac9a6e3 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -270,8 +270,13 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) int i_ref = i ? h->i_ref1 : h->i_ref0; for( j=0; jmutex ); + while( fref[j]->i_lines_completed < thresh) + x264_pthread_cond_wait( &fref[j]->cv, + &fref[j]->mutex ); thread_mvy_range = X264_MIN( thread_mvy_range, fref[j]->i_lines_completed - pix_y ); + x264_pthread_mutex_unlock( &fref[j]->mutex ); } } if( h->param.b_deterministic ) diff --git a/encoder/encoder.c b/encoder/encoder.c index 0999dc2..87330cd 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -1300,7 +1300,8 @@ static void x264_thread_sync_context( x264_t *dst, x264_t *src ) // copy everything except the per-thread pointers and the constants. memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) ); - dst->stat = src->stat; + //dst->stat = src->stat; + memcpy(&dst->stat.i_slice_count, &src->stat.i_slice_count, offsetof(x264_t, predict_16x16) - offsetof(x264_t, stat.i_slice_count)); } static void x264_thread_sync_stat( x264_t *dst, x264_t *src ) diff --git a/x264.c b/x264.c index a1a8c94..65fb81b 100644 --- a/x264.c +++ b/x264.c @@ -828,6 +828,11 @@ static int Encode( x264_param_t *param, cli_opt_t *opt ) i_start = x264_mdate(); /* Encode frames */ +#if defined(_OPENMP) +#pragma omp parallel +#pragma omp single +{ +#endif for( i_frame = 0, i_file = 0; b_ctrl_c == 0 && (i_frame < i_frame_total || i_frame_total == 0); ) { if( p_read_frame( &pic, opt->hin, i_frame + opt->i_seek ) ) @@ -876,6 +881,10 @@ static int Encode( x264_param_t *param, cli_opt_t *opt ) i_frame_size = Encode_frame( h, opt->hout, NULL ); } while( i_frame_size ); +#if defined(_OPENMP) +} +#endif + i_end = x264_mdate(); x264_picture_clean( &pic ); /* Erase progress indicator before printing encoding stats. */