diff --git a/common/macroblock.c b/common/macroblock.c index 836d203..e32f694 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -666,6 +666,7 @@ int x264_macroblock_cache_init( x264_t *h ) { int i, j; int i_mb_count = h->mb.i_mb_count; + int buf_hpel, buf_ssim, me_range, buf_tesa; h->mb.i_mb_stride = h->sps->i_mb_width; h->mb.i_b8_stride = h->sps->i_mb_width * 2; @@ -738,10 +739,10 @@ int x264_macroblock_cache_init( x264_t *h ) h->mb.i_neighbour4[15] = h->mb.i_neighbour8[3] = MB_LEFT|MB_TOP|MB_TOPLEFT; - int buf_hpel = (h->param.i_width+48) * sizeof(int16_t); - int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int); - int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range); - int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) * + buf_hpel = (h->param.i_width+40) * sizeof(int16_t); + buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int); + me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range); + buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) * ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t)); CHECKED_MALLOC( h->scratch_buffer, X264_MAX3( buf_hpel, buf_ssim, buf_tesa ) ); diff --git a/common/osdep.h b/common/osdep.h index 915ec05..6a96aa9 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -48,6 +48,8 @@ #define ftell _ftelli64 #define isfinite _finite #define strtok_r strtok_s +#define PRIx32 "lx" +#define log2f(a) (logf(a)/logf(2)) #define _CRT_SECURE_NO_DEPRECATE #define X264_VERSION "" // no configure script for msvc #endif diff --git a/encoder/me.c b/encoder/me.c index f13e84b..d9b3a8e 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -451,93 +451,95 @@ me_hex2: /* hexagon grid */ omx = bmx; omy = bmy; - const int16_t *p_cost_omvx = p_cost_mvx + omx*4; - const int16_t *p_cost_omvy = p_cost_mvy + omy*4; - i = 1; - do { - static const int hex4[16][2] = { - { 0,-4}, { 0, 4}, {-2,-3}, { 2,-3}, - {-4,-2}, { 4,-2}, {-4,-1}, { 4,-1}, - {-4, 0}, { 4, 0}, {-4, 1}, { 4, 1}, - {-4, 2}, { 4, 2}, {-2, 3}, { 2, 3}, - }; - - if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min, - mv_y_max-omy, omy-mv_y_min ) ) + const int16_t *p_cost_omvx = p_cost_mvx + omx*4; + const int16_t *p_cost_omvy = p_cost_mvy + omy*4; + i = 1; + do { - for( j = 0; j < 16; j++ ) + static const int hex4[16][2] = { + { 0,-4}, { 0, 4}, {-2,-3}, { 2,-3}, + {-4,-2}, { 4,-2}, {-4,-1}, { 4,-1}, + {-4, 0}, { 4, 0}, {-4, 1}, { 4, 1}, + {-4, 2}, { 4, 2}, {-2, 3}, { 2, 3}, + }; + + if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min, + mv_y_max-omy, omy-mv_y_min ) ) { - int mx = omx + hex4[j][0]*i; - int my = omy + hex4[j][1]*i; - if( CHECK_MVRANGE(mx, my) ) - COST_MV( mx, my ); + for( j = 0; j < 16; j++ ) + { + int mx = omx + hex4[j][0]*i; + int my = omy + hex4[j][1]*i; + if( CHECK_MVRANGE(mx, my) ) + COST_MV( mx, my ); + } } - } - else - { - int dir = 0; - uint8_t *pix_base = p_fref + omx + (omy-4*i)*stride; - int dy = i*stride; -#define SADS(k,x0,y0,x1,y1,x2,y2,x3,y3)\ - h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\ - pix_base x0*i+(y0-2*k+4)*dy,\ - pix_base x1*i+(y1-2*k+4)*dy,\ - pix_base x2*i+(y2-2*k+4)*dy,\ - pix_base x3*i+(y3-2*k+4)*dy,\ - stride, costs+4*k );\ - pix_base += 2*dy; -#define ADD_MVCOST(k,x,y) costs[k] += p_cost_omvx[x*4*i] + p_cost_omvy[y*4*i] -#define MIN_MV(k,x,y) COPY2_IF_LT( bcost, costs[k], dir, x*16+(y&15) ) - SADS( 0, +0,-4, +0,+4, -2,-3, +2,-3 ); - SADS( 1, -4,-2, +4,-2, -4,-1, +4,-1 ); - SADS( 2, -4,+0, +4,+0, -4,+1, +4,+1 ); - SADS( 3, -4,+2, +4,+2, -2,+3, +2,+3 ); - ADD_MVCOST( 0, 0,-4 ); - ADD_MVCOST( 1, 0, 4 ); - ADD_MVCOST( 2,-2,-3 ); - ADD_MVCOST( 3, 2,-3 ); - ADD_MVCOST( 4,-4,-2 ); - ADD_MVCOST( 5, 4,-2 ); - ADD_MVCOST( 6,-4,-1 ); - ADD_MVCOST( 7, 4,-1 ); - ADD_MVCOST( 8,-4, 0 ); - ADD_MVCOST( 9, 4, 0 ); - ADD_MVCOST( 10,-4, 1 ); - ADD_MVCOST( 11, 4, 1 ); - ADD_MVCOST( 12,-4, 2 ); - ADD_MVCOST( 13, 4, 2 ); - ADD_MVCOST( 14,-2, 3 ); - ADD_MVCOST( 15, 2, 3 ); - MIN_MV( 0, 0,-4 ); - MIN_MV( 1, 0, 4 ); - MIN_MV( 2,-2,-3 ); - MIN_MV( 3, 2,-3 ); - MIN_MV( 4,-4,-2 ); - MIN_MV( 5, 4,-2 ); - MIN_MV( 6,-4,-1 ); - MIN_MV( 7, 4,-1 ); - MIN_MV( 8,-4, 0 ); - MIN_MV( 9, 4, 0 ); - MIN_MV( 10,-4, 1 ); - MIN_MV( 11, 4, 1 ); - MIN_MV( 12,-4, 2 ); - MIN_MV( 13, 4, 2 ); - MIN_MV( 14,-2, 3 ); - MIN_MV( 15, 2, 3 ); -#undef SADS -#undef ADD_MVCOST -#undef MIN_MV - if(dir) + else { - bmx = omx + i*(dir>>4); - bmy = omy + i*((dir<<28)>>28); + int dir = 0; + uint8_t *pix_base = p_fref + omx + (omy-4*i)*stride; + int dy = i*stride; + #define SADS(k,x0,y0,x1,y1,x2,y2,x3,y3)\ + h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\ + pix_base x0*i+(y0-2*k+4)*dy,\ + pix_base x1*i+(y1-2*k+4)*dy,\ + pix_base x2*i+(y2-2*k+4)*dy,\ + pix_base x3*i+(y3-2*k+4)*dy,\ + stride, costs+4*k );\ + pix_base += 2*dy; + #define ADD_MVCOST(k,x,y) costs[k] += p_cost_omvx[x*4*i] + p_cost_omvy[y*4*i] + #define MIN_MV(k,x,y) COPY2_IF_LT( bcost, costs[k], dir, x*16+(y&15) ) + SADS( 0, +0,-4, +0,+4, -2,-3, +2,-3 ); + SADS( 1, -4,-2, +4,-2, -4,-1, +4,-1 ); + SADS( 2, -4,+0, +4,+0, -4,+1, +4,+1 ); + SADS( 3, -4,+2, +4,+2, -2,+3, +2,+3 ); + ADD_MVCOST( 0, 0,-4 ); + ADD_MVCOST( 1, 0, 4 ); + ADD_MVCOST( 2,-2,-3 ); + ADD_MVCOST( 3, 2,-3 ); + ADD_MVCOST( 4,-4,-2 ); + ADD_MVCOST( 5, 4,-2 ); + ADD_MVCOST( 6,-4,-1 ); + ADD_MVCOST( 7, 4,-1 ); + ADD_MVCOST( 8,-4, 0 ); + ADD_MVCOST( 9, 4, 0 ); + ADD_MVCOST( 10,-4, 1 ); + ADD_MVCOST( 11, 4, 1 ); + ADD_MVCOST( 12,-4, 2 ); + ADD_MVCOST( 13, 4, 2 ); + ADD_MVCOST( 14,-2, 3 ); + ADD_MVCOST( 15, 2, 3 ); + MIN_MV( 0, 0,-4 ); + MIN_MV( 1, 0, 4 ); + MIN_MV( 2,-2,-3 ); + MIN_MV( 3, 2,-3 ); + MIN_MV( 4,-4,-2 ); + MIN_MV( 5, 4,-2 ); + MIN_MV( 6,-4,-1 ); + MIN_MV( 7, 4,-1 ); + MIN_MV( 8,-4, 0 ); + MIN_MV( 9, 4, 0 ); + MIN_MV( 10,-4, 1 ); + MIN_MV( 11, 4, 1 ); + MIN_MV( 12,-4, 2 ); + MIN_MV( 13, 4, 2 ); + MIN_MV( 14,-2, 3 ); + MIN_MV( 15, 2, 3 ); + #undef SADS + #undef ADD_MVCOST + #undef MIN_MV + if(dir) + { + bmx = omx + i*(dir>>4); + bmy = omy + i*((dir<<28)>>28); + } } - } - } while( ++i <= i_me_range/4 ); - if( bmy <= mv_y_max ) - goto me_hex2; - break; + } while( ++i <= i_me_range/4 ); + if( bmy <= mv_y_max ) + goto me_hex2; + break; + } } case X264_ME_ESA: diff --git a/encoder/slicetype.c b/encoder/slicetype.c index 2c16429..d716e47 100644 --- a/encoder/slicetype.c +++ b/encoder/slicetype.c @@ -438,10 +438,7 @@ static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_fr static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 ) { x264_frame_t *frame = frames[p1]; - x264_slicetype_frame_cost( h, a, frames, p0, p1, p1, 0 ); - int icost = frame->i_cost_est[0][0]; - int pcost = frame->i_cost_est[p1-p0][0]; float f_bias; int i_gop_size = frame->i_frame - h->frames.i_last_idr; float f_thresh_max = h->param.i_scenecut_threshold / 100.0; @@ -450,6 +447,12 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in / ( h->param.i_keyint_max * 4 ); int res; + int icost,pcost; + x264_slicetype_frame_cost( h, a, frames, p0, p1, p1, 0 ); + + icost = frame->i_cost_est[0][0]; + pcost = frame->i_cost_est[p1-p0][0]; + if( h->param.i_keyint_min == h->param.i_keyint_max ) f_thresh_min= f_thresh_max; if( i_gop_size < h->param.i_keyint_min / 4 )