diff --git a/common/macroblock.c b/common/macroblock.c
index 836d203..e32f694 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -666,6 +666,7 @@ int x264_macroblock_cache_init( x264_t *h )
 {
     int i, j;
     int i_mb_count = h->mb.i_mb_count;
+    int buf_hpel, buf_ssim, me_range, buf_tesa;
 
     h->mb.i_mb_stride = h->sps->i_mb_width;
     h->mb.i_b8_stride = h->sps->i_mb_width * 2;
@@ -738,10 +739,10 @@ int x264_macroblock_cache_init( x264_t *h )
     h->mb.i_neighbour4[15] =
     h->mb.i_neighbour8[3] = MB_LEFT|MB_TOP|MB_TOPLEFT;
 
-    int buf_hpel = (h->param.i_width+48) * sizeof(int16_t);
-    int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
-    int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
-    int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
+    buf_hpel = (h->param.i_width+40) * sizeof(int16_t);
+    buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
+    me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
+    buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
         ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
     CHECKED_MALLOC( h->scratch_buffer, X264_MAX3( buf_hpel, buf_ssim, buf_tesa ) );
 
diff --git a/common/osdep.h b/common/osdep.h
index 915ec05..6a96aa9 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -48,6 +48,8 @@
 #define ftell _ftelli64
 #define isfinite _finite
 #define strtok_r strtok_s
+#define PRIx32 "lx"
+#define log2f(a) (logf(a)/logf(2))
 #define _CRT_SECURE_NO_DEPRECATE
 #define X264_VERSION "" // no configure script for msvc
 #endif
diff --git a/encoder/me.c b/encoder/me.c
index f13e84b..d9b3a8e 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -451,93 +451,95 @@ me_hex2:
 
             /* hexagon grid */
             omx = bmx; omy = bmy;
-            const int16_t *p_cost_omvx = p_cost_mvx + omx*4;
-            const int16_t *p_cost_omvy = p_cost_mvy + omy*4;
-            i = 1;
-            do
             {
-                static const int hex4[16][2] = {
-                    { 0,-4}, { 0, 4}, {-2,-3}, { 2,-3},
-                    {-4,-2}, { 4,-2}, {-4,-1}, { 4,-1},
-                    {-4, 0}, { 4, 0}, {-4, 1}, { 4, 1},
-                    {-4, 2}, { 4, 2}, {-2, 3}, { 2, 3},
-                };
-
-                if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min,
-                                     mv_y_max-omy, omy-mv_y_min ) )
+                const int16_t *p_cost_omvx = p_cost_mvx + omx*4;
+                const int16_t *p_cost_omvy = p_cost_mvy + omy*4;
+                i = 1;
+                do
                 {
-                    for( j = 0; j < 16; j++ )
+                    static const int hex4[16][2] = {
+                        { 0,-4}, { 0, 4}, {-2,-3}, { 2,-3},
+                        {-4,-2}, { 4,-2}, {-4,-1}, { 4,-1},
+                        {-4, 0}, { 4, 0}, {-4, 1}, { 4, 1},
+                        {-4, 2}, { 4, 2}, {-2, 3}, { 2, 3},
+                    };
+
+                    if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min,
+                                         mv_y_max-omy, omy-mv_y_min ) )
                     {
-                        int mx = omx + hex4[j][0]*i;
-                        int my = omy + hex4[j][1]*i;
-                        if( CHECK_MVRANGE(mx, my) )
-                            COST_MV( mx, my );
+                        for( j = 0; j < 16; j++ )
+                        {
+                            int mx = omx + hex4[j][0]*i;
+                            int my = omy + hex4[j][1]*i;
+                            if( CHECK_MVRANGE(mx, my) )
+                                COST_MV( mx, my );
+                        }
                     }
-                }
-                else
-                {
-                    int dir = 0;
-                    uint8_t *pix_base = p_fref + omx + (omy-4*i)*stride;
-                    int dy = i*stride;
-#define SADS(k,x0,y0,x1,y1,x2,y2,x3,y3)\
-                    h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
-                            pix_base x0*i+(y0-2*k+4)*dy,\
-                            pix_base x1*i+(y1-2*k+4)*dy,\
-                            pix_base x2*i+(y2-2*k+4)*dy,\
-                            pix_base x3*i+(y3-2*k+4)*dy,\
-                            stride, costs+4*k );\
-                    pix_base += 2*dy;
-#define ADD_MVCOST(k,x,y) costs[k] += p_cost_omvx[x*4*i] + p_cost_omvy[y*4*i]
-#define MIN_MV(k,x,y)     COPY2_IF_LT( bcost, costs[k], dir, x*16+(y&15) )
-                    SADS( 0, +0,-4, +0,+4, -2,-3, +2,-3 );
-                    SADS( 1, -4,-2, +4,-2, -4,-1, +4,-1 );
-                    SADS( 2, -4,+0, +4,+0, -4,+1, +4,+1 );
-                    SADS( 3, -4,+2, +4,+2, -2,+3, +2,+3 );
-                    ADD_MVCOST(  0, 0,-4 );
-                    ADD_MVCOST(  1, 0, 4 );
-                    ADD_MVCOST(  2,-2,-3 );
-                    ADD_MVCOST(  3, 2,-3 );
-                    ADD_MVCOST(  4,-4,-2 );
-                    ADD_MVCOST(  5, 4,-2 );
-                    ADD_MVCOST(  6,-4,-1 );
-                    ADD_MVCOST(  7, 4,-1 );
-                    ADD_MVCOST(  8,-4, 0 );
-                    ADD_MVCOST(  9, 4, 0 );
-                    ADD_MVCOST( 10,-4, 1 );
-                    ADD_MVCOST( 11, 4, 1 );
-                    ADD_MVCOST( 12,-4, 2 );
-                    ADD_MVCOST( 13, 4, 2 );
-                    ADD_MVCOST( 14,-2, 3 );
-                    ADD_MVCOST( 15, 2, 3 );
-                    MIN_MV(  0, 0,-4 );
-                    MIN_MV(  1, 0, 4 );
-                    MIN_MV(  2,-2,-3 );
-                    MIN_MV(  3, 2,-3 );
-                    MIN_MV(  4,-4,-2 );
-                    MIN_MV(  5, 4,-2 );
-                    MIN_MV(  6,-4,-1 );
-                    MIN_MV(  7, 4,-1 );
-                    MIN_MV(  8,-4, 0 );
-                    MIN_MV(  9, 4, 0 );
-                    MIN_MV( 10,-4, 1 );
-                    MIN_MV( 11, 4, 1 );
-                    MIN_MV( 12,-4, 2 );
-                    MIN_MV( 13, 4, 2 );
-                    MIN_MV( 14,-2, 3 );
-                    MIN_MV( 15, 2, 3 );
-#undef SADS
-#undef ADD_MVCOST
-#undef MIN_MV
-                    if(dir)
+                    else
                     {
-                        bmx = omx + i*(dir>>4);
-                        bmy = omy + i*((dir<<28)>>28);
+                        int dir = 0;
+                        uint8_t *pix_base = p_fref + omx + (omy-4*i)*stride;
+                        int dy = i*stride;
+    #define SADS(k,x0,y0,x1,y1,x2,y2,x3,y3)\
+                        h->pixf.fpelcmp_x4[i_pixel]( p_fenc,\
+                                pix_base x0*i+(y0-2*k+4)*dy,\
+                                pix_base x1*i+(y1-2*k+4)*dy,\
+                                pix_base x2*i+(y2-2*k+4)*dy,\
+                                pix_base x3*i+(y3-2*k+4)*dy,\
+                                stride, costs+4*k );\
+                        pix_base += 2*dy;
+    #define ADD_MVCOST(k,x,y) costs[k] += p_cost_omvx[x*4*i] + p_cost_omvy[y*4*i]
+    #define MIN_MV(k,x,y)     COPY2_IF_LT( bcost, costs[k], dir, x*16+(y&15) )
+                        SADS( 0, +0,-4, +0,+4, -2,-3, +2,-3 );
+                        SADS( 1, -4,-2, +4,-2, -4,-1, +4,-1 );
+                        SADS( 2, -4,+0, +4,+0, -4,+1, +4,+1 );
+                        SADS( 3, -4,+2, +4,+2, -2,+3, +2,+3 );
+                        ADD_MVCOST(  0, 0,-4 );
+                        ADD_MVCOST(  1, 0, 4 );
+                        ADD_MVCOST(  2,-2,-3 );
+                        ADD_MVCOST(  3, 2,-3 );
+                        ADD_MVCOST(  4,-4,-2 );
+                        ADD_MVCOST(  5, 4,-2 );
+                        ADD_MVCOST(  6,-4,-1 );
+                        ADD_MVCOST(  7, 4,-1 );
+                        ADD_MVCOST(  8,-4, 0 );
+                        ADD_MVCOST(  9, 4, 0 );
+                        ADD_MVCOST( 10,-4, 1 );
+                        ADD_MVCOST( 11, 4, 1 );
+                        ADD_MVCOST( 12,-4, 2 );
+                        ADD_MVCOST( 13, 4, 2 );
+                        ADD_MVCOST( 14,-2, 3 );
+                        ADD_MVCOST( 15, 2, 3 );
+                        MIN_MV(  0, 0,-4 );
+                        MIN_MV(  1, 0, 4 );
+                        MIN_MV(  2,-2,-3 );
+                        MIN_MV(  3, 2,-3 );
+                        MIN_MV(  4,-4,-2 );
+                        MIN_MV(  5, 4,-2 );
+                        MIN_MV(  6,-4,-1 );
+                        MIN_MV(  7, 4,-1 );
+                        MIN_MV(  8,-4, 0 );
+                        MIN_MV(  9, 4, 0 );
+                        MIN_MV( 10,-4, 1 );
+                        MIN_MV( 11, 4, 1 );
+                        MIN_MV( 12,-4, 2 );
+                        MIN_MV( 13, 4, 2 );
+                        MIN_MV( 14,-2, 3 );
+                        MIN_MV( 15, 2, 3 );
+    #undef SADS
+    #undef ADD_MVCOST
+    #undef MIN_MV
+                        if(dir)
+                        {
+                            bmx = omx + i*(dir>>4);
+                            bmy = omy + i*((dir<<28)>>28);
+                        }
                     }
-                }
-            } while( ++i <= i_me_range/4 );
-            if( bmy <= mv_y_max )
-                goto me_hex2;
-            break;
+                } while( ++i <= i_me_range/4 );
+                if( bmy <= mv_y_max )
+                    goto me_hex2;
+                break;
+            }
         }
 
     case X264_ME_ESA:
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
index 2c16429..d716e47 100644
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -438,10 +438,7 @@ static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_fr
 static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 )
 {
     x264_frame_t *frame = frames[p1];
-    x264_slicetype_frame_cost( h, a, frames, p0, p1, p1, 0 );
 
-    int icost = frame->i_cost_est[0][0];
-    int pcost = frame->i_cost_est[p1-p0][0];
     float f_bias;
     int i_gop_size = frame->i_frame - h->frames.i_last_idr;
     float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
@@ -450,6 +447,12 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in
                          / ( h->param.i_keyint_max * 4 );
     int res;
 
+    int icost,pcost;
+    x264_slicetype_frame_cost( h, a, frames, p0, p1, p1, 0 );
+
+    icost = frame->i_cost_est[0][0];
+    pcost = frame->i_cost_est[p1-p0][0];
+
     if( h->param.i_keyint_min == h->param.i_keyint_max )
         f_thresh_min= f_thresh_max;
     if( i_gop_size < h->param.i_keyint_min / 4 )