[x264-devel] [Git][videolan/x264][master] 11 commits: Fix heap-buffer-overflow read detected by ASan with interlaced encoding

Wed Jul 17 20:23:28 CEST 2019


Anton Mitrofanov pushed to branch master at VideoLAN / x264


Commits:
6381798d by Anton Mitrofanov at 2019-07-17T17:15:34Z
Fix heap-buffer-overflow read detected by ASan with interlaced encoding

Bug report by Hongxu Chen.

- - - - -
3147fa43 by Anton Mitrofanov at 2019-07-17T17:15:34Z
checkasm: Fix heap-buffer-overflow read detected by ASan

- - - - -
f06062f5 by Anton Mitrofanov at 2019-07-17T17:15:34Z
Fix integer overflow detected by UBSan in --weightp analysis

Bug report by Xuezhi Yan.

- - - - -
6b1170cb by Anton Mitrofanov at 2019-07-17T17:15:34Z
Shut up UBSan about uninitialized data read

Result was never used in that case.

- - - - -
6d494708 by Anton Mitrofanov at 2019-07-17T17:15:34Z
Fix x264_picture_alloc with X264_CSP_I400 colorspace

- - - - -
f9af2a0f by Anton Mitrofanov at 2019-07-17T17:15:34Z
Revert r2959: Signal Progressive and Constrained profiles

Some hardware decoders reject to decode streams with non-zero
constraint_set4_flag/constraint_set5_flag.

- - - - -
34c06d1c by Anton Mitrofanov at 2019-07-17T17:15:34Z
Strip git-hash from version in x264.pc

pkg-config doesn't like spaces in version string.

- - - - -
b5bc5d69 by Henrik Gramner at 2019-07-17T17:15:35Z
x86: Perform stack realignment in C instead of assembly

Simplifies a lot of code and avoids having to export public asm functions.

Note that the force_align_arg_pointer function attribute is broken in clang
versions prior to 6.0.1 which may result in crashes, so make sure to either
use a newer clang version or a different compiler.

- - - - -
a615f027 by Anton Mitrofanov at 2019-07-17T17:19:23Z
Mark explicitly DSO public API symbols and hide all other by -fvisibility=hidden

Removes need for -Bsymbolic during linking.

- - - - -
76c5afc2 by Anton Mitrofanov at 2019-07-17T17:19:23Z
Fix MSVS build with ./configure --enable-shared --system-libx264

- - - - -
3759fcb7 by Anton Mitrofanov at 2019-07-17T17:19:23Z
Remove CRT objects use between DLL boundaries

Fix crash of MSVC builds compiled with --system-libx264 and /MT (default) CRT.

- - - - -


22 changed files:

- Makefile
- common/base.c
- common/base.h
- common/cpu.h
- common/frame.h
- common/osdep.c
- common/osdep.h
- common/set.h
- common/tables.h
- common/threadpool.c
- common/threadpool.h
- common/x86/cpu-a.asm
- configure
- encoder/analyse.c
- encoder/api.c
- encoder/encoder.c
- encoder/lookahead.c
- encoder/set.c
- encoder/slicetype.c
- tools/checkasm.c
- x264.c
- x264.h


Changes:

=====================================
Makefile
=====================================
@@ -246,6 +246,8 @@ $(LIBX264): $(GENERATED) .depend $(OBJS) $(OBJASM)
 $(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO)
 	$(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
 
+$(IMPLIBNAME): $(SONAME)
+
 ifneq ($(EXE),)
 .PHONY: x264 checkasm8 checkasm10 example
 x264: x264$(EXE)
@@ -266,6 +268,9 @@ checkasm10$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_10) $(LIBX264)
 example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264)
 	$(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS)
 
+$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO)
+$(OBJCLI): CFLAGS += $(CFLAGSCLI)
+
 $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend
 
 %.o: %.c


=====================================
common/base.c
=====================================
@@ -196,7 +196,7 @@ error:
 /****************************************************************************
  * x264_picture_init:
  ****************************************************************************/
-static void picture_init( x264_picture_t *pic )
+REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
 {
     memset( pic, 0, sizeof( x264_picture_t ) );
     pic->i_type = X264_TYPE_AUTO;
@@ -204,15 +204,10 @@ static void picture_init( x264_picture_t *pic )
     pic->i_pic_struct = PIC_STRUCT_AUTO;
 }
 
-void x264_picture_init( x264_picture_t *pic )
-{
-    x264_stack_align( picture_init, pic );
-}
-
 /****************************************************************************
  * x264_picture_alloc:
  ****************************************************************************/
-static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
+REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
 {
     typedef struct
     {
@@ -223,6 +218,7 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
 
     static const x264_csp_tab_t csp_tab[] =
     {
+        [X264_CSP_I400] = { 1, { 256*1 },               { 256*1 }               },
         [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
         [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
         [X264_CSP_NV12] = { 2, { 256*1, 256*1 },        { 256*1, 256/2 },       },
@@ -242,7 +238,7 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
     int csp = i_csp & X264_CSP_MASK;
     if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
         return -1;
-    picture_init( pic );
+    x264_picture_init( pic );
     pic->img.i_csp = i_csp;
     pic->img.i_plane = csp_tab[csp].planes;
     int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
@@ -264,15 +260,10 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
     return 0;
 }
 
-int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
-{
-    return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
-}
-
 /****************************************************************************
  * x264_picture_clean:
  ****************************************************************************/
-static void picture_clean( x264_picture_t *pic )
+REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
 {
     x264_free( pic->img.plane[0] );
 
@@ -280,15 +271,10 @@ static void picture_clean( x264_picture_t *pic )
     memset( pic, 0, sizeof( x264_picture_t ) );
 }
 
-void x264_picture_clean( x264_picture_t *pic )
-{
-    x264_stack_align( picture_clean, pic );
-}
-
 /****************************************************************************
  * x264_param_default:
  ****************************************************************************/
-static void param_default( x264_param_t *param )
+REALIGN_STACK void x264_param_default( x264_param_t *param )
 {
     /* */
     memset( param, 0, sizeof( x264_param_t ) );
@@ -433,11 +419,6 @@ static void param_default( x264_param_t *param )
     param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC;
 }
 
-void x264_param_default( x264_param_t *param )
-{
-    x264_stack_align( param_default, param );
-}
-
 static int param_apply_preset( x264_param_t *param, const char *preset )
 {
     char *end;
@@ -655,9 +636,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
     return 0;
 }
 
-static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
+REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
 {
-    param_default( param );
+    x264_param_default( param );
 
     if( preset && param_apply_preset( param, preset ) < 0 )
         return -1;
@@ -666,12 +647,7 @@ static int param_default_preset( x264_param_t *param, const char *preset, const
     return 0;
 }
 
-int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
-{
-    return x264_stack_align( param_default_preset, param, preset, tune );
-}
-
-static void param_apply_fastfirstpass( x264_param_t *param )
+REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param )
 {
     /* Set faster options in case of turbo firstpass. */
     if( param->rc.b_stat_write && !param->rc.b_stat_read )
@@ -686,11 +662,6 @@ static void param_apply_fastfirstpass( x264_param_t *param )
     }
 }
 
-void x264_param_apply_fastfirstpass( x264_param_t *param )
-{
-    x264_stack_align( param_apply_fastfirstpass, param );
-}
-
 static int profile_string_to_int( const char *str )
 {
     if( !strcasecmp( str, "baseline" ) )
@@ -708,7 +679,7 @@ static int profile_string_to_int( const char *str )
     return -1;
 }
 
-static int param_apply_profile( x264_param_t *param, const char *profile )
+REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile )
 {
     if( !profile )
         return 0;
@@ -775,11 +746,6 @@ static int param_apply_profile( x264_param_t *param, const char *profile )
     return 0;
 }
 
-int x264_param_apply_profile( x264_param_t *param, const char *profile )
-{
-    return x264_stack_align( param_apply_profile, param, profile );
-}
-
 static int parse_enum( const char *arg, const char * const *names, int *dst )
 {
     for( int i = 0; names[i]; i++ )
@@ -841,7 +807,7 @@ static double atof_internal( const char *str, int *b_error )
 #define atoi(str) atoi_internal( str, &b_error )
 #define atof(str) atof_internal( str, &b_error )
 
-static int param_parse( x264_param_t *p, const char *name, const char *value )
+REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value )
 {
     char *name_buf = NULL;
     int b_error = 0;
@@ -1342,11 +1308,6 @@ static int param_parse( x264_param_t *p, const char *name, const char *value )
     return b_error ? errortype : 0;
 }
 
-int x264_param_parse( x264_param_t *param, const char *name, const char *value )
-{
-    return x264_stack_align( param_parse, param, name, value );
-}
-
 /****************************************************************************
  * x264_param2string:
  ****************************************************************************/


=====================================
common/base.h
=====================================
@@ -47,7 +47,6 @@
 #include <string.h>
 #include <assert.h>
 #include <limits.h>
-#include "x264.h"
 
 /****************************************************************************
  * Macros
@@ -256,23 +255,23 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
 /****************************************************************************
  * General functions
  ****************************************************************************/
-void x264_reduce_fraction( uint32_t *n, uint32_t *d );
-void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
+X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d );
+X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
 
-void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
-void x264_log_internal( int i_level, const char *psz_fmt, ... );
+X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
+X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... );
 
 /* x264_malloc : will do or emulate a memalign
  * you have to use x264_free for buffers allocated with x264_malloc */
-void *x264_malloc( int );
-void  x264_free( void * );
+X264_API void *x264_malloc( int );
+X264_API void  x264_free( void * );
 
 /* x264_slurp_file: malloc space for the whole file and read it */
-char *x264_slurp_file( const char *filename );
+X264_API char *x264_slurp_file( const char *filename );
 
 /* x264_param2string: return a (malloced) string containing most of
  * the encoding options */
-char *x264_param2string( x264_param_t *p, int b_res );
+X264_API char *x264_param2string( x264_param_t *p, int b_res );
 
 /****************************************************************************
  * Macros


=====================================
common/cpu.h
=====================================
@@ -26,8 +26,8 @@
 #ifndef X264_CPU_H
 #define X264_CPU_H
 
-uint32_t x264_cpu_detect( void );
-int      x264_cpu_num_processors( void );
+X264_API uint32_t x264_cpu_detect( void );
+X264_API int      x264_cpu_num_processors( void );
 void     x264_cpu_emms( void );
 void     x264_cpu_sfence( void );
 #if HAVE_MMX
@@ -46,28 +46,11 @@ void     x264_cpu_sfence( void );
 #endif
 #define x264_sfence x264_cpu_sfence
 
-/* kludge:
- * gcc can't give variables any greater alignment than the stack frame has.
- * We need 32 byte alignment for AVX2, so here we make sure that the stack is
- * aligned to 32 bytes.
- * gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
- * problem, but I don't want to require such a new version.
- * aligning to 32 bytes only works if the compiler supports keeping that
- * alignment between functions (osdep.h handles manual alignment of arrays
- * if it doesn't).
- */
-#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4))
-intptr_t x264_stack_align( void (*func)(), ... );
-#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
-#else
-#define x264_stack_align(func,...) func(__VA_ARGS__)
-#endif
-
 typedef struct
 {
     const char *name;
     uint32_t flags;
 } x264_cpu_name_t;
-extern const x264_cpu_name_t x264_cpu_names[];
+X264_API extern const x264_cpu_name_t x264_cpu_names[];
 
 #endif


=====================================
common/frame.h
=====================================
@@ -261,13 +261,14 @@ void          x264_threadslice_cond_broadcast( x264_t *h, int pass );
 void          x264_threadslice_cond_wait( x264_t *h, int pass );
 
 #define x264_frame_push x264_template(frame_push)
-void          x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
+X264_API void          x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
 #define x264_frame_pop x264_template(frame_pop)
-x264_frame_t *x264_frame_pop( x264_frame_t **list );
+X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list );
 #define x264_frame_unshift x264_template(frame_unshift)
-void          x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
+X264_API void          x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
 #define x264_frame_shift x264_template(frame_shift)
-x264_frame_t *x264_frame_shift( x264_frame_t **list );
+X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list );
+
 #define x264_frame_push_unused x264_template(frame_push_unused)
 void          x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
 #define x264_frame_push_blank_unused x264_template(frame_push_blank_unused)


=====================================
common/osdep.c
=====================================
@@ -27,11 +27,6 @@
 
 #include "osdep.h"
 
-#ifdef _WIN32
-#include <windows.h>
-#include <io.h>
-#endif
-
 #if SYS_WINDOWS
 #include <sys/types.h>
 #include <sys/timeb.h>
@@ -111,114 +106,3 @@ int x264_threading_init( void )
     return 0;
 }
 #endif
-
-#ifdef _WIN32
-/* Functions for dealing with Unicode on Windows. */
-FILE *x264_fopen( const char *filename, const char *mode )
-{
-    wchar_t filename_utf16[MAX_PATH];
-    wchar_t mode_utf16[16];
-    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
-        return _wfopen( filename_utf16, mode_utf16 );
-    return NULL;
-}
-
-int x264_rename( const char *oldname, const char *newname )
-{
-    wchar_t oldname_utf16[MAX_PATH];
-    wchar_t newname_utf16[MAX_PATH];
-    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
-    {
-        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
-        _wunlink( newname_utf16 );
-        return _wrename( oldname_utf16, newname_utf16 );
-    }
-    return -1;
-}
-
-int x264_stat( const char *path, x264_struct_stat *buf )
-{
-    wchar_t path_utf16[MAX_PATH];
-    if( utf8_to_utf16( path, path_utf16 ) )
-        return _wstati64( path_utf16, buf );
-    return -1;
-}
-
-#if !HAVE_WINRT
-int x264_vfprintf( FILE *stream, const char *format, va_list arg )
-{
-    HANDLE console = NULL;
-    DWORD mode;
-
-    if( stream == stdout )
-        console = GetStdHandle( STD_OUTPUT_HANDLE );
-    else if( stream == stderr )
-        console = GetStdHandle( STD_ERROR_HANDLE );
-
-    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
-    if( GetConsoleMode( console, &mode ) )
-    {
-        char buf[4096];
-        wchar_t buf_utf16[4096];
-        va_list arg2;
-
-        va_copy( arg2, arg );
-        int length = vsnprintf( buf, sizeof(buf), format, arg2 );
-        va_end( arg2 );
-
-        if( length > 0 && length < sizeof(buf) )
-        {
-            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
-            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
-            DWORD written;
-            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
-            return length;
-        }
-    }
-    return vfprintf( stream, format, arg );
-}
-
-int x264_is_pipe( const char *path )
-{
-    wchar_t path_utf16[MAX_PATH];
-    if( utf8_to_utf16( path, path_utf16 ) )
-        return WaitNamedPipeW( path_utf16, 0 );
-    return 0;
-}
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER < 1900
-/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
-int x264_snprintf( char *s, size_t n, const char *fmt, ... )
-{
-    va_list arg;
-    va_start( arg, fmt );
-    int length = x264_vsnprintf( s, n, fmt, arg );
-    va_end( arg );
-    return length;
-}
-
-int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
-{
-    int length = -1;
-
-    if( n )
-    {
-        va_list arg2;
-        va_copy( arg2, arg );
-        length = _vsnprintf( s, n, fmt, arg2 );
-        va_end( arg2 );
-
-        /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
-        if( length < 0 || length >= n )
-            s[n-1] = '\0';
-    }
-
-    /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
-    if( length < 0 )
-        return _vscprintf( fmt, arg );
-
-    return length;
-}
-#endif
-#endif


=====================================
common/osdep.h
=====================================
@@ -43,6 +43,13 @@
 #include <math.h>
 #endif
 
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+
+#include "x264.h"
+
 #if !HAVE_LOG2F
 #define log2f(x) (logf(x)/0.693147180559945f)
 #define log2(x) (log(x)/0.693147180559945)
@@ -54,12 +61,6 @@
 #define strncasecmp _strnicmp
 #define strtok_r strtok_s
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
-#if _MSC_VER < 1900
-int x264_snprintf( char *s, size_t n, const char *fmt, ... );
-int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
-#define snprintf  x264_snprintf
-#define vsnprintf x264_vsnprintf
-#endif
 #else
 #include <strings.h>
 #endif
@@ -76,14 +77,81 @@ int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
 #define strtok_r(str,delim,save) strtok(str,delim)
 #endif
 
+#if defined(_MSC_VER) && _MSC_VER < 1900
+/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
+static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
+{
+    int length = -1;
+
+    if( n )
+    {
+        va_list arg2;
+        va_copy( arg2, arg );
+        length = _vsnprintf( s, n, fmt, arg2 );
+        va_end( arg2 );
+
+        /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
+        if( length < 0 || length >= n )
+            s[n-1] = '\0';
+    }
+
+    /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
+    if( length < 0 )
+        return _vscprintf( fmt, arg );
+
+    return length;
+}
+
+static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... )
+{
+    va_list arg;
+    va_start( arg, fmt );
+    int length = x264_vsnprintf( s, n, fmt, arg );
+    va_end( arg );
+    return length;
+}
+
+#define snprintf  x264_snprintf
+#define vsnprintf x264_vsnprintf
+#endif
+
 #ifdef _WIN32
 #define utf8_to_utf16( utf8, utf16 )\
     MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
-FILE *x264_fopen( const char *filename, const char *mode );
-int x264_rename( const char *oldname, const char *newname );
+
+/* Functions for dealing with Unicode on Windows. */
+static inline FILE *x264_fopen( const char *filename, const char *mode )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    wchar_t mode_utf16[16];
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
+        return _wfopen( filename_utf16, mode_utf16 );
+    return NULL;
+}
+
+static inline int x264_rename( const char *oldname, const char *newname )
+{
+    wchar_t oldname_utf16[MAX_PATH];
+    wchar_t newname_utf16[MAX_PATH];
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    {
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
+        _wunlink( newname_utf16 );
+        return _wrename( oldname_utf16, newname_utf16 );
+    }
+    return -1;
+}
+
 #define x264_struct_stat struct _stati64
 #define x264_fstat _fstati64
-int x264_stat( const char *path, x264_struct_stat *buf );
+
+static inline int x264_stat( const char *path, x264_struct_stat *buf )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return _wstati64( path_utf16, buf );
+    return -1;
+}
 #else
 #define x264_fopen       fopen
 #define x264_rename      rename
@@ -93,11 +161,49 @@ int x264_stat( const char *path, x264_struct_stat *buf );
 #endif
 
 /* mdate: return the current date in microsecond */
-int64_t x264_mdate( void );
+X264_API int64_t x264_mdate( void );
 
 #if defined(_WIN32) && !HAVE_WINRT
-int x264_vfprintf( FILE *stream, const char *format, va_list arg );
-int x264_is_pipe( const char *path );
+static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg )
+{
+    HANDLE console = NULL;
+    DWORD mode;
+
+    if( stream == stdout )
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
+    else if( stream == stderr )
+        console = GetStdHandle( STD_ERROR_HANDLE );
+
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
+    if( GetConsoleMode( console, &mode ) )
+    {
+        char buf[4096];
+        wchar_t buf_utf16[4096];
+        va_list arg2;
+
+        va_copy( arg2, arg );
+        int length = vsnprintf( buf, sizeof(buf), format, arg2 );
+        va_end( arg2 );
+
+        if( length > 0 && length < sizeof(buf) )
+        {
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
+            DWORD written;
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
+            return length;
+        }
+    }
+    return vfprintf( stream, format, arg );
+}
+
+static inline int x264_is_pipe( const char *path )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return WaitNamedPipeW( path_utf16, 0 );
+    return 0;
+}
 #else
 #define x264_vfprintf vfprintf
 #define x264_is_pipe(x) 0
@@ -163,6 +269,12 @@ int x264_is_pipe( const char *path );
 #define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
 #endif
 
+#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)
+#define REALIGN_STACK __attribute__((force_align_arg_pointer))
+#else
+#define REALIGN_STACK
+#endif
+
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
 #define UNUSED __attribute__((unused))
 #define ALWAYS_INLINE __attribute__((always_inline)) inline
@@ -247,7 +359,7 @@ static inline int x264_pthread_create( x264_pthread_t *t, void *a, void *(*f)(vo
 #endif
 
 #if HAVE_WIN32THREAD || PTW32_STATIC_LIB
-int x264_threading_init( void );
+X264_API int x264_threading_init( void );
 #else
 #define x264_threading_init() 0
 #endif


=====================================
common/set.h
=====================================
@@ -53,8 +53,6 @@ typedef struct
     int b_constraint_set1;
     int b_constraint_set2;
     int b_constraint_set3;
-    int b_constraint_set4;
-    int b_constraint_set5;
 
     int i_log2_max_frame_num;
 


=====================================
common/tables.h
=====================================
@@ -33,7 +33,7 @@ typedef struct
     uint8_t i_size;
 } vlc_t;
 
-extern const x264_level_t x264_levels[];
+X264_API extern const x264_level_t x264_levels[];
 
 extern const uint8_t x264_exp2_lut[64];
 extern const float   x264_log2_lut[128];


=====================================
common/threadpool.c
=====================================
@@ -47,7 +47,7 @@ struct x264_threadpool_t
     x264_sync_frame_list_t done;   /* list of jobs that have finished processing */
 };
 
-static void *threadpool_thread_internal( x264_threadpool_t *pool )
+REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool )
 {
     if( pool->init_func )
         pool->init_func( pool->init_arg );
@@ -72,11 +72,6 @@ static void *threadpool_thread_internal( x264_threadpool_t *pool )
     return NULL;
 }
 
-static void *threadpool_thread( x264_threadpool_t *pool )
-{
-    return (void*)x264_stack_align( threadpool_thread_internal, pool );
-}
-
 int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
                           void (*init_func)(void *), void *init_arg )
 {


=====================================
common/threadpool.h
=====================================
@@ -30,14 +30,14 @@ typedef struct x264_threadpool_t x264_threadpool_t;
 
 #if HAVE_THREAD
 #define x264_threadpool_init x264_template(threadpool_init)
-int   x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
-                            void (*init_func)(void *), void *init_arg );
+X264_API int   x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
+                                     void (*init_func)(void *), void *init_arg );
 #define x264_threadpool_run x264_template(threadpool_run)
-void  x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
+X264_API void  x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
 #define x264_threadpool_wait x264_template(threadpool_wait)
-void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
+X264_API void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
 #define x264_threadpool_delete x264_template(threadpool_delete)
-void  x264_threadpool_delete( x264_threadpool_t *pool );
+X264_API void  x264_threadpool_delete( x264_threadpool_t *pool );
 #else
 #define x264_threadpool_init(p,t,f,a) -1
 #define x264_threadpool_run(p,f,a)


=====================================
common/x86/cpu-a.asm
=====================================
@@ -78,33 +78,7 @@ cglobal cpu_sfence
     sfence
     ret
 
-%if ARCH_X86_64
-
-;-----------------------------------------------------------------------------
-; intptr_t stack_align( void (*func)(void*), ... ); (up to 5 args)
-;-----------------------------------------------------------------------------
-cvisible stack_align
-    mov      rax, r0mp
-    mov       r0, r1mp
-    mov       r1, r2mp
-    mov       r2, r3mp
-    mov       r3, r4mp
-    mov       r4, r5mp
-    push     rbp
-    mov      rbp, rsp
-%if WIN64
-    sub      rsp, 40 ; shadow space + r4
-%endif
-    and      rsp, ~(STACK_ALIGNMENT-1)
-%if WIN64
-    mov [rsp+32], r4
-%endif
-    call     rax
-    leave
-    ret
-
-%else
-
+%if ARCH_X86_64 == 0
 ;-----------------------------------------------------------------------------
 ; int cpu_cpuid_test( void )
 ; return 0 if unsupported
@@ -130,24 +104,4 @@ cglobal cpu_cpuid_test
     pop     ebx
     popfd
     ret
-
-cvisible stack_align
-    push      ebp
-    mov       ebp, esp
-    sub       esp, 20
-    and       esp, ~(STACK_ALIGNMENT-1)
-    mov        r0, [ebp+12]
-    mov        r1, [ebp+16]
-    mov        r2, [ebp+20]
-    mov  [esp+ 0], r0
-    mov  [esp+ 4], r1
-    mov  [esp+ 8], r2
-    mov        r0, [ebp+24]
-    mov        r1, [ebp+28]
-    mov  [esp+12], r0
-    mov  [esp+16], r1
-    call [ebp+ 8]
-    leave
-    ret
-
 %endif


=====================================
configure
=====================================
@@ -151,9 +151,9 @@ cc_check() {
     done
     echo "int main (void) { $3 return 0; }" >> conftest.c
     if [ $compiler_style = MS ]; then
-        cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)"
+        cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)"
     else
-        cc_cmd="$CC conftest.c $CFLAGS $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest"
+        cc_cmd="$CC conftest.c $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest"
     fi
     if $cc_cmd >conftest.log 2>&1; then
         res=$?
@@ -380,6 +380,8 @@ opencl="yes"
 vsx="auto"
 
 CFLAGS="$CFLAGS -Wall -I. -I\$(SRCPATH)"
+CFLAGSSO="$CFLAGSSO -DX264_API_EXPORTS"
+CFLAGSCLI="$CFLAGSCLI"
 LDFLAGS="$LDFLAGS"
 LDFLAGSCLI="$LDFLAGSCLI"
 ASFLAGS="$ASFLAGS -I. -I\$(SRCPATH)"
@@ -1007,6 +1009,7 @@ if [ "$cli_libx264" = "system" -a "$shared" != "yes" ] ; then
     [ "$static" = "yes" ] && die "Option --system-libx264 can not be used together with --enable-static"
     if pkg_check x264 ; then
         X264_LIBS="$($PKGCONFIG --libs x264)"
+        X264_CFLAGS="$($PKGCONFIG --cflags x264)"
         X264_INCLUDE_DIR="${X264_INCLUDE_DIR-$($PKGCONFIG --variable=includedir x264)}"
         configure_system_override "$X264_INCLUDE_DIR" || die "Detection of system libx264 configuration failed"
     else
@@ -1308,6 +1311,10 @@ if cc_check '' -Wmaybe-uninitialized ; then
     CFLAGS="-Wno-maybe-uninitialized $CFLAGS"
 fi
 
+if [ $compiler = GNU ] && cc_check '' -fvisibility=hidden ; then
+    CFLAGS="$CFLAGS -fvisibility=hidden"
+fi
+
 if [ $compiler = ICC -o $compiler = ICL ] ; then
     if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then
         define HAVE_INTEL_DISPATCHER
@@ -1367,10 +1374,16 @@ ${SRCPATH}/version.sh >> x264_config.h
 
 if [ "$cli_libx264" = "system" ] ; then
     if [ "$shared" = "yes" ]; then
-        CLI_LIBX264='$(SONAME)'
+        if [ "$SYS" = "WINDOWS" -o "$SYS" = "CYGWIN" ]; then
+            CLI_LIBX264='$(IMPLIBNAME)'
+        else
+            CLI_LIBX264='$(SONAME)'
+        fi
+        CFLAGSCLI="$CFLAGSCLI -DX264_API_IMPORTS"
     else
         CLI_LIBX264=
         LDFLAGSCLI="$X264_LIBS $LDFLAGSCLI"
+        CFLAGSCLI="$CFLAGSCLI $X264_CFLAGS"
         cc_check 'stdint.h x264.h' '' 'x264_encoder_open(0);' || die "System libx264 can't be used for compilation of this version"
     fi
 else
@@ -1409,7 +1422,11 @@ else # gcc/icc
     LIBX264=libx264.a
     [ -n "$RC" ] && RCFLAGS="$RCFLAGS -I. -o "
 fi
-[ $compiler != GNU ] && CFLAGS="$(cc_cflags $CFLAGS)"
+if [ $compiler != GNU ]; then
+    CFLAGS="$(cc_cflags $CFLAGS)"
+    CFLAGSSO="$(cc_cflags $CFLAGSSO)"
+    CFLAGSCLI="$(cc_cflags $CFLAGSCLI)"
+fi
 if [ $compiler = ICC -o $compiler = ICL ]; then
     # icc does not define __SSE__ until SSE2 optimization and icl never defines it or _M_IX86_FP
     [ \( $ARCH = X86_64 -o $ARCH = X86 \) -a $asm = yes ] && ! cpp_check "" "" "defined(__SSE__)" && define __SSE__
@@ -1448,13 +1465,17 @@ SYS_ARCH=$ARCH
 SYS=$SYS
 CC=$CC
 CFLAGS=$CFLAGS
+CFLAGSSO=$CFLAGSSO
+CFLAGSCLI=$CFLAGSCLI
 COMPILER=$compiler
 COMPILER_STYLE=$compiler_style
 DEPMM=$DEPMM
 DEPMT=$DEPMT
 LD=$LD
 LDFLAGS=$LDFLAGS
+LDFLAGSCLI=$LDFLAGSCLI
 LIBX264=$LIBX264
+CLI_LIBX264=$CLI_LIBX264
 AR=$AR
 RANLIB=$RANLIB
 STRIP=$STRIP
@@ -1490,14 +1511,7 @@ if [ "$shared" = "yes" ]; then
         echo "SONAME=libx264-$API.dll" >> config.mak
         if [ $compiler_style = MS ]; then
             echo 'IMPLIBNAME=libx264.dll.lib' >> config.mak
-            # GNU ld on windows defaults to exporting all global functions if there are no explicit __declspec(dllexport) declarations
-            # MSVC link does not act similarly, so it is required to make an export definition out of x264.h and use it at link time
-            echo "SOFLAGS=-dll -def:x264.def -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak
-            echo "EXPORTS" > x264.def
-            # export API functions
-            grep "^\(int\|void\|x264_t\).*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264.*\)(.*/\1/;s/open/open_$API/g" >> x264.def
-            # export API variables/data. must be flagged with the DATA keyword
-            grep "extern.*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264\w*\)\W.*/\1 DATA/;" >> x264.def
+            echo "SOFLAGS=-dll -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak
         else
             echo 'IMPLIBNAME=libx264.dll.a' >> config.mak
             echo "SOFLAGS=-shared -Wl,--out-implib,\$(IMPLIBNAME) $SOFLAGS" >> config.mak
@@ -1524,9 +1538,6 @@ if [ "$static" = "yes" ]; then
     echo 'install: install-lib-static' >> config.mak
 fi
 
-echo "LDFLAGSCLI = $LDFLAGSCLI" >> config.mak
-echo "CLI_LIBX264 = $CLI_LIBX264" >> config.mak
-
 cat > x264.pc << EOF
 prefix=$prefix
 exec_prefix=$exec_prefix
@@ -1535,10 +1546,10 @@ includedir=$includedir
 
 Name: x264
 Description: H.264 (MPEG4 AVC) encoder library
-Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
+Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//; s/ .*//')
 Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl)
 Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl)
-Cflags: -I$includedir
+Cflags: -I$includedir $([ "$shared" = "yes" ] && echo "-DX264_API_IMPORTS")
 EOF
 
 filters="crop select_every"


=====================================
encoder/analyse.c
=====================================
@@ -145,7 +145,7 @@ static int init_costs( x264_t *h, float *logs, int qp )
     if( h->cost_mv[qp] )
         return 0;
 
-    int mv_range = h->param.analyse.i_mv_range;
+    int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED;
     int lambda = x264_lambda_tab[qp];
     /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
     CHECKED_MALLOC( h->cost_mv[qp], (4*4*mv_range + 1) * sizeof(uint16_t) );
@@ -178,7 +178,7 @@ fail:
 
 int x264_analyse_init_costs( x264_t *h )
 {
-    int mv_range = h->param.analyse.i_mv_range;
+    int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED;
     float *logs = x264_malloc( (2*4*mv_range+1) * sizeof(float) );
     if( !logs )
         return -1;
@@ -203,7 +203,7 @@ fail:
 
 void x264_analyse_free_costs( x264_t *h )
 {
-    int mv_range = h->param.analyse.i_mv_range;
+    int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED;
     for( int i = 0; i < QP_MAX+1; i++ )
     {
         if( h->cost_mv[i] )
@@ -783,10 +783,11 @@ static void mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter
             {
                 if( !h->mb.b_lossless && predict_mode[5] >= 0 )
                 {
-                    ALIGNED_ARRAY_16( int32_t, satd,[9] );
+                    ALIGNED_ARRAY_16( int32_t, satd,[4] );
                     h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
                     int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
-                    satd[i_pred_mode] -= 3 * lambda;
+                    if( i_pred_mode < 3 )
+                        satd[i_pred_mode] -= 3 * lambda;
                     for( int i = 2; i >= 0; i-- )
                     {
                         int cost = satd[i];
@@ -901,10 +902,11 @@ static void mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter
             {
                 if( !h->mb.b_lossless && predict_mode[5] >= 0 )
                 {
-                    ALIGNED_ARRAY_16( int32_t, satd,[9] );
+                    ALIGNED_ARRAY_16( int32_t, satd,[4] );
                     h->pixf.intra_mbcmp_x3_4x4( p_src_by, p_dst_by, satd );
                     int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
-                    satd[i_pred_mode] -= 3 * lambda;
+                    if( i_pred_mode < 3 )
+                        satd[i_pred_mode] -= 3 * lambda;
                     i_best = satd[I_PRED_4x4_DC]; a->i_predict4x4[idx] = I_PRED_4x4_DC;
                     COPY2_IF_LT( i_best, satd[I_PRED_4x4_H], a->i_predict4x4[idx], I_PRED_4x4_H );
                     COPY2_IF_LT( i_best, satd[I_PRED_4x4_V], a->i_predict4x4[idx], I_PRED_4x4_V );


=====================================
encoder/api.c
=====================================
@@ -73,7 +73,7 @@ typedef struct x264_api_t
     int  (*encoder_invalidate_reference)( x264_t *, int64_t pts );
 } x264_api_t;
 
-static x264_api_t *encoder_open( x264_param_t *param )
+REALIGN_STACK x264_t *x264_encoder_open( x264_param_t *param )
 {
     x264_api_t *api = calloc( 1, sizeof( x264_api_t ) );
     if( !api )
@@ -118,82 +118,77 @@ static x264_api_t *encoder_open( x264_param_t *param )
         return NULL;
     }
 
-    return api;
-}
-
-x264_t *x264_encoder_open( x264_param_t *param )
-{
     /* x264_t is opaque */
-    return (x264_t *)x264_stack_align( encoder_open, param );
+    return (x264_t *)api;
 }
 
-void x264_encoder_close( x264_t *h )
+REALIGN_STACK void x264_encoder_close( x264_t *h )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    x264_stack_align( api->encoder_close, api->x264 );
+    api->encoder_close( api->x264 );
     free( api );
 }
 
-void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
+REALIGN_STACK void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    x264_stack_align( api->nal_encode, api->x264, dst, nal );
+    api->nal_encode( api->x264, dst, nal );
 }
 
-int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
+REALIGN_STACK int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    return x264_stack_align( api->encoder_reconfig, api->x264, param );
+    return api->encoder_reconfig( api->x264, param );
 }
 
-void x264_encoder_parameters( x264_t *h, x264_param_t *param )
+REALIGN_STACK void x264_encoder_parameters( x264_t *h, x264_param_t *param )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    x264_stack_align( api->encoder_parameters, api->x264, param );
+    api->encoder_parameters( api->x264, param );
 }
 
-int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
+REALIGN_STACK int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    return x264_stack_align( api->encoder_headers, api->x264, pp_nal, pi_nal );
+    return api->encoder_headers( api->x264, pp_nal, pi_nal );
 }
 
-int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
+REALIGN_STACK int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    return x264_stack_align( api->encoder_encode, api->x264, pp_nal, pi_nal, pic_in, pic_out );
+    return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out );
 }
 
-int x264_encoder_delayed_frames( x264_t *h )
+REALIGN_STACK int x264_encoder_delayed_frames( x264_t *h )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    return x264_stack_align( api->encoder_delayed_frames, api->x264 );
+    return api->encoder_delayed_frames( api->x264 );
 }
 
-int x264_encoder_maximum_delayed_frames( x264_t *h )
+REALIGN_STACK int x264_encoder_maximum_delayed_frames( x264_t *h )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    return x264_stack_align( api->encoder_maximum_delayed_frames, api->x264 );
+    return api->encoder_maximum_delayed_frames( api->x264 );
 }
 
-void x264_encoder_intra_refresh( x264_t *h )
+REALIGN_STACK void x264_encoder_intra_refresh( x264_t *h )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    x264_stack_align( api->encoder_intra_refresh, api->x264 );
+    api->encoder_intra_refresh( api->x264 );
 }
 
-int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
+REALIGN_STACK int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
 {
     x264_api_t *api = (x264_api_t *)h;
 
-    return x264_stack_align( api->encoder_invalidate_reference, api->x264, pts );
+    return api->encoder_invalidate_reference( api->x264, pts );
 }


=====================================
encoder/encoder.c
=====================================
@@ -1757,10 +1757,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
 
     const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" :
                           h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
-                          h->sps->i_profile_idc == PROFILE_HIGH ?
-                              (h->sps->b_constraint_set4 ? (h->sps->b_constraint_set5 ? "Constrained High" : "Progressive High") : "High") :
+                          h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
                           h->sps->i_profile_idc == PROFILE_HIGH10 ?
-                              (h->sps->b_constraint_set3 ? "High 10 Intra" : (h->sps->b_constraint_set4 ? "Progressive High 10" : "High 10")) :
+                              (h->sps->b_constraint_set3 ? "High 10 Intra" : "High 10") :
                           h->sps->i_profile_idc == PROFILE_HIGH422 ?
                               (h->sps->b_constraint_set3 ? "High 4:2:2 Intra" : "High 4:2:2") :
                           h->sps->b_constraint_set3 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive";


=====================================
encoder/lookahead.c
=====================================
@@ -87,7 +87,7 @@ static void lookahead_slicetype_decide( x264_t *h )
     x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
 }
 
-static void *lookahead_thread_internal( x264_t *h )
+REALIGN_STACK static void *lookahead_thread( x264_t *h )
 {
     while( !h->lookahead->b_exit_thread )
     {
@@ -122,10 +122,6 @@ static void *lookahead_thread_internal( x264_t *h )
     return NULL;
 }
 
-static void *lookahead_thread( x264_t *h )
-{
-    return (void*)x264_stack_align( lookahead_thread_internal, h );
-}
 #endif
 
 int x264_lookahead_init( x264_t *h, int i_slicetype_length )


=====================================
encoder/set.c
=====================================
@@ -133,8 +133,6 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
     /* Never set constraint_set2, it is not necessary and not used in real world. */
     sps->b_constraint_set2  = 0;
     sps->b_constraint_set3  = 0;
-    sps->b_constraint_set4  = sps->i_profile_idc >= PROFILE_MAIN && sps->i_profile_idc <= PROFILE_HIGH10 && sps->b_frame_mbs_only;
-    sps->b_constraint_set5  = (sps->i_profile_idc == PROFILE_MAIN || sps->i_profile_idc == PROFILE_HIGH) && param->i_bframe == 0;
 
     sps->i_level_idc = param->i_level_idc;
     if( param->i_level_idc == 9 && ( sps->i_profile_idc == PROFILE_BASELINE || sps->i_profile_idc == PROFILE_MAIN ) )
@@ -311,10 +309,8 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
     bs_write1( s, sps->b_constraint_set1 );
     bs_write1( s, sps->b_constraint_set2 );
     bs_write1( s, sps->b_constraint_set3 );
-    bs_write1( s, sps->b_constraint_set4 );
-    bs_write1( s, sps->b_constraint_set5 );
 
-    bs_write( s, 2, 0 );    /* reserved */
+    bs_write( s, 4, 0 );    /* reserved */
 
     bs_write( s, 8, sps->i_level_idc );
 


=====================================
encoder/slicetype.c
=====================================
@@ -405,8 +405,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
                  * because scale has a much wider range than offset (because of denom), so
                  * it should almost never need to be clamped. */
                 cur_offset = x264_clip3( cur_offset, -128, 127 );
-                cur_scale = (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f;
-                cur_scale = x264_clip3( cur_scale, 0, 127 );
+                cur_scale = x264_clip3f( (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f, 0, 127 );
             }
             int start_offset = x264_clip3( cur_offset - offset_dist, -128, 127 );
             int end_offset   = x264_clip3( cur_offset + offset_dist, -128, 127 );


=====================================
tools/checkasm.c
=====================================
@@ -1578,13 +1578,14 @@ static int check_mc( int cpu_ref, int cpu_new )
             intptr_t src_stride = plane_specs[i].src_stride;
             intptr_t dst_stride = ALIGN( w, 16 );
             intptr_t offv = dst_stride*h + 16;
+            pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
 
             for( int pw = 3; pw <= 4; pw++ )
             {
                 memset( pbuf3, 0, 0x1000 );
                 memset( pbuf4, 0, 0x1000 );
-                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
-                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, src1, src_stride, pw, w, h );
+                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, src1, src_stride, pw, w, h );
                 for( int y = 0; y < h; y++ )
                     if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
                         memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
@@ -2913,7 +2914,7 @@ static int check_all_flags( void )
     return ret;
 }
 
-static int main_internal( int argc, char **argv )
+REALIGN_STACK int main( int argc, char **argv )
 {
 #ifdef _WIN32
     /* Disable the Windows Error Reporting dialog */
@@ -2972,8 +2973,3 @@ static int main_internal( int argc, char **argv )
         print_bench();
     return 0;
 }
-
-int main( int argc, char **argv )
-{
-    return x264_stack_align( main_internal, argc, argv );
-}


=====================================
x264.c
=====================================
@@ -373,7 +373,7 @@ static void print_version_info( void )
 #endif
 }
 
-static int main_internal( int argc, char **argv )
+REALIGN_STACK int main( int argc, char **argv )
 {
     if( argc == 4 && !strcmp( argv[1], "--autocomplete" ) )
         return x264_cli_autocomplete( argv[2], argv[3] );
@@ -428,11 +428,6 @@ static int main_internal( int argc, char **argv )
     return ret;
 }
 
-int main( int argc, char **argv )
-{
-    return x264_stack_align( main_internal, argc, argv );
-}
-
 static char const *strtable_lookup( const char * const table[], int idx )
 {
     int i = 0; while( table[i] ) i++;


=====================================
x264.h
=====================================
@@ -45,7 +45,20 @@ extern "C" {
 
 #include "x264_config.h"
 
-#define X264_BUILD 157
+#define X264_BUILD 158
+
+#ifdef _WIN32
+#   define X264_DLL_IMPORT __declspec(dllimport)
+#   define X264_DLL_EXPORT __declspec(dllexport)
+#else
+#   if defined(__GNUC__) && (__GNUC__ >= 4)
+#       define X264_DLL_IMPORT
+#       define X264_DLL_EXPORT __attribute__((visibility("default")))
+#   else
+#       define X264_DLL_IMPORT
+#       define X264_DLL_EXPORT
+#   endif
+#endif
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment
@@ -53,9 +66,13 @@ extern "C" {
  * This clause does not apply to MinGW, similar development environments, or non
  * Windows platforms. */
 #ifdef X264_API_IMPORTS
-#define X264_API __declspec(dllimport)
+#   define X264_API X264_DLL_IMPORT
 #else
-#define X264_API
+#   ifdef X264_API_EXPORTS
+#       define X264_API X264_DLL_EXPORT
+#   else
+#       define X264_API
+#   endif
 #endif
 
 /* x264_t:
@@ -568,7 +585,7 @@ typedef struct x264_param_t
     void (*nalu_process)( x264_t *h, x264_nal_t *nal, void *opaque );
 } x264_param_t;
 
-void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
+X264_API void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
 
 /****************************************************************************
  * H.264 level restriction information
@@ -600,7 +617,7 @@ X264_API extern const x264_level_t x264_levels[];
 
 /* x264_param_default:
  *      fill x264_param_t with default values and do CPU detection */
-void    x264_param_default( x264_param_t * );
+X264_API void x264_param_default( x264_param_t * );
 
 /* x264_param_parse:
  *  set one parameter by name.
@@ -611,7 +628,7 @@ void    x264_param_default( x264_param_t * );
  *  value=NULL means "true" for boolean options, but is a BAD_VALUE for non-booleans. */
 #define X264_PARAM_BAD_NAME  (-1)
 #define X264_PARAM_BAD_VALUE (-2)
-int x264_param_parse( x264_param_t *, const char *name, const char *value );
+X264_API int x264_param_parse( x264_param_t *, const char *name, const char *value );
 
 /****************************************************************************
  * Advanced parameter handling functions
@@ -655,13 +672,13 @@ static const char * const x264_tune_names[] = { "film", "animation", "grain", "s
  *      film, animation, grain, stillimage, psnr, and ssim are psy tunings.
  *
  *      returns 0 on success, negative on failure (e.g. invalid preset/tune name). */
-int     x264_param_default_preset( x264_param_t *, const char *preset, const char *tune );
+X264_API int x264_param_default_preset( x264_param_t *, const char *preset, const char *tune );
 
 /* x264_param_apply_fastfirstpass:
  *      If first-pass mode is set (rc.b_stat_read == 0, rc.b_stat_write == 1),
  *      modify the encoder settings to disable options generally not useful on
  *      the first pass. */
-void    x264_param_apply_fastfirstpass( x264_param_t * );
+X264_API void x264_param_apply_fastfirstpass( x264_param_t * );
 
 /* x264_param_apply_profile:
  *      Applies the restrictions of the given profile.
@@ -676,7 +693,7 @@ static const char * const x264_profile_names[] = { "baseline", "main", "high", "
  *      decrease them.
  *
  *      returns 0 on success, negative on failure (e.g. invalid profile name). */
-int     x264_param_apply_profile( x264_param_t *, const char *profile );
+X264_API int x264_param_apply_profile( x264_param_t *, const char *profile );
 
 /****************************************************************************
  * Picture structures and functions
@@ -846,17 +863,17 @@ typedef struct x264_picture_t
 /* x264_picture_init:
  *  initialize an x264_picture_t.  Needs to be done if the calling application
  *  allocates its own x264_picture_t as opposed to using x264_picture_alloc. */
-void x264_picture_init( x264_picture_t *pic );
+X264_API void x264_picture_init( x264_picture_t *pic );
 
 /* x264_picture_alloc:
  *  alloc data for a picture. You must call x264_picture_clean on it.
  *  returns 0 on success, or -1 on malloc failure or invalid colorspace. */
-int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height );
+X264_API int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height );
 
 /* x264_picture_clean:
  *  free associated resource for a x264_picture_t allocated with
  *  x264_picture_alloc ONLY */
-void x264_picture_clean( x264_picture_t *pic );
+X264_API void x264_picture_clean( x264_picture_t *pic );
 
 /****************************************************************************
  * Encoder functions
@@ -871,7 +888,7 @@ void x264_picture_clean( x264_picture_t *pic );
 
 /* x264_encoder_open:
  *      create a new encoder handler, all parameters from x264_param_t are copied */
-x264_t *x264_encoder_open( x264_param_t * );
+X264_API x264_t *x264_encoder_open( x264_param_t * );
 
 /* x264_encoder_reconfig:
  *      various parameters from x264_param_t are copied.
@@ -886,7 +903,7 @@ x264_t *x264_encoder_open( x264_param_t * );
  *      more so than for other presets, many of the speed shortcuts used in ultrafast cannot be
  *      switched out of; using reconfig to switch between ultrafast and other presets is not
  *      recommended without a more fine-grained breakdown of parameters to take this into account. */
-int     x264_encoder_reconfig( x264_t *, x264_param_t * );
+X264_API int x264_encoder_reconfig( x264_t *, x264_param_t * );
 /* x264_encoder_parameters:
  *      copies the current internal set of parameters to the pointer provided
  *      by the caller.  useful when the calling application needs to know
@@ -894,32 +911,32 @@ int     x264_encoder_reconfig( x264_t *, x264_param_t * );
  *      of the encoder after multiple x264_encoder_reconfig calls.
  *      note that the data accessible through pointers in the returned param struct
  *      (e.g. filenames) should not be modified by the calling application. */
-void    x264_encoder_parameters( x264_t *, x264_param_t * );
+X264_API void x264_encoder_parameters( x264_t *, x264_param_t * );
 /* x264_encoder_headers:
  *      return the SPS and PPS that will be used for the whole stream.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
  *      returns the number of bytes in the returned NALs.
  *      returns negative on error.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
-int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
+X264_API int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
 /* x264_encoder_encode:
  *      encode one picture.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
  *      returns the number of bytes in the returned NALs.
  *      returns negative on error and zero if no NAL units returned.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
-int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
+X264_API int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
 /* x264_encoder_close:
  *      close an encoder handler */
-void    x264_encoder_close( x264_t * );
+X264_API void x264_encoder_close( x264_t * );
 /* x264_encoder_delayed_frames:
  *      return the number of currently delayed (buffered) frames
  *      this should be used at the end of the stream, to know when you have all the encoded frames. */
-int     x264_encoder_delayed_frames( x264_t * );
+X264_API int x264_encoder_delayed_frames( x264_t * );
 /* x264_encoder_maximum_delayed_frames( x264_t * ):
  *      return the maximum number of delayed (buffered) frames that can occur with the current
  *      parameters. */
-int     x264_encoder_maximum_delayed_frames( x264_t * );
+X264_API int x264_encoder_maximum_delayed_frames( x264_t * );
 /* x264_encoder_intra_refresh:
  *      If an intra refresh is not in progress, begin one with the next P-frame.
  *      If an intra refresh is in progress, begin one as soon as the current one finishes.
@@ -933,7 +950,7 @@ int     x264_encoder_maximum_delayed_frames( x264_t * );
  *      behavior is undefined.
  *
  *      Should not be called during an x264_encoder_encode. */
-void    x264_encoder_intra_refresh( x264_t * );
+X264_API void x264_encoder_intra_refresh( x264_t * );
 /* x264_encoder_invalidate_reference:
  *      An interactive error resilience tool, designed for use in a low-latency one-encoder-few-clients
  *      system.  When the client has packet loss or otherwise incorrectly decodes a frame, the encoder
@@ -956,7 +973,7 @@ void    x264_encoder_intra_refresh( x264_t * );
  *      Should not be called during an x264_encoder_encode, but multiple calls can be made simultaneously.
  *
  *      Returns 0 on success, negative on failure. */
-int x264_encoder_invalidate_reference( x264_t *, int64_t pts );
+X264_API int x264_encoder_invalidate_reference( x264_t *, int64_t pts );
 
 #ifdef __cplusplus
 }



View it on GitLab: https://code.videolan.org/videolan/x264/compare/bd8a88be426baa903427a10de9f9ddb5e7c32812...3759fcb7b48037a5169715ab89f80a0ab4801cdf

-- 
View it on GitLab: https://code.videolan.org/videolan/x264/compare/bd8a88be426baa903427a10de9f9ddb5e7c32812...3759fcb7b48037a5169715ab89f80a0ab4801cdf
You're receiving this email because of your account on code.videolan.org.