[x264-devel] x86: 32-byte align the stack if possible
Jason Garrett-Glaser
git at videolan.org
Mon May 20 23:06:47 CEST 2013
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Thu May 2 17:10:26 2013 -0700| [2a716040eb8b89efd92ea61ab08ecc41bf0b8623] | committer: Jason Garrett-Glaser
x86: 32-byte align the stack if possible
Avoids the need for manual 32 byte array alignment on compilers that support
-mpreferred-stack-boundary.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2a716040eb8b89efd92ea61ab08ecc41bf0b8623
---
common/cpu.h | 14 ++++++++------
common/osdep.h | 6 ++++++
common/x86/cpu-a.asm | 29 +++++++++++++++++++++++------
configure | 4 ++++
4 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/common/cpu.h b/common/cpu.h
index 7f76e43..27d1339 100644
--- a/common/cpu.h
+++ b/common/cpu.h
@@ -48,15 +48,17 @@ void x264_cpu_sfence( void );
void x264_cpu_mask_misalign_sse( void );
void x264_safe_intel_cpu_indicator_init( void );
-/* kluge:
+/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
- * We need 16 byte alignment for SSE2, so here we make sure that the stack is
- * aligned to 16 bytes.
+ * We need 32 byte alignment for AVX2, so here we make sure that the stack is
+ * aligned to 32 bytes.
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
- * This applies only to x86_32, since other architectures that need alignment
- * either have ABIs that ensure aligned stack, or don't support it at all. */
-#if ARCH_X86 && HAVE_MMX
+ * aligning to 32 bytes only works if the compiler supports keeping that
+ * alignment between functions (osdep.h handles manual alignment of arrays
+ * if it doesn't).
+ */
+#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
int x264_stack_align( void (*func)(), ... );
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
diff --git a/common/osdep.h b/common/osdep.h
index e4b22f0..cdc0f61 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -111,7 +111,13 @@
#define EXPAND(x) x
+#if HAVE_32B_STACK_ALIGNMENT
+#define ALIGNED_ARRAY_32( type, name, sub1, ... )\
+ ALIGNED_32( type name sub1 __VA_ARGS__ )
+#else
#define ALIGNED_ARRAY_32( ... ) EXPAND( ALIGNED_ARRAY_EMU( 31, __VA_ARGS__ ) )
+#endif
+
#define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) )
/* For AVX2 */
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index 6e26b96..b765571 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -66,7 +66,27 @@ cglobal cpu_xgetbv, 3,7
mov [r4], edx
RET
-%if ARCH_X86_64 == 0
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; void stack_align( void (*func)(void*), void *arg );
+;-----------------------------------------------------------------------------
+cglobal stack_align
+ push rbp
+ mov rbp, rsp
+%if WIN64
+ sub rsp, 32 ; shadow space
+%endif
+ and rsp, ~31
+ mov rax, r0
+ mov r0, r1
+ mov r1, r2
+ mov r2, r3
+ call rax
+ leave
+ ret
+
+%else
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
@@ -94,14 +114,11 @@ cglobal cpu_cpuid_test
popfd
ret
-;-----------------------------------------------------------------------------
-; void stack_align( void (*func)(void*), void *arg );
-;-----------------------------------------------------------------------------
cglobal stack_align
push ebp
mov ebp, esp
sub esp, 12
- and esp, ~15
+ and esp, ~31
mov ecx, [ebp+8]
mov edx, [ebp+12]
mov [esp], edx
@@ -168,7 +185,7 @@ cglobal safe_intel_cpu_indicator_init
%if WIN64
sub rsp, 32 ; shadow space
%endif
- and rsp, ~15
+ and rsp, ~31
call intel_cpu_indicator_init
leave
%if ARCH_X86_64
diff --git a/configure b/configure
index 7fea421..3d14850 100755
--- a/configure
+++ b/configure
@@ -717,6 +717,10 @@ if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
exit 1
fi
define HAVE_MMX
+ if cc_check '' -mpreferred-stack-boundary=5 ; then
+ CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
+ define HAVE_32B_STACK_ALIGNMENT
+ fi
fi
if [ $asm = auto -a $ARCH = ARM ] ; then
More information about the x264-devel
mailing list