[x264-devel] x86: 32-byte align the stack if possible

Jason Garrett-Glaser git at videolan.org
Mon May 20 23:06:47 CEST 2013


x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Thu May  2 17:10:26 2013 -0700| [2a716040eb8b89efd92ea61ab08ecc41bf0b8623] | committer: Jason Garrett-Glaser

x86: 32-byte align the stack if possible

Avoids the need for manual 32 byte array alignment on compilers that support
-mpreferred-stack-boundary.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2a716040eb8b89efd92ea61ab08ecc41bf0b8623
---

 common/cpu.h         |   14 ++++++++------
 common/osdep.h       |    6 ++++++
 common/x86/cpu-a.asm |   29 +++++++++++++++++++++++------
 configure            |    4 ++++
 4 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/common/cpu.h b/common/cpu.h
index 7f76e43..27d1339 100644
--- a/common/cpu.h
+++ b/common/cpu.h
@@ -48,15 +48,17 @@ void     x264_cpu_sfence( void );
 void     x264_cpu_mask_misalign_sse( void );
 void     x264_safe_intel_cpu_indicator_init( void );
 
-/* kluge:
+/* kludge:
  * gcc can't give variables any greater alignment than the stack frame has.
- * We need 16 byte alignment for SSE2, so here we make sure that the stack is
- * aligned to 16 bytes.
+ * We need 32 byte alignment for AVX2, so here we make sure that the stack is
+ * aligned to 32 bytes.
  * gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
  * problem, but I don't want to require such a new version.
- * This applies only to x86_32, since other architectures that need alignment
- * either have ABIs that ensure aligned stack, or don't support it at all. */
-#if ARCH_X86 && HAVE_MMX
+ * aligning to 32 bytes only works if the compiler supports keeping that
+ * alignment between functions (osdep.h handles manual alignment of arrays
+ * if it doesn't).
+ */
+#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
 int x264_stack_align( void (*func)(), ... );
 #define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
 #else
diff --git a/common/osdep.h b/common/osdep.h
index e4b22f0..cdc0f61 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -111,7 +111,13 @@
 
 #define EXPAND(x) x
 
+#if HAVE_32B_STACK_ALIGNMENT
+#define ALIGNED_ARRAY_32( type, name, sub1, ... )\
+    ALIGNED_32( type name sub1 __VA_ARGS__ )
+#else
 #define ALIGNED_ARRAY_32( ... ) EXPAND( ALIGNED_ARRAY_EMU( 31, __VA_ARGS__ ) )
+#endif
+
 #define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) )
 
 /* For AVX2 */
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index 6e26b96..b765571 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -66,7 +66,27 @@ cglobal cpu_xgetbv, 3,7
     mov [r4], edx
     RET
 
-%if ARCH_X86_64 == 0
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; void stack_align( void (*func)(void*), void *arg );
+;-----------------------------------------------------------------------------
+cglobal stack_align
+    push rbp
+    mov  rbp, rsp
+%if WIN64
+    sub  rsp, 32 ; shadow space
+%endif
+    and  rsp, ~31
+    mov  rax, r0
+    mov   r0, r1
+    mov   r1, r2
+    mov   r2, r3
+    call rax
+    leave
+    ret
+
+%else
 
 ;-----------------------------------------------------------------------------
 ; int cpu_cpuid_test( void )
@@ -94,14 +114,11 @@ cglobal cpu_cpuid_test
     popfd
     ret
 
-;-----------------------------------------------------------------------------
-; void stack_align( void (*func)(void*), void *arg );
-;-----------------------------------------------------------------------------
 cglobal stack_align
     push ebp
     mov  ebp, esp
     sub  esp, 12
-    and  esp, ~15
+    and  esp, ~31
     mov  ecx, [ebp+8]
     mov  edx, [ebp+12]
     mov  [esp], edx
@@ -168,7 +185,7 @@ cglobal safe_intel_cpu_indicator_init
 %if WIN64
     sub  rsp, 32 ; shadow space
 %endif
-    and  rsp, ~15
+    and  rsp, ~31
     call intel_cpu_indicator_init
     leave
 %if ARCH_X86_64
diff --git a/configure b/configure
index 7fea421..3d14850 100755
--- a/configure
+++ b/configure
@@ -717,6 +717,10 @@ if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
         exit 1
     fi
     define HAVE_MMX
+    if cc_check '' -mpreferred-stack-boundary=5 ; then
+        CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
+        define HAVE_32B_STACK_ALIGNMENT
+    fi
 fi
 
 if [ $asm = auto -a $ARCH = ARM ] ; then



More information about the x264-devel mailing list