[x265] [PATCH RFC] manually align the stack for GCC x86_32 builds

Steve Borho steve at borho.org
Thu Apr 17 21:16:04 CEST 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1397762142 18000
#      Thu Apr 17 14:15:42 2014 -0500
# Node ID 171b8496ccc6604691f7fb5cc01348d8d4f888cf
# Parent  adb708655a30e7f3b2d1fd391740214c2cb2fb1c
manually align the stack for GCC x86_32 builds

This needs testing on GCC built x86_32 platforms, any volunteers?

For all threads x265 creates I'm hoping we can align the stack immediately in
the call to threadMain().

At first glance, it seems only the call to x265_encoder_encode() needs to be
stack aligned.

diff -r adb708655a30 -r 171b8496ccc6 source/cmake/CMakeASM_YASMInformation.cmake
--- a/source/cmake/CMakeASM_YASMInformation.cmake	Wed Apr 16 14:44:12 2014 +0530
+++ b/source/cmake/CMakeASM_YASMInformation.cmake	Thu Apr 17 14:15:42 2014 -0500
@@ -21,8 +21,7 @@
     endif()
 endif()
 
-# we cannot assume 16-byte stack alignment on x86_32 even with GCC
-if(GCC AND X64)
+if(GCC)
     set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=1")
 else()
     set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=0")
diff -r adb708655a30 -r 171b8496ccc6 source/common/common.h
--- a/source/common/common.h	Wed Apr 16 14:44:12 2014 +0530
+++ b/source/common/common.h	Thu Apr 17 14:15:42 2014 -0500
@@ -47,10 +47,21 @@
 #define ALIGN_VAR_8(T, var)  T var __attribute__((aligned(8)))
 #define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
 #define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
+
+#if X265_ARCH_X86 && !defined(X86_64)
+intptr_t x265_stack_align( void (*func)(), ... );
+#define x265_stack_align(func,...) x265_stack_align((void (*)())func, __VA_ARGS__)
+#else
+#define x265_stack_align(func,...) func(__VA_ARGS__)
+#endif
+
 #elif defined(_MSC_VER)
+
 #define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
 #define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
 #define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
+#define x265_stack_align(func,...) func(__VA_ARGS__)
+
 #endif // if defined(__GNUC__)
 
 #if HIGH_BIT_DEPTH
diff -r adb708655a30 -r 171b8496ccc6 source/common/threading.cpp
--- a/source/common/threading.cpp	Wed Apr 16 14:44:12 2014 +0530
+++ b/source/common/threading.cpp	Thu Apr 17 14:15:42 2014 -0500
@@ -28,12 +28,18 @@
 namespace x265 {
 // x265 private namespace
 
+/* C shim for forced stack alignment */
+static void stackAlignMain(Thread *instance)
+{
+    instance->threadMain();
+}
+
 #if _WIN32
 
 static DWORD WINAPI ThreadShim(Thread *instance)
 {
     // defer processing to the virtual function implemented in the derived class
-    instance->threadMain();
+    x265_stack_align(stackAlignMain, instance);
 
     return 0;
 }
@@ -70,7 +76,7 @@
     // defer processing to the virtual function implemented in the derived class
     Thread *instance = reinterpret_cast<Thread *>(opaque);
 
-    instance->threadMain();
+    x265_stack_align(stackAlignMain, instance);
 
     return NULL;
 }
diff -r adb708655a30 -r 171b8496ccc6 source/encoder/api.cpp
--- a/source/encoder/api.cpp	Wed Apr 16 14:44:12 2014 +0530
+++ b/source/encoder/api.cpp	Thu Apr 17 14:15:42 2014 -0500
@@ -103,6 +103,15 @@
     return ret;
 }
 
+#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64)
+/* C wrapper for Encoder::encode() so we can align the stack prior to entry
+ * since the caller may not have aligned the stack enough for us */
+static int encode_stack_frame(Encoder *enc, bool bEos, const x265_picture* pic, x265_picture *pic_out, NALUnitEBSP **nalunits)
+{
+    enc->encode(bEos, pic, pic_out, nalunits);
+}
+#endif
+
 extern "C"
 int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out)
 {
@@ -111,7 +120,12 @@
 
     Encoder *encoder = static_cast<Encoder*>(enc);
     NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 };
+
+#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64)
+    int numEncoded = x265_stack_align(encode_stack_frame, encoder, !pic_in, pic_in, pic_out, nalunits);
+#else
     int numEncoded = encoder->encode(!pic_in, pic_in, pic_out, nalunits);
+#endif
 
     if (pp_nal && numEncoded > 0)
     {


More information about the x265-devel mailing list