[x265] [PATCH RFC V2] manually align the stack for GCC x86_32 builds

Steve Borho steve at borho.org
Fri Apr 18 21:10:10 CEST 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1397762142 18000
#      Thu Apr 17 14:15:42 2014 -0500
# Node ID df76c716a254ba1b3fdc563d9e7803c4f4df1829
# Parent  1fab04de065a3f7f5fedc128f572b860d6df0de2
manually align the stack for GCC x86_32 builds

This version declares x265_stack_align as extern "C" since it is an assembly
function, fixes the return of count from Encoder::encode

This needs testing on GCC built x86_32 platforms, any volunteers?

For all threads x265 creates I'm hoping we can align the stack immediately in
the call to threadMain().

At first glance, it seems only the call to x265_encoder_encode() needs to be
stack aligned.

diff -r 1fab04de065a -r df76c716a254 source/cmake/CMakeASM_YASMInformation.cmake
--- a/source/cmake/CMakeASM_YASMInformation.cmake	Fri Apr 18 18:00:58 2014 +0530
+++ b/source/cmake/CMakeASM_YASMInformation.cmake	Thu Apr 17 14:15:42 2014 -0500
@@ -21,8 +21,7 @@
     endif()
 endif()
 
-# we cannot assume 16-byte stack alignment on x86_32 even with GCC
-if(GCC AND X64)
+if(GCC)
     set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=1")
 else()
     set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=0")
diff -r 1fab04de065a -r df76c716a254 source/common/common.h
--- a/source/common/common.h	Fri Apr 18 18:00:58 2014 +0530
+++ b/source/common/common.h	Thu Apr 17 14:15:42 2014 -0500
@@ -47,10 +47,21 @@
 #define ALIGN_VAR_8(T, var)  T var __attribute__((aligned(8)))
 #define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
 #define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
+
+#if X265_ARCH_X86 && !defined(X86_64)
+extern "C" intptr_t x265_stack_align( void (*func)(), ... );
+#define x265_stack_align(func,...) x265_stack_align((void (*)())func, __VA_ARGS__)
+#else
+#define x265_stack_align(func,...) func(__VA_ARGS__)
+#endif
+
 #elif defined(_MSC_VER)
+
 #define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
 #define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
 #define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
+#define x265_stack_align(func,...) func(__VA_ARGS__)
+
 #endif // if defined(__GNUC__)
 
 #if HIGH_BIT_DEPTH
diff -r 1fab04de065a -r df76c716a254 source/common/threading.cpp
--- a/source/common/threading.cpp	Fri Apr 18 18:00:58 2014 +0530
+++ b/source/common/threading.cpp	Thu Apr 17 14:15:42 2014 -0500
@@ -28,12 +28,18 @@
 namespace x265 {
 // x265 private namespace
 
+/* C shim for forced stack alignment */
+static void stackAlignMain(Thread *instance)
+{
+    instance->threadMain();
+}
+
 #if _WIN32
 
 static DWORD WINAPI ThreadShim(Thread *instance)
 {
     // defer processing to the virtual function implemented in the derived class
-    instance->threadMain();
+    x265_stack_align(stackAlignMain, instance);
 
     return 0;
 }
@@ -70,7 +76,7 @@
     // defer processing to the virtual function implemented in the derived class
     Thread *instance = reinterpret_cast<Thread *>(opaque);
 
-    instance->threadMain();
+    x265_stack_align(stackAlignMain, instance);
 
     return NULL;
 }
diff -r 1fab04de065a -r df76c716a254 source/encoder/api.cpp
--- a/source/encoder/api.cpp	Fri Apr 18 18:00:58 2014 +0530
+++ b/source/encoder/api.cpp	Thu Apr 17 14:15:42 2014 -0500
@@ -103,6 +103,15 @@
     return ret;
 }
 
+#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64)
+/* C wrapper for Encoder::encode() so we can align the stack prior to entry
+ * since the caller may not have aligned the stack enough for us */
+static intptr_t encode_stack_frame(Encoder *enc, bool bEos, const x265_picture* pic, x265_picture *pic_out, NALUnitEBSP **nalunits)
+{
+    return (intptr_t)enc->encode(bEos, pic, pic_out, nalunits);
+}
+#endif
+
 extern "C"
 int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out)
 {
@@ -111,7 +120,12 @@
 
     Encoder *encoder = static_cast<Encoder*>(enc);
     NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 };
+
+#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64)
+    int numEncoded = (int)x265_stack_align(encode_stack_frame, encoder, !pic_in, pic_in, pic_out, nalunits);
+#else
     int numEncoded = encoder->encode(!pic_in, pic_in, pic_out, nalunits);
+#endif
 
     if (pp_nal && numEncoded > 0)
     {


More information about the x265-devel mailing list