[x265] [PATCH RFC V2] manually align the stack for GCC x86_32 builds

Mon Apr 21 22:15:19 CEST 2014

On 04/18/2014 12:10 PM, Steve Borho wrote:
> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1397762142 18000
> #      Thu Apr 17 14:15:42 2014 -0500
> # Node ID df76c716a254ba1b3fdc563d9e7803c4f4df1829
> # Parent  1fab04de065a3f7f5fedc128f572b860d6df0de2
> manually align the stack for GCC x86_32 builds
>
> This version declares x265_stack_align as extern "C" since it is an assembly
> function, fixes the return of count from Encoder::encode
>
> This needs testing on GCC built x86_32 platforms, any volunteers?
>
> For all threads x265 creates I'm hoping we can align the stack immediately in
> the call to threadMain().
>
> At first glance, it seems only the call to x265_encoder_encode() needs to be
> stack aligned.
>
> diff -r 1fab04de065a -r df76c716a254 source/cmake/CMakeASM_YASMInformation.cmake
> --- a/source/cmake/CMakeASM_YASMInformation.cmake	Fri Apr 18 18:00:58 2014 +0530
> +++ b/source/cmake/CMakeASM_YASMInformation.cmake	Thu Apr 17 14:15:42 2014 -0500
> @@ -21,8 +21,7 @@
>       endif()
>   endif()
>   
> -# we cannot assume 16-byte stack alignment on x86_32 even with GCC
> -if(GCC AND X64)
> +if(GCC)
>       set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=1")
>   else()
>       set(ASM_FLAGS "${ASM_FLAGS} -DHAVE_ALIGNED_STACK=0")
> diff -r 1fab04de065a -r df76c716a254 source/common/common.h
> --- a/source/common/common.h	Fri Apr 18 18:00:58 2014 +0530
> +++ b/source/common/common.h	Thu Apr 17 14:15:42 2014 -0500
> @@ -47,10 +47,21 @@
>   #define ALIGN_VAR_8(T, var)  T var __attribute__((aligned(8)))
>   #define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
>   #define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
> +
> +#if X265_ARCH_X86 && !defined(X86_64)
> +extern "C" intptr_t x265_stack_align( void (*func)(), ... );
> +#define x265_stack_align(func,...) x265_stack_align((void (*)())func, __VA_ARGS__)
> +#else
> +#define x265_stack_align(func,...) func(__VA_ARGS__)
> +#endif
> +
>   #elif defined(_MSC_VER)
> +
>   #define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
>   #define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
>   #define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
> +#define x265_stack_align(func,...) func(__VA_ARGS__)
> +
>   #endif // if defined(__GNUC__)
>   
>   #if HIGH_BIT_DEPTH
> diff -r 1fab04de065a -r df76c716a254 source/common/threading.cpp
> --- a/source/common/threading.cpp	Fri Apr 18 18:00:58 2014 +0530
> +++ b/source/common/threading.cpp	Thu Apr 17 14:15:42 2014 -0500
> @@ -28,12 +28,18 @@
>   namespace x265 {
>   // x265 private namespace
>   
> +/* C shim for forced stack alignment */
> +static void stackAlignMain(Thread *instance)
> +{
> +    instance->threadMain();
> +}
> +
>   #if _WIN32
>   
>   static DWORD WINAPI ThreadShim(Thread *instance)
>   {
>       // defer processing to the virtual function implemented in the derived class
> -    instance->threadMain();
> +    x265_stack_align(stackAlignMain, instance);
>   
>       return 0;
>   }
> @@ -70,7 +76,7 @@
>       // defer processing to the virtual function implemented in the derived class
>       Thread *instance = reinterpret_cast<Thread *>(opaque);
>   
> -    instance->threadMain();
> +    x265_stack_align(stackAlignMain, instance);
>   
>       return NULL;
>   }
> diff -r 1fab04de065a -r df76c716a254 source/encoder/api.cpp
> --- a/source/encoder/api.cpp	Fri Apr 18 18:00:58 2014 +0530
> +++ b/source/encoder/api.cpp	Thu Apr 17 14:15:42 2014 -0500
> @@ -103,6 +103,15 @@
>       return ret;
>   }
>   
> +#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64)
> +/* C wrapper for Encoder::encode() so we can align the stack prior to entry
> + * since the caller may not have aligned the stack enough for us */
> +static intptr_t encode_stack_frame(Encoder *enc, bool bEos, const x265_picture* pic, x265_picture *pic_out, NALUnitEBSP **nalunits)
> +{
> +    return (intptr_t)enc->encode(bEos, pic, pic_out, nalunits);
> +}
> +#endif
> +
>   extern "C"
>   int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out)
>   {
> @@ -111,7 +120,12 @@
>   
>       Encoder *encoder = static_cast<Encoder*>(enc);
>       NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 };
> +
> +#if defined(__GNUC__) && X265_ARCH_X86 && !defined(X86_64)
> +    int numEncoded = (int)x265_stack_align(encode_stack_frame, encoder, !pic_in, pic_in, pic_out, nalunits);
> +#else
>       int numEncoded = encoder->encode(!pic_in, pic_in, pic_out, nalunits);
> +#endif
>   
>       if (pp_nal && numEncoded > 0)
>       {
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
I resolved build dependencies for 32 bit on my system but it seems like 
cmake is primarily setup to just build for the host system. It's 
ignoring "-f elf32" for CMAKE_ASM_YASM_FLAGS and generating "-f 
elf64...".  GCC and ld are accepting -m32 but then ld fails to link 
yasm's 64bit object code with GCC's 32bit code.

I tried tinkering with the cmake generated build.make file to get yasm 
to create 32bit code but then yasm doesn't like 64bit register symbols.  
How do you get yasm to build 32bit object code from 64bit source code?

Am I wasting my time trying to build x265 for 32 bit linux from 64 bit 
linux or is this something that is needed?