[x264-devel] x86: Avoid self-relative expressions on macho64

Tue May 23 18:04:34 CEST 2017

x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue May 23 16:40:26 2017 +0200| [df79067c0cf33da712d344b5f8869be7eaf326f3] | committer: Henrik Gramner

x86: Avoid self-relative expressions on macho64

Functions that uses self-relative expressions in the form of [foo-$$]
appears to cause issues on 64-bit Mach-O systems when assembled with nasm.
Temporarily disable those functions on macho64 for the time being until
we've figured out the root cause.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=df79067c0cf33da712d344b5f8869be7eaf326f3
---

 common/bitstream.c |  4 ++--
 common/pixel.c     | 10 ++++++++++
 common/quant.c     |  4 ++++
 encoder/cabac.c    |  6 +++---
 encoder/rdo.c      |  2 +-
 5 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/common/bitstream.c b/common/bitstream.c
index 34e643ce..cc763000 100644
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -119,7 +119,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
 
     pf->nal_escape = x264_nal_escape_c;
 #if HAVE_MMX
-#if ARCH_X86_64
+#if ARCH_X86_64 && !defined( __MACH__ )
     pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2;
     pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2;
     pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2;
@@ -132,7 +132,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
         if( cpu&X264_CPU_SSE2_IS_FAST )
             pf->nal_escape = x264_nal_escape_sse2;
     }
-#if ARCH_X86_64
+#if ARCH_X86_64 && !defined( __MACH__ )
     if( cpu&X264_CPU_LZCNT )
     {
         pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_lzcnt;
diff --git a/common/pixel.c b/common/pixel.c
index 14891d71..b4fe182d 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -959,7 +959,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
         INIT7( sad, _ssse3 );
         INIT7( sad_x3, _ssse3 );
         INIT7( sad_x4, _ssse3 );
+#if ARCH_X86 || !defined( __MACH__ )
         INIT_ADS( _ssse3 );
+#endif
         INIT6( satd, _ssse3 );
         pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_ssse3;
 
@@ -1000,7 +1002,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
     if( cpu&X264_CPU_AVX )
     {
         INIT5_NAME( sad_aligned, sad, _ssse3 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
+#if ARCH_X86 || !defined( __MACH__ )
         INIT_ADS( _avx );
+#endif
         INIT6( satd, _avx );
         pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_avx;
         if( !(cpu&X264_CPU_STACK_MOD4) )
@@ -1196,7 +1200,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
             pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_ssse3;
 #endif
         }
+#if ARCH_X86 || !defined( __MACH__ )
         INIT_ADS( _ssse3 );
+#endif
         if( cpu&X264_CPU_SLOW_ATOM )
         {
             pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3_atom;
@@ -1279,7 +1285,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
         INIT8( satd, _avx );
         INIT7( satd_x3, _avx );
         INIT7( satd_x4, _avx );
+#if ARCH_X86 || !defined( __MACH__ )
         INIT_ADS( _avx );
+#endif
         INIT4( hadamard_ac, _avx );
         if( !(cpu&X264_CPU_STACK_MOD4) )
         {
@@ -1332,7 +1340,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
         INIT2( sad_x4, _avx2 );
         INIT4( satd, _avx2 );
         INIT2( hadamard_ac, _avx2 );
+#if ARCH_X86 || !defined( __MACH__ )
         INIT_ADS( _avx2 );
+#endif
         pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_avx2;
         pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_avx2;
         pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16_avx2;
diff --git a/common/quant.c b/common/quant.c
index 5af38764..ae962226 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -660,6 +660,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
         pf->decimate_score16 = x264_decimate_score16_ssse3;
         pf->decimate_score64 = x264_decimate_score64_ssse3;
         INIT_TRELLIS( ssse3 );
+#if ARCH_X86 || !defined( __MACH__ )
         pf->coeff_level_run4 = x264_coeff_level_run4_ssse3;
         pf->coeff_level_run8 = x264_coeff_level_run8_ssse3;
         pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3;
@@ -671,6 +672,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
             pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3_lzcnt;
             pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_ssse3_lzcnt;
         }
+#endif
     }
 
     if( cpu&X264_CPU_SSE4 )
@@ -721,8 +723,10 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
         pf->decimate_score64 = x264_decimate_score64_avx2;
         pf->denoise_dct = x264_denoise_dct_avx2;
         pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx2;
+#if ARCH_X86 || !defined( __MACH__ )
         pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_avx2;
         pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_avx2;
+#endif
     }
     if( cpu&X264_CPU_AVX512 )
     {
diff --git a/encoder/cabac.c b/encoder/cabac.c
index 27052cdb..d96c0991 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -801,7 +801,7 @@ void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat
 
 static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
-#if ARCH_X86_64 && HAVE_MMX
+#if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
     h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
 #else
     x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
@@ -915,7 +915,7 @@ void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_
 
 static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
-#if ARCH_X86_64 && HAVE_MMX
+#if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
     h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
 #else
     x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
@@ -923,7 +923,7 @@ static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t
 }
 static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
-#if ARCH_X86_64 && HAVE_MMX
+#if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
     h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
 #else
     x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l );
diff --git a/encoder/rdo.c b/encoder/rdo.c
index ef780c44..a6865bd3 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -694,7 +694,7 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct,
         return !!dct[0];
     }
 
-#if HAVE_MMX && ARCH_X86_64
+#if HAVE_MMX && ARCH_X86_64 && !defined( __MACH__ )
 #define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\
                      cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8)
     if( num_coefs == 16 && !dc )