[x264-devel] x86: Avoid self-relative expressions on macho64
Henrik Gramner
git at videolan.org
Tue May 23 18:04:34 CEST 2017
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue May 23 16:40:26 2017 +0200| [df79067c0cf33da712d344b5f8869be7eaf326f3] | committer: Henrik Gramner
x86: Avoid self-relative expressions on macho64
Functions that uses self-relative expressions in the form of [foo-$$]
appears to cause issues on 64-bit Mach-O systems when assembled with nasm.
Temporarily disable those functions on macho64 for the time being until
we've figured out the root cause.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=df79067c0cf33da712d344b5f8869be7eaf326f3
---
common/bitstream.c | 4 ++--
common/pixel.c | 10 ++++++++++
common/quant.c | 4 ++++
encoder/cabac.c | 6 +++---
encoder/rdo.c | 2 +-
5 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/common/bitstream.c b/common/bitstream.c
index 34e643ce..cc763000 100644
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -119,7 +119,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
pf->nal_escape = x264_nal_escape_c;
#if HAVE_MMX
-#if ARCH_X86_64
+#if ARCH_X86_64 && !defined( __MACH__ )
pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2;
pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2;
pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2;
@@ -132,7 +132,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
if( cpu&X264_CPU_SSE2_IS_FAST )
pf->nal_escape = x264_nal_escape_sse2;
}
-#if ARCH_X86_64
+#if ARCH_X86_64 && !defined( __MACH__ )
if( cpu&X264_CPU_LZCNT )
{
pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_lzcnt;
diff --git a/common/pixel.c b/common/pixel.c
index 14891d71..b4fe182d 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -959,7 +959,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
INIT7( sad, _ssse3 );
INIT7( sad_x3, _ssse3 );
INIT7( sad_x4, _ssse3 );
+#if ARCH_X86 || !defined( __MACH__ )
INIT_ADS( _ssse3 );
+#endif
INIT6( satd, _ssse3 );
pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_ssse3;
@@ -1000,7 +1002,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
if( cpu&X264_CPU_AVX )
{
INIT5_NAME( sad_aligned, sad, _ssse3 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
+#if ARCH_X86 || !defined( __MACH__ )
INIT_ADS( _avx );
+#endif
INIT6( satd, _avx );
pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_avx;
if( !(cpu&X264_CPU_STACK_MOD4) )
@@ -1196,7 +1200,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->intra_sa8d_x9_8x8 = x264_intra_sa8d_x9_8x8_ssse3;
#endif
}
+#if ARCH_X86 || !defined( __MACH__ )
INIT_ADS( _ssse3 );
+#endif
if( cpu&X264_CPU_SLOW_ATOM )
{
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3_atom;
@@ -1279,7 +1285,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
INIT8( satd, _avx );
INIT7( satd_x3, _avx );
INIT7( satd_x4, _avx );
+#if ARCH_X86 || !defined( __MACH__ )
INIT_ADS( _avx );
+#endif
INIT4( hadamard_ac, _avx );
if( !(cpu&X264_CPU_STACK_MOD4) )
{
@@ -1332,7 +1340,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
INIT2( sad_x4, _avx2 );
INIT4( satd, _avx2 );
INIT2( hadamard_ac, _avx2 );
+#if ARCH_X86 || !defined( __MACH__ )
INIT_ADS( _avx2 );
+#endif
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_avx2;
pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_avx2;
pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_avx2;
diff --git a/common/quant.c b/common/quant.c
index 5af38764..ae962226 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -660,6 +660,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->decimate_score16 = x264_decimate_score16_ssse3;
pf->decimate_score64 = x264_decimate_score64_ssse3;
INIT_TRELLIS( ssse3 );
+#if ARCH_X86 || !defined( __MACH__ )
pf->coeff_level_run4 = x264_coeff_level_run4_ssse3;
pf->coeff_level_run8 = x264_coeff_level_run8_ssse3;
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3;
@@ -671,6 +672,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3_lzcnt;
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_ssse3_lzcnt;
}
+#endif
}
if( cpu&X264_CPU_SSE4 )
@@ -721,8 +723,10 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->decimate_score64 = x264_decimate_score64_avx2;
pf->denoise_dct = x264_denoise_dct_avx2;
pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx2;
+#if ARCH_X86 || !defined( __MACH__ )
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_avx2;
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_avx2;
+#endif
}
if( cpu&X264_CPU_AVX512 )
{
diff --git a/encoder/cabac.c b/encoder/cabac.c
index 27052cdb..d96c0991 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -801,7 +801,7 @@ void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat
static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
-#if ARCH_X86_64 && HAVE_MMX
+#if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
#else
x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
@@ -915,7 +915,7 @@ void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_
static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
-#if ARCH_X86_64 && HAVE_MMX
+#if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
#else
x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
@@ -923,7 +923,7 @@ static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t
}
static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
-#if ARCH_X86_64 && HAVE_MMX
+#if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
#else
x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l );
diff --git a/encoder/rdo.c b/encoder/rdo.c
index ef780c44..a6865bd3 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -694,7 +694,7 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct,
return !!dct[0];
}
-#if HAVE_MMX && ARCH_X86_64
+#if HAVE_MMX && ARCH_X86_64 && !defined( __MACH__ )
#define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\
cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8)
if( num_coefs == 16 && !dc )
More information about the x264-devel
mailing list