[x264-devel] x86: Update intel compiler cpu dispatcher override for new versions of ICC /ICL
Anton Mitrofanov
git at videolan.org
Sat Dec 20 21:10:50 CET 2014
x264 | branch: master | Anton Mitrofanov <BugMaster at narod.ru> | Mon Dec 15 18:49:23 2014 +0300| [40bb56814e56ed342040bdbf30258aab39ee9e89] | committer: Anton Mitrofanov
x86: Update intel compiler cpu dispatcher override for new versions of ICC/ICL
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=40bb56814e56ed342040bdbf30258aab39ee9e89
---
common/cpu.h | 1 -
common/osdep.c | 45 ----------------------------------------
common/x86/cpu-a.asm | 50 ---------------------------------------------
configure | 8 +++++++-
encoder/encoder.c | 7 +++++++
extras/intel_dispatcher.h | 46 +++++++++++++++++++++++++++++++++++++++++
6 files changed, 60 insertions(+), 97 deletions(-)
diff --git a/common/cpu.h b/common/cpu.h
index 6d08027..07e5c6c 100644
--- a/common/cpu.h
+++ b/common/cpu.h
@@ -45,7 +45,6 @@ void x264_cpu_sfence( void );
#define x264_emms()
#endif
#define x264_sfence x264_cpu_sfence
-void x264_safe_intel_cpu_indicator_init( void );
/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
diff --git a/common/osdep.c b/common/osdep.c
index e97aaed..91f3fdd 100644
--- a/common/osdep.c
+++ b/common/osdep.c
@@ -94,51 +94,6 @@ int x264_threading_init( void )
}
#endif
-#if HAVE_MMX
-#ifdef __INTEL_COMPILER
-/* Agner's patch to Intel's CPU dispatcher from pages 131-132 of
- * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30)
- * adapted to x264's cpu schema. */
-
-// Global variable indicating cpu
-int __intel_cpu_indicator = 0;
-// CPU dispatcher function
-void x264_intel_cpu_indicator_init( void )
-{
- unsigned int cpu = x264_cpu_detect();
- if( cpu&X264_CPU_AVX )
- __intel_cpu_indicator = 0x20000;
- else if( cpu&X264_CPU_SSE42 )
- __intel_cpu_indicator = 0x8000;
- else if( cpu&X264_CPU_SSE4 )
- __intel_cpu_indicator = 0x2000;
- else if( cpu&X264_CPU_SSSE3 )
- __intel_cpu_indicator = 0x1000;
- else if( cpu&X264_CPU_SSE3 )
- __intel_cpu_indicator = 0x800;
- else if( cpu&X264_CPU_SSE2 && !(cpu&X264_CPU_SSE2_IS_SLOW) )
- __intel_cpu_indicator = 0x200;
- else if( cpu&X264_CPU_SSE )
- __intel_cpu_indicator = 0x80;
- else if( cpu&X264_CPU_MMX2 )
- __intel_cpu_indicator = 8;
- else
- __intel_cpu_indicator = 1;
-}
-
-/* __intel_cpu_indicator_init appears to have a non-standard calling convention that
- * assumes certain registers aren't preserved, so we'll route it through a function
- * that backs up all the registers. */
-void __intel_cpu_indicator_init( void )
-{
- x264_safe_intel_cpu_indicator_init();
-}
-#else
-void x264_intel_cpu_indicator_init( void )
-{}
-#endif
-#endif
-
#ifdef _WIN32
/* Functions for dealing with Unicode on Windows. */
FILE *x264_fopen( const char *filename, const char *mode )
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index bcf6c43..4dfd775 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -145,53 +145,3 @@ cglobal cpu_emms
cglobal cpu_sfence
sfence
ret
-
-cextern intel_cpu_indicator_init
-
-;-----------------------------------------------------------------------------
-; void safe_intel_cpu_indicator_init( void );
-;-----------------------------------------------------------------------------
-cglobal safe_intel_cpu_indicator_init
- push r0
- push r1
- push r2
- push r3
- push r4
- push r5
- push r6
-%if ARCH_X86_64
- push r7
- push r8
- push r9
- push r10
- push r11
- push r12
- push r13
- push r14
-%endif
- push rbp
- mov rbp, rsp
-%if WIN64
- sub rsp, 32 ; shadow space
-%endif
- and rsp, ~31
- call intel_cpu_indicator_init
- leave
-%if ARCH_X86_64
- pop r14
- pop r13
- pop r12
- pop r11
- pop r10
- pop r9
- pop r8
- pop r7
-%endif
- pop r6
- pop r5
- pop r4
- pop r3
- pop r2
- pop r1
- pop r0
- ret
diff --git a/configure b/configure
index 02fe4be..e2977bd 100755
--- a/configure
+++ b/configure
@@ -311,7 +311,7 @@ NL="
# list of all preprocessor HAVE values we can define
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
- LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC"
+ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER"
# parse options
@@ -1093,6 +1093,12 @@ if cc_check '' -Wmaybe-uninitialized ; then
CFLAGS="-Wno-maybe-uninitialized $CFLAGS"
fi
+if [ $compiler = ICC -o $compiler = ICL ] ; then
+ if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then
+ define HAVE_INTEL_DISPATCHER
+ fi
+fi
+
if [ "$bit_depth" -gt "8" ]; then
define HIGH_BIT_DEPTH
ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=1"
diff --git a/encoder/encoder.c b/encoder/encoder.c
index c98a900..54d2e5a 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -32,6 +32,9 @@
#include "ratecontrol.h"
#include "macroblock.h"
#include "me.h"
+#if HAVE_INTEL_DISPATCHER
+#include "extras/intel_dispatcher.h"
+#endif
//#define DEBUG_MB_TYPE
@@ -1390,6 +1393,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
if( param->param_free )
param->param_free( param );
+#if HAVE_INTEL_DISPATCHER
+ x264_intel_dispatcher_override();
+#endif
+
if( x264_threading_init() )
{
x264_log( h, X264_LOG_ERROR, "unable to initialize threading\n" );
diff --git a/extras/intel_dispatcher.h b/extras/intel_dispatcher.h
new file mode 100644
index 0000000..8837c62
--- /dev/null
+++ b/extras/intel_dispatcher.h
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * intel_dispatcher.h: intel compiler cpu dispatcher override
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster at narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#ifndef X264_INTEL_DISPATCHER_H
+#define X264_INTEL_DISPATCHER_H
+
+/* Feature flags using _FEATURE_* defines from immintrin.h */
+extern unsigned long long __intel_cpu_feature_indicator;
+extern unsigned long long __intel_cpu_feature_indicator_x;
+
+/* CPU vendor independent version of dispatcher */
+void __intel_cpu_features_init_x( void );
+
+static void x264_intel_dispatcher_override( void )
+{
+ if( __intel_cpu_feature_indicator & ~1ULL )
+ return;
+ __intel_cpu_feature_indicator = 0;
+ __intel_cpu_feature_indicator_x = 0;
+ __intel_cpu_features_init_x();
+ __intel_cpu_feature_indicator = __intel_cpu_feature_indicator_x;
+}
+
+#endif
More information about the x264-devel
mailing list