[x264-devel] x86: Update intel compiler cpu dispatcher override for new versions of ICC /ICL

Anton Mitrofanov git at videolan.org
Sat Dec 20 21:10:50 CET 2014


x264 | branch: master | Anton Mitrofanov <BugMaster at narod.ru> | Mon Dec 15 18:49:23 2014 +0300| [40bb56814e56ed342040bdbf30258aab39ee9e89] | committer: Anton Mitrofanov

x86: Update intel compiler cpu dispatcher override for new versions of ICC/ICL

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=40bb56814e56ed342040bdbf30258aab39ee9e89
---

 common/cpu.h              |    1 -
 common/osdep.c            |   45 ----------------------------------------
 common/x86/cpu-a.asm      |   50 ---------------------------------------------
 configure                 |    8 +++++++-
 encoder/encoder.c         |    7 +++++++
 extras/intel_dispatcher.h |   46 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 60 insertions(+), 97 deletions(-)

diff --git a/common/cpu.h b/common/cpu.h
index 6d08027..07e5c6c 100644
--- a/common/cpu.h
+++ b/common/cpu.h
@@ -45,7 +45,6 @@ void     x264_cpu_sfence( void );
 #define x264_emms()
 #endif
 #define x264_sfence x264_cpu_sfence
-void     x264_safe_intel_cpu_indicator_init( void );
 
 /* kludge:
  * gcc can't give variables any greater alignment than the stack frame has.
diff --git a/common/osdep.c b/common/osdep.c
index e97aaed..91f3fdd 100644
--- a/common/osdep.c
+++ b/common/osdep.c
@@ -94,51 +94,6 @@ int x264_threading_init( void )
 }
 #endif
 
-#if HAVE_MMX
-#ifdef __INTEL_COMPILER
-/* Agner's patch to Intel's CPU dispatcher from pages 131-132 of
- * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30)
- * adapted to x264's cpu schema. */
-
-// Global variable indicating cpu
-int __intel_cpu_indicator = 0;
-// CPU dispatcher function
-void x264_intel_cpu_indicator_init( void )
-{
-    unsigned int cpu = x264_cpu_detect();
-    if( cpu&X264_CPU_AVX )
-        __intel_cpu_indicator = 0x20000;
-    else if( cpu&X264_CPU_SSE42 )
-        __intel_cpu_indicator = 0x8000;
-    else if( cpu&X264_CPU_SSE4 )
-        __intel_cpu_indicator = 0x2000;
-    else if( cpu&X264_CPU_SSSE3 )
-        __intel_cpu_indicator = 0x1000;
-    else if( cpu&X264_CPU_SSE3 )
-        __intel_cpu_indicator = 0x800;
-    else if( cpu&X264_CPU_SSE2 && !(cpu&X264_CPU_SSE2_IS_SLOW) )
-        __intel_cpu_indicator = 0x200;
-    else if( cpu&X264_CPU_SSE )
-        __intel_cpu_indicator = 0x80;
-    else if( cpu&X264_CPU_MMX2 )
-        __intel_cpu_indicator = 8;
-    else
-        __intel_cpu_indicator = 1;
-}
-
-/* __intel_cpu_indicator_init appears to have a non-standard calling convention that
- * assumes certain registers aren't preserved, so we'll route it through a function
- * that backs up all the registers. */
-void __intel_cpu_indicator_init( void )
-{
-    x264_safe_intel_cpu_indicator_init();
-}
-#else
-void x264_intel_cpu_indicator_init( void )
-{}
-#endif
-#endif
-
 #ifdef _WIN32
 /* Functions for dealing with Unicode on Windows. */
 FILE *x264_fopen( const char *filename, const char *mode )
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index bcf6c43..4dfd775 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -145,53 +145,3 @@ cglobal cpu_emms
 cglobal cpu_sfence
     sfence
     ret
-
-cextern intel_cpu_indicator_init
-
-;-----------------------------------------------------------------------------
-; void safe_intel_cpu_indicator_init( void );
-;-----------------------------------------------------------------------------
-cglobal safe_intel_cpu_indicator_init
-    push r0
-    push r1
-    push r2
-    push r3
-    push r4
-    push r5
-    push r6
-%if ARCH_X86_64
-    push r7
-    push r8
-    push r9
-    push r10
-    push r11
-    push r12
-    push r13
-    push r14
-%endif
-    push rbp
-    mov  rbp, rsp
-%if WIN64
-    sub  rsp, 32 ; shadow space
-%endif
-    and  rsp, ~31
-    call intel_cpu_indicator_init
-    leave
-%if ARCH_X86_64
-    pop r14
-    pop r13
-    pop r12
-    pop r11
-    pop r10
-    pop r9
-    pop r8
-    pop r7
-%endif
-    pop r6
-    pop r5
-    pop r4
-    pop r3
-    pop r2
-    pop r1
-    pop r0
-    ret
diff --git a/configure b/configure
index 02fe4be..e2977bd 100755
--- a/configure
+++ b/configure
@@ -311,7 +311,7 @@ NL="
 
 # list of all preprocessor HAVE values we can define
 CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
-             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC"
+             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER"
 
 # parse options
 
@@ -1093,6 +1093,12 @@ if cc_check '' -Wmaybe-uninitialized ; then
     CFLAGS="-Wno-maybe-uninitialized $CFLAGS"
 fi
 
+if [ $compiler = ICC -o $compiler = ICL ] ; then
+    if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then
+        define HAVE_INTEL_DISPATCHER
+    fi
+fi
+
 if [ "$bit_depth" -gt "8" ]; then
     define HIGH_BIT_DEPTH
     ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=1"
diff --git a/encoder/encoder.c b/encoder/encoder.c
index c98a900..54d2e5a 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -32,6 +32,9 @@
 #include "ratecontrol.h"
 #include "macroblock.h"
 #include "me.h"
+#if HAVE_INTEL_DISPATCHER
+#include "extras/intel_dispatcher.h"
+#endif
 
 //#define DEBUG_MB_TYPE
 
@@ -1390,6 +1393,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
     if( param->param_free )
         param->param_free( param );
 
+#if HAVE_INTEL_DISPATCHER
+    x264_intel_dispatcher_override();
+#endif
+
     if( x264_threading_init() )
     {
         x264_log( h, X264_LOG_ERROR, "unable to initialize threading\n" );
diff --git a/extras/intel_dispatcher.h b/extras/intel_dispatcher.h
new file mode 100644
index 0000000..8837c62
--- /dev/null
+++ b/extras/intel_dispatcher.h
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * intel_dispatcher.h: intel compiler cpu dispatcher override
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster at narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#ifndef X264_INTEL_DISPATCHER_H
+#define X264_INTEL_DISPATCHER_H
+
+/* Feature flags using _FEATURE_* defines from immintrin.h */
+extern unsigned long long __intel_cpu_feature_indicator;
+extern unsigned long long __intel_cpu_feature_indicator_x;
+
+/* CPU vendor independent version of dispatcher */
+void __intel_cpu_features_init_x( void );
+
+static void x264_intel_dispatcher_override( void )
+{
+    if( __intel_cpu_feature_indicator & ~1ULL )
+        return;
+    __intel_cpu_feature_indicator = 0;
+    __intel_cpu_feature_indicator_x = 0;
+    __intel_cpu_features_init_x();
+    __intel_cpu_feature_indicator = __intel_cpu_feature_indicator_x;
+}
+
+#endif



More information about the x264-devel mailing list