[x264-devel] Check for OS AVX support in addition to CPUID
Jason Garrett-Glaser
git at videolan.org
Thu Jan 27 15:07:55 CET 2011
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Thu Jan 27 05:33:25 2011 -0800| [f6d0c95b964d52780891c39f6ec93022b6ec1cb0] | committer: Jason Garrett-Glaser
Check for OS AVX support in addition to CPUID
Even if not using ymm registers, AVX operations will cause SIGILLs on unsupported OSs.
On Windows, AVX is only available on Windows 7 SP1 or later.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f6d0c95b964d52780891c39f6ec93022b6ec1cb0
---
common/cpu.c | 13 ++++++++--
common/x86/cpu-a.asm | 62 +++++++++++++++++++++++++-------------------------
x264.h | 5 +--
3 files changed, 43 insertions(+), 37 deletions(-)
diff --git a/common/cpu.c b/common/cpu.c
index 6885746..e77253d 100644
--- a/common/cpu.c
+++ b/common/cpu.c
@@ -94,7 +94,8 @@ static void sigill_handler( int sig )
#if HAVE_MMX
int x264_cpu_cpuid_test( void );
-uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
+void x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
+void x264_cpu_xgetbv( uint32_t op, int *eax, int *edx );
uint32_t x264_cpu_detect( void )
{
@@ -130,8 +131,14 @@ uint32_t x264_cpu_detect( void )
cpu |= X264_CPU_SSE4;
if( ecx&0x00100000 )
cpu |= X264_CPU_SSE42;
- if( ecx&0x10000000 )
- cpu |= X264_CPU_AVX;
+ /* Check OXSAVE and AVX bits */
+ if( (ecx&0x18000000) == 0x18000000 )
+ {
+ /* Check for OS support */
+ x264_cpu_xgetbv( 0, &eax, &edx );
+ if( (eax&0x6) == 0x6 )
+ cpu |= X264_CPU_AVX;
+ }
if( cpu & X264_CPU_SSSE3 )
cpu |= X264_CPU_SSE2_IS_FAST;
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index c2dd72d..02265bc 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -29,27 +29,43 @@
SECTION .text
-%ifdef ARCH_X86_64
-
;-----------------------------------------------------------------------------
-; int cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
+; void cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
;-----------------------------------------------------------------------------
cglobal cpu_cpuid, 5,7
- push rbx
- mov r11, r1
- mov r10, r2
- movifnidn r9, r3
- movifnidn r8, r4
- mov eax, r0d
+ push rbx
+ push r4
+ push r3
+ push r2
+ push r1
+ mov eax, r0d
cpuid
- mov [r11], eax
- mov [r10], ebx
- mov [r9], ecx
- mov [r8], edx
- pop rbx
+ pop rsi
+ mov [rsi], eax
+ pop rsi
+ mov [rsi], ebx
+ pop rsi
+ mov [rsi], ecx
+ pop rsi
+ mov [rsi], edx
+ pop rbx
+ RET
+
+;-----------------------------------------------------------------------------
+; void cpu_xgetbv( int op, int *eax, int *edx )
+;-----------------------------------------------------------------------------
+cglobal cpu_xgetbv, 3,7
+ push r2
+ push r1
+ mov ecx, r0d
+ xgetbv
+ pop rsi
+ mov [rsi], eax
+ pop rsi
+ mov [rsi], edx
RET
-%else
+%ifndef ARCH_X86_64
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
@@ -78,22 +94,6 @@ cglobal cpu_cpuid_test
ret
;-----------------------------------------------------------------------------
-; int cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
-;-----------------------------------------------------------------------------
-cglobal cpu_cpuid, 0,6
- mov eax, r0m
- cpuid
- mov esi, r1m
- mov [esi], eax
- mov esi, r2m
- mov [esi], ebx
- mov esi, r3m
- mov [esi], ecx
- mov esi, r4m
- mov [esi], edx
- RET
-
-;-----------------------------------------------------------------------------
; void stack_align( void (*func)(void*), void *arg );
;-----------------------------------------------------------------------------
cglobal stack_align
diff --git a/x264.h b/x264.h
index 5234bc0..8f39497 100644
--- a/x264.h
+++ b/x264.h
@@ -122,9 +122,8 @@ typedef struct
#define X264_CPU_FAST_NEON_MRC 0x080000 /* Transfer from NEON to ARM register is fast (Cortex-A9) */
#define X264_CPU_SLOW_CTZ 0x100000 /* BSR/BSF x86 instructions are really slow on some CPUs */
#define X264_CPU_SLOW_ATOM 0x200000 /* The Atom just sucks */
-#define X264_CPU_AVX 0x400000 /* AVX support -- we don't currently use YMM registers, just
- * the 3-operand capability, so we don't require OS support
- * for AVX. */
+#define X264_CPU_AVX 0x400000 /* AVX support: requires OS support even if YMM registers
+ * aren't used. */
/* Analyse flags
*/
More information about the x264-devel
mailing list