[vlc-commits] Check for SSE4 at build time where possible
Rémi Denis-Courmont
git at videolan.org
Sat Aug 4 16:02:09 CEST 2012
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sat Aug 4 16:45:10 2012 +0300| [0ed12fd66fe7dbd390ed90f33fd137ba0f51908d] | committer: Rémi Denis-Courmont
Check for SSE4 at build time where possible
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=0ed12fd66fe7dbd390ed90f33fd137ba0f51908d
---
include/vlc_cpu.h | 24 +++++++++++++++++++++---
modules/codec/avcodec/avcodec.c | 7 +++----
modules/codec/avcodec/copy.c | 7 ++++++-
modules/codec/avcodec/encoder.c | 7 +++----
modules/stream_out/switcher.c | 8 ++++----
src/misc/cpu.c | 18 +++++++-----------
src/posix/linux_cpu.c | 16 +++-------------
7 files changed, 47 insertions(+), 40 deletions(-)
diff --git a/include/vlc_cpu.h b/include/vlc_cpu.h
index 9753687..a87589d 100644
--- a/include/vlc_cpu.h
+++ b/include/vlc_cpu.h
@@ -37,9 +37,9 @@ VLC_API unsigned vlc_CPU(void);
# define VLC_CPU_SSE2 128
# define VLC_CPU_SSE3 256
# define VLC_CPU_SSSE3 512
-# define CPU_CAPABILITY_SSE4_1 (1<<10)
-# define CPU_CAPABILITY_SSE4_2 (1<<11)
-# define CPU_CAPABILITY_SSE4A (1<<12)
+# define VLC_CPU_SSE4_1 1024
+# define VLC_CPU_SSE4_2 2048
+# define VLC_CPU_SSE4A 4096
# if defined (__MMX__)
# define vlc_CPU_MMX() (1)
@@ -85,6 +85,24 @@ VLC_API unsigned vlc_CPU(void);
# define vlc_CPU_SSSE3() ((vlc_CPU() & VLC_CPU_SSSE3) != 0)
# endif
+# ifdef __SSE4_1__
+# define vlc_CPU_SSE4_1() (1)
+# else
+# define vlc_CPU_SSE4_1() ((vlc_CPU() & VLC_CPU_SSE4_1) != 0)
+# endif
+
+# ifdef __SSE4_2__
+# define vlc_CPU_SSE4_2() (1)
+# else
+# define vlc_CPU_SSE4_2() ((vlc_CPU() & VLC_CPU_SSE4_2) != 0)
+# endif
+
+# ifdef __SSE4A__
+# define vlc_CPU_SSE4A() (1)
+# else
+# define vlc_CPU_SSE4A() ((vlc_CPU() & VLC_CPU_SSE4A) != 0)
+# endif
+
# elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
# define HAVE_FPU 1
# define VLC_CPU_ALTIVEC 2
diff --git a/modules/codec/avcodec/avcodec.c b/modules/codec/avcodec/avcodec.c
index a4e3d82..4159cc0 100644
--- a/modules/codec/avcodec/avcodec.c
+++ b/modules/codec/avcodec/avcodec.c
@@ -331,12 +331,11 @@ static int OpenDecoder( vlc_object_t *p_this )
/* Set CPU capabilities */
p_context->dsp_mask = 0;
#if defined (__i386__) || defined (__x86_64__)
- unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
- if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
+ if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
@@ -351,11 +350,11 @@ static int OpenDecoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
- if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+ if( !vlc_CPU_SSE4_1() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
- if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+ if( !vlc_CPU_SSE4_2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
diff --git a/modules/codec/avcodec/copy.c b/modules/codec/avcodec/copy.c
index 71758ef..2828923 100644
--- a/modules/codec/avcodec/copy.c
+++ b/modules/codec/avcodec/copy.c
@@ -47,6 +47,11 @@
store " %%xmm4, 48(%[dst])\n" \
: : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
+#ifndef __SSE4A__
+# undef vlc_CPU_SSE4A
+# define vlc_CPU_SSE4A() ((cpu & VLC_CPU_SSE4A) != 0)
+#endif
+
#ifndef __SSSE3__
# undef vlc_CPU_SSSE3
# define vlc_CPU_SSSE3() ((cpu & VLC_CPU_SSSE3) != 0)
@@ -88,7 +93,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
dst[x] = src[x];
#ifdef CAN_COMPILE_SSE4_1
- if (cpu & CPU_CAPABILITY_SSE4_1) {
+ if (vlc_CPU_SSE4_1()) {
if (!unaligned) {
for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
diff --git a/modules/codec/avcodec/encoder.c b/modules/codec/avcodec/encoder.c
index 7101df1..02888ef 100644
--- a/modules/codec/avcodec/encoder.c
+++ b/modules/codec/avcodec/encoder.c
@@ -325,12 +325,11 @@ int OpenEncoder( vlc_object_t *p_this )
/* Set CPU capabilities */
p_context->dsp_mask = 0;
#if defined (__i386__) || defined (__x86_64__)
- unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
- if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
+ if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
@@ -345,11 +344,11 @@ int OpenEncoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
- if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+ if( !vlc_CPU_SSE4_1() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
- if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+ if( !vlc_CPU_SSE4_2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
diff --git a/modules/stream_out/switcher.c b/modules/stream_out/switcher.c
index b6ebf52..bb0c1b3 100644
--- a/modules/stream_out/switcher.c
+++ b/modules/stream_out/switcher.c
@@ -400,11 +400,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
- if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+ if( !vlc_CPU_SSE4_1() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
- if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+ if( !vlc_CPU_SSE4_2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
@@ -821,11 +821,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif
# ifdef AV_CPU_FLAG_SSE4
- if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+ if( !vlc_CPU_SSE4_1() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif
# ifdef AV_CPU_FLAG_SSE42
- if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+ if( !vlc_CPU_SSE4_2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif
#endif
diff --git a/src/misc/cpu.c b/src/misc/cpu.c
index e0ae7b9..f7e9bfe 100644
--- a/src/misc/cpu.c
+++ b/src/misc/cpu.c
@@ -247,18 +247,14 @@ void vlc_CPU_init (void)
i_capabilities |= VLC_CPU_SSSE3;
# endif
-# if defined (__SSE4_1__)
- i_capabilities |= CPU_CAPABILITY_SSE4_1;
-# elif defined (CAN_COMPILE_SSE4_1)
+# if defined (CAN_COMPILE_SSE4_1)
if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test))
- i_capabilities |= CPU_CAPABILITY_SSE4_1;
+ i_capabilities |= VLC_CPU_SSE4_1;
# endif
-# if defined (__SSE4_2__)
- i_capabilities |= CPU_CAPABILITY_SSE4_2;
-# elif defined (CAN_COMPILE_SSE4_2)
+# if defined (CAN_COMPILE_SSE4_2)
if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test))
- i_capabilities |= CPU_CAPABILITY_SSE4_2;
+ i_capabilities |= VLC_CPU_SSE4_2;
# endif
/* test for additional capabilities */
@@ -345,9 +341,9 @@ void vlc_CPU_dump (vlc_object_t *obj)
if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
if (vlc_CPU_SSE3()) p += sprintf (p, "SSE2 ");;
if (vlc_CPU_SSSE3()) p += sprintf (p, "SSSE3 ");;
- PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
- PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
- PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A");
+ if (vlc_CPU_SSE4_1()) p += sprintf (p, "SSE4.1 ");;
+ if (vlc_CPU_SSE4_2()) p += sprintf (p, "SSE4.2 ");;
+ if (vlc_CPU_SSE4A()) p += sprintf (p, "SSE4A ");;
PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
#elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
diff --git a/src/posix/linux_cpu.c b/src/posix/linux_cpu.c
index f239d2c..edb4f24 100644
--- a/src/posix/linux_cpu.c
+++ b/src/posix/linux_cpu.c
@@ -79,16 +79,12 @@ static void vlc_CPU_init (void)
core_caps |= VLC_CPU_SSE3;
if (!strcmp (cap, "ssse3"))
core_caps |= VLC_CPU_SSSE3;
-# ifndef __SSE4_1__
if (!strcmp (cap, "sse4_1"))
- core_caps |= CPU_CAPABILITY_SSE4_1;
-# endif
-# ifndef __SSE4_2__
+ core_caps |= VLC_CPU_SSE4_1;
if (!strcmp (cap, "sse4_2"))
- core_caps |= CPU_CAPABILITY_SSE4_1;
-# endif
+ core_caps |= VLC_CPU_SSE4_1;
if (!strcmp (cap, "sse4a"))
- core_caps |= CPU_CAPABILITY_SSE4A;
+ core_caps |= VLC_CPU_SSE4A;
# ifndef __3dNOW__
if (!strcmp (cap, "3dnow"))
core_caps |= CPU_CAPABILITY_3DNOW;
@@ -111,12 +107,6 @@ static void vlc_CPU_init (void)
/* Always enable capabilities that were forced during compilation */
#if defined (__i386__) || defined (__x86_64__)
-# ifdef __SSE4_1__
- all_caps |= CPU_CAPABILITY_SSE4_1;
-# endif
-# ifdef __SSE4_2__
- all_caps |= CPU_CAPABILITY_SSE4_2;
-# endif
# ifdef __3dNOW__
all_caps |= CPU_CAPABILITY_3DNOW;
# endif
More information about the vlc-commits
mailing list