[vlc-commits] Check for SSE4 at build time where possible

Rémi Denis-Courmont git at videolan.org
Sat Aug 4 16:02:09 CEST 2012


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sat Aug  4 16:45:10 2012 +0300| [0ed12fd66fe7dbd390ed90f33fd137ba0f51908d] | committer: Rémi Denis-Courmont

Check for SSE4 at build time where possible

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=0ed12fd66fe7dbd390ed90f33fd137ba0f51908d
---

 include/vlc_cpu.h               |   24 +++++++++++++++++++++---
 modules/codec/avcodec/avcodec.c |    7 +++----
 modules/codec/avcodec/copy.c    |    7 ++++++-
 modules/codec/avcodec/encoder.c |    7 +++----
 modules/stream_out/switcher.c   |    8 ++++----
 src/misc/cpu.c                  |   18 +++++++-----------
 src/posix/linux_cpu.c           |   16 +++-------------
 7 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/include/vlc_cpu.h b/include/vlc_cpu.h
index 9753687..a87589d 100644
--- a/include/vlc_cpu.h
+++ b/include/vlc_cpu.h
@@ -37,9 +37,9 @@ VLC_API unsigned vlc_CPU(void);
 #  define VLC_CPU_SSE2   128
 #  define VLC_CPU_SSE3   256
 #  define VLC_CPU_SSSE3  512
-#  define CPU_CAPABILITY_SSE4_1  (1<<10)
-#  define CPU_CAPABILITY_SSE4_2  (1<<11)
-#  define CPU_CAPABILITY_SSE4A   (1<<12)
+#  define VLC_CPU_SSE4_1 1024
+#  define VLC_CPU_SSE4_2 2048
+#  define VLC_CPU_SSE4A  4096
 
 # if defined (__MMX__)
 #  define vlc_CPU_MMX() (1)
@@ -85,6 +85,24 @@ VLC_API unsigned vlc_CPU(void);
 #  define vlc_CPU_SSSE3() ((vlc_CPU() & VLC_CPU_SSSE3) != 0)
 # endif
 
+# ifdef __SSE4_1__
+#  define vlc_CPU_SSE4_1() (1)
+# else
+#  define vlc_CPU_SSE4_1() ((vlc_CPU() & VLC_CPU_SSE4_1) != 0)
+# endif
+
+# ifdef __SSE4_2__
+#  define vlc_CPU_SSE4_2() (1)
+# else
+#  define vlc_CPU_SSE4_2() ((vlc_CPU() & VLC_CPU_SSE4_2) != 0)
+# endif
+
+# ifdef __SSE4A__
+#  define vlc_CPU_SSE4A() (1)
+# else
+#  define vlc_CPU_SSE4A() ((vlc_CPU() & VLC_CPU_SSE4A) != 0)
+# endif
+
 # elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
 #  define HAVE_FPU 1
 #  define VLC_CPU_ALTIVEC 2
diff --git a/modules/codec/avcodec/avcodec.c b/modules/codec/avcodec/avcodec.c
index a4e3d82..4159cc0 100644
--- a/modules/codec/avcodec/avcodec.c
+++ b/modules/codec/avcodec/avcodec.c
@@ -331,12 +331,11 @@ static int OpenDecoder( vlc_object_t *p_this )
     /* Set CPU capabilities */
     p_context->dsp_mask = 0;
 #if defined (__i386__) || defined (__x86_64__)
-    unsigned i_cpu = vlc_CPU();
     if( !vlc_CPU_MMX() )
         p_context->dsp_mask |= AV_CPU_FLAG_MMX;
     if( !vlc_CPU_MMXEXT() )
         p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
-    if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
+    if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
         p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
     if( !vlc_CPU_SSE() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE;
@@ -351,11 +350,11 @@ static int OpenDecoder( vlc_object_t *p_this )
         p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
 # endif
 # ifdef AV_CPU_FLAG_SSE4
-    if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+    if( !vlc_CPU_SSE4_1() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
 # endif
 # ifdef AV_CPU_FLAG_SSE42
-    if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+    if( !vlc_CPU_SSE4_2() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
 # endif
 #endif
diff --git a/modules/codec/avcodec/copy.c b/modules/codec/avcodec/copy.c
index 71758ef..2828923 100644
--- a/modules/codec/avcodec/copy.c
+++ b/modules/codec/avcodec/copy.c
@@ -47,6 +47,11 @@
         store " %%xmm4,   48(%[dst])\n" \
         : : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
 
+#ifndef __SSE4A__
+# undef vlc_CPU_SSE4A
+# define vlc_CPU_SSE4A() ((cpu & VLC_CPU_SSE4A) != 0)
+#endif
+
 #ifndef __SSSE3__
 # undef vlc_CPU_SSSE3
 # define vlc_CPU_SSSE3() ((cpu & VLC_CPU_SSSE3) != 0)
@@ -88,7 +93,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
             dst[x] = src[x];
 
 #ifdef CAN_COMPILE_SSE4_1
-        if (cpu & CPU_CAPABILITY_SSE4_1) {
+        if (vlc_CPU_SSE4_1()) {
             if (!unaligned) {
                 for (; x+63 < width; x += 64)
                     COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
diff --git a/modules/codec/avcodec/encoder.c b/modules/codec/avcodec/encoder.c
index 7101df1..02888ef 100644
--- a/modules/codec/avcodec/encoder.c
+++ b/modules/codec/avcodec/encoder.c
@@ -325,12 +325,11 @@ int OpenEncoder( vlc_object_t *p_this )
     /* Set CPU capabilities */
     p_context->dsp_mask = 0;
 #if defined (__i386__) || defined (__x86_64__)
-    unsigned i_cpu = vlc_CPU();
     if( !vlc_CPU_MMX() )
         p_context->dsp_mask |= AV_CPU_FLAG_MMX;
     if( !vlc_CPU_MMXEXT() )
         p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
-    if( !(i_cpu & CPU_CAPABILITY_3DNOW) )
+    if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
         p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
     if( !vlc_CPU_SSE() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE;
@@ -345,11 +344,11 @@ int OpenEncoder( vlc_object_t *p_this )
         p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
 # endif
 # ifdef AV_CPU_FLAG_SSE4
-    if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+    if( !vlc_CPU_SSE4_1() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
 # endif
 # ifdef AV_CPU_FLAG_SSE42
-    if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+    if( !vlc_CPU_SSE4_2() )
         p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
 # endif
 #endif
diff --git a/modules/stream_out/switcher.c b/modules/stream_out/switcher.c
index b6ebf52..bb0c1b3 100644
--- a/modules/stream_out/switcher.c
+++ b/modules/stream_out/switcher.c
@@ -400,11 +400,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
 # endif
 # ifdef AV_CPU_FLAG_SSE4
-        if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+        if( !vlc_CPU_SSE4_1() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
 # endif
 # ifdef AV_CPU_FLAG_SSE42
-        if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+        if( !vlc_CPU_SSE4_2() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
 # endif
 #endif
@@ -821,11 +821,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
 # endif
 # ifdef AV_CPU_FLAG_SSE4
-        if( !(i_cpu & CPU_CAPABILITY_SSE4_1) )
+        if( !vlc_CPU_SSE4_1() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
 # endif
 # ifdef AV_CPU_FLAG_SSE42
-        if( !(i_cpu & CPU_CAPABILITY_SSE4_2) )
+        if( !vlc_CPU_SSE4_2() )
             id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
 # endif
 #endif
diff --git a/src/misc/cpu.c b/src/misc/cpu.c
index e0ae7b9..f7e9bfe 100644
--- a/src/misc/cpu.c
+++ b/src/misc/cpu.c
@@ -247,18 +247,14 @@ void vlc_CPU_init (void)
         i_capabilities |= VLC_CPU_SSSE3;
 # endif
 
-# if defined (__SSE4_1__)
-    i_capabilities |= CPU_CAPABILITY_SSE4_1;
-# elif defined (CAN_COMPILE_SSE4_1)
+# if defined (CAN_COMPILE_SSE4_1)
     if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test))
-        i_capabilities |= CPU_CAPABILITY_SSE4_1;
+        i_capabilities |= VLC_CPU_SSE4_1;
 # endif
 
-# if defined (__SSE4_2__)
-    i_capabilities |= CPU_CAPABILITY_SSE4_2;
-# elif defined (CAN_COMPILE_SSE4_2)
+# if defined (CAN_COMPILE_SSE4_2)
     if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test))
-        i_capabilities |= CPU_CAPABILITY_SSE4_2;
+        i_capabilities |= VLC_CPU_SSE4_2;
 # endif
 
     /* test for additional capabilities */
@@ -345,9 +341,9 @@ void vlc_CPU_dump (vlc_object_t *obj)
     if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
     if (vlc_CPU_SSE3()) p += sprintf (p, "SSE2 ");;
     if (vlc_CPU_SSSE3()) p += sprintf (p, "SSSE3 ");;
-    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
-    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2");
-    PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A,  "SSE4A");
+    if (vlc_CPU_SSE4_1()) p += sprintf (p, "SSE4.1 ");;
+    if (vlc_CPU_SSE4_2()) p += sprintf (p, "SSE4.2 ");;
+    if (vlc_CPU_SSE4A()) p += sprintf (p, "SSE4A ");;
     PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
 
 #elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
diff --git a/src/posix/linux_cpu.c b/src/posix/linux_cpu.c
index f239d2c..edb4f24 100644
--- a/src/posix/linux_cpu.c
+++ b/src/posix/linux_cpu.c
@@ -79,16 +79,12 @@ static void vlc_CPU_init (void)
                 core_caps |= VLC_CPU_SSE3;
             if (!strcmp (cap, "ssse3"))
                 core_caps |= VLC_CPU_SSSE3;
-# ifndef __SSE4_1__
             if (!strcmp (cap, "sse4_1"))
-                core_caps |= CPU_CAPABILITY_SSE4_1;
-# endif
-# ifndef __SSE4_2__
+                core_caps |= VLC_CPU_SSE4_1;
             if (!strcmp (cap, "sse4_2"))
-                core_caps |= CPU_CAPABILITY_SSE4_1;
-# endif
+                core_caps |= VLC_CPU_SSE4_1;
             if (!strcmp (cap, "sse4a"))
-                core_caps |= CPU_CAPABILITY_SSE4A;
+                core_caps |= VLC_CPU_SSE4A;
 # ifndef __3dNOW__
             if (!strcmp (cap, "3dnow"))
                 core_caps |= CPU_CAPABILITY_3DNOW;
@@ -111,12 +107,6 @@ static void vlc_CPU_init (void)
 
     /* Always enable capabilities that were forced during compilation */
 #if defined (__i386__) || defined (__x86_64__)
-# ifdef __SSE4_1__
-    all_caps |= CPU_CAPABILITY_SSE4_1;
-# endif
-# ifdef __SSE4_2__
-    all_caps |= CPU_CAPABILITY_SSE4_2;
-# endif
 # ifdef __3dNOW__
     all_caps |= CPU_CAPABILITY_3DNOW;
 # endif



More information about the vlc-commits mailing list