[vlc-devel] [PATCH 09/16] ssse3: add deinterleaving macro and SSSE3 option

glenvt18 glenvt18 at gmail.com
Fri Jun 26 13:20:03 CEST 2015


---
 configure.ac        |  5 +++++
 src/dvbcsa_bs.h     |  3 +++
 src/dvbcsa_bs_sse.h | 19 +++++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/configure.ac b/configure.ac
index 4dd0726..f978a02 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,6 +13,7 @@ AC_ARG_ENABLE(uint32, AC_HELP_STRING(--enable-uint32, [Use native 32 bits intege
 AC_ARG_ENABLE(uint64, AC_HELP_STRING(--enable-uint64, [Use native 64 bits integers for bitslice]), enable_uint64=$enableval, enable_uint64=no)
 AC_ARG_ENABLE(mmx, AC_HELP_STRING(--enable-mmx, [Use MMX for bitslice]), mmx_debug=$enableval, enable_mmx=no)
 AC_ARG_ENABLE(sse2, AC_HELP_STRING(--enable-sse2, [Use SSE2 for bitslice]), sse2_debug=$enableval, enable_sse2=no)
+AC_ARG_ENABLE(ssse3, AC_HELP_STRING(--enable-ssse3, [Use SSSE3 for bitslice]), ssse3_debug=$enableval, enable_ssse3=no)
 AC_ARG_ENABLE(altivec, AC_HELP_STRING(--enable-altivec, [Use AltiVec for bitslice]), altivec_debug=$enableval, enable_altivec=no)
 AC_ARG_ENABLE(neon, AC_HELP_STRING(--enable-neon, [Use NEON for bitslice]), neon_debug=$enableval, enable_neon=no)
 
@@ -47,6 +48,10 @@ elif test "$enable_sse2" = "yes" ; then
      AC_DEFINE(DVBCSA_USE_SSE, 1, Using SSE2 bitslice.)
      GCC_CFLAGS="$GCC_CFLAGS -msse -msse2"
 
+elif test "$enable_ssse3" = "yes" ; then
+     AC_DEFINE(DVBCSA_USE_SSSE3, 1, Using SSSE3 bitslice.)
+     GCC_CFLAGS="$GCC_CFLAGS -mssse3"
+
 elif test "$enable_altivec" = "yes" ; then
      AC_DEFINE(DVBCSA_USE_ALTIVEC, 1, Using AltiVec bitslice.)
      GCC_CFLAGS="$GCC_CFLAGS -maltivec -mabi=altivec"
diff --git a/src/dvbcsa_bs.h b/src/dvbcsa_bs.h
index 7145048..8162405 100644
--- a/src/dvbcsa_bs.h
+++ b/src/dvbcsa_bs.h
@@ -40,6 +40,9 @@
 #elif defined(DVBCSA_USE_SSE)
 # include "dvbcsa_bs_sse.h"
 
+#elif defined(DVBCSA_USE_SSSE3)
+# include "dvbcsa_bs_sse.h"
+
 #elif defined(DVBCSA_USE_ALTIVEC)
 # include "dvbcsa_bs_altivec.h"
 
diff --git a/src/dvbcsa_bs_sse.h b/src/dvbcsa_bs_sse.h
index f1b0c79..02ecb1b 100644
--- a/src/dvbcsa_bs_sse.h
+++ b/src/dvbcsa_bs_sse.h
@@ -29,6 +29,10 @@
 #include <xmmintrin.h>
 #include <emmintrin.h>
 
+#ifdef DVBCSA_USE_SSSE3
+#include <tmmintrin.h>
+#endif
+
 typedef __m128i dvbcsa_bs_word_t;
 
 #define BS_BATCH_SIZE 128
@@ -54,4 +58,19 @@ typedef __m128i dvbcsa_bs_word_t;
 
 #define BS_EMPTY()
 
+#ifdef DVBCSA_USE_SSSE3
+/* block cipher 2-word load with byte-deinterleaving */
+#define BS_LOAD_DEINTERLEAVE_8(ptr, var_lo, var_hi) \
+      {\
+      dvbcsa_bs_word_t a, b; \
+      a = _mm_load_si128((ptr)); \
+      b = _mm_load_si128((ptr) + 1); \
+      a = _mm_shuffle_epi8(a, _mm_set_epi8(15,13,11,9,7,5,3,1,14,12,10,8,6,4,2,0)); \
+      b = _mm_shuffle_epi8(b, _mm_set_epi8(15,13,11,9,7,5,3,1,14,12,10,8,6,4,2,0)); \
+      var_lo = _mm_unpacklo_epi64(a, b); \
+      var_hi = _mm_unpackhi_epi64(a, b); \
+      }
 #endif
+
+#endif
+
-- 
1.9.1




More information about the vlc-devel mailing list