[vlc-devel] [PATCH 09/16] ssse3: add deinterleaving macro and SSSE3 option
glenvt18
glenvt18 at gmail.com
Fri Jun 26 13:20:03 CEST 2015
---
configure.ac | 5 +++++
src/dvbcsa_bs.h | 3 +++
src/dvbcsa_bs_sse.h | 19 +++++++++++++++++++
3 files changed, 27 insertions(+)
diff --git a/configure.ac b/configure.ac
index 4dd0726..f978a02 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,6 +13,7 @@ AC_ARG_ENABLE(uint32, AC_HELP_STRING(--enable-uint32, [Use native 32 bits intege
AC_ARG_ENABLE(uint64, AC_HELP_STRING(--enable-uint64, [Use native 64 bits integers for bitslice]), enable_uint64=$enableval, enable_uint64=no)
AC_ARG_ENABLE(mmx, AC_HELP_STRING(--enable-mmx, [Use MMX for bitslice]), mmx_debug=$enableval, enable_mmx=no)
AC_ARG_ENABLE(sse2, AC_HELP_STRING(--enable-sse2, [Use SSE2 for bitslice]), sse2_debug=$enableval, enable_sse2=no)
+AC_ARG_ENABLE(ssse3, AC_HELP_STRING(--enable-ssse3, [Use SSSE3 for bitslice]), ssse3_debug=$enableval, enable_ssse3=no)
AC_ARG_ENABLE(altivec, AC_HELP_STRING(--enable-altivec, [Use AltiVec for bitslice]), altivec_debug=$enableval, enable_altivec=no)
AC_ARG_ENABLE(neon, AC_HELP_STRING(--enable-neon, [Use NEON for bitslice]), neon_debug=$enableval, enable_neon=no)
@@ -47,6 +48,10 @@ elif test "$enable_sse2" = "yes" ; then
AC_DEFINE(DVBCSA_USE_SSE, 1, Using SSE2 bitslice.)
GCC_CFLAGS="$GCC_CFLAGS -msse -msse2"
+elif test "$enable_ssse3" = "yes" ; then
+ AC_DEFINE(DVBCSA_USE_SSSE3, 1, Using SSSE3 bitslice.)
+ GCC_CFLAGS="$GCC_CFLAGS -mssse3"
+
elif test "$enable_altivec" = "yes" ; then
AC_DEFINE(DVBCSA_USE_ALTIVEC, 1, Using AltiVec bitslice.)
GCC_CFLAGS="$GCC_CFLAGS -maltivec -mabi=altivec"
diff --git a/src/dvbcsa_bs.h b/src/dvbcsa_bs.h
index 7145048..8162405 100644
--- a/src/dvbcsa_bs.h
+++ b/src/dvbcsa_bs.h
@@ -40,6 +40,9 @@
#elif defined(DVBCSA_USE_SSE)
# include "dvbcsa_bs_sse.h"
+#elif defined(DVBCSA_USE_SSSE3)
+# include "dvbcsa_bs_sse.h"
+
#elif defined(DVBCSA_USE_ALTIVEC)
# include "dvbcsa_bs_altivec.h"
diff --git a/src/dvbcsa_bs_sse.h b/src/dvbcsa_bs_sse.h
index f1b0c79..02ecb1b 100644
--- a/src/dvbcsa_bs_sse.h
+++ b/src/dvbcsa_bs_sse.h
@@ -29,6 +29,10 @@
#include <xmmintrin.h>
#include <emmintrin.h>
+#ifdef DVBCSA_USE_SSSE3
+#include <tmmintrin.h>
+#endif
+
typedef __m128i dvbcsa_bs_word_t;
#define BS_BATCH_SIZE 128
@@ -54,4 +58,19 @@ typedef __m128i dvbcsa_bs_word_t;
#define BS_EMPTY()
+#ifdef DVBCSA_USE_SSSE3
+/* block cipher 2-word load with byte-deinterleaving */
+#define BS_LOAD_DEINTERLEAVE_8(ptr, var_lo, var_hi) \
+ {\
+ dvbcsa_bs_word_t a, b; \
+ a = _mm_load_si128((ptr)); \
+ b = _mm_load_si128((ptr) + 1); \
+ a = _mm_shuffle_epi8(a, _mm_set_epi8(15,13,11,9,7,5,3,1,14,12,10,8,6,4,2,0)); \
+ b = _mm_shuffle_epi8(b, _mm_set_epi8(15,13,11,9,7,5,3,1,14,12,10,8,6,4,2,0)); \
+ var_lo = _mm_unpacklo_epi64(a, b); \
+ var_hi = _mm_unpackhi_epi64(a, b); \
+ }
#endif
+
+#endif
+
--
1.9.1
More information about the vlc-devel
mailing list