[vlc-devel] [PATCH 09/16] ssse3: add deinterleaving macro and SSSE3 option

Jean-Baptiste Kempf jb at videolan.org
Mon Jul 6 18:14:34 CEST 2015


Can't we get autodetection at runtime, please?

On 26 Jun, glenvt18 wrote :
> ---
>  configure.ac        |  5 +++++
>  src/dvbcsa_bs.h     |  3 +++
>  src/dvbcsa_bs_sse.h | 19 +++++++++++++++++++
>  3 files changed, 27 insertions(+)
> 
> diff --git a/configure.ac b/configure.ac
> index 4dd0726..f978a02 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -13,6 +13,7 @@ AC_ARG_ENABLE(uint32, AC_HELP_STRING(--enable-uint32, [Use native 32 bits intege
>  AC_ARG_ENABLE(uint64, AC_HELP_STRING(--enable-uint64, [Use native 64 bits integers for bitslice]), enable_uint64=$enableval, enable_uint64=no)
>  AC_ARG_ENABLE(mmx, AC_HELP_STRING(--enable-mmx, [Use MMX for bitslice]), mmx_debug=$enableval, enable_mmx=no)
>  AC_ARG_ENABLE(sse2, AC_HELP_STRING(--enable-sse2, [Use SSE2 for bitslice]), sse2_debug=$enableval, enable_sse2=no)
> +AC_ARG_ENABLE(ssse3, AC_HELP_STRING(--enable-ssse3, [Use SSSE3 for bitslice]), ssse3_debug=$enableval, enable_ssse3=no)
>  AC_ARG_ENABLE(altivec, AC_HELP_STRING(--enable-altivec, [Use AltiVec for bitslice]), altivec_debug=$enableval, enable_altivec=no)
>  AC_ARG_ENABLE(neon, AC_HELP_STRING(--enable-neon, [Use NEON for bitslice]), neon_debug=$enableval, enable_neon=no)
>  
> @@ -47,6 +48,10 @@ elif test "$enable_sse2" = "yes" ; then
>       AC_DEFINE(DVBCSA_USE_SSE, 1, Using SSE2 bitslice.)
>       GCC_CFLAGS="$GCC_CFLAGS -msse -msse2"
>  
> +elif test "$enable_ssse3" = "yes" ; then
> +     AC_DEFINE(DVBCSA_USE_SSSE3, 1, Using SSSE3 bitslice.)
> +     GCC_CFLAGS="$GCC_CFLAGS -mssse3"
> +
>  elif test "$enable_altivec" = "yes" ; then
>       AC_DEFINE(DVBCSA_USE_ALTIVEC, 1, Using AltiVec bitslice.)
>       GCC_CFLAGS="$GCC_CFLAGS -maltivec -mabi=altivec"
> diff --git a/src/dvbcsa_bs.h b/src/dvbcsa_bs.h
> index 7145048..8162405 100644
> --- a/src/dvbcsa_bs.h
> +++ b/src/dvbcsa_bs.h
> @@ -40,6 +40,9 @@
>  #elif defined(DVBCSA_USE_SSE)
>  # include "dvbcsa_bs_sse.h"
>  
> +#elif defined(DVBCSA_USE_SSSE3)
> +# include "dvbcsa_bs_sse.h"
> +
>  #elif defined(DVBCSA_USE_ALTIVEC)
>  # include "dvbcsa_bs_altivec.h"
>  
> diff --git a/src/dvbcsa_bs_sse.h b/src/dvbcsa_bs_sse.h
> index f1b0c79..02ecb1b 100644
> --- a/src/dvbcsa_bs_sse.h
> +++ b/src/dvbcsa_bs_sse.h
> @@ -29,6 +29,10 @@
>  #include <xmmintrin.h>
>  #include <emmintrin.h>
>  
> +#ifdef DVBCSA_USE_SSSE3
> +#include <tmmintrin.h>
> +#endif
> +
>  typedef __m128i dvbcsa_bs_word_t;
>  
>  #define BS_BATCH_SIZE 128
> @@ -54,4 +58,19 @@ typedef __m128i dvbcsa_bs_word_t;
>  
>  #define BS_EMPTY()
>  
> +#ifdef DVBCSA_USE_SSSE3
> +/* block cipher 2-word load with byte-deinterleaving */
> +#define BS_LOAD_DEINTERLEAVE_8(ptr, var_lo, var_hi) \
> +      {\
> +      dvbcsa_bs_word_t a, b; \
> +      a = _mm_load_si128((ptr)); \
> +      b = _mm_load_si128((ptr) + 1); \
> +      a = _mm_shuffle_epi8(a, _mm_set_epi8(15,13,11,9,7,5,3,1,14,12,10,8,6,4,2,0)); \
> +      b = _mm_shuffle_epi8(b, _mm_set_epi8(15,13,11,9,7,5,3,1,14,12,10,8,6,4,2,0)); \
> +      var_lo = _mm_unpacklo_epi64(a, b); \
> +      var_hi = _mm_unpackhi_epi64(a, b); \
> +      }
>  #endif
> +
> +#endif
> +
> -- 
> 1.9.1
> 
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel

-- 
With my kindest regards,

-- 
Jean-Baptiste Kempf
http://www.jbkempf.com/ - +33 672 704 734
Sent from my Electronic Device



More information about the vlc-devel mailing list