[vlc-devel] [PATCH 04/16] Add ARM NEON support

Serg Chernyavskiy glenvt18 at gmail.com
Tue Jul 7 04:41:00 CEST 2015


Would be nice. I tried to stick to the original API and we don't have
dvbcsa_lib_init() function there, And quite a lot projects  uses this API.
Hmm, checking on every decrypt/encrypt... Wouldn't it be costly? But it's
certainly good for binary packages and should be implemented. My plan was
first make it run fast, then make it more convenient. Now we at the first
stage.

2015-07-06 19:13 GMT+03:00 Jean-Baptiste Kempf <jb at videolan.org>:

> Can't we have NEON runtime detection, btw?
>
> On 26 Jun, glenvt18 wrote :
> > ---
> >  configure.ac         |  6 ++++++
> >  src/Makefile.am      |  2 +-
> >  src/dvbcsa_bs.h      |  3 +++
> >  src/dvbcsa_bs_neon.h | 56
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  test/testbsops.c     | 29 +++++++++++++++++++++++++++
> >  5 files changed, 95 insertions(+), 1 deletion(-)
> >  create mode 100644 src/dvbcsa_bs_neon.h
> >
> > diff --git a/configure.ac b/configure.ac
> > index ed8a1ad..cefdf8a 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -14,6 +14,7 @@ AC_ARG_ENABLE(uint64, AC_HELP_STRING(--enable-uint64,
> [Use native 64 bits intege
> >  AC_ARG_ENABLE(mmx, AC_HELP_STRING(--enable-mmx, [Use MMX for
> bitslice]), mmx_debug=$enableval, enable_mmx=no)
> >  AC_ARG_ENABLE(sse2, AC_HELP_STRING(--enable-sse2, [Use SSE2 for
> bitslice]), sse2_debug=$enableval, enable_sse2=no)
> >  AC_ARG_ENABLE(altivec, AC_HELP_STRING(--enable-altivec, [Use AltiVec
> for bitslice]), altivec_debug=$enableval, enable_altivec=no)
> > +AC_ARG_ENABLE(neon, AC_HELP_STRING(--enable-neon, [Use NEON for
> bitslice]), neon_debug=$enableval, enable_neon=no)
> >
> >  AM_INIT_AUTOMAKE(libdvbcsa, 1.1.0)
> >  AC_CONFIG_HEADERS(config.h)
> > @@ -46,6 +47,11 @@ elif test "$enable_altivec" = "yes" ; then
> >       AC_DEFINE(DVBCSA_USE_ALTIVEC, 1, Using AltiVec bitslice.)
> >       GCC_CFLAGS="$GCC_CFLAGS -maltivec -mabi=altivec"
> >
> > +elif test "$enable_neon" = "yes" ; then
> > +     transpose_128=yes
> > +     AC_DEFINE(DVBCSA_USE_NEON, 1, Using NEON bitslice.)
> > +     GCC_CFLAGS="$GCC_CFLAGS -mfpu=neon"
> > +
> >  elif test "$enable_uint32" = "yes" ; then
> >       transpose_32=yes
> >       AC_DEFINE(DVBCSA_USE_UINT32, 1, Using 32 bits integer bitslice.)
> > diff --git a/src/Makefile.am b/src/Makefile.am
> > index dec4f55..3bad07a 100644
> > --- a/src/Makefile.am
> > +++ b/src/Makefile.am
> > @@ -7,7 +7,7 @@ libdvbcsa_la_SOURCES = dvbcsa_algo.c dvbcsa_block.c
> dvbcsa_bs_algo.c  \
> >       dvbcsa_bs_block.c dvbcsa_bs_key.c dvbcsa_bs_stream.c            \
> >       dvbcsa_stream.c dvbcsa_bs.h dvbcsa_pv.h dvbcsa_bs_uint64.h      \
> >       dvbcsa_bs_uint32.h dvbcsa_bs_mmx.h dvbcsa_bs_sse.h              \
> > -     dvbcsa_bs_altivec.h dvbcsa_bs_transpose.c dvbcsa_key.c  \
> > +     dvbcsa_bs_altivec.h dvbcsa_bs_neon.h dvbcsa_bs_transpose.c
> dvbcsa_key.c \
> >       dvbcsa_bs_stream_kernel.inc dvbcsa_bs_stream_kernel.h
> >
> >  if TRANSPOSE_128
> > diff --git a/src/dvbcsa_bs.h b/src/dvbcsa_bs.h
> > index 75cabc9..7145048 100644
> > --- a/src/dvbcsa_bs.h
> > +++ b/src/dvbcsa_bs.h
> > @@ -43,6 +43,9 @@
> >  #elif defined(DVBCSA_USE_ALTIVEC)
> >  # include "dvbcsa_bs_altivec.h"
> >
> > +#elif defined(DVBCSA_USE_NEON)
> > +# include "dvbcsa_bs_neon.h"
> > +
> >  #else
> >  # error No dvbcsa word size defined
> >  #endif
> > diff --git a/src/dvbcsa_bs_neon.h b/src/dvbcsa_bs_neon.h
> > new file mode 100644
> > index 0000000..7bf0156
> > --- /dev/null
> > +++ b/src/dvbcsa_bs_neon.h
> > @@ -0,0 +1,56 @@
> > +/*
> > +
> > +    This file is part of libdvbcsa.
> > +
> > +    libdvbcsa is free software; you can redistribute it and/or modify
> > +    it under the terms of the GNU General Public License as published
> > +    by the Free Software Foundation; either version 2 of the License,
> > +    or (at your option) any later version.
> > +
> > +    libdvbcsa is distributed in the hope that it will be useful, but
> > +    WITHOUT ANY WARRANTY; without even the implied warranty of
> > +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +    General Public License for more details.
> > +
> > +    You should have received a copy of the GNU General Public License
> > +    along with libdvbcsa; if not, write to the Free Software
> > +    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> > +    02111-1307 USA
> > +
> > +    Based on FFdecsa, Copyright (C) 2003-2004  fatih89r
> > +
> > +    (c) 2006-2008 Alexandre Becoulet <alexandre.becoulet at free.fr>
> > +
> > +*/
> > +
> > +#ifndef DVBCSA_NEON_H_
> > +#define DVBCSA_NEON_H_
> > +
> > +# include <arm_neon.h>
> > +
> > +typedef uint64x2_t dvbcsa_bs_word_t;
> > +
> > +#define BS_BATCH_SIZE 128
> > +#define BS_BATCH_BYTES 16
> > +
> > +#define BS_VAL(n, m)    vcombine_u64((uint64_t)(m), (uint64_t)(n))
> > +#define BS_VAL64(n)     vdupq_n_u64(0x##n##ULL)
> > +#define BS_VAL32(n)     vreinterpretq_u64_u32(vdupq_n_u32(0x##n))
> > +#define BS_VAL16(n)     vreinterpretq_u64_u16(vdupq_n_u16(0x##n))
> > +#define BS_VAL8(n)      vreinterpretq_u64_u8(vdupq_n_u8(0x##n))
> > +
> > +#define BS_AND(a, b)    vandq_u64 ((a), (b))
> > +#define BS_OR(a, b)     vorrq_u64 ((a), (b))
> > +#define BS_XOR(a, b)    veorq_u64 ((a), (b))
> > +#define BS_NOT(a)
>  vreinterpretq_u64_u8(vmvnq_u8(vreinterpretq_u8_u64(a)))
> > +
> > +#define BS_SHL(a, n)    vshlq_n_u64 ((a), n)
> > +#define BS_SHR(a, n)    vshrq_n_u64 ((a), n)
> > +#define BS_SHL8(a, n)   BS_SHL(a, 8 * (n))
> > +#define BS_SHR8(a, n)   BS_SHR(a, 8 * (n))
> > +#define BS_EXTRACT8(a, n) (((uint8_t*)&(a))[n])
> > +
> > +#define BS_EMPTY()
> > +
> > +#endif
> > +
> > diff --git a/test/testbsops.c b/test/testbsops.c
> > index 50748ab..a8588bd 100644
> > --- a/test/testbsops.c
> > +++ b/test/testbsops.c
> > @@ -248,17 +248,30 @@ main            (void)
> >    vec_testeq(b, c);
> >
> >    a = BS_VAL(0xaaaaaaaaaaaaaaaaLL, 0x5555555555555555LL);
> > +  /* there is no neon instruction to shift a whole register */
> > +#ifdef DVBCSA_USE_NEON
> > +  b = BS_VAL(0xaaaaaaaaaaaaaa00LL, 0x5555555555555500LL);
> > +#else
> >    b = BS_VAL(0xaaaaaaaaaaaaaa55LL, 0x5555555555555500LL);
> > +#endif
> >    c = BS_SHL8(a, 1);
> >    vec_testeq(b, c);
> >
> >    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
> > +#ifdef DVBCSA_USE_NEON
> > +  b = BS_VAL(0xffffffffffffff00LL, 0xffffffffffffff00LL);
> > +#else
> >    b = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffff00LL);
> > +#endif
> >    c = BS_SHL8(a, 1);
> >    vec_testeq(b, c);
> >
> >    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
> > +#ifdef DVBCSA_USE_NEON
> > +  b = BS_VAL(0xffffffff00000000LL, 0xffffffff00000000LL);
> > +#else
> >    b = BS_VAL(0xffffffffffffffffLL, 0xffffffff00000000LL);
> > +#endif
> >    c = BS_SHL8(a, 4);
> >    vec_testeq(b, c);
> >
> > @@ -270,22 +283,38 @@ main            (void)
> >    vec_testeq(b, c);
> >
> >    a = BS_VAL(0xaaaaaaaaaaaaaaaaLL, 0x5555555555555555LL);
> > +#ifdef DVBCSA_USE_NEON
> > +  b = BS_VAL(0x00aaaaaaaaaaaaaaLL, 0x0055555555555555LL);
> > +#else
> >    b = BS_VAL(0x00aaaaaaaaaaaaaaLL, 0xaa55555555555555LL);
> > +#endif
> >    c = BS_SHR8(a, 1);
> >    vec_testeq(b, c);
> >
> >    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
> > +#ifdef DVBCSA_USE_NEON
> > +  b = BS_VAL(0x00ffffffffffffffLL, 0x00ffffffffffffffLL);
> > +#else
> >    b = BS_VAL(0x00ffffffffffffffLL, 0xffffffffffffffffLL);
> > +#endif
> >    c = BS_SHR8(a, 1);
> >    vec_testeq(b, c);
> >
> >    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
> > +#ifdef DVBCSA_USE_NEON
> > +  b = BS_VAL(0x00000000ffffffffLL, 0x00000000ffffffffLL);
> > +#else
> >    b = BS_VAL(0x00000000ffffffffLL, 0xffffffffffffffffLL);
> > +#endif
> >    c = BS_SHR8(a, 4);
> >    vec_testeq(b, c);
> >
> >    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
> > +#ifdef DVBCSA_USE_NEON
> > +  b = BS_VAL(0x000000ffffffffffLL, 0x000000ffffffffffLL);
> > +#else
> >    b = BS_VAL(0x000000ffffffffffLL, 0xffffffffffffffffLL);
> > +#endif
> >    c = BS_SHR8(a, 3);
> >    vec_testeq(b, c);
> >
> > --
> > 1.9.1
> >
> > _______________________________________________
> > vlc-devel mailing list
> > To unsubscribe or modify your subscription options:
> > https://mailman.videolan.org/listinfo/vlc-devel
>
> --
> With my kindest regards,
>
> --
> Jean-Baptiste Kempf
> http://www.jbkempf.com/ - +33 672 704 734
> Sent from my Electronic Device
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/vlc-devel/attachments/20150707/69d047de/attachment.html>


More information about the vlc-devel mailing list