[vlc-devel] [PATCH 04/16] Add ARM NEON support

Serg Chernyavskiy glenvt18 at gmail.com
Thu Jul 30 14:13:37 CEST 2015


Let's discuss and implement it afterwards. Do you agree?

2015-07-06 19:13 GMT+03:00 Jean-Baptiste Kempf <jb at videolan.org>:
> Can't we have NEON runtime detection, btw?
>
> On 26 Jun, glenvt18 wrote :
>> ---
>>  configure.ac         |  6 ++++++
>>  src/Makefile.am      |  2 +-
>>  src/dvbcsa_bs.h      |  3 +++
>>  src/dvbcsa_bs_neon.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  test/testbsops.c     | 29 +++++++++++++++++++++++++++
>>  5 files changed, 95 insertions(+), 1 deletion(-)
>>  create mode 100644 src/dvbcsa_bs_neon.h
>>
>> diff --git a/configure.ac b/configure.ac
>> index ed8a1ad..cefdf8a 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -14,6 +14,7 @@ AC_ARG_ENABLE(uint64, AC_HELP_STRING(--enable-uint64, [Use native 64 bits intege
>>  AC_ARG_ENABLE(mmx, AC_HELP_STRING(--enable-mmx, [Use MMX for bitslice]), mmx_debug=$enableval, enable_mmx=no)
>>  AC_ARG_ENABLE(sse2, AC_HELP_STRING(--enable-sse2, [Use SSE2 for bitslice]), sse2_debug=$enableval, enable_sse2=no)
>>  AC_ARG_ENABLE(altivec, AC_HELP_STRING(--enable-altivec, [Use AltiVec for bitslice]), altivec_debug=$enableval, enable_altivec=no)
>> +AC_ARG_ENABLE(neon, AC_HELP_STRING(--enable-neon, [Use NEON for bitslice]), neon_debug=$enableval, enable_neon=no)
>>
>>  AM_INIT_AUTOMAKE(libdvbcsa, 1.1.0)
>>  AC_CONFIG_HEADERS(config.h)
>> @@ -46,6 +47,11 @@ elif test "$enable_altivec" = "yes" ; then
>>       AC_DEFINE(DVBCSA_USE_ALTIVEC, 1, Using AltiVec bitslice.)
>>       GCC_CFLAGS="$GCC_CFLAGS -maltivec -mabi=altivec"
>>
>> +elif test "$enable_neon" = "yes" ; then
>> +     transpose_128=yes
>> +     AC_DEFINE(DVBCSA_USE_NEON, 1, Using NEON bitslice.)
>> +     GCC_CFLAGS="$GCC_CFLAGS -mfpu=neon"
>> +
>>  elif test "$enable_uint32" = "yes" ; then
>>       transpose_32=yes
>>       AC_DEFINE(DVBCSA_USE_UINT32, 1, Using 32 bits integer bitslice.)
>> diff --git a/src/Makefile.am b/src/Makefile.am
>> index dec4f55..3bad07a 100644
>> --- a/src/Makefile.am
>> +++ b/src/Makefile.am
>> @@ -7,7 +7,7 @@ libdvbcsa_la_SOURCES = dvbcsa_algo.c dvbcsa_block.c dvbcsa_bs_algo.c  \
>>       dvbcsa_bs_block.c dvbcsa_bs_key.c dvbcsa_bs_stream.c            \
>>       dvbcsa_stream.c dvbcsa_bs.h dvbcsa_pv.h dvbcsa_bs_uint64.h      \
>>       dvbcsa_bs_uint32.h dvbcsa_bs_mmx.h dvbcsa_bs_sse.h              \
>> -     dvbcsa_bs_altivec.h dvbcsa_bs_transpose.c dvbcsa_key.c  \
>> +     dvbcsa_bs_altivec.h dvbcsa_bs_neon.h dvbcsa_bs_transpose.c dvbcsa_key.c \
>>       dvbcsa_bs_stream_kernel.inc dvbcsa_bs_stream_kernel.h
>>
>>  if TRANSPOSE_128
>> diff --git a/src/dvbcsa_bs.h b/src/dvbcsa_bs.h
>> index 75cabc9..7145048 100644
>> --- a/src/dvbcsa_bs.h
>> +++ b/src/dvbcsa_bs.h
>> @@ -43,6 +43,9 @@
>>  #elif defined(DVBCSA_USE_ALTIVEC)
>>  # include "dvbcsa_bs_altivec.h"
>>
>> +#elif defined(DVBCSA_USE_NEON)
>> +# include "dvbcsa_bs_neon.h"
>> +
>>  #else
>>  # error No dvbcsa word size defined
>>  #endif
>> diff --git a/src/dvbcsa_bs_neon.h b/src/dvbcsa_bs_neon.h
>> new file mode 100644
>> index 0000000..7bf0156
>> --- /dev/null
>> +++ b/src/dvbcsa_bs_neon.h
>> @@ -0,0 +1,56 @@
>> +/*
>> +
>> +    This file is part of libdvbcsa.
>> +
>> +    libdvbcsa is free software; you can redistribute it and/or modify
>> +    it under the terms of the GNU General Public License as published
>> +    by the Free Software Foundation; either version 2 of the License,
>> +    or (at your option) any later version.
>> +
>> +    libdvbcsa is distributed in the hope that it will be useful, but
>> +    WITHOUT ANY WARRANTY; without even the implied warranty of
>> +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +    General Public License for more details.
>> +
>> +    You should have received a copy of the GNU General Public License
>> +    along with libdvbcsa; if not, write to the Free Software
>> +    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
>> +    02111-1307 USA
>> +
>> +    Based on FFdecsa, Copyright (C) 2003-2004  fatih89r
>> +
>> +    (c) 2006-2008 Alexandre Becoulet <alexandre.becoulet at free.fr>
>> +
>> +*/
>> +
>> +#ifndef DVBCSA_NEON_H_
>> +#define DVBCSA_NEON_H_
>> +
>> +# include <arm_neon.h>
>> +
>> +typedef uint64x2_t dvbcsa_bs_word_t;
>> +
>> +#define BS_BATCH_SIZE 128
>> +#define BS_BATCH_BYTES 16
>> +
>> +#define BS_VAL(n, m)    vcombine_u64((uint64_t)(m), (uint64_t)(n))
>> +#define BS_VAL64(n)     vdupq_n_u64(0x##n##ULL)
>> +#define BS_VAL32(n)     vreinterpretq_u64_u32(vdupq_n_u32(0x##n))
>> +#define BS_VAL16(n)     vreinterpretq_u64_u16(vdupq_n_u16(0x##n))
>> +#define BS_VAL8(n)      vreinterpretq_u64_u8(vdupq_n_u8(0x##n))
>> +
>> +#define BS_AND(a, b)    vandq_u64 ((a), (b))
>> +#define BS_OR(a, b)     vorrq_u64 ((a), (b))
>> +#define BS_XOR(a, b)    veorq_u64 ((a), (b))
>> +#define BS_NOT(a)       vreinterpretq_u64_u8(vmvnq_u8(vreinterpretq_u8_u64(a)))
>> +
>> +#define BS_SHL(a, n)    vshlq_n_u64 ((a), n)
>> +#define BS_SHR(a, n)    vshrq_n_u64 ((a), n)
>> +#define BS_SHL8(a, n)   BS_SHL(a, 8 * (n))
>> +#define BS_SHR8(a, n)   BS_SHR(a, 8 * (n))
>> +#define BS_EXTRACT8(a, n) (((uint8_t*)&(a))[n])
>> +
>> +#define BS_EMPTY()
>> +
>> +#endif
>> +
>> diff --git a/test/testbsops.c b/test/testbsops.c
>> index 50748ab..a8588bd 100644
>> --- a/test/testbsops.c
>> +++ b/test/testbsops.c
>> @@ -248,17 +248,30 @@ main            (void)
>>    vec_testeq(b, c);
>>
>>    a = BS_VAL(0xaaaaaaaaaaaaaaaaLL, 0x5555555555555555LL);
>> +  /* there is no neon instruction to shift a whole register */
>> +#ifdef DVBCSA_USE_NEON
>> +  b = BS_VAL(0xaaaaaaaaaaaaaa00LL, 0x5555555555555500LL);
>> +#else
>>    b = BS_VAL(0xaaaaaaaaaaaaaa55LL, 0x5555555555555500LL);
>> +#endif
>>    c = BS_SHL8(a, 1);
>>    vec_testeq(b, c);
>>
>>    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
>> +#ifdef DVBCSA_USE_NEON
>> +  b = BS_VAL(0xffffffffffffff00LL, 0xffffffffffffff00LL);
>> +#else
>>    b = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffff00LL);
>> +#endif
>>    c = BS_SHL8(a, 1);
>>    vec_testeq(b, c);
>>
>>    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
>> +#ifdef DVBCSA_USE_NEON
>> +  b = BS_VAL(0xffffffff00000000LL, 0xffffffff00000000LL);
>> +#else
>>    b = BS_VAL(0xffffffffffffffffLL, 0xffffffff00000000LL);
>> +#endif
>>    c = BS_SHL8(a, 4);
>>    vec_testeq(b, c);
>>
>> @@ -270,22 +283,38 @@ main            (void)
>>    vec_testeq(b, c);
>>
>>    a = BS_VAL(0xaaaaaaaaaaaaaaaaLL, 0x5555555555555555LL);
>> +#ifdef DVBCSA_USE_NEON
>> +  b = BS_VAL(0x00aaaaaaaaaaaaaaLL, 0x0055555555555555LL);
>> +#else
>>    b = BS_VAL(0x00aaaaaaaaaaaaaaLL, 0xaa55555555555555LL);
>> +#endif
>>    c = BS_SHR8(a, 1);
>>    vec_testeq(b, c);
>>
>>    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
>> +#ifdef DVBCSA_USE_NEON
>> +  b = BS_VAL(0x00ffffffffffffffLL, 0x00ffffffffffffffLL);
>> +#else
>>    b = BS_VAL(0x00ffffffffffffffLL, 0xffffffffffffffffLL);
>> +#endif
>>    c = BS_SHR8(a, 1);
>>    vec_testeq(b, c);
>>
>>    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
>> +#ifdef DVBCSA_USE_NEON
>> +  b = BS_VAL(0x00000000ffffffffLL, 0x00000000ffffffffLL);
>> +#else
>>    b = BS_VAL(0x00000000ffffffffLL, 0xffffffffffffffffLL);
>> +#endif
>>    c = BS_SHR8(a, 4);
>>    vec_testeq(b, c);
>>
>>    a = BS_VAL(0xffffffffffffffffLL, 0xffffffffffffffffLL);
>> +#ifdef DVBCSA_USE_NEON
>> +  b = BS_VAL(0x000000ffffffffffLL, 0x000000ffffffffffLL);
>> +#else
>>    b = BS_VAL(0x000000ffffffffffLL, 0xffffffffffffffffLL);
>> +#endif
>>    c = BS_SHR8(a, 3);
>>    vec_testeq(b, c);
>>
>> --
>> 1.9.1
>>
>> _______________________________________________
>> vlc-devel mailing list
>> To unsubscribe or modify your subscription options:
>> https://mailman.videolan.org/listinfo/vlc-devel
>
> --
> With my kindest regards,
>
> --
> Jean-Baptiste Kempf
> http://www.jbkempf.com/ - +33 672 704 734
> Sent from my Electronic Device
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel



More information about the vlc-devel mailing list