[vlc-devel] [PATCH 06/16] neon: add matrix transpose macros

Serg Chernyavskiy glenvt18 at gmail.com
Thu Jul 30 14:14:18 CEST 2015


Please review.

2015-06-26 14:20 GMT+03:00 glenvt18 <glenvt18 at gmail.com>:
> ---
>  src/dvbcsa_bs_neon.h | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
>
> diff --git a/src/dvbcsa_bs_neon.h b/src/dvbcsa_bs_neon.h
> index 7bf0156..32b8e18 100644
> --- a/src/dvbcsa_bs_neon.h
> +++ b/src/dvbcsa_bs_neon.h
> @@ -52,5 +52,36 @@ typedef uint64x2_t dvbcsa_bs_word_t;
>
>  #define BS_EMPTY()
>
> +/* 2x2 matrix transpose */
> +#ifdef BS_SWAP32_LE
> +#undef BS_SWAP32_LE
> +#endif
> +#define BS_SWAP32_LE(t, b) \
> +    { \
> +    uint32x4x2_t tmp = vtrnq_u32(vreinterpretq_u32_u64(t), vreinterpretq_u32_u64(b)); \
> +    t = vreinterpretq_u64_u32(tmp.val[0]); \
> +    b = vreinterpretq_u64_u32(tmp.val[1]); \
> +    }
> +
> +#ifdef BS_SWAP16_LE
> +#undef BS_SWAP16_LE
> +#endif
> +#define BS_SWAP16_LE(t, b) \
> +    { \
> +    uint16x8x2_t tmp = vtrnq_u16(vreinterpretq_u16_u64(t), vreinterpretq_u16_u64(b)); \
> +    t = vreinterpretq_u64_u16(tmp.val[0]); \
> +    b = vreinterpretq_u64_u16(tmp.val[1]); \
> +    }
> +
> +#ifdef BS_SWAP8_LE
> +#undef BS_SWAP8_LE
> +#endif
> +#define BS_SWAP8_LE(t, b) \
> +    { \
> +    uint8x16x2_t tmp = vtrnq_u8(vreinterpretq_u8_u64(t), vreinterpretq_u8_u64(b)); \
> +    t = vreinterpretq_u64_u8(tmp.val[0]); \
> +    b = vreinterpretq_u64_u8(tmp.val[1]); \
> +    }
> +
>  #endif
>
> --
> 1.9.1
>



More information about the vlc-devel mailing list