[vlc-devel] [PATCH 06/16] neon: add matrix transpose macros
Serg Chernyavskiy
glenvt18 at gmail.com
Thu Jul 30 14:14:18 CEST 2015
Please review.
2015-06-26 14:20 GMT+03:00 glenvt18 <glenvt18 at gmail.com>:
> ---
> src/dvbcsa_bs_neon.h | 31 +++++++++++++++++++++++++++++++
> 1 file changed, 31 insertions(+)
>
> diff --git a/src/dvbcsa_bs_neon.h b/src/dvbcsa_bs_neon.h
> index 7bf0156..32b8e18 100644
> --- a/src/dvbcsa_bs_neon.h
> +++ b/src/dvbcsa_bs_neon.h
> @@ -52,5 +52,36 @@ typedef uint64x2_t dvbcsa_bs_word_t;
>
> #define BS_EMPTY()
>
> +/* 2x2 matrix transpose */
> +#ifdef BS_SWAP32_LE
> +#undef BS_SWAP32_LE
> +#endif
> +#define BS_SWAP32_LE(t, b) \
> + { \
> + uint32x4x2_t tmp = vtrnq_u32(vreinterpretq_u32_u64(t), vreinterpretq_u32_u64(b)); \
> + t = vreinterpretq_u64_u32(tmp.val[0]); \
> + b = vreinterpretq_u64_u32(tmp.val[1]); \
> + }
> +
> +#ifdef BS_SWAP16_LE
> +#undef BS_SWAP16_LE
> +#endif
> +#define BS_SWAP16_LE(t, b) \
> + { \
> + uint16x8x2_t tmp = vtrnq_u16(vreinterpretq_u16_u64(t), vreinterpretq_u16_u64(b)); \
> + t = vreinterpretq_u64_u16(tmp.val[0]); \
> + b = vreinterpretq_u64_u16(tmp.val[1]); \
> + }
> +
> +#ifdef BS_SWAP8_LE
> +#undef BS_SWAP8_LE
> +#endif
> +#define BS_SWAP8_LE(t, b) \
> + { \
> + uint8x16x2_t tmp = vtrnq_u8(vreinterpretq_u8_u64(t), vreinterpretq_u8_u64(b)); \
> + t = vreinterpretq_u64_u8(tmp.val[0]); \
> + b = vreinterpretq_u64_u8(tmp.val[1]); \
> + }
> +
> #endif
>
> --
> 1.9.1
>
More information about the vlc-devel
mailing list