[vlc-devel] [PATCH 06/16] neon: add matrix transpose macros

glenvt18 glenvt18 at gmail.com
Fri Jun 26 13:20:00 CEST 2015


---
 src/dvbcsa_bs_neon.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/dvbcsa_bs_neon.h b/src/dvbcsa_bs_neon.h
index 7bf0156..32b8e18 100644
--- a/src/dvbcsa_bs_neon.h
+++ b/src/dvbcsa_bs_neon.h
@@ -52,5 +52,36 @@ typedef uint64x2_t dvbcsa_bs_word_t;
 
 #define BS_EMPTY()
 
+/* 2x2 matrix transpose */
+#ifdef BS_SWAP32_LE
+#undef BS_SWAP32_LE
+#endif
+#define BS_SWAP32_LE(t, b) \
+    { \
+    uint32x4x2_t tmp = vtrnq_u32(vreinterpretq_u32_u64(t), vreinterpretq_u32_u64(b)); \
+    t = vreinterpretq_u64_u32(tmp.val[0]); \
+    b = vreinterpretq_u64_u32(tmp.val[1]); \
+    }
+
+#ifdef BS_SWAP16_LE
+#undef BS_SWAP16_LE
+#endif
+#define BS_SWAP16_LE(t, b) \
+    { \
+    uint16x8x2_t tmp = vtrnq_u16(vreinterpretq_u16_u64(t), vreinterpretq_u16_u64(b)); \
+    t = vreinterpretq_u64_u16(tmp.val[0]); \
+    b = vreinterpretq_u64_u16(tmp.val[1]); \
+    }
+
+#ifdef BS_SWAP8_LE
+#undef BS_SWAP8_LE
+#endif
+#define BS_SWAP8_LE(t, b) \
+    { \
+    uint8x16x2_t tmp = vtrnq_u8(vreinterpretq_u8_u64(t), vreinterpretq_u8_u64(b)); \
+    t = vreinterpretq_u64_u8(tmp.val[0]); \
+    b = vreinterpretq_u64_u8(tmp.val[1]); \
+    }
+
 #endif
 
-- 
1.9.1




More information about the vlc-devel mailing list