[x264-devel] [PATCH] ppc: Add compat macros to use vec_xxpermdi
Luca Barbato
lu_zero at gentoo.org
Thu Jul 12 10:53:39 CEST 2018
---
Yes, the clang people made the same mistake the gcc people made before
apparently...
common/ppc/ppccommon.h | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
index 35ea8379..7735e2a1 100644
--- a/common/ppc/ppccommon.h
+++ b/common/ppc/ppccommon.h
@@ -308,3 +308,34 @@ p2 += i2;
#ifndef __POWER9_VECTOR__
#define vec_absd( a, b ) vec_sub( vec_max( a, b ), vec_min( a, b ) )
#endif
+
+// vec_xxpermdi is quite useful but some version of clang do not expose it
+#if !HAVE_VSX || (defined(__clang__) && __clang_major__ < 6)
+static const uint8x16_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+ 0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17 };
+static const uint8x16_t xxpermdi1_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+ 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+static const uint8x16_t xxpermdi2_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+ 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17 };
+static const uint8x16_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+ 0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F };
+#define xxpermdi(a, b, c) vec_perm(a, b, xxpermdi##c##_perm)
+#elif defined(__GNUC__) && \
+ (__GNUC__ > 6 || (__GNUC__ == 6 && __GNUC_MINOR__ >= 3))
+#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c)
+#endif
+
+// vec_xxpermdi has its endianness bias exposed in early gcc and clang
+#ifdef WORDS_BIGENDIAN
+#ifndef xxpermdi
+#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c)
+#endif
+#else
+#ifndef xxpermdi
+#define xxpermdi(a, b, c) vec_xxpermdi(b, a, ((c >> 1) | (c & 1) << 1) ^ 3)
+#endif
+#endif
--
2.12.2
More information about the x264-devel
mailing list