[x264-devel] [PATCH] ppc: Add compat macros to use vec_xxpermdi

Luca Barbato lu_zero at gentoo.org
Thu Jul 12 10:53:39 CEST 2018


---

Yes, the clang people made the same mistake the gcc people made before
apparently...

 common/ppc/ppccommon.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
index 35ea8379..7735e2a1 100644
--- a/common/ppc/ppccommon.h
+++ b/common/ppc/ppccommon.h
@@ -308,3 +308,34 @@ p2 += i2;
 #ifndef __POWER9_VECTOR__
 #define vec_absd( a, b ) vec_sub( vec_max( a, b ), vec_min( a, b ) )
 #endif
+
+// vec_xxpermdi is quite useful but some version of clang do not expose it
+#if !HAVE_VSX || (defined(__clang__) && __clang_major__ < 6)
+static const uint8x16_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                           0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
+                                           0x14, 0x15, 0x16, 0x17 };
+static const uint8x16_t xxpermdi1_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                           0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B,
+                                           0x1C, 0x1D, 0x1E, 0x1F };
+static const uint8x16_t xxpermdi2_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                           0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13,
+                                           0x14, 0x15, 0x16, 0x17 };
+static const uint8x16_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                           0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B,
+                                           0x1C, 0x1D, 0x1E, 0x1F };
+#define xxpermdi(a, b, c) vec_perm(a, b, xxpermdi##c##_perm)
+#elif defined(__GNUC__) && \
+    (__GNUC__ > 6 || (__GNUC__ == 6 && __GNUC_MINOR__ >= 3))
+#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c)
+#endif
+
+// vec_xxpermdi has its endianness bias exposed in early gcc and clang
+#ifdef WORDS_BIGENDIAN
+#ifndef xxpermdi
+#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c)
+#endif
+#else
+#ifndef xxpermdi
+#define xxpermdi(a, b, c) vec_xxpermdi(b, a, ((c >> 1) | (c & 1) << 1) ^ 3)
+#endif
+#endif
--
2.12.2



More information about the x264-devel mailing list