[vlc-devel] [PATCH] startcode_helper: ensure dependencies between asm blocks
Francois Cartegnie
fcvlcdev at free.fr
Mon Sep 14 10:42:57 CEST 2020
uses attribute checking and vector declarations.
verified with clang and gcc
---
modules/packetizer/startcode_helper.h | 29 +++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/modules/packetizer/startcode_helper.h b/modules/packetizer/startcode_helper.h
index 2b61e5cf98..32296f33e8 100644
--- a/modules/packetizer/startcode_helper.h
+++ b/modules/packetizer/startcode_helper.h
@@ -23,7 +23,11 @@
#include <vlc_cpu.h>
#if !defined(CAN_COMPILE_SSE2) && defined(HAVE_SSE2_INTRINSICS)
- #include <emmintrin.h>
+# include <emmintrin.h>
+#endif
+
+#if __has_attribute(__vector_size__)
+typedef unsigned char v16qu __attribute__((__vector_size__(16)));
#endif
/* Looks up efficiently for an AnnexB startcode 0x00 0x00 0x01
@@ -63,26 +67,35 @@ static inline const uint8_t * startcode_FindAnnexB_SSE2( const uint8_t *p, const
alignedend = end - ((intptr_t) end & 15);
if( alignedend > p )
{
-#ifdef CAN_COMPILE_SSE2
- asm volatile(
- "pxor %%xmm1, %%xmm1\n"
- ::: "xmm1"
- );
+#if defined(CAN_COMPILE_SSE2) && __has_attribute(__vector_size__)
+ const v16qu zeros = { 0 };
#else
- __m128i zeros = _mm_set1_epi8( 0x00 );
+ const __m128i zeros = _mm_set1_epi8( 0x00 );
#endif
for( ; p < alignedend; p += 16)
{
uint32_t match;
#ifdef CAN_COMPILE_SSE2
+# if __has_attribute(__vector_size__)
+ asm volatile(
+ "movdqa 0(%[v]), %%xmm0\n"
+ "pcmpeqb %[czero], %%xmm0\n"
+ "pmovmskb %%xmm0, %[match]\n"
+ : [match]"=r"(match)
+ : [v]"r"(p), [czero]"x"(zeros)
+ : "xmm0"
+ );
+# else
asm volatile(
"movdqa 0(%[v]), %%xmm0\n"
+ "pxor %%xmm1, %%xmm1\n"
"pcmpeqb %%xmm1, %%xmm0\n"
"pmovmskb %%xmm0, %[match]\n"
: [match]"=r"(match)
: [v]"r"(p)
- : "xmm0"
+ : "xmm0", "xmm1"
);
+# endif
#else
__m128i v = _mm_load_si128((__m128i*)p);
__m128i res = _mm_cmpeq_epi8( zeros, v );
--
2.25.4
More information about the vlc-devel
mailing list