[vlc-devel] [PATCH 1/3] packetizer: startcode_helper: prefer intrinsics
Francois Cartegnie
fcvlcdev at free.fr
Thu Sep 10 22:18:19 CEST 2020
There's no universal cross compiler way to store
a vector constant have dependency listed in the next
assembly block. With pure assembly, there's risk
of clobbering constant if the compiler auto vector
the TRY_MATCH sections.
---
modules/packetizer/startcode_helper.h | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/modules/packetizer/startcode_helper.h b/modules/packetizer/startcode_helper.h
index 2b61e5cf98..fd7b8249d2 100644
--- a/modules/packetizer/startcode_helper.h
+++ b/modules/packetizer/startcode_helper.h
@@ -22,7 +22,7 @@
#include <vlc_cpu.h>
-#if !defined(CAN_COMPILE_SSE2) && defined(HAVE_SSE2_INTRINSICS)
+#if defined(HAVE_SSE2_INTRINSICS)
#include <emmintrin.h>
#endif
@@ -63,30 +63,26 @@ static inline const uint8_t * startcode_FindAnnexB_SSE2( const uint8_t *p, const
alignedend = end - ((intptr_t) end & 15);
if( alignedend > p )
{
-#ifdef CAN_COMPILE_SSE2
- asm volatile(
- "pxor %%xmm1, %%xmm1\n"
- ::: "xmm1"
- );
-#else
- __m128i zeros = _mm_set1_epi8( 0x00 );
+#ifdef HAVE_SSE2_INTRINSICS
+ __m128i zeros = _mm_set1_epi8( 0x00 );
#endif
for( ; p < alignedend; p += 16)
{
uint32_t match;
-#ifdef CAN_COMPILE_SSE2
+#ifdef HAVE_SSE2_INTRINSICS
+ __m128i v = _mm_load_si128((__m128i*)p);
+ __m128i res = _mm_cmpeq_epi8( zeros, v );
+ match = _mm_movemask_epi8( res ); /* mask will be in reversed match order */
+#else
asm volatile(
+ "pxor %%xmm1, %%xmm1\n"
"movdqa 0(%[v]), %%xmm0\n"
"pcmpeqb %%xmm1, %%xmm0\n"
"pmovmskb %%xmm0, %[match]\n"
: [match]"=r"(match)
: [v]"r"(p)
- : "xmm0"
+ : "xmm0", "xmm1"
);
-#else
- __m128i v = _mm_load_si128((__m128i*)p);
- __m128i res = _mm_cmpeq_epi8( zeros, v );
- match = _mm_movemask_epi8( res ); /* mask will be in reversed match order */
#endif
if( match & 0x000F )
TRY_MATCH(p, 0);
--
2.25.4
More information about the vlc-devel
mailing list