[vlc-devel] [PATCH 13/25] packetizer: add startcode_FindAnnexB SSSE3 asm
Victorien Le Couviour--Tuffet
victorien.lecouviour.tuffet at gmail.com
Tue Apr 14 12:40:24 CEST 2020
---
modules/packetizer/startcode.asm | 82 ++++++++++++++++++++++++++-
modules/packetizer/startcode_helper.h | 6 +-
test/modules/packetizer/helpers.c | 8 +++
3 files changed, 93 insertions(+), 3 deletions(-)
diff --git a/modules/packetizer/startcode.asm b/modules/packetizer/startcode.asm
index 55f294fcb5..cac5eea116 100644
--- a/modules/packetizer/startcode.asm
+++ b/modules/packetizer/startcode.asm
@@ -22,7 +22,10 @@
SECTION_RODATA 16
-pd_0x01000000: times 4 dd 0x01000000
+shuf_pad2dw: db 0x00, 0x01, 0x02, 0x80, 0x03, 0x04, 0x05, 0x80
+ db 0x06, 0x07, 0x08, 0x80, 0x09, 0x0A, 0x0B, 0x80
+
+pd_0x00010000: times 4 dd 0x00010000
SECTION .text
@@ -33,8 +36,9 @@ cglobal startcode_FindAnnexB, 2, 7, 6, ptr, end, size
cmp sized, 16
jl .end
- LEA r6q, pd_0x01000000
+ LEA r6q, pd_0x00010000
mova m0, [r6q]
+ pslld m0, 8
.loop:
movu m1, [ptrq]
psrldq m2, m1, 1
@@ -93,3 +97,77 @@ DEFINE_ARGS ptr, _, size, tmp
.found:
lea rax, [ptrq+r3q]
RET
+
+INIT_XMM ssse3
+cglobal startcode_FindAnnexB, 2, 7, 5, ptr, end, size
+%define base r6q-shuf_pad2dw
+ mov sizeq, endq
+ sub sizeq, ptrq
+ cmp sized, 16
+ jl .end
+
+ LEA r6q, shuf_pad2dw
+ mova m0, [base+shuf_pad2dw]
+ mova m1, [base+pd_0x00010000]
+.loop:
+ movu m2, [ptrq]
+ psrldq m3, m2, 1
+ psrldq m4, m2, 2
+ pshufb m2, m0
+ pshufb m3, m0
+ pshufb m4, m0
+ pcmpeqd m2, m1
+ pcmpeqd m3, m1
+ pcmpeqd m4, m1
+ movmskps r3d, m2
+ movmskps r4d, m3
+ movmskps r5d, m4
+ tzcnt r3d, r3d
+ tzcnt r4d, r4d
+ tzcnt r5d, r5d
+ cmp r4d, r3d
+ cmovle r3d, r4d
+ setle r4b
+ cmp r5d, r3d
+ cmovle r3d, r5d
+ setle r5b
+ test r3d, 32
+ jz .found
+ add ptrq, 12
+ sub sized, 12
+ cmp sized, 16
+ jge .loop
+
+DEFINE_ARGS ptr, _, size, tmp
+.end:
+ sub sized, 3
+ jl .ret_null
+.end_loop:
+ xor tmpd, tmpd
+ test word [ptrq], 0xFFFF
+ cmovz tmpw, [ptrq+1]
+ xor tmpd, 0x0100
+ jz .ret
+ inc ptrq
+ dec sized
+ jge .end_loop
+.ret_null:
+ xor ptrq, ptrq
+.ret:
+%if ARCH_X86_32
+ mov eax, ptrd
+%else
+ mov rax, ptrq
+%endif
+ RET
+
+.found:
+ lea r3d, [r3d*3]
+ mov r6d, r4d
+ xor r4d, r5d
+ and r4d, r6d
+ shl r5d, 1
+ or r4d, r5d
+ add r3d, r4d
+ lea rax, [ptrq+r3q]
+ RET
diff --git a/modules/packetizer/startcode_helper.h b/modules/packetizer/startcode_helper.h
index 4009e06a65..73425e9970 100644
--- a/modules/packetizer/startcode_helper.h
+++ b/modules/packetizer/startcode_helper.h
@@ -70,13 +70,17 @@ startcode_FindAnnexB_Bits(uint8_t const *p, uint8_t const *end)
#if defined(__i386__) || defined(__x86_64__)
uint8_t const *vlcpriv_startcode_FindAnnexB_sse2(uint8_t const *ptr,
uint8_t const *end);
+uint8_t const *vlcpriv_startcode_FindAnnexB_ssse3(uint8_t const *ptr,
+ uint8_t const *end);
#endif
static inline block_startcode_helper_t
startcode_FindAnnexB_helper(void)
{
#if defined(__i386__) || defined(__x86_64__)
- if (vlc_CPU_SSE2())
+ if (vlc_CPU_SSSE3())
+ return vlcpriv_startcode_FindAnnexB_ssse3;
+ else if (vlc_CPU_SSE2())
return vlcpriv_startcode_FindAnnexB_sse2;
else
#endif
diff --git a/test/modules/packetizer/helpers.c b/test/modules/packetizer/helpers.c
index 4ee3418ff3..64e982c2d5 100644
--- a/test/modules/packetizer/helpers.c
+++ b/test/modules/packetizer/helpers.c
@@ -86,6 +86,14 @@ static int run_annexb_sets( const uint8_t *p_set, const uint8_t *p_end,
if( i_ret != 0 )
return i_ret;
}
+ if (vlc_CPU_SSSE3())
+ {
+ printf("checking SSSE3 asm:\n");
+ i_ret = check_set( p_set, p_end, p_results, i_results, i_results_offset,
+ vlcpriv_startcode_FindAnnexB_ssse3 );
+ if( i_ret != 0 )
+ return i_ret;
+ }
#endif
return 0;
--
2.24.1
More information about the vlc-devel
mailing list