[vlc-devel] [PATCH 13/25] packetizer: add startcode_FindAnnexB SSSE3 asm

Victorien Le Couviour--Tuffet victorien.lecouviour.tuffet at gmail.com
Tue Apr 14 12:40:24 CEST 2020


---
 modules/packetizer/startcode.asm      | 82 ++++++++++++++++++++++++++-
 modules/packetizer/startcode_helper.h |  6 +-
 test/modules/packetizer/helpers.c     |  8 +++
 3 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/modules/packetizer/startcode.asm b/modules/packetizer/startcode.asm
index 55f294fcb5..cac5eea116 100644
--- a/modules/packetizer/startcode.asm
+++ b/modules/packetizer/startcode.asm
@@ -22,7 +22,10 @@
 
 SECTION_RODATA 16
 
-pd_0x01000000: times 4 dd 0x01000000
+shuf_pad2dw: db 0x00, 0x01, 0x02, 0x80, 0x03, 0x04, 0x05, 0x80
+             db 0x06, 0x07, 0x08, 0x80, 0x09, 0x0A, 0x0B, 0x80
+
+pd_0x00010000: times 4 dd 0x00010000
 
 SECTION .text
 
@@ -33,8 +36,9 @@ cglobal startcode_FindAnnexB, 2, 7, 6, ptr, end, size
     cmp          sized, 16
     jl .end
 
-    LEA            r6q, pd_0x01000000
+    LEA            r6q, pd_0x00010000
     mova            m0, [r6q]
+    pslld           m0, 8
 .loop:
     movu            m1, [ptrq]
     psrldq          m2, m1, 1
@@ -93,3 +97,77 @@ DEFINE_ARGS ptr, _, size, tmp
 .found:
     lea            rax, [ptrq+r3q]
     RET
+
+INIT_XMM ssse3
+cglobal startcode_FindAnnexB, 2, 7, 5, ptr, end, size
+%define base r6q-shuf_pad2dw
+    mov          sizeq, endq
+    sub          sizeq, ptrq
+    cmp          sized, 16
+    jl .end
+
+    LEA            r6q, shuf_pad2dw
+    mova            m0, [base+shuf_pad2dw]
+    mova            m1, [base+pd_0x00010000]
+.loop:
+    movu            m2, [ptrq]
+    psrldq          m3, m2, 1
+    psrldq          m4, m2, 2
+    pshufb          m2, m0
+    pshufb          m3, m0
+    pshufb          m4, m0
+    pcmpeqd         m2, m1
+    pcmpeqd         m3, m1
+    pcmpeqd         m4, m1
+    movmskps       r3d, m2
+    movmskps       r4d, m3
+    movmskps       r5d, m4
+    tzcnt          r3d, r3d
+    tzcnt          r4d, r4d
+    tzcnt          r5d, r5d
+    cmp            r4d, r3d
+    cmovle         r3d, r4d
+    setle          r4b
+    cmp            r5d, r3d
+    cmovle         r3d, r5d
+    setle          r5b
+    test           r3d, 32
+    jz .found
+    add           ptrq, 12
+    sub          sized, 12
+    cmp          sized, 16
+    jge .loop
+
+DEFINE_ARGS ptr, _, size, tmp
+.end:
+    sub          sized, 3
+    jl .ret_null
+.end_loop:
+    xor           tmpd, tmpd
+    test   word [ptrq], 0xFFFF
+    cmovz         tmpw, [ptrq+1]
+    xor           tmpd, 0x0100
+    jz .ret
+    inc           ptrq
+    dec          sized
+    jge .end_loop
+.ret_null:
+    xor           ptrq, ptrq
+.ret:
+%if ARCH_X86_32
+    mov            eax, ptrd
+%else
+    mov            rax, ptrq
+%endif
+    RET
+
+.found:
+    lea            r3d, [r3d*3]
+    mov            r6d, r4d
+    xor            r4d, r5d
+    and            r4d, r6d
+    shl            r5d, 1
+    or             r4d, r5d
+    add            r3d, r4d
+    lea            rax, [ptrq+r3q]
+    RET
diff --git a/modules/packetizer/startcode_helper.h b/modules/packetizer/startcode_helper.h
index 4009e06a65..73425e9970 100644
--- a/modules/packetizer/startcode_helper.h
+++ b/modules/packetizer/startcode_helper.h
@@ -70,13 +70,17 @@ startcode_FindAnnexB_Bits(uint8_t const *p, uint8_t const *end)
 #if defined(__i386__) || defined(__x86_64__)
 uint8_t const *vlcpriv_startcode_FindAnnexB_sse2(uint8_t const *ptr,
                                                  uint8_t const *end);
+uint8_t const *vlcpriv_startcode_FindAnnexB_ssse3(uint8_t const *ptr,
+                                                  uint8_t const *end);
 #endif
 
 static inline block_startcode_helper_t
 startcode_FindAnnexB_helper(void)
 {
 #if defined(__i386__) || defined(__x86_64__)
-    if (vlc_CPU_SSE2())
+    if (vlc_CPU_SSSE3())
+        return vlcpriv_startcode_FindAnnexB_ssse3;
+    else if (vlc_CPU_SSE2())
         return vlcpriv_startcode_FindAnnexB_sse2;
     else
 #endif
diff --git a/test/modules/packetizer/helpers.c b/test/modules/packetizer/helpers.c
index 4ee3418ff3..64e982c2d5 100644
--- a/test/modules/packetizer/helpers.c
+++ b/test/modules/packetizer/helpers.c
@@ -86,6 +86,14 @@ static int run_annexb_sets( const uint8_t *p_set, const uint8_t *p_end,
         if( i_ret != 0 )
             return i_ret;
     }
+    if (vlc_CPU_SSSE3())
+    {
+        printf("checking SSSE3 asm:\n");
+        i_ret = check_set( p_set, p_end, p_results, i_results, i_results_offset,
+                           vlcpriv_startcode_FindAnnexB_ssse3 );
+        if( i_ret != 0 )
+            return i_ret;
+    }
 #endif
 
     return 0;
-- 
2.24.1



More information about the vlc-devel mailing list