[x265] [PATCH 2 of 6] avx2:'integral12v' asm code-> 7.48x faster than 'C' version

vignesh at multicorewareinc.com vignesh at multicorewareinc.com
Tue May 9 06:46:19 CEST 2017


# HG changeset patch
# User Vignesh Vijayakumar
# Date 1494225152 -19800
#      Mon May 08 12:02:32 2017 +0530
# Node ID 128fdef63629b3fd60babcdc20536e278ec2324a
# Parent  26b97f77ec3f8c23903a08537acbcb21625df472
avx2:'integral12v' asm code-> 7.48x faster than 'C' version

    integral_init12v  7.48x    206.80          1545.53

diff -r 26b97f77ec3f -r 128fdef63629 source/common/x86/seaintegral.asm
--- a/source/common/x86/seaintegral.asm	Mon May 08 12:02:01 2017 +0530
+++ b/source/common/x86/seaintegral.asm	Mon May 08 12:02:32 2017 +0530
@@ -70,8 +70,22 @@
 ;void integral_init12v_c(uint32_t *sum12, intptr_t stride)
 ;-----------------------------------------------------------------------------
 INIT_YMM avx2
-cglobal integral12v, 2, 2, 0
- 
+cglobal integral12v, 2, 4, 2
+    mov r2, r1
+    mov r3, r1
+    shl r2, 5
+    shl r3, 4
+    add r2, r3
+
+.loop
+    movu    m0, [r0]
+    movu    m1, [r0 + r2]
+    psubd   m1, m0
+    movu    [r0], m1
+    add     r0, 32
+    sub     r1, 8
+    cmp     r1, 0
+    jnz     .loop
     RET
 
 ;-----------------------------------------------------------------------------


More information about the x265-devel mailing list