[x265] [PATCH 1 of 6] avx2:'integral8v' asm code-> 7.32x faster than 'C' version
vignesh at multicorewareinc.com
vignesh at multicorewareinc.com
Tue May 9 06:46:18 CEST 2017
# HG changeset patch
# User Vignesh Vijayakumar
# Date 1494225121 -19800
# Mon May 08 12:02:01 2017 +0530
# Node ID 26b97f77ec3f8c23903a08537acbcb21625df472
# Parent 41611825c2f4661536500e1306db7d8c4bf7fd07
avx2:'integral8v' asm code-> 7.32x faster than 'C' version
integral_init8v 7.32x 201.03 1470.88
diff -r 41611825c2f4 -r 26b97f77ec3f source/common/x86/seaintegral.asm
--- a/source/common/x86/seaintegral.asm Thu May 04 19:13:48 2017 +0530
+++ b/source/common/x86/seaintegral.asm Mon May 08 12:02:01 2017 +0530
@@ -51,8 +51,19 @@
;void integral_init8v_c(uint32_t *sum8, intptr_t stride)
;-----------------------------------------------------------------------------
INIT_YMM avx2
-cglobal integral8v, 2, 2, 0
-
+cglobal integral8v, 2, 3, 2
+ mov r2, r1
+ shl r2, 5
+
+.loop
+ movu m0, [r0]
+ movu m1, [r0 + r2]
+ psubd m1, m0
+ movu [r0], m1
+ add r0, 32
+ sub r1, 8
+ cmp r1, 0
+ jnz .loop
RET
;-----------------------------------------------------------------------------
More information about the x265-devel
mailing list