[x265] [PATCH] asm: improve the old avx2 code for sad[32x24]
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Mon Apr 6 09:16:09 CEST 2015
# HG changeset patch
# User Sumalatha Polureddy
# Date 1428301075 -19800
# Mon Apr 06 11:47:55 2015 +0530
# Node ID 7fcb5dd81aa6b7dd20ce072e32b18e1ffa6bd5c1
# Parent ebe5e57c4b45b45338035a1009b64585f21d66d5
asm: improve the old avx2 code for sad[32x24]
old:
sad[32x24] 14.26x 490.58 6995.66
new:
sad[32x24] 16.33x 428.35 6993.57
diff -r ebe5e57c4b45 -r 7fcb5dd81aa6 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Sat Apr 04 15:11:39 2015 -0500
+++ b/source/common/x86/sad-a.asm Mon Apr 06 11:47:55 2015 +0530
@@ -4004,10 +4004,12 @@
RET
INIT_YMM avx2
-cglobal pixel_sad_32x24, 4,5,6
+cglobal pixel_sad_32x24, 4,7,6
xorps m0, m0
xorps m5, m5
mov r4d, 6
+ lea r5, [r1 * 3]
+ lea r6, [r3 * 3]
.loop
movu m1, [r0] ; row 0 of pix0
movu m2, [r2] ; row 0 of pix1
@@ -4019,21 +4021,18 @@
paddd m0, m1
paddd m5, m3
- lea r2, [r2 + 2 * r3]
- lea r0, [r0 + 2 * r1]
-
- movu m1, [r0] ; row 2 of pix0
- movu m2, [r2] ; row 2 of pix1
- movu m3, [r0 + r1] ; row 3 of pix0
- movu m4, [r2 + r3] ; row 3 of pix1
+ movu m1, [r0 + 2 * r1] ; row 2 of pix0
+ movu m2, [r2 + 2 * r3] ; row 2 of pix1
+ movu m3, [r0 + r5] ; row 3 of pix0
+ movu m4, [r2 + r6] ; row 3 of pix1
psadbw m1, m2
psadbw m3, m4
paddd m0, m1
paddd m5, m3
- lea r2, [r2 + 2 * r3]
- lea r0, [r0 + 2 * r1]
+ lea r2, [r2 + 4 * r3]
+ lea r0, [r0 + 4 * r1]
dec r4d
jnz .loop
More information about the x265-devel
mailing list