[x265] [PATCH] asm: improve the old avx2 code for sad[32x24]

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Mon Apr 6 09:16:09 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1428301075 -19800
#      Mon Apr 06 11:47:55 2015 +0530
# Node ID 7fcb5dd81aa6b7dd20ce072e32b18e1ffa6bd5c1
# Parent  ebe5e57c4b45b45338035a1009b64585f21d66d5
asm: improve the old avx2 code for sad[32x24]

old:
sad[32x24]  14.26x   490.58          6995.66
new:
sad[32x24]  16.33x   428.35          6993.57

diff -r ebe5e57c4b45 -r 7fcb5dd81aa6 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm	Sat Apr 04 15:11:39 2015 -0500
+++ b/source/common/x86/sad-a.asm	Mon Apr 06 11:47:55 2015 +0530
@@ -4004,10 +4004,12 @@
     RET
 
 INIT_YMM avx2
-cglobal pixel_sad_32x24, 4,5,6
+cglobal pixel_sad_32x24, 4,7,6
     xorps           m0, m0
     xorps           m5, m5
     mov             r4d, 6
+    lea             r5, [r1 * 3]
+    lea             r6, [r3 * 3]
 .loop
     movu           m1, [r0]               ; row 0 of pix0
     movu           m2, [r2]               ; row 0 of pix1
@@ -4019,21 +4021,18 @@
     paddd          m0, m1
     paddd          m5, m3
 
-    lea     r2,     [r2 + 2 * r3]
-    lea     r0,     [r0 + 2 * r1]
-
-    movu           m1, [r0]               ; row 2 of pix0
-    movu           m2, [r2]               ; row 2 of pix1
-    movu           m3, [r0 + r1]          ; row 3 of pix0
-    movu           m4, [r2 + r3]          ; row 3 of pix1
+    movu           m1, [r0 + 2 * r1]      ; row 2 of pix0
+    movu           m2, [r2 + 2 * r3]      ; row 2 of pix1
+    movu           m3, [r0 + r5]          ; row 3 of pix0
+    movu           m4, [r2 + r6]          ; row 3 of pix1
 
     psadbw         m1, m2
     psadbw         m3, m4
     paddd          m0, m1
     paddd          m5, m3
 
-    lea     r2,     [r2 + 2 * r3]
-    lea     r0,     [r0 + 2 * r1]
+    lea     r2,     [r2 + 4 * r3]
+    lea     r0,     [r0 + 4 * r1]
 
     dec         r4d
     jnz         .loop


More information about the x265-devel mailing list