[x265] [PATCH] use macro HADDD to improve AMD performance
Min Chen
chenm003 at 163.com
Thu Jul 17 22:10:39 CEST 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1405627833 25200
# Node ID 8eefb97e5101e2ae30a087759827003777ca7429
# Parent 6d0c8efbe3ed26cf80446cba6d56fbd9f5a2d957
use macro HADDD to improve AMD performance
diff -r 6d0c8efbe3ed -r 8eefb97e5101 source/common/x86/ssd-a.asm
--- a/source/common/x86/ssd-a.asm Thu Jul 17 02:38:35 2014 -0500
+++ b/source/common/x86/ssd-a.asm Thu Jul 17 13:10:33 2014 -0700
@@ -2412,14 +2412,10 @@
pmaddwd m0, m0
pmaddwd m1, m1
-
- ; calculate sum
- paddd m0, m1
- movhlps m1, m0
- paddd m0, m1
- pshufd m1, m0, 1
paddd m0, m1
+ ; calculate sum and return
+ HADDD m0, m1
movd eax, m0
RET
@@ -2454,14 +2450,10 @@
paddd m4, m1
paddd m2, m3
paddd m4, m2
+ paddd m0, m4
- ; calculate sum
- paddd m0, m4
- movhlps m1, m0
- paddd m0, m1
- pshufd m1, m0, 1
- paddd m0, m1
-
+ ; calculate sum and return
+ HADDD m0, m1
movd eax, m0
RET
@@ -2506,12 +2498,8 @@
dec r2d
jnz .loop
- ; calculate sum
- movhlps m1, m0
- paddd m0, m1
- pshufd m1, m0, 1
- paddd m0, m1
-
+ ; calculate sum and return
+ HADDD m0, m1
movd eax, m0
RET
@@ -2556,12 +2544,8 @@
dec r2d
jnz .loop
- ; calculate sum
- movhlps m1, m0
- paddd m0, m1
- pshufd m1, m0, 1
- paddd m0, m1
-
+ ; calculate sum and return
+ HADDD m0, m1
movd eax, m0
RET
@@ -2606,13 +2590,7 @@
dec r2d
jnz .loop
- ; calculate sum
- vextracti128 xm1, m0, 1
- paddd xm0, xm1
- movhlps xm1, xm0
- paddd xm0, xm1
- pshufd xm1, xm0, 1
- paddd xm0, xm1
-
+ ; calculate sum and return
+ HADDD m0, m1
movd eax, xm0
RET
More information about the x265-devel
mailing list