[x265] [PATCH] use macro HADDD to improve AMD performance

Min Chen chenm003 at 163.com
Thu Jul 17 22:10:39 CEST 2014


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1405627833 25200
# Node ID 8eefb97e5101e2ae30a087759827003777ca7429
# Parent  6d0c8efbe3ed26cf80446cba6d56fbd9f5a2d957
use macro HADDD to improve AMD performance

diff -r 6d0c8efbe3ed -r 8eefb97e5101 source/common/x86/ssd-a.asm
--- a/source/common/x86/ssd-a.asm	Thu Jul 17 02:38:35 2014 -0500
+++ b/source/common/x86/ssd-a.asm	Thu Jul 17 13:10:33 2014 -0700
@@ -2412,14 +2412,10 @@
 
     pmaddwd m0, m0
     pmaddwd m1, m1
-
-    ; calculate sum
-    paddd   m0, m1
-    movhlps m1, m0
-    paddd   m0, m1
-    pshufd  m1, m0, 1
     paddd   m0, m1
 
+    ; calculate sum and return
+    HADDD   m0, m1
     movd    eax, m0
     RET
 
@@ -2454,14 +2450,10 @@
     paddd   m4, m1
     paddd   m2, m3
     paddd   m4, m2
+    paddd   m0, m4
 
-    ; calculate sum
-    paddd   m0, m4
-    movhlps m1, m0
-    paddd   m0, m1
-    pshufd  m1, m0, 1
-    paddd   m0, m1
-
+    ; calculate sum and return
+    HADDD   m0, m1
     movd    eax, m0
     RET
 
@@ -2506,12 +2498,8 @@
     dec     r2d
     jnz    .loop
 
-    ; calculate sum
-    movhlps m1, m0
-    paddd   m0, m1
-    pshufd  m1, m0, 1
-    paddd   m0, m1
-
+    ; calculate sum and return
+    HADDD   m0, m1
     movd    eax, m0
     RET
 
@@ -2556,12 +2544,8 @@
     dec     r2d
     jnz    .loop
 
-    ; calculate sum
-    movhlps m1, m0
-    paddd   m0, m1
-    pshufd  m1, m0, 1
-    paddd   m0, m1
-
+    ; calculate sum and return
+    HADDD   m0, m1
     movd    eax, m0
     RET
 
@@ -2606,13 +2590,7 @@
     dec     r2d
     jnz    .loop
 
-    ; calculate sum
-    vextracti128 xm1, m0, 1
-    paddd   xm0, xm1
-    movhlps xm1, xm0
-    paddd   xm0, xm1
-    pshufd  xm1, xm0, 1
-    paddd   xm0, xm1
-
+    ; calculate sum and return
+    HADDD   m0, m1
     movd    eax, xm0
     RET



More information about the x265-devel mailing list