[x264-devel] x86util: Support ymm registers in HADD macros

Henrik Gramner git at videolan.org
Tue Apr 23 23:37:08 CEST 2013


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue Apr 16 23:26:47 2013 +0200| [78b8af872f49aeaa3727ac4e0c8d3b53f0716f51] | committer: Jason Garrett-Glaser

x86util: Support ymm registers in HADD macros

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=78b8af872f49aeaa3727ac4e0c8d3b53f0716f51
---

 common/x86/x86util.asm |   18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
index a22c89f..e3c9183 100644
--- a/common/x86/x86util.asm
+++ b/common/x86/x86util.asm
@@ -287,16 +287,24 @@
 %endmacro
 
 %macro HADDD 2 ; sum junk
+%if sizeof%1 == 32
+%define %2 xmm%2
+    vextracti128 %2, %1, 1
+%define %1 xmm%1
+    paddd   %1, %2
+%endif
 %if mmsize >= 16
     movhlps %2, %1
     paddd   %1, %2
 %endif
     PSHUFLW %2, %1, q0032
     paddd   %1, %2
+%undef %1
+%undef %2
 %endmacro
 
 %macro HADDW 2 ; reg, tmp
-%if cpuflag(xop) && mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
     vphaddwq  %1, %1
     movhlps   %2, %1
     paddd     %1, %2
@@ -307,7 +315,7 @@
 %endmacro
 
 %macro HADDUWD 2
-%if cpuflag(xop)
+%if cpuflag(xop) && sizeof%1 == 16
     vphadduwd %1, %1
 %else
     psrld %2, %1, 16
@@ -318,13 +326,13 @@
 %endmacro
 
 %macro HADDUW 2
-%if cpuflag(xop) && mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
     vphadduwq %1, %1
     movhlps   %2, %1
     paddd     %1, %2
 %else
-    HADDUWD %1, %2
-    HADDD %1, %2
+    HADDUWD   %1, %2
+    HADDD     %1, %2
 %endif
 %endmacro
 



More information about the x264-devel mailing list