[x264-devel] x86util: Support ymm registers in HADD macros
Henrik Gramner
git at videolan.org
Tue Apr 23 23:03:05 CEST 2013
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue Apr 16 23:26:47 2013 +0200| [e67a4965cb90a4ee987ce9c55a30f6fd9ba3006e] | committer: Jason Garrett-Glaser
x86util: Support ymm registers in HADD macros
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=e67a4965cb90a4ee987ce9c55a30f6fd9ba3006e
---
common/x86/x86util.asm | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
index a22c89f..e3c9183 100644
--- a/common/x86/x86util.asm
+++ b/common/x86/x86util.asm
@@ -287,16 +287,24 @@
%endmacro
%macro HADDD 2 ; sum junk
+%if sizeof%1 == 32
+%define %2 xmm%2
+ vextracti128 %2, %1, 1
+%define %1 xmm%1
+ paddd %1, %2
+%endif
%if mmsize >= 16
movhlps %2, %1
paddd %1, %2
%endif
PSHUFLW %2, %1, q0032
paddd %1, %2
+%undef %1
+%undef %2
%endmacro
%macro HADDW 2 ; reg, tmp
-%if cpuflag(xop) && mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
vphaddwq %1, %1
movhlps %2, %1
paddd %1, %2
@@ -307,7 +315,7 @@
%endmacro
%macro HADDUWD 2
-%if cpuflag(xop)
+%if cpuflag(xop) && sizeof%1 == 16
vphadduwd %1, %1
%else
psrld %2, %1, 16
@@ -318,13 +326,13 @@
%endmacro
%macro HADDUW 2
-%if cpuflag(xop) && mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
vphadduwq %1, %1
movhlps %2, %1
paddd %1, %2
%else
- HADDUWD %1, %2
- HADDD %1, %2
+ HADDUWD %1, %2
+ HADDD %1, %2
%endif
%endmacro
More information about the x264-devel
mailing list