[x264-devel] x86inc: automatically insert vzeroupper for YMM functions
Ronald S. Bultje
git at videolan.org
Wed Sep 5 21:07:20 CEST 2012
x264 | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Thu Jul 26 18:01:49 2012 -0700| [8f7644865010385efcb4cb5bd239b28edb4b49e2] | committer: Jason Garrett-Glaser
x86inc: automatically insert vzeroupper for YMM functions
Backported from libav.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8f7644865010385efcb4cb5bd239b28edb4b49e2
---
common/x86/mc-a2.asm | 3 +--
common/x86/x86inc.asm | 15 ++++++++++++---
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 0197941..53aba12 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -1786,5 +1786,4 @@ cglobal mbtree_propagate_cost, 7,7,8
vmovdqu [r0+r6*2], ymm1
add r6, 16
jl .loop
- vzeroupper
- RET
+ REP_RET
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 3e2b8da..d7513be 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -368,11 +368,14 @@ DECLARE_REG 14, R15, 120
%assign xmm_regs_used 0
%endmacro
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6
+%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
%macro RET 0
WIN64_RESTORE_XMM_INTERNAL rsp
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
+%if mmsize == 32
+ vzeroupper
+%endif
ret
%endmacro
@@ -404,10 +407,13 @@ DECLARE_REG 14, R15, 72
DEFINE_ARGS %4
%endmacro
-%define has_epilogue regs_used > 9
+%define has_epilogue regs_used > 9 || mmsize == 32
%macro RET 0
POP_IF_USED 14, 13, 12, 11, 10, 9
+%if mmsize == 32
+ vzeroupper
+%endif
ret
%endmacro
@@ -444,10 +450,13 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
DEFINE_ARGS %4
%endmacro
-%define has_epilogue regs_used > 3
+%define has_epilogue regs_used > 3 || mmsize == 32
%macro RET 0
POP_IF_USED 6, 5, 4, 3
+%if mmsize == 32
+ vzeroupper
+%endif
ret
%endmacro
More information about the x264-devel
mailing list