[x264-devel] x86inc: automatically insert vzeroupper for YMM functions

Ronald S. Bultje git at videolan.org
Wed Sep 5 21:07:20 CEST 2012


x264 | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Thu Jul 26 18:01:49 2012 -0700| [8f7644865010385efcb4cb5bd239b28edb4b49e2] | committer: Jason Garrett-Glaser

x86inc: automatically insert vzeroupper for YMM functions
Backported from libav.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8f7644865010385efcb4cb5bd239b28edb4b49e2
---

 common/x86/mc-a2.asm  |    3 +--
 common/x86/x86inc.asm |   15 ++++++++++++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 0197941..53aba12 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -1786,5 +1786,4 @@ cglobal mbtree_propagate_cost, 7,7,8
     vmovdqu [r0+r6*2], ymm1
     add            r6, 16
     jl .loop
-    vzeroupper
-    RET
+    REP_RET
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 3e2b8da..d7513be 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -368,11 +368,14 @@ DECLARE_REG 14, R15, 120
     %assign xmm_regs_used 0
 %endmacro
 
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6
+%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
 
 %macro RET 0
     WIN64_RESTORE_XMM_INTERNAL rsp
     POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
+%if mmsize == 32
+    vzeroupper
+%endif
     ret
 %endmacro
 
@@ -404,10 +407,13 @@ DECLARE_REG 14, R15, 72
     DEFINE_ARGS %4
 %endmacro
 
-%define has_epilogue regs_used > 9
+%define has_epilogue regs_used > 9 || mmsize == 32
 
 %macro RET 0
     POP_IF_USED 14, 13, 12, 11, 10, 9
+%if mmsize == 32
+    vzeroupper
+%endif
     ret
 %endmacro
 
@@ -444,10 +450,13 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
     DEFINE_ARGS %4
 %endmacro
 
-%define has_epilogue regs_used > 3
+%define has_epilogue regs_used > 3 || mmsize == 32
 
 %macro RET 0
     POP_IF_USED 6, 5, 4, 3
+%if mmsize == 32
+    vzeroupper
+%endif
     ret
 %endmacro
 



More information about the x264-devel mailing list