[x264-devel] x86: don't use the red zone on win64
Anton Mitrofanov
git at videolan.org
Wed Feb 27 00:18:02 CET 2013
x264 | branch: master | Anton Mitrofanov <BugMaster at narod.ru> | Mon Feb 25 19:28:19 2013 +0400| [736d69b5875587b61c03aa45438e19ddba1f7035] | committer: Jason Garrett-Glaser
x86: don't use the red zone on win64
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=736d69b5875587b61c03aa45438e19ddba1f7035
---
common/x86/cpu-a.asm | 3 +++
common/x86/deblock-a.asm | 6 +++++-
common/x86/mc-a.asm | 4 ++++
common/x86/pixel-a.asm | 15 ++++++++-------
4 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index b90bc56..6e26b96 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -165,6 +165,9 @@ cglobal safe_intel_cpu_indicator_init
%endif
push rbp
mov rbp, rsp
+%if WIN64
+ sub rsp, 32 ; shadow space
+%endif
and rsp, ~15
call intel_cpu_indicator_init
leave
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index b1c9a88..0cc003f 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -1429,7 +1429,11 @@ DEBLOCK_LUMA v, 16
%define t5 m11
%define mask0 m12
%define mask1p m13
+%if WIN64
+ %define mask1q [rsp]
+%else
%define mask1q [rsp-24]
+%endif
%define mpb_0 m14
%define mpb_1 m15
%else
@@ -1448,7 +1452,7 @@ DEBLOCK_LUMA v, 16
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_intra, 4,6,16,ARCH_X86_64*0x50-0x50
+cglobal deblock_%1_luma_intra, 4,6,16,0-(1-ARCH_X86_64)*0x50-WIN64*0x10
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
dec r2d ; alpha-1
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 8e568cf..e6b2190 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1514,7 +1514,11 @@ ALIGN 4
mov t0, r0
mov t1, r1
mov t2, r3
+%if WIN64
+ %define multy0 r4m
+%else
%define multy0 [rsp-8]
+%endif
mova multy0, m5
%else
mov r3m, r3
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index d5d3e90..77a1c30 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1913,15 +1913,16 @@ cglobal hadamard_load
; void intra_satd_x3_4x4( uint8_t *fenc, uint8_t *fdec, int *res )
;-----------------------------------------------------------------------------
cglobal intra_satd_x3_4x4, 3,3
-%if ARCH_X86_64
+%if UNIX64
; stack is 16 byte aligned because abi says so
%define top_1d rsp-8 ; size 8
%define left_1d rsp-16 ; size 8
%else
- ; stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
- SUB esp, 16
- %define top_1d esp+8
- %define left_1d esp
+ ; WIN64: stack is 16 byte aligned because abi says so
+ ; X86_32: stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
+ SUB rsp, 16
+ %define top_1d rsp+8
+ %define left_1d rsp
%endif
call hadamard_load
@@ -1943,8 +1944,8 @@ cglobal intra_satd_x3_4x4, 3,3
movd [r2+0], m0 ; i4x4_v satd
movd [r2+4], m4 ; i4x4_h satd
movd [r2+8], m5 ; i4x4_dc satd
-%if ARCH_X86_64 == 0
- ADD esp, 16
+%if UNIX64 == 0
+ ADD rsp, 16
%endif
RET
More information about the x264-devel
mailing list