[x264-devel] Windows x64 support
Loren Merritt
lorenm at u.washington.edu
Wed Jan 28 10:06:25 CET 2009
On Wed, 28 Jan 2009, BugMaster wrote:
> On Wed, 28 Jan 2009 08:22:20 +0000 (UTC), Loren Merritt wrote:
>> On Wed, 28 Jan 2009, BugMaster wrote:
>>>
>>> First of all it doesn't compiles because registers "r8dq" and "r9dq"
>>> don't exist. It would compile after replacing of
>>> %define r%1mp %6q
>>> with
>>> %define r%1mp %2
>>> Also I prefer original variant because it is clearer and specifies
>>> memory size (dword/qword) which is good for finding errors on
>>> compilation time.
>>
>> Then include dword, which the original didn't. And remove dword from all
>> the uses of rNmd. And add a rNmb and rNmw since not all of them are dword.
>
> It is another extreme which I don't think is good. By the way I would
> need than also include "dqword" because there are few SSE instructions
> that need it.
I don't see why type-checking qword is good but type-cehcking dword isn't.
But here's a version which should be functionally equivalent to yours and
still eliminates all that duplication.
--Loren Merritt
-------------- next part --------------
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index e420532..f2144d7 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -96,13 +96,25 @@
; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
; which are slow when a normal ret follows a branch.
-%macro DECLARE_REG 7
+; registers:
+; rN and rNq are the native-size register holding function argument N
+; rNd, rNw, rNb are dword, word, and byte size
+; rNm is the original location of arg N (a register or on the stack), dword
+; rNmp is native size
+
+%macro DECLARE_REG 6
%define r%1q %2
%define r%1d %3
%define r%1w %4
%define r%1b %5
%define r%1m %6
- %define r%1mp %7
+ %ifid %6 ; i.e. it's a register
+ %define r%1mp %2
+ %elifdef ARCH_X86_64 ; memory
+ %define r%1mp qword %6
+ %else
+ %define r%1mp dword %6
+ %endif
%define r%1 %2
%endmacro
@@ -227,13 +239,13 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
%ifdef WIN64 ; Windows x64 ;==================================================
-DECLARE_REG 0, rcx, ecx, cx, cl, ecx, rcx
-DECLARE_REG 1, rdx, edx, dx, dl, edx, rdx
-DECLARE_REG 2, r8, r8d, r8w, r8b, r8d, r8
-DECLARE_REG 3, r9, r9d, r9w, r9b, r9d, r9
-DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40], qword [rsp + stack_offset + 40]
-DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48], qword [rsp + stack_offset + 48]
-DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56], qword [rsp + stack_offset + 56]
+DECLARE_REG 0, rcx, ecx, cx, cl, ecx
+DECLARE_REG 1, rdx, edx, dx, dl, edx
+DECLARE_REG 2, r8, r8d, r8w, r8b, r8d
+DECLARE_REG 3, r9, r9d, r9w, r9b, r9d
+DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40]
+DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48]
+DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
%define r7m [rsp + stack_offset + 64]
%define r8m [rsp + stack_offset + 72]
@@ -309,13 +321,13 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56], qword [rsp + stack
%elifdef ARCH_X86_64 ; *nix x64 ;=============================================
-DECLARE_REG 0, rdi, edi, di, dil, edi, rdi
-DECLARE_REG 1, rsi, esi, si, sil, esi, rsi
-DECLARE_REG 2, rdx, edx, dx, dl, edx, rdx
-DECLARE_REG 3, rcx, ecx, cx, cl, ecx, rcx
-DECLARE_REG 4, r8, r8d, r8w, r8b, r8d, r8
-DECLARE_REG 5, r9, r9d, r9w, r9b, r9d, r9
-DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8], qword [rsp + stack_offset + 8]
+DECLARE_REG 0, rdi, edi, di, dil, edi
+DECLARE_REG 1, rsi, esi, si, sil, esi
+DECLARE_REG 2, rdx, edx, dx, dl, edx
+DECLARE_REG 3, rcx, ecx, cx, cl, ecx
+DECLARE_REG 4, r8, r8d, r8w, r8b, r8d
+DECLARE_REG 5, r9, r9d, r9w, r9b, r9d
+DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8]
%define r7m [rsp + stack_offset + 16]
%define r8m [rsp + stack_offset + 24]
@@ -342,13 +354,13 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8], qword [rsp + stack_
%else ; X86_32 ;==============================================================
-DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4], dword [esp + stack_offset + 4]
-DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8], dword [esp + stack_offset + 8]
-DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12], dword [esp + stack_offset + 12]
-DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16], dword [esp + stack_offset + 16]
-DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20], dword [esp + stack_offset + 20]
-DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24], dword [esp + stack_offset + 24]
-DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28], dword [esp + stack_offset + 28]
+DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4]
+DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8]
+DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12]
+DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16]
+DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20]
+DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24]
+DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
%define r7m [esp + stack_offset + 32]
%define r8m [esp + stack_offset + 36]
%define rsp esp
More information about the x264-devel
mailing list