[x264-devel] Windows x64 support

Loren Merritt lorenm at u.washington.edu
Wed Jan 28 10:06:25 CET 2009


On Wed, 28 Jan 2009, BugMaster wrote:

> On Wed, 28 Jan 2009 08:22:20 +0000 (UTC), Loren Merritt wrote:
>> On Wed, 28 Jan 2009, BugMaster wrote:
>>>
>>> First of all it doesn't compiles because registers "r8dq" and "r9dq"
>>> don't exist. It would compile after replacing of
>>>        %define r%1mp %6q
>>> with
>>>        %define r%1mp %2
>>> Also I prefer original variant because it is clearer and specifies
>>> memory size (dword/qword) which is good for finding errors on
>>> compilation time.
>>
>> Then include dword, which the original didn't. And remove dword from all
>> the uses of rNmd. And add a rNmb and rNmw since not all of them are dword.
>
> It is another extreme which I don't think is good. By the way I would
> need than also include "dqword" because there are few SSE instructions
> that need it.

I don't see why type-checking qword is good but type-cehcking dword isn't.
But here's a version which should be functionally equivalent to yours and 
still eliminates all that duplication.

--Loren Merritt
-------------- next part --------------
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index e420532..f2144d7 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -96,13 +96,25 @@
 ; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
 ; which are slow when a normal ret follows a branch.
 
-%macro DECLARE_REG 7
+; registers:
+; rN and rNq are the native-size register holding function argument N
+; rNd, rNw, rNb are dword, word, and byte size
+; rNm is the original location of arg N (a register or on the stack), dword
+; rNmp is native size
+
+%macro DECLARE_REG 6
     %define r%1q %2
     %define r%1d %3
     %define r%1w %4
     %define r%1b %5
     %define r%1m %6
-    %define r%1mp %7
+    %ifid %6 ; i.e. it's a register
+        %define r%1mp %2
+    %elifdef ARCH_X86_64 ; memory
+        %define r%1mp qword %6
+    %else
+        %define r%1mp dword %6
+    %endif
     %define r%1  %2
 %endmacro
 
@@ -227,13 +239,13 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
 
 %ifdef WIN64 ; Windows x64 ;==================================================
 
-DECLARE_REG 0, rcx, ecx, cx,  cl,  ecx, rcx
-DECLARE_REG 1, rdx, edx, dx,  dl,  edx, rdx
-DECLARE_REG 2, r8,  r8d, r8w, r8b, r8d, r8
-DECLARE_REG 3, r9,  r9d, r9w, r9b, r9d, r9
-DECLARE_REG 4, rdi, edi, di,  dil, [rsp + stack_offset + 40], qword [rsp + stack_offset + 40]
-DECLARE_REG 5, rsi, esi, si,  sil, [rsp + stack_offset + 48], qword [rsp + stack_offset + 48]
-DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 56], qword [rsp + stack_offset + 56]
+DECLARE_REG 0, rcx, ecx, cx,  cl,  ecx
+DECLARE_REG 1, rdx, edx, dx,  dl,  edx
+DECLARE_REG 2, r8,  r8d, r8w, r8b, r8d
+DECLARE_REG 3, r9,  r9d, r9w, r9b, r9d
+DECLARE_REG 4, rdi, edi, di,  dil, [rsp + stack_offset + 40]
+DECLARE_REG 5, rsi, esi, si,  sil, [rsp + stack_offset + 48]
+DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 56]
 %define r7m [rsp + stack_offset + 64]
 %define r8m [rsp + stack_offset + 72]
 
@@ -309,13 +321,13 @@ DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 56], qword [rsp + stack
 
 %elifdef ARCH_X86_64 ; *nix x64 ;=============================================
 
-DECLARE_REG 0, rdi, edi, di,  dil, edi, rdi
-DECLARE_REG 1, rsi, esi, si,  sil, esi, rsi
-DECLARE_REG 2, rdx, edx, dx,  dl,  edx, rdx
-DECLARE_REG 3, rcx, ecx, cx,  cl,  ecx, rcx
-DECLARE_REG 4, r8,  r8d, r8w, r8b, r8d, r8
-DECLARE_REG 5, r9,  r9d, r9w, r9b, r9d, r9
-DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 8], qword [rsp + stack_offset + 8]
+DECLARE_REG 0, rdi, edi, di,  dil, edi
+DECLARE_REG 1, rsi, esi, si,  sil, esi
+DECLARE_REG 2, rdx, edx, dx,  dl,  edx
+DECLARE_REG 3, rcx, ecx, cx,  cl,  ecx
+DECLARE_REG 4, r8,  r8d, r8w, r8b, r8d
+DECLARE_REG 5, r9,  r9d, r9w, r9b, r9d
+DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 8]
 %define r7m [rsp + stack_offset + 16]
 %define r8m [rsp + stack_offset + 24]
 
@@ -342,13 +354,13 @@ DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 8], qword [rsp + stack_
 
 %else ; X86_32 ;==============================================================
 
-DECLARE_REG 0, eax, eax, ax, al,   [esp + stack_offset + 4],  dword [esp + stack_offset + 4]
-DECLARE_REG 1, ecx, ecx, cx, cl,   [esp + stack_offset + 8],  dword [esp + stack_offset + 8]
-DECLARE_REG 2, edx, edx, dx, dl,   [esp + stack_offset + 12], dword [esp + stack_offset + 12]
-DECLARE_REG 3, ebx, ebx, bx, bl,   [esp + stack_offset + 16], dword [esp + stack_offset + 16]
-DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20], dword [esp + stack_offset + 20]
-DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24], dword [esp + stack_offset + 24]
-DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28], dword [esp + stack_offset + 28]
+DECLARE_REG 0, eax, eax, ax, al,   [esp + stack_offset + 4]
+DECLARE_REG 1, ecx, ecx, cx, cl,   [esp + stack_offset + 8]
+DECLARE_REG 2, edx, edx, dx, dl,   [esp + stack_offset + 12]
+DECLARE_REG 3, ebx, ebx, bx, bl,   [esp + stack_offset + 16]
+DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20]
+DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24]
+DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
 %define r7m [esp + stack_offset + 32]
 %define r8m [esp + stack_offset + 36]
 %define rsp esp


More information about the x264-devel mailing list