[x264-devel] x86: Experimental nasm support
Henrik Gramner
git at videolan.org
Sun Jul 26 22:26:32 CEST 2015
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat May 23 19:44:16 2015 +0200| [b568a256b9bc6c500d7b1ffe4b9c3311ee5ff337] | committer: Henrik Gramner
x86: Experimental nasm support
Enables the use of nasm as an alternative to yasm.
Note that nasm cannot assemble x264 with PIC enabled since it currently doesn't
support [symbol-$$] addressing which is used extensively by x264's PIC code.
This includes all 64-bit Windows and 64-bit OS X builds, even non-shared.
For the above reason nasm is currently intentionally not auto-detected, instead
the assembler must be explicitly specified using "AS=nasm ./configure".
Also drop -O2 from ASFLAGS since it's simply ignored anyway.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=b568a256b9bc6c500d7b1ffe4b9c3311ee5ff337
---
common/x86/x86inc.asm | 56 +++++++++++++++++++++++++++++++++++--------------
configure | 16 +++++++-------
tools/checkasm-a.asm | 36 +++++++++++++++----------------
3 files changed, 66 insertions(+), 42 deletions(-)
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 9c5193d..c0fa723 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -64,6 +64,15 @@
%endif
%endif
+%define FORMAT_ELF 0
+%ifidn __OUTPUT_FORMAT__,elf
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf32
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf64
+ %define FORMAT_ELF 1
+%endif
+
%ifdef PREFIX
%define mangle(x) _ %+ x
%else
@@ -86,6 +95,10 @@
default rel
%endif
+%ifdef __NASM_VER__
+ %use smartalign
+%endif
+
; Macros to eliminate most code duplication between x86_32 and x86_64:
; Currently this works only for leaf functions which load all their arguments
; into registers at the start, and make no other use of the stack. Luckily that
@@ -671,7 +684,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
CAT_XDEFINE cglobaled_, %2, 1
%endif
%xdefine current_function %2
- %ifidn __OUTPUT_FORMAT__,elf
+ %if FORMAT_ELF
global %2:function %%VISIBILITY
%else
global %2
@@ -697,14 +710,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
; like cextern, but without the prefix
%macro cextern_naked 1
- %xdefine %1 mangle(%1)
+ %ifdef PREFIX
+ %xdefine %1 mangle(%1)
+ %endif
CAT_XDEFINE cglobaled_, %1, 1
extern %1
%endmacro
%macro const 1-2+
%xdefine %1 mangle(private_prefix %+ _ %+ %1)
- %ifidn __OUTPUT_FORMAT__,elf
+ %if FORMAT_ELF
global %1:data hidden
%else
global %1
@@ -712,9 +727,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%1: %2
%endmacro
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
+; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
+%if FORMAT_ELF
[SECTION .note.GNU-stack noalloc noexec nowrite progbits]
%endif
@@ -785,9 +799,17 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%endif
%if ARCH_X86_64 || cpuflag(sse2)
- CPU amdnop
+ %ifdef __NASM_VER__
+ ALIGNMODE k8
+ %else
+ CPU amdnop
+ %endif
%else
- CPU basicnop
+ %ifdef __NASM_VER__
+ ALIGNMODE nop
+ %else
+ CPU basicnop
+ %endif
%endif
%endmacro
@@ -1467,12 +1489,14 @@ FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd
FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss
; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
-%if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
- %macro vpbroadcastq 2
- %if sizeof%1 == 16
- movddup %1, %2
- %else
- vbroadcastsd %1, %2
- %endif
- %endmacro
+%ifdef __YASM_VER__
+ %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
+ %macro vpbroadcastq 2
+ %if sizeof%1 == 16
+ movddup %1, %2
+ %else
+ vbroadcastsd %1, %2
+ %endif
+ %endmacro
+ %endif
%endif
diff --git a/configure b/configure
index db11659..2c6cfe8 100755
--- a/configure
+++ b/configure
@@ -649,9 +649,9 @@ stack_alignment=16
case $host_cpu in
i*86)
ARCH="X86"
- AS="yasm"
+ AS="${AS-yasm}"
AS_EXT=".asm"
- ASFLAGS="$ASFLAGS -O2 -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/"
+ ASFLAGS="$ASFLAGS -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/"
if [ $compiler = GNU ]; then
if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then
CFLAGS="$CFLAGS -march=i686"
@@ -678,36 +678,36 @@ case $host_cpu in
stack_alignment=4
fi
if [ "$SYS" = MACOSX ]; then
- ASFLAGS="$ASFLAGS -f macho -DPREFIX"
+ ASFLAGS="$ASFLAGS -f macho32 -DPREFIX"
elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
ASFLAGS="$ASFLAGS -f win32 -DPREFIX"
LDFLAGS="$LDFLAGS -Wl,--large-address-aware"
[ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase"
[ $compiler = GNU ] && RCFLAGS="--target=pe-i386 $RCFLAGS"
else
- ASFLAGS="$ASFLAGS -f elf"
+ ASFLAGS="$ASFLAGS -f elf32"
fi
;;
x86_64)
ARCH="X86_64"
- AS="yasm"
+ AS="${AS-yasm}"
AS_EXT=".asm"
ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
[ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
if [ "$SYS" = MACOSX ]; then
- ASFLAGS="$ASFLAGS -f macho64 -m amd64 -DPIC -DPREFIX"
+ ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX"
if cc_check '' "-arch x86_64"; then
CFLAGS="$CFLAGS -arch x86_64"
LDFLAGS="$LDFLAGS -arch x86_64"
fi
elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
- ASFLAGS="$ASFLAGS -f win32 -m amd64"
+ ASFLAGS="$ASFLAGS -f win64"
# only the GNU toolchain is inconsistent in prefixing function names with _
[ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
[ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase"
[ $compiler = GNU ] && RCFLAGS="--target=pe-x86-64 $RCFLAGS"
else
- ASFLAGS="$ASFLAGS -f elf -m amd64"
+ ASFLAGS="$ASFLAGS -f elf64"
fi
;;
powerpc|powerpc64)
diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm
index fbe1291..51f7ab5 100644
--- a/tools/checkasm-a.asm
+++ b/tools/checkasm-a.asm
@@ -33,24 +33,24 @@ error_message: db "failed to preserve register", 0
%if ARCH_X86_64
; just random numbers to reduce the chance of incidental match
ALIGN 16
-x6: ddq 0x79445c159ce790641a1b2550a612b48c
-x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
-x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943
-x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2
-x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
-x11: ddq 0x77d410d5c42c882d89b0c0765892729a
-x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
-x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
-x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
-x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
-n7: dq 0x21f86d66c8ca00ce
-n8: dq 0x75b6ba21077c48ad
-n9: dq 0xed56bb2dcb3c7736
-n10: dq 0x8bda43d3fd1a7e06
-n11: dq 0xb64a9c9e5d318408
-n12: dq 0xdf9a54b303f1d3a3
-n13: dq 0x4a75479abd64e097
-n14: dq 0x249214109d5d1c88
+x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
+x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
+x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
+x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
+n7: dq 0x21f86d66c8ca00ce
+n8: dq 0x75b6ba21077c48ad
+n9: dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
%endif
SECTION .text
More information about the x264-devel
mailing list