[x264-devel] x86: Experimental nasm support

Henrik Gramner git at videolan.org
Sun Jul 26 22:26:32 CEST 2015


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat May 23 19:44:16 2015 +0200| [b568a256b9bc6c500d7b1ffe4b9c3311ee5ff337] | committer: Henrik Gramner

x86: Experimental nasm support

Enables the use of nasm as an alternative to yasm.

Note that nasm cannot assemble x264 with PIC enabled since it currently doesn't
support [symbol-$$] addressing which is used extensively by x264's PIC code.
This includes all 64-bit Windows and 64-bit OS X builds, even non-shared.

For the above reason nasm is currently intentionally not auto-detected, instead
the assembler must be explicitly specified using "AS=nasm ./configure".

Also drop -O2 from ASFLAGS since it's simply ignored anyway.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=b568a256b9bc6c500d7b1ffe4b9c3311ee5ff337
---

 common/x86/x86inc.asm |   56 +++++++++++++++++++++++++++++++++++--------------
 configure             |   16 +++++++-------
 tools/checkasm-a.asm  |   36 +++++++++++++++----------------
 3 files changed, 66 insertions(+), 42 deletions(-)

diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 9c5193d..c0fa723 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -64,6 +64,15 @@
     %endif
 %endif
 
+%define FORMAT_ELF 0
+%ifidn __OUTPUT_FORMAT__,elf
+    %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf32
+    %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf64
+    %define FORMAT_ELF 1
+%endif
+
 %ifdef PREFIX
     %define mangle(x) _ %+ x
 %else
@@ -86,6 +95,10 @@
     default rel
 %endif
 
+%ifdef __NASM_VER__
+    %use smartalign
+%endif
+
 ; Macros to eliminate most code duplication between x86_32 and x86_64:
 ; Currently this works only for leaf functions which load all their arguments
 ; into registers at the start, and make no other use of the stack. Luckily that
@@ -671,7 +684,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
         CAT_XDEFINE cglobaled_, %2, 1
     %endif
     %xdefine current_function %2
-    %ifidn __OUTPUT_FORMAT__,elf
+    %if FORMAT_ELF
         global %2:function %%VISIBILITY
     %else
         global %2
@@ -697,14 +710,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 
 ; like cextern, but without the prefix
 %macro cextern_naked 1
-    %xdefine %1 mangle(%1)
+    %ifdef PREFIX
+        %xdefine %1 mangle(%1)
+    %endif
     CAT_XDEFINE cglobaled_, %1, 1
     extern %1
 %endmacro
 
 %macro const 1-2+
     %xdefine %1 mangle(private_prefix %+ _ %+ %1)
-    %ifidn __OUTPUT_FORMAT__,elf
+    %if FORMAT_ELF
         global %1:data hidden
     %else
         global %1
@@ -712,9 +727,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %1: %2
 %endmacro
 
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
+; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
+%if FORMAT_ELF
     [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
 %endif
 
@@ -785,9 +799,17 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %endif
 
     %if ARCH_X86_64 || cpuflag(sse2)
-        CPU amdnop
+        %ifdef __NASM_VER__
+            ALIGNMODE k8
+        %else
+            CPU amdnop
+        %endif
     %else
-        CPU basicnop
+        %ifdef __NASM_VER__
+            ALIGNMODE nop
+        %else
+            CPU basicnop
+        %endif
     %endif
 %endmacro
 
@@ -1467,12 +1489,14 @@ FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd
 FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss
 
 ; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
-%if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
-    %macro vpbroadcastq 2
-        %if sizeof%1 == 16
-            movddup %1, %2
-        %else
-            vbroadcastsd %1, %2
-        %endif
-    %endmacro
+%ifdef __YASM_VER__
+    %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
+        %macro vpbroadcastq 2
+            %if sizeof%1 == 16
+                movddup %1, %2
+            %else
+                vbroadcastsd %1, %2
+            %endif
+        %endmacro
+    %endif
 %endif
diff --git a/configure b/configure
index db11659..2c6cfe8 100755
--- a/configure
+++ b/configure
@@ -649,9 +649,9 @@ stack_alignment=16
 case $host_cpu in
     i*86)
         ARCH="X86"
-        AS="yasm"
+        AS="${AS-yasm}"
         AS_EXT=".asm"
-        ASFLAGS="$ASFLAGS -O2 -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/"
+        ASFLAGS="$ASFLAGS -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/"
         if [ $compiler = GNU ]; then
             if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then
                 CFLAGS="$CFLAGS -march=i686"
@@ -678,36 +678,36 @@ case $host_cpu in
             stack_alignment=4
         fi
         if [ "$SYS" = MACOSX ]; then
-            ASFLAGS="$ASFLAGS -f macho -DPREFIX"
+            ASFLAGS="$ASFLAGS -f macho32 -DPREFIX"
         elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
             ASFLAGS="$ASFLAGS -f win32 -DPREFIX"
             LDFLAGS="$LDFLAGS -Wl,--large-address-aware"
             [ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase"
             [ $compiler = GNU ] && RCFLAGS="--target=pe-i386 $RCFLAGS"
         else
-            ASFLAGS="$ASFLAGS -f elf"
+            ASFLAGS="$ASFLAGS -f elf32"
         fi
         ;;
     x86_64)
         ARCH="X86_64"
-        AS="yasm"
+        AS="${AS-yasm}"
         AS_EXT=".asm"
         ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
         [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
         if [ "$SYS" = MACOSX ]; then
-            ASFLAGS="$ASFLAGS -f macho64 -m amd64 -DPIC -DPREFIX"
+            ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX"
             if cc_check '' "-arch x86_64"; then
                 CFLAGS="$CFLAGS -arch x86_64"
                 LDFLAGS="$LDFLAGS -arch x86_64"
             fi
         elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
-            ASFLAGS="$ASFLAGS -f win32 -m amd64"
+            ASFLAGS="$ASFLAGS -f win64"
             # only the GNU toolchain is inconsistent in prefixing function names with _
             [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
             [ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase"
             [ $compiler = GNU ] && RCFLAGS="--target=pe-x86-64 $RCFLAGS"
         else
-            ASFLAGS="$ASFLAGS -f elf -m amd64"
+            ASFLAGS="$ASFLAGS -f elf64"
         fi
         ;;
     powerpc|powerpc64)
diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm
index fbe1291..51f7ab5 100644
--- a/tools/checkasm-a.asm
+++ b/tools/checkasm-a.asm
@@ -33,24 +33,24 @@ error_message: db "failed to preserve register", 0
 %if ARCH_X86_64
 ; just random numbers to reduce the chance of incidental match
 ALIGN 16
-x6:  ddq 0x79445c159ce790641a1b2550a612b48c
-x7:  ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
-x8:  ddq 0x3f2bf84fc0fcca4eb0856806085e7943
-x9:  ddq 0xd229e1f5b281303facbd382dcf5b8de2
-x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
-x11: ddq 0x77d410d5c42c882d89b0c0765892729a
-x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
-x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
-x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
-x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
-n7:   dq 0x21f86d66c8ca00ce
-n8:   dq 0x75b6ba21077c48ad
-n9:   dq 0xed56bb2dcb3c7736
-n10:  dq 0x8bda43d3fd1a7e06
-n11:  dq 0xb64a9c9e5d318408
-n12:  dq 0xdf9a54b303f1d3a3
-n13:  dq 0x4a75479abd64e097
-n14:  dq 0x249214109d5d1c88
+x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
+x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
+x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
+x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
+n7:  dq 0x21f86d66c8ca00ce
+n8:  dq 0x75b6ba21077c48ad
+n9:  dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
 %endif
 
 SECTION .text



More information about the x264-devel mailing list