[x265] [PATCH 2 of 2] x86: Change assembler from YASM to NASM

chekib nouira chekib_nouira at yahoo.fr
Tue Nov 21 19:19:32 CET 2017


 speaking about AVX512, what's the speed up it is offering as compared to AVX2?
thx--Chekib
    Le mardi 21 novembre 2017 à 04:49:30 UTC−8, Andrey Semashev <andrey.semashev at gmail.com> a écrit :  
 
 On 11/21/17 15:25, Sean McGovern wrote:
> Hi,
> 
> Is this really necessary?
> 
> Ubuntu 16.04 Xenial Xerus only ships with nasm 2.11.08.

yasm does not support AVX-512 and has very low activity recently[1]. 
nasm supports AVX-512 since 2.13[2]. Even if x265 does not use AVX-512 
currently, this is the right way forward in the long run. It may be 
possible to reduce the minimum required nasm version though, until 
AVX-512 support is added.

BTW, x264 and ffmpeg made a similar move recently.

[1]: https://github.com/yasm/yasm/commits/master
[2]: http://www.nasm.us/doc/nasmdocc.html

>    Original Message
> From: vignesh at multicorewareinc.com
> Sent: November 21, 2017 12:07 AM
> To: x265-devel at videolan.org
> Reply-to: x265-devel at videolan.org
> Subject: [x265] [PATCH 2 of 2] x86: Change assembler from YASM to NASM
> 
> # HG changeset patch
> # User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
> # Date 1509595841 -19800
> #      Thu Nov 02 09:40:41 2017 +0530
> # Node ID 16ea92bf3627c6de43d583554df294dbbfd8fa8a
> # Parent  182bfd0d5af929a801a08b35ee863d79eadb2833
> x86: Change assembler from YASM to NASM
> 
> Supports NASM versions 2.13 and greater
> 
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/CMakeLists.txt
> --- a/source/CMakeLists.txt    Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/CMakeLists.txt    Thu Nov 02 09:40:41 2017 +0530
> @@ -323,15 +323,15 @@
>       execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE CC_VERSION)
> endif(GCC)
> 
> -find_package(Yasm)
> +find_package(Nasm)
> if(ARM OR CROSS_COMPILE_ARM)
>       option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
> -elseif(YASM_FOUND AND X86)
> -    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
> -        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
> +elseif(NASM_FOUND AND X86)
> +    if (NASM_VERSION_STRING VERSION_LESS "2.13.0")
> +        message(STATUS "Nasm version ${NASM_VERSION_STRING} is too old. 2.13.0 or later required")
>           option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" OFF)
>       else()
> -        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
> +        message(STATUS "Found Nasm ${NASM_VERSION_STRING} to build assembly primitives")
>           option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
>       endif()
> else()
> @@ -517,18 +517,18 @@
>               list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
>               add_custom_command(
>                   OUTPUT ${ASM}.${SUFFIX}
> -                COMMAND ${YASM_EXECUTABLE} ARGS ${YASM_FLAGS} ${ASM_SRC} -o ${ASM}.${SUFFIX}
> +                COMMAND ${NASM_EXECUTABLE} ARGS ${NASM_FLAGS} ${ASM_SRC} -o ${ASM}.${SUFFIX}
>                   DEPENDS ${ASM_SRC})
>           endforeach()
>       endif()
> endif()
> source_group(ASM FILES ${ASM_SRCS})
> if(ENABLE_HDR10_PLUS)
> -    add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS} ${ASM_SRCS})
> +    add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS})
>       add_library(hdr10plus-static STATIC $<TARGET_OBJECTS:dynamicHDR10>)
>       set_target_properties(hdr10plus-static PROPERTIES OUTPUT_NAME hdr10plus)
> else()
> -    add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS})
> +    add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS})
> endif()
> if(NOT MSVC)
>       set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265)
> @@ -686,11 +686,11 @@
>           if(ENABLE_HDR10_PLUS)
>           add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
>                           x265.cpp x265.h x265cli.h
> -                        $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS} ${ASM_SRCS})
> +                        $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS})
>           else()
>               add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
>                           x265.cpp x265.h x265cli.h
> -                        $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS})
> +                        $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS})
>           endif()
>       else()
>           add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeASM_NASMInformation.cmake
> --- /dev/null    Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/CMakeASM_NASMInformation.cmake    Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,68 @@
> +set(ASM_DIALECT "_NASM")
> +set(CMAKE_ASM${ASM_DIALECT}_SOURCE_FILE_EXTENSIONS asm)
> +
> +if(X64)
> +    list(APPEND ASM_FLAGS -DARCH_X86_64=1 -I ${CMAKE_CURRENT_SOURCE_DIR}/../common/x86/)
> +    if(ENABLE_PIC)
> +        list(APPEND ASM_FLAGS -DPIC)
> +    endif()
> +    if(APPLE)
> +        set(ARGS -f macho64 -DPREFIX)
> +    elseif(UNIX AND NOT CYGWIN)
> +        set(ARGS -f elf64)
> +    else()
> +        set(ARGS -f win64)
> +    endif()
> +else()
> +    list(APPEND ASM_FLAGS -DARCH_X86_64=0 -I ${CMAKE_CURRENT_SOURCE_DIR}/../common/x86/)
> +    if(APPLE)
> +        set(ARGS -f macho32 -DPREFIX)
> +    elseif(UNIX AND NOT CYGWIN)
> +        set(ARGS -f elf32)
> +    else()
> +        set(ARGS -f win32 -DPREFIX)
> +    endif()
> +endif()
> +
> +if(GCC)
> +    list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=1)
> +else()
> +    list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=0)
> +endif()
> +
> +if(HIGH_BIT_DEPTH)
> +    if(MAIN12)
> +        list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=12 -DX265_NS=${X265_NS})
> +    else()
> +        list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 -DX265_NS=${X265_NS})
> +    endif()
> +else()
> +    list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -DX265_NS=${X265_NS})
> +endif()
> +
> +list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS}")
> +
> +if(CMAKE_BUILD_TYPE MATCHES Release)
> +    list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_RELEASE}")
> +elseif(CMAKE_BUILD_TYPE MATCHES Debug)
> +    list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_DEBUG}")
> +elseif(CMAKE_BUILD_TYPE MATCHES MinSizeRel)
> +    list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_MINSIZEREL}")
> +elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
> +    list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_RELWITHDEBINFO}")
> +endif()
> +
> +set(NASM_FLAGS ${ARGS} ${ASM_FLAGS} PARENT_SCOPE)
> +string(REPLACE ";" " " CMAKE_ASM_NASM_COMPILER_ARG1 "${ARGS}")
> +
> +# This section exists to override the one in CMakeASMInformation.cmake
> +# (the default Information file). This removes the <FLAGS>
> +# thing so that your C compiler flags that have been set via
> +# set_target_properties don't get passed to nasm and confuse it.
> +if(NOT CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT)
> +    string(REPLACE ";" " " STR_ASM_FLAGS "${ASM_FLAGS}")
> +    set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT "<CMAKE_ASM${ASM_DIALECT}_COMPILER> ${STR_ASM_FLAGS} -o <OBJECT> <SOURCE>")
> +endif()
> +
> +include(CMakeASMInformation)
> +set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeASM_YASMInformation.cmake
> --- a/source/cmake/CMakeASM_YASMInformation.cmake    Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null    Thu Jan 01 00:00:00 1970 +0000
> @@ -1,68 +0,0 @@
> -set(ASM_DIALECT "_YASM")
> -set(CMAKE_ASM${ASM_DIALECT}_SOURCE_FILE_EXTENSIONS asm)
> -
> -if(X64)
> -    list(APPEND ASM_FLAGS -DARCH_X86_64=1)
> -    if(ENABLE_PIC)
> -        list(APPEND ASM_FLAGS -DPIC)
> -    endif()
> -    if(APPLE)
> -        set(ARGS -f macho64 -m amd64 -DPREFIX)
> -    elseif(UNIX AND NOT CYGWIN)
> -        set(ARGS -f elf64 -m amd64)
> -    else()
> -        set(ARGS -f win64 -m amd64)
> -    endif()
> -else()
> -    list(APPEND ASM_FLAGS -DARCH_X86_64=0)
> -    if(APPLE)
> -        set(ARGS -f macho -DPREFIX)
> -    elseif(UNIX AND NOT CYGWIN)
> -        set(ARGS -f elf32)
> -    else()
> -        set(ARGS -f win32 -DPREFIX)
> -    endif()
> -endif()
> -
> -if(GCC)
> -    list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=1)
> -else()
> -    list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=0)
> -endif()
> -
> -if(HIGH_BIT_DEPTH)
> -    if(MAIN12)
> -        list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=12 -DX265_NS=${X265_NS})
> -    else()
> -        list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 -DX265_NS=${X265_NS})
> -    endif()
> -else()
> -    list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -DX265_NS=${X265_NS})
> -endif()
> -
> -list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS}")
> -
> -if(CMAKE_BUILD_TYPE MATCHES Release)
> -    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_RELEASE}")
> -elseif(CMAKE_BUILD_TYPE MATCHES Debug)
> -    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_DEBUG}")
> -elseif(CMAKE_BUILD_TYPE MATCHES MinSizeRel)
> -    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_MINSIZEREL}")
> -elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
> -    list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_RELWITHDEBINFO}")
> -endif()
> -
> -set(YASM_FLAGS ${ARGS} ${ASM_FLAGS} PARENT_SCOPE)
> -string(REPLACE ";" " " CMAKE_ASM_YASM_COMPILER_ARG1 "${ARGS}")
> -
> -# This section exists to override the one in CMakeASMInformation.cmake
> -# (the default Information file). This removes the <FLAGS>
> -# thing so that your C compiler flags that have been set via
> -# set_target_properties don't get passed to yasm and confuse it.
> -if(NOT CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT)
> -    string(REPLACE ";" " " STR_ASM_FLAGS "${ASM_FLAGS}")
> -    set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT "<CMAKE_ASM${ASM_DIALECT}_COMPILER> ${STR_ASM_FLAGS} -o <OBJECT> <SOURCE>")
> -endif()
> -
> -include(CMakeASMInformation)
> -set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeDetermineASM_NASMCompiler.cmake
> --- /dev/null    Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/CMakeDetermineASM_NASMCompiler.cmake    Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,5 @@
> +set(ASM_DIALECT "_NASM")
> +set(CMAKE_ASM${ASM_DIALECT}_COMPILER ${NASM_EXECUTABLE})
> +set(CMAKE_ASM${ASM_DIALECT}_COMPILER_INIT ${_CMAKE_TOOLCHAIN_PREFIX}nasm)
> +include(CMakeDetermineASMCompiler)
> +set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeDetermineASM_YASMCompiler.cmake
> --- a/source/cmake/CMakeDetermineASM_YASMCompiler.cmake    Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null    Thu Jan 01 00:00:00 1970 +0000
> @@ -1,5 +0,0 @@
> -set(ASM_DIALECT "_YASM")
> -set(CMAKE_ASM${ASM_DIALECT}_COMPILER ${YASM_EXECUTABLE})
> -set(CMAKE_ASM${ASM_DIALECT}_COMPILER_INIT ${_CMAKE_TOOLCHAIN_PREFIX}yasm)
> -include(CMakeDetermineASMCompiler)
> -set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeTestASM_NASMCompiler.cmake
> --- /dev/null    Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/CMakeTestASM_NASMCompiler.cmake    Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,3 @@
> +set(ASM_DIALECT "_NASM")
> +include(CMakeTestASMCompiler)
> +set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeTestASM_YASMCompiler.cmake
> --- a/source/cmake/CMakeTestASM_YASMCompiler.cmake    Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null    Thu Jan 01 00:00:00 1970 +0000
> @@ -1,3 +0,0 @@
> -set(ASM_DIALECT "_YASM")
> -include(CMakeTestASMCompiler)
> -set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/FindNasm.cmake
> --- /dev/null    Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/FindNasm.cmake    Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,25 @@
> +include(FindPackageHandleStandardArgs)
> +
> +# Simple path search with YASM_ROOT environment variable override
> +find_program(NASM_EXECUTABLE
> + NAMES nasm nasm-2.13.0-win32 nasm-2.13.0-win64 nasm nasm-2.13.0-win32 nasm-2.13.0-win64
> + HINTS $ENV{NASM_ROOT} ${NASM_ROOT}
> + PATH_SUFFIXES bin
> +)
> +
> +if(NASM_EXECUTABLE)
> +        execute_process(COMMAND ${NASM_EXECUTABLE} -version
> +            OUTPUT_VARIABLE nasm_version
> +            ERROR_QUIET
> +            OUTPUT_STRIP_TRAILING_WHITESPACE
> +            )
> +    if(nasm_version MATCHES "^NASM version ([0-9\\.]*)")
> +        set(NASM_VERSION_STRING "${CMAKE_MATCH_1}")
> +    endif()
> +    unset(nasm_version)
> +endif()
> +
> +# Provide standardized success/failure messages
> +find_package_handle_standard_args(nasm
> +    REQUIRED_VARS NASM_EXECUTABLE
> +    VERSION_VAR NASM_VERSION_STRING)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/FindYasm.cmake
> --- a/source/cmake/FindYasm.cmake    Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null    Thu Jan 01 00:00:00 1970 +0000
> @@ -1,25 +0,0 @@
> -include(FindPackageHandleStandardArgs)
> -
> -# Simple path search with YASM_ROOT environment variable override
> -find_program(YASM_EXECUTABLE
> - NAMES yasm yasm-1.2.0-win32 yasm-1.2.0-win64 yasm yasm-1.3.0-win32 yasm-1.3.0-win64
> - HINTS $ENV{YASM_ROOT} ${YASM_ROOT}
> - PATH_SUFFIXES bin
> -)
> -
> -if(YASM_EXECUTABLE)
> -    execute_process(COMMAND ${YASM_EXECUTABLE} --version
> -        OUTPUT_VARIABLE yasm_version
> -        ERROR_QUIET
> -        OUTPUT_STRIP_TRAILING_WHITESPACE
> -        )
> -    if(yasm_version MATCHES "^yasm ([0-9\\.]*)")
> -        set(YASM_VERSION_STRING "${CMAKE_MATCH_1}")
> -    endif()
> -    unset(yasm_version)
> -endif()
> -
> -# Provide standardized success/failure messages
> -find_package_handle_standard_args(yasm
> -    REQUIRED_VARS YASM_EXECUTABLE
> -    VERSION_VAR YASM_VERSION_STRING)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt    Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/common/CMakeLists.txt    Thu Nov 02 09:40:41 2017 +0530
> @@ -72,12 +72,12 @@
>       endif()
> 
>       if(MSVC_IDE OR XCODE)
> -        # MSVC requires custom build rules in the main cmake script for yasm
> -        set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "yasm sources")
> +        # MSVC requires custom build rules in the main cmake script for nasm
> +        set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "nasm sources")
>           set(A_SRCS)
>       endif()
> 
> -    enable_language(ASM_YASM)
> +    enable_language(ASM_NASM)
> 
>       foreach(SRC ${A_SRCS} ${C_SRCS})
>           set(ASM_PRIMITIVES ${ASM_PRIMITIVES} x86/${SRC})
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/common/x86/x86inc.asm
> --- a/source/common/x86/x86inc.asm    Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/common/x86/x86inc.asm    Thu Nov 02 09:40:41 2017 +0530
> @@ -66,6 +66,15 @@
>       %endif
> %endif
> 
> +%define FORMAT_ELF 0
> +%ifidn __OUTPUT_FORMAT__,elf
> +    %define FORMAT_ELF 1
> +%elifidn __OUTPUT_FORMAT__,elf32
> +    %define FORMAT_ELF 1
> +%elifidn __OUTPUT_FORMAT__,elf64
> +    %define FORMAT_ELF 1
> +%endif
> +
> %ifdef PREFIX
>       %define mangle(x) _ %+ x
> %else
> @@ -88,6 +97,10 @@
>       default rel
> %endif
> 
> +%ifdef __NASM_VER__
> +    %use smartalign
> +%endif
> +
> ; Macros to eliminate most code duplication between x86_32 and x86_64:
> ; Currently this works only for leaf functions which load all their arguments
> ; into registers at the start, and make no other use of the stack. Luckily that
> @@ -685,7 +698,7 @@
>           CAT_XDEFINE cglobaled_, %2, 1
>       %endif
>       %xdefine current_function %2
> -    %ifidn __OUTPUT_FORMAT__,elf
> +    %if FORMAT_ELF
>           global %2:function %%VISIBILITY
>       %else
>           global %2
> @@ -711,14 +724,16 @@
> 
> ; like cextern, but without the prefix
> %macro cextern_naked 1
> -    %xdefine %1 mangle(%1)
> +    %ifdef PREFIX
> +        %xdefine %1 mangle(%1)
> +    %endif
>       CAT_XDEFINE cglobaled_, %1, 1
>       extern %1
> %endmacro
> 
> %macro const 1-2+
>       %xdefine %1 mangle(private_prefix %+ _ %+ %1)
> -    %ifidn __OUTPUT_FORMAT__,elf
> +    %if FORMAT_ELF
>           global %1:data hidden
>       %else
>           global %1
> @@ -727,9 +742,8 @@
>       %1: %2
> %endmacro
> 
> -; This is needed for ELF, otherwise the GNU linker assumes the stack is
> -; executable by default.
> -%ifidn __OUTPUT_FORMAT__,elf
> +; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
> +%if FORMAT_ELF
>       [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
> %endif
> 
> @@ -801,9 +815,17 @@
>       %endif
> 
>       %if ARCH_X86_64 || cpuflag(sse2)
> -        CPU amdnop
> +        %ifdef __NASM_VER__
> +            ALIGNMODE p6
> +        %else
> +            CPU amdnop
> +        %endif
>       %else
> -        CPU basicnop
> +        %ifdef __NASM_VER__
> +            ALIGNMODE nop
> +        %else
> +            CPU basicnop
> +        %endif
>       %endif
> %endmacro
> 
> @@ -1467,7 +1489,7 @@
>                   v%5%6 %1, %2, %3, %4
>               %elifidn %1, %2
>                   ; If %3 or %4 is a memory operand it needs to be encoded as the last operand.
> -                %ifid %3
> +                %ifnum sizeof%3
>                       v%{5}213%6 %2, %3, %4
>                   %else
>                       v%{5}132%6 %2, %4, %3
> @@ -1491,14 +1513,3 @@
> FMA4_INSTR fmsubadd, pd, ps
> FMA4_INSTR fnmadd,   pd, ps, sd, ss
> FMA4_INSTR fnmsub,   pd, ps, sd, ss
> -
> -; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
> -%if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
> -    %macro vpbroadcastq 2
> -        %if sizeof%1 == 16
> -            movddup %1, %2
> -        %else
> -            vbroadcastsd %1, %2
> -        %endif
> -    %endmacro
> -%endif
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/test/CMakeLists.txt
> --- a/source/test/CMakeLists.txt    Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/test/CMakeLists.txt    Thu Nov 02 09:40:41 2017 +0530
> @@ -7,37 +7,37 @@
> 
> # add X86 assembly files
> if(X86)
> -enable_language(ASM_YASM)
> +enable_language(ASM_NASM)
> 
> if(MSVC_IDE)
> -    set(YASM_SRC checkasm-a.obj)
> +    set(NASM_SRC checkasm-a.obj)
>       add_custom_command(
>           OUTPUT checkasm-a.obj
> -        COMMAND ${YASM_EXECUTABLE}
> -        ARGS ${YASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-a.asm -o checkasm-a.obj
> +        COMMAND ${NASM_EXECUTABLE}
> +        ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-a.asm -o checkasm-a.obj
>           DEPENDS checkasm-a.asm)
> else()
> -    set(YASM_SRC checkasm-a.asm)
> +    set(NASM_SRC checkasm-a.asm)
> endif()
> endif(X86)
> 
> # add ARM assembly files
> if(ARM OR CROSS_COMPILE_ARM)
>       enable_language(ASM)
> -    set(YASM_SRC checkasm-arm.S)
> +    set(NASM_SRC checkasm-arm.S)
>       add_custom_command(
>           OUTPUT checkasm-arm.obj
>           COMMAND ${CMAKE_CXX_COMPILER}
> -        ARGS ${YASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
> +        ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
>           DEPENDS checkasm-arm.S)
> endif(ARM OR CROSS_COMPILE_ARM)
> 
> # add PowerPC assembly files
> if(POWER)
> -    set(YASM_SRC)
> +    set(NASM_SRC)
> endif(POWER)
> 
> -add_executable(TestBench ${YASM_SRC}
> +add_executable(TestBench ${NASM_SRC}
>       testbench.cpp testharness.h
>       pixelharness.cpp pixelharness.h
>       mbdstharness.cpp mbdstharness.h
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/test/checkasm-a.asm
> --- a/source/test/checkasm-a.asm    Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/test/checkasm-a.asm    Thu Nov 02 09:40:41 2017 +0530
> @@ -26,7 +26,7 @@
> ;* For more information, contact us at license @ x265.com.
> ;*****************************************************************************
> 
> -%include "../common/x86/x86inc.asm"
> +%include "x86inc.asm"
> 
> SECTION_RODATA
> 
> @@ -35,24 +35,24 @@
> %if ARCH_X86_64
> ; just random numbers to reduce the chance of incidental match
> ALIGN 16
> -x6:  ddq 0x79445c159ce790641a1b2550a612b48c
> -x7:  ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
> -x8:  ddq 0x3f2bf84fc0fcca4eb0856806085e7943
> -x9:  ddq 0xd229e1f5b281303facbd382dcf5b8de2
> -x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
> -x11: ddq 0x77d410d5c42c882d89b0c0765892729a
> -x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
> -x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
> -x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
> -x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
> -n7:   dq 0x21f86d66c8ca00ce
> -n8:   dq 0x75b6ba21077c48ad
> -n9:   dq 0xed56bb2dcb3c7736
> -n10:  dq 0x8bda43d3fd1a7e06
> -n11:  dq 0xb64a9c9e5d318408
> -n12:  dq 0xdf9a54b303f1d3a3
> -n13:  dq 0x4a75479abd64e097
> -n14:  dq 0x249214109d5d1c88
> +x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
> +x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
> +x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
> +x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
> +x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
> +x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
> +x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
> +x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
> +x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
> +x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
> +n7:  dq 0x21f86d66c8ca00ce
> +n8:  dq 0x75b6ba21077c48ad
> +n9:  dq 0xed56bb2dcb3c7736
> +n10: dq 0x8bda43d3fd1a7e06
> +n11: dq 0xb64a9c9e5d318408
> +n12: dq 0xdf9a54b303f1d3a3
> +n13: dq 0x4a75479abd64e097
> +n14: dq 0x249214109d5d1c88
> %endif
> 
> SECTION .text
> @@ -70,14 +70,14 @@
> ;-----------------------------------------------------------------------------
> cglobal checkasm_stack_clobber, 1,2
>       ; Clobber the stack with junk below the stack pointer
> -    %define size (max_args+6)*8
> -    SUB  rsp, size
> -    mov   r1, size-8
> +    %define argsize (max_args+6)*8
> +    SUB  rsp, argsize
> +    mov   r1, argsize-8
> .loop:
>       mov [rsp+r1], r0
>       sub   r1, 8
>       jge .loop
> -    ADD  rsp, size
> +    ADD  rsp, argsize
>       RET
> 
> %if WIN64
> @@ -156,7 +156,11 @@
>       mov  r9, rax
>       mov r10, rdx
>       lea  r0, [error_message]
> +%if FORMAT_ELF
> +    call puts wrt ..plt
> +%else
>       call puts
> +%endif
>       mov  r1, [rsp+max_args*8]
>       mov  dword [r1], 0
>       mov  rdx, r10
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
> 

_______________________________________________
x265-devel mailing list
x265-devel at videolan.org
https://mailman.videolan.org/listinfo/x265-devel
  
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171121/0d9b921f/attachment-0001.html>


More information about the x265-devel mailing list