[x265] [PATCH 2 of 2] x86: Change assembler from YASM to NASM
chekib nouira
chekib_nouira at yahoo.fr
Tue Nov 21 19:19:32 CET 2017
speaking about AVX512, what's the speed up it is offering as compared to AVX2?
thx--Chekib
Le mardi 21 novembre 2017 à 04:49:30 UTC−8, Andrey Semashev <andrey.semashev at gmail.com> a écrit :
On 11/21/17 15:25, Sean McGovern wrote:
> Hi,
>
> Is this really necessary?
>
> Ubuntu 16.04 Xenial Xerus only ships with nasm 2.11.08.
yasm does not support AVX-512 and has very low activity recently[1].
nasm supports AVX-512 since 2.13[2]. Even if x265 does not use AVX-512
currently, this is the right way forward in the long run. It may be
possible to reduce the minimum required nasm version though, until
AVX-512 support is added.
BTW, x264 and ffmpeg made a similar move recently.
[1]: https://github.com/yasm/yasm/commits/master
[2]: http://www.nasm.us/doc/nasmdocc.html
> Original Message
> From: vignesh at multicorewareinc.com
> Sent: November 21, 2017 12:07 AM
> To: x265-devel at videolan.org
> Reply-to: x265-devel at videolan.org
> Subject: [x265] [PATCH 2 of 2] x86: Change assembler from YASM to NASM
>
> # HG changeset patch
> # User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
> # Date 1509595841 -19800
> # Thu Nov 02 09:40:41 2017 +0530
> # Node ID 16ea92bf3627c6de43d583554df294dbbfd8fa8a
> # Parent 182bfd0d5af929a801a08b35ee863d79eadb2833
> x86: Change assembler from YASM to NASM
>
> Supports NASM versions 2.13 and greater
>
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/CMakeLists.txt
> --- a/source/CMakeLists.txt Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/CMakeLists.txt Thu Nov 02 09:40:41 2017 +0530
> @@ -323,15 +323,15 @@
> execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE CC_VERSION)
> endif(GCC)
>
> -find_package(Yasm)
> +find_package(Nasm)
> if(ARM OR CROSS_COMPILE_ARM)
> option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
> -elseif(YASM_FOUND AND X86)
> - if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
> - message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
> +elseif(NASM_FOUND AND X86)
> + if (NASM_VERSION_STRING VERSION_LESS "2.13.0")
> + message(STATUS "Nasm version ${NASM_VERSION_STRING} is too old. 2.13.0 or later required")
> option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" OFF)
> else()
> - message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
> + message(STATUS "Found Nasm ${NASM_VERSION_STRING} to build assembly primitives")
> option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
> endif()
> else()
> @@ -517,18 +517,18 @@
> list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
> add_custom_command(
> OUTPUT ${ASM}.${SUFFIX}
> - COMMAND ${YASM_EXECUTABLE} ARGS ${YASM_FLAGS} ${ASM_SRC} -o ${ASM}.${SUFFIX}
> + COMMAND ${NASM_EXECUTABLE} ARGS ${NASM_FLAGS} ${ASM_SRC} -o ${ASM}.${SUFFIX}
> DEPENDS ${ASM_SRC})
> endforeach()
> endif()
> endif()
> source_group(ASM FILES ${ASM_SRCS})
> if(ENABLE_HDR10_PLUS)
> - add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS} ${ASM_SRCS})
> + add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS})
> add_library(hdr10plus-static STATIC $<TARGET_OBJECTS:dynamicHDR10>)
> set_target_properties(hdr10plus-static PROPERTIES OUTPUT_NAME hdr10plus)
> else()
> - add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS})
> + add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS})
> endif()
> if(NOT MSVC)
> set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265)
> @@ -686,11 +686,11 @@
> if(ENABLE_HDR10_PLUS)
> add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
> x265.cpp x265.h x265cli.h
> - $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS} ${ASM_SRCS})
> + $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS})
> else()
> add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
> x265.cpp x265.h x265cli.h
> - $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS})
> + $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS})
> endif()
> else()
> add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeASM_NASMInformation.cmake
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/CMakeASM_NASMInformation.cmake Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,68 @@
> +set(ASM_DIALECT "_NASM")
> +set(CMAKE_ASM${ASM_DIALECT}_SOURCE_FILE_EXTENSIONS asm)
> +
> +if(X64)
> + list(APPEND ASM_FLAGS -DARCH_X86_64=1 -I ${CMAKE_CURRENT_SOURCE_DIR}/../common/x86/)
> + if(ENABLE_PIC)
> + list(APPEND ASM_FLAGS -DPIC)
> + endif()
> + if(APPLE)
> + set(ARGS -f macho64 -DPREFIX)
> + elseif(UNIX AND NOT CYGWIN)
> + set(ARGS -f elf64)
> + else()
> + set(ARGS -f win64)
> + endif()
> +else()
> + list(APPEND ASM_FLAGS -DARCH_X86_64=0 -I ${CMAKE_CURRENT_SOURCE_DIR}/../common/x86/)
> + if(APPLE)
> + set(ARGS -f macho32 -DPREFIX)
> + elseif(UNIX AND NOT CYGWIN)
> + set(ARGS -f elf32)
> + else()
> + set(ARGS -f win32 -DPREFIX)
> + endif()
> +endif()
> +
> +if(GCC)
> + list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=1)
> +else()
> + list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=0)
> +endif()
> +
> +if(HIGH_BIT_DEPTH)
> + if(MAIN12)
> + list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=12 -DX265_NS=${X265_NS})
> + else()
> + list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 -DX265_NS=${X265_NS})
> + endif()
> +else()
> + list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -DX265_NS=${X265_NS})
> +endif()
> +
> +list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS}")
> +
> +if(CMAKE_BUILD_TYPE MATCHES Release)
> + list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_RELEASE}")
> +elseif(CMAKE_BUILD_TYPE MATCHES Debug)
> + list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_DEBUG}")
> +elseif(CMAKE_BUILD_TYPE MATCHES MinSizeRel)
> + list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_MINSIZEREL}")
> +elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
> + list(APPEND ASM_FLAGS "${CMAKE_ASM_NASM_FLAGS_RELWITHDEBINFO}")
> +endif()
> +
> +set(NASM_FLAGS ${ARGS} ${ASM_FLAGS} PARENT_SCOPE)
> +string(REPLACE ";" " " CMAKE_ASM_NASM_COMPILER_ARG1 "${ARGS}")
> +
> +# This section exists to override the one in CMakeASMInformation.cmake
> +# (the default Information file). This removes the <FLAGS>
> +# thing so that your C compiler flags that have been set via
> +# set_target_properties don't get passed to nasm and confuse it.
> +if(NOT CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT)
> + string(REPLACE ";" " " STR_ASM_FLAGS "${ASM_FLAGS}")
> + set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT "<CMAKE_ASM${ASM_DIALECT}_COMPILER> ${STR_ASM_FLAGS} -o <OBJECT> <SOURCE>")
> +endif()
> +
> +include(CMakeASMInformation)
> +set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeASM_YASMInformation.cmake
> --- a/source/cmake/CMakeASM_YASMInformation.cmake Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
> @@ -1,68 +0,0 @@
> -set(ASM_DIALECT "_YASM")
> -set(CMAKE_ASM${ASM_DIALECT}_SOURCE_FILE_EXTENSIONS asm)
> -
> -if(X64)
> - list(APPEND ASM_FLAGS -DARCH_X86_64=1)
> - if(ENABLE_PIC)
> - list(APPEND ASM_FLAGS -DPIC)
> - endif()
> - if(APPLE)
> - set(ARGS -f macho64 -m amd64 -DPREFIX)
> - elseif(UNIX AND NOT CYGWIN)
> - set(ARGS -f elf64 -m amd64)
> - else()
> - set(ARGS -f win64 -m amd64)
> - endif()
> -else()
> - list(APPEND ASM_FLAGS -DARCH_X86_64=0)
> - if(APPLE)
> - set(ARGS -f macho -DPREFIX)
> - elseif(UNIX AND NOT CYGWIN)
> - set(ARGS -f elf32)
> - else()
> - set(ARGS -f win32 -DPREFIX)
> - endif()
> -endif()
> -
> -if(GCC)
> - list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=1)
> -else()
> - list(APPEND ASM_FLAGS -DHAVE_ALIGNED_STACK=0)
> -endif()
> -
> -if(HIGH_BIT_DEPTH)
> - if(MAIN12)
> - list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=12 -DX265_NS=${X265_NS})
> - else()
> - list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 -DX265_NS=${X265_NS})
> - endif()
> -else()
> - list(APPEND ASM_FLAGS -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 -DX265_NS=${X265_NS})
> -endif()
> -
> -list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS}")
> -
> -if(CMAKE_BUILD_TYPE MATCHES Release)
> - list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_RELEASE}")
> -elseif(CMAKE_BUILD_TYPE MATCHES Debug)
> - list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_DEBUG}")
> -elseif(CMAKE_BUILD_TYPE MATCHES MinSizeRel)
> - list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_MINSIZEREL}")
> -elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
> - list(APPEND ASM_FLAGS "${CMAKE_ASM_YASM_FLAGS_RELWITHDEBINFO}")
> -endif()
> -
> -set(YASM_FLAGS ${ARGS} ${ASM_FLAGS} PARENT_SCOPE)
> -string(REPLACE ";" " " CMAKE_ASM_YASM_COMPILER_ARG1 "${ARGS}")
> -
> -# This section exists to override the one in CMakeASMInformation.cmake
> -# (the default Information file). This removes the <FLAGS>
> -# thing so that your C compiler flags that have been set via
> -# set_target_properties don't get passed to yasm and confuse it.
> -if(NOT CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT)
> - string(REPLACE ";" " " STR_ASM_FLAGS "${ASM_FLAGS}")
> - set(CMAKE_ASM${ASM_DIALECT}_COMPILE_OBJECT "<CMAKE_ASM${ASM_DIALECT}_COMPILER> ${STR_ASM_FLAGS} -o <OBJECT> <SOURCE>")
> -endif()
> -
> -include(CMakeASMInformation)
> -set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeDetermineASM_NASMCompiler.cmake
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/CMakeDetermineASM_NASMCompiler.cmake Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,5 @@
> +set(ASM_DIALECT "_NASM")
> +set(CMAKE_ASM${ASM_DIALECT}_COMPILER ${NASM_EXECUTABLE})
> +set(CMAKE_ASM${ASM_DIALECT}_COMPILER_INIT ${_CMAKE_TOOLCHAIN_PREFIX}nasm)
> +include(CMakeDetermineASMCompiler)
> +set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeDetermineASM_YASMCompiler.cmake
> --- a/source/cmake/CMakeDetermineASM_YASMCompiler.cmake Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
> @@ -1,5 +0,0 @@
> -set(ASM_DIALECT "_YASM")
> -set(CMAKE_ASM${ASM_DIALECT}_COMPILER ${YASM_EXECUTABLE})
> -set(CMAKE_ASM${ASM_DIALECT}_COMPILER_INIT ${_CMAKE_TOOLCHAIN_PREFIX}yasm)
> -include(CMakeDetermineASMCompiler)
> -set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeTestASM_NASMCompiler.cmake
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/CMakeTestASM_NASMCompiler.cmake Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,3 @@
> +set(ASM_DIALECT "_NASM")
> +include(CMakeTestASMCompiler)
> +set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/CMakeTestASM_YASMCompiler.cmake
> --- a/source/cmake/CMakeTestASM_YASMCompiler.cmake Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
> @@ -1,3 +0,0 @@
> -set(ASM_DIALECT "_YASM")
> -include(CMakeTestASMCompiler)
> -set(ASM_DIALECT)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/FindNasm.cmake
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/FindNasm.cmake Thu Nov 02 09:40:41 2017 +0530
> @@ -0,0 +1,25 @@
> +include(FindPackageHandleStandardArgs)
> +
> +# Simple path search with YASM_ROOT environment variable override
> +find_program(NASM_EXECUTABLE
> + NAMES nasm nasm-2.13.0-win32 nasm-2.13.0-win64 nasm nasm-2.13.0-win32 nasm-2.13.0-win64
> + HINTS $ENV{NASM_ROOT} ${NASM_ROOT}
> + PATH_SUFFIXES bin
> +)
> +
> +if(NASM_EXECUTABLE)
> + execute_process(COMMAND ${NASM_EXECUTABLE} -version
> + OUTPUT_VARIABLE nasm_version
> + ERROR_QUIET
> + OUTPUT_STRIP_TRAILING_WHITESPACE
> + )
> + if(nasm_version MATCHES "^NASM version ([0-9\\.]*)")
> + set(NASM_VERSION_STRING "${CMAKE_MATCH_1}")
> + endif()
> + unset(nasm_version)
> +endif()
> +
> +# Provide standardized success/failure messages
> +find_package_handle_standard_args(nasm
> + REQUIRED_VARS NASM_EXECUTABLE
> + VERSION_VAR NASM_VERSION_STRING)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/cmake/FindYasm.cmake
> --- a/source/cmake/FindYasm.cmake Thu Nov 02 09:39:58 2017 +0530
> +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
> @@ -1,25 +0,0 @@
> -include(FindPackageHandleStandardArgs)
> -
> -# Simple path search with YASM_ROOT environment variable override
> -find_program(YASM_EXECUTABLE
> - NAMES yasm yasm-1.2.0-win32 yasm-1.2.0-win64 yasm yasm-1.3.0-win32 yasm-1.3.0-win64
> - HINTS $ENV{YASM_ROOT} ${YASM_ROOT}
> - PATH_SUFFIXES bin
> -)
> -
> -if(YASM_EXECUTABLE)
> - execute_process(COMMAND ${YASM_EXECUTABLE} --version
> - OUTPUT_VARIABLE yasm_version
> - ERROR_QUIET
> - OUTPUT_STRIP_TRAILING_WHITESPACE
> - )
> - if(yasm_version MATCHES "^yasm ([0-9\\.]*)")
> - set(YASM_VERSION_STRING "${CMAKE_MATCH_1}")
> - endif()
> - unset(yasm_version)
> -endif()
> -
> -# Provide standardized success/failure messages
> -find_package_handle_standard_args(yasm
> - REQUIRED_VARS YASM_EXECUTABLE
> - VERSION_VAR YASM_VERSION_STRING)
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/common/CMakeLists.txt Thu Nov 02 09:40:41 2017 +0530
> @@ -72,12 +72,12 @@
> endif()
>
> if(MSVC_IDE OR XCODE)
> - # MSVC requires custom build rules in the main cmake script for yasm
> - set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "yasm sources")
> + # MSVC requires custom build rules in the main cmake script for nasm
> + set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "nasm sources")
> set(A_SRCS)
> endif()
>
> - enable_language(ASM_YASM)
> + enable_language(ASM_NASM)
>
> foreach(SRC ${A_SRCS} ${C_SRCS})
> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} x86/${SRC})
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/common/x86/x86inc.asm
> --- a/source/common/x86/x86inc.asm Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/common/x86/x86inc.asm Thu Nov 02 09:40:41 2017 +0530
> @@ -66,6 +66,15 @@
> %endif
> %endif
>
> +%define FORMAT_ELF 0
> +%ifidn __OUTPUT_FORMAT__,elf
> + %define FORMAT_ELF 1
> +%elifidn __OUTPUT_FORMAT__,elf32
> + %define FORMAT_ELF 1
> +%elifidn __OUTPUT_FORMAT__,elf64
> + %define FORMAT_ELF 1
> +%endif
> +
> %ifdef PREFIX
> %define mangle(x) _ %+ x
> %else
> @@ -88,6 +97,10 @@
> default rel
> %endif
>
> +%ifdef __NASM_VER__
> + %use smartalign
> +%endif
> +
> ; Macros to eliminate most code duplication between x86_32 and x86_64:
> ; Currently this works only for leaf functions which load all their arguments
> ; into registers at the start, and make no other use of the stack. Luckily that
> @@ -685,7 +698,7 @@
> CAT_XDEFINE cglobaled_, %2, 1
> %endif
> %xdefine current_function %2
> - %ifidn __OUTPUT_FORMAT__,elf
> + %if FORMAT_ELF
> global %2:function %%VISIBILITY
> %else
> global %2
> @@ -711,14 +724,16 @@
>
> ; like cextern, but without the prefix
> %macro cextern_naked 1
> - %xdefine %1 mangle(%1)
> + %ifdef PREFIX
> + %xdefine %1 mangle(%1)
> + %endif
> CAT_XDEFINE cglobaled_, %1, 1
> extern %1
> %endmacro
>
> %macro const 1-2+
> %xdefine %1 mangle(private_prefix %+ _ %+ %1)
> - %ifidn __OUTPUT_FORMAT__,elf
> + %if FORMAT_ELF
> global %1:data hidden
> %else
> global %1
> @@ -727,9 +742,8 @@
> %1: %2
> %endmacro
>
> -; This is needed for ELF, otherwise the GNU linker assumes the stack is
> -; executable by default.
> -%ifidn __OUTPUT_FORMAT__,elf
> +; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
> +%if FORMAT_ELF
> [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
> %endif
>
> @@ -801,9 +815,17 @@
> %endif
>
> %if ARCH_X86_64 || cpuflag(sse2)
> - CPU amdnop
> + %ifdef __NASM_VER__
> + ALIGNMODE p6
> + %else
> + CPU amdnop
> + %endif
> %else
> - CPU basicnop
> + %ifdef __NASM_VER__
> + ALIGNMODE nop
> + %else
> + CPU basicnop
> + %endif
> %endif
> %endmacro
>
> @@ -1467,7 +1489,7 @@
> v%5%6 %1, %2, %3, %4
> %elifidn %1, %2
> ; If %3 or %4 is a memory operand it needs to be encoded as the last operand.
> - %ifid %3
> + %ifnum sizeof%3
> v%{5}213%6 %2, %3, %4
> %else
> v%{5}132%6 %2, %4, %3
> @@ -1491,14 +1513,3 @@
> FMA4_INSTR fmsubadd, pd, ps
> FMA4_INSTR fnmadd, pd, ps, sd, ss
> FMA4_INSTR fnmsub, pd, ps, sd, ss
> -
> -; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
> -%if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
> - %macro vpbroadcastq 2
> - %if sizeof%1 == 16
> - movddup %1, %2
> - %else
> - vbroadcastsd %1, %2
> - %endif
> - %endmacro
> -%endif
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/test/CMakeLists.txt
> --- a/source/test/CMakeLists.txt Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/test/CMakeLists.txt Thu Nov 02 09:40:41 2017 +0530
> @@ -7,37 +7,37 @@
>
> # add X86 assembly files
> if(X86)
> -enable_language(ASM_YASM)
> +enable_language(ASM_NASM)
>
> if(MSVC_IDE)
> - set(YASM_SRC checkasm-a.obj)
> + set(NASM_SRC checkasm-a.obj)
> add_custom_command(
> OUTPUT checkasm-a.obj
> - COMMAND ${YASM_EXECUTABLE}
> - ARGS ${YASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-a.asm -o checkasm-a.obj
> + COMMAND ${NASM_EXECUTABLE}
> + ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-a.asm -o checkasm-a.obj
> DEPENDS checkasm-a.asm)
> else()
> - set(YASM_SRC checkasm-a.asm)
> + set(NASM_SRC checkasm-a.asm)
> endif()
> endif(X86)
>
> # add ARM assembly files
> if(ARM OR CROSS_COMPILE_ARM)
> enable_language(ASM)
> - set(YASM_SRC checkasm-arm.S)
> + set(NASM_SRC checkasm-arm.S)
> add_custom_command(
> OUTPUT checkasm-arm.obj
> COMMAND ${CMAKE_CXX_COMPILER}
> - ARGS ${YASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
> + ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
> DEPENDS checkasm-arm.S)
> endif(ARM OR CROSS_COMPILE_ARM)
>
> # add PowerPC assembly files
> if(POWER)
> - set(YASM_SRC)
> + set(NASM_SRC)
> endif(POWER)
>
> -add_executable(TestBench ${YASM_SRC}
> +add_executable(TestBench ${NASM_SRC}
> testbench.cpp testharness.h
> pixelharness.cpp pixelharness.h
> mbdstharness.cpp mbdstharness.h
> diff -r 182bfd0d5af9 -r 16ea92bf3627 source/test/checkasm-a.asm
> --- a/source/test/checkasm-a.asm Thu Nov 02 09:39:58 2017 +0530
> +++ b/source/test/checkasm-a.asm Thu Nov 02 09:40:41 2017 +0530
> @@ -26,7 +26,7 @@
> ;* For more information, contact us at license @ x265.com.
> ;*****************************************************************************
>
> -%include "../common/x86/x86inc.asm"
> +%include "x86inc.asm"
>
> SECTION_RODATA
>
> @@ -35,24 +35,24 @@
> %if ARCH_X86_64
> ; just random numbers to reduce the chance of incidental match
> ALIGN 16
> -x6: ddq 0x79445c159ce790641a1b2550a612b48c
> -x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
> -x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943
> -x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2
> -x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
> -x11: ddq 0x77d410d5c42c882d89b0c0765892729a
> -x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
> -x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
> -x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
> -x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
> -n7: dq 0x21f86d66c8ca00ce
> -n8: dq 0x75b6ba21077c48ad
> -n9: dq 0xed56bb2dcb3c7736
> -n10: dq 0x8bda43d3fd1a7e06
> -n11: dq 0xb64a9c9e5d318408
> -n12: dq 0xdf9a54b303f1d3a3
> -n13: dq 0x4a75479abd64e097
> -n14: dq 0x249214109d5d1c88
> +x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
> +x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
> +x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
> +x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
> +x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
> +x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
> +x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
> +x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
> +x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
> +x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
> +n7: dq 0x21f86d66c8ca00ce
> +n8: dq 0x75b6ba21077c48ad
> +n9: dq 0xed56bb2dcb3c7736
> +n10: dq 0x8bda43d3fd1a7e06
> +n11: dq 0xb64a9c9e5d318408
> +n12: dq 0xdf9a54b303f1d3a3
> +n13: dq 0x4a75479abd64e097
> +n14: dq 0x249214109d5d1c88
> %endif
>
> SECTION .text
> @@ -70,14 +70,14 @@
> ;-----------------------------------------------------------------------------
> cglobal checkasm_stack_clobber, 1,2
> ; Clobber the stack with junk below the stack pointer
> - %define size (max_args+6)*8
> - SUB rsp, size
> - mov r1, size-8
> + %define argsize (max_args+6)*8
> + SUB rsp, argsize
> + mov r1, argsize-8
> .loop:
> mov [rsp+r1], r0
> sub r1, 8
> jge .loop
> - ADD rsp, size
> + ADD rsp, argsize
> RET
>
> %if WIN64
> @@ -156,7 +156,11 @@
> mov r9, rax
> mov r10, rdx
> lea r0, [error_message]
> +%if FORMAT_ELF
> + call puts wrt ..plt
> +%else
> call puts
> +%endif
> mov r1, [rsp+max_args*8]
> mov dword [r1], 0
> mov rdx, r10
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
_______________________________________________
x265-devel mailing list
x265-devel at videolan.org
https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171121/0d9b921f/attachment-0001.html>
More information about the x265-devel
mailing list