[x265] [X265][PATCH] Revert "AArch64: Runtime CPU feature detection"
Karam Singh
karam.singh at multicorewareinc.com
Fri Oct 11 06:48:10 UTC 2024
This patch has been pushed to the master branch.
*__________________________*
*Karam Singh*
*Ph.D. IIT Guwahati*
Senior Software (Video Coding) Engineer
Mobile: +91 8011279030
Block 9A, 6th floor, DLF Cyber City
Manapakkam, Chennai 600 089
On Wed, Oct 9, 2024 at 3:56 PM Ponsanthini Arunachalam <
ponsanthini.arunachalam at multicorewareinc.com> wrote:
> From c2038d6d6825354a5137db9cdedabb2067d5b6d2 Mon Sep 17 00:00:00 2001
> From: Harshitha Suresh <harshitha at multicorewareinc.com>
> Date: Wed, 9 Oct 2024 11:58:03 +0530
> Subject: [PATCH] Revert "AArch64: Runtime CPU feature detection"
>
> This commit is being reverted since I8MM ARM optimization feature
> detection flag is not yet supported by Windows.
> ---
> .../msys/make-x86_64-w64-mingw32-Makefiles.sh | 8 --
> build/msys/toolchain-x86_64-w64-mingw32.cmake | 6 --
> source/CMakeLists.txt | 21 ++---
> source/common/CMakeLists.txt | 5 --
> source/common/cpu.cpp | 85 +------------------
> 5 files changed, 9 insertions(+), 116 deletions(-)
> delete mode 100644 build/msys/make-x86_64-w64-mingw32-Makefiles.sh
> delete mode 100644 build/msys/toolchain-x86_64-w64-mingw32.cmake
>
> diff --git a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
> b/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
> deleted file mode 100644
> index d98eced87..000000000
> --- a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
> +++ /dev/null
> @@ -1,8 +0,0 @@
> -#!/bin/sh
> -
> -# This will generate a cross-compile environment, compiling an x86_64
> -# Win64 target from a 32bit MinGW32 host environment. If your MinGW
> -# install is 64bit, you can use the native compiler batch file:
> -# make-Makefiles.sh
> -
> -cmake -G "MSYS Makefiles"
> -DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-w64-mingw32.cmake ../../source &&
> cmake-gui ../../source
> diff --git a/build/msys/toolchain-x86_64-w64-mingw32.cmake
> b/build/msys/toolchain-x86_64-w64-mingw32.cmake
> deleted file mode 100644
> index a3f768b7e..000000000
> --- a/build/msys/toolchain-x86_64-w64-mingw32.cmake
> +++ /dev/null
> @@ -1,6 +0,0 @@
> -SET(CMAKE_SYSTEM_NAME Windows)
> -SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
> -SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
> -SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
> -SET(CMAKE_RANLIB x86_64-w64-mingw32-ranlib)
> -SET(CMAKE_ASM_YASM_COMPILER yasm)
> diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
> index cd19050c3..8a3128bb7 100755
> --- a/source/CMakeLists.txt
> +++ b/source/CMakeLists.txt
> @@ -303,12 +303,10 @@ if(GCC)
> endif()
> endif()
>
> - set(ARM64_ARCH_ARGS "-O3")
> if(CPU_HAS_NEON_DOTPROD)
> # Neon DotProd is mandatory from Armv8.4.
> message(STATUS "Found Neon DotProd")
> - set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)
> - set(ARM_ARGS -O3)
> + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
> add_definitions(-DHAVE_NEON_DOTPROD=1)
> endif()
> if(CPU_HAS_NEON_I8MM)
> @@ -318,8 +316,7 @@ if(GCC)
> if(NOT CPU_HAS_NEON_DOTPROD)
> message(FATAL_ERROR "Unsupported AArch64 feature
> combination (Neon I8MM without Neon DotProd)")
> endif()
> - set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)
> - set(ARM_ARGS -O3)
> + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
> add_definitions(-DHAVE_NEON_I8MM=1)
> endif()
> if(CPU_HAS_SVE)
> @@ -328,15 +325,13 @@ if(GCC)
> if(NOT CPU_HAS_NEON_I8MM)
> message(FATAL_ERROR "Unsupported AArch64 feature
> combination (SVE without Neon I8MM)")
> endif()
> - set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)
> - set(ARM_ARGS -O3)
> + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
> add_definitions(-DHAVE_SVE=1)
> endif()
> if(CPU_HAS_SVE2)
> message(STATUS "Found SVE2")
> # SVE2 is only available from Armv9.0, and armv9-a implies
> +dotprod
> - set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)
> - set(ARM_ARGS -O3)
> + set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
> add_definitions(-DHAVE_SVE2=1)
> endif()
> set(ARM_ARGS ${ARM_ARGS} -fPIC)
> @@ -701,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
> add_custom_command(
> OUTPUT ${ASM}.${SUFFIX}
> COMMAND ${CMAKE_CXX_COMPILER}
> - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
> + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> DEPENDS ${ASM_SRC})
> endforeach()
> if(CPU_HAS_SVE2)
> @@ -712,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
> add_custom_command(
> OUTPUT ${ASM}.${SUFFIX}
> COMMAND ${CMAKE_CXX_COMPILER}
> - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
> + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> DEPENDS ${ASM_SRC})
> endforeach()
> endif()
> @@ -724,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
> add_custom_command(
> OUTPUT ${ASM}.${SUFFIX}
> COMMAND ${CMAKE_CXX_COMPILER}
> - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
> + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> DEPENDS ${ASM_SRC})
> endforeach()
> endif()
> @@ -736,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
> add_custom_command(
> OUTPUT ${ASM}.${SUFFIX}
> COMMAND ${CMAKE_CXX_COMPILER}
> - ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
> + ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> DEPENDS ${ASM_SRC})
> endforeach()
> endif()
> diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
> index 33025cada..dc4a74107 100644
> --- a/source/common/CMakeLists.txt
> +++ b/source/common/CMakeLists.txt
> @@ -123,34 +123,29 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR
> CROSS_COMPILE_ARM64))
> set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL
> "Arm Assembly Sources that use the Neon DotProd extension")
> foreach(SRC ${C_SRCS_NEON})
> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> - set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS
> ${ARM64_ARCH_ARGS} )
> endforeach()
>
> if(CPU_HAS_NEON_I8MM)
> foreach(SRC ${C_SRCS_NEON_I8MM})
> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> - set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
> endforeach()
> endif()
>
> if(CPU_HAS_NEON_DOTPROD)
> foreach(SRC ${C_SRCS_NEON_DOTPROD})
> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> - set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
> endforeach()
> endif()
>
> if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)
> foreach(SRC ${C_SRCS_SVE})
> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> - set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
> endforeach()
> endif()
>
> if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)
> foreach(SRC ${C_SRCS_SVE2})
> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> - set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
> endforeach()
> endif()
>
> diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
> index c1fa928e4..d18aeb8d2 100644
> --- a/source/common/cpu.cpp
> +++ b/source/common/cpu.cpp
> @@ -391,8 +391,7 @@ uint32_t cpu_detect(bool benableavx512)
>
> #elif X265_ARCH_ARM64
>
> -#if defined(_MSC_VER) || defined(__APPLE__)
> -uint32_t cpu_detect(bool /*benableavx512*/)
> +uint32_t cpu_detect(bool benableavx512)
> {
> int flags = 0;
>
> @@ -417,88 +416,6 @@ uint32_t cpu_detect(bool /*benableavx512*/)
> return flags;
> }
>
> -// TODO: Remove isOryonCPU() once Windows defines PF_ flag for I8MM on
> supported ARM64 devices
> -#elif defined(__MINGW64__) // Windows+Aarch64
> -
> -#include <windows.h>
> -#include <processthreadsapi.h>
> -
> -bool isOryonCPU()
> -{
> -
> - char processorName[128];
> - DWORD bufferSize = 128;
> -
> - LONG result = RegGetValue(HKEY_LOCAL_MACHINE,
> "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
> "ProcessorNameString", RRF_RT_ANY, NULL, (PVOID)&processorName,
> &bufferSize);
> - if (strstr(processorName, "Oryon") != NULL)
> - {
> - return true;
> - }
> - else
> - {
> - return false;
> - }
> -}
> -uint32_t cpu_detect(bool /*benableavx512*/)
> -{
> -
> - int flags = 0;
> -
> -#ifdef ENABLE_ASSEMBLY
> - #if HAVE_NEON
> - flags |= X265_CPU_NEON; // All of ARM64 has NEON
> - #endif
> - #if HAVE_NEON_DOTPROD && defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
> - flags |=
> IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) ?
> X265_CPU_NEON_DOTPROD : 0;
> - #endif
> - #if HAVE_NEON_I8MM
> - flags |= isOryonCPU() ? X265_CPU_NEON_I8MM : 0;
> - #endif
> - #if HAVE_SVE && defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
> - flags |=
> IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE
> : 0;
> - #endif
> - #if HAVE_SVE2 && defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
> - flags |=
> IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) ?
> X265_CPU_SVE2 : 0;
> - #endif
> -#endif
> -
> - return flags;
> -} // end of Windows+Aarch64
> -
> -#else // Linux+Aarch64
> -
> -#include <asm/hwcap.h>
> -#include <sys/auxv.h>
> -
> -uint32_t cpu_detect(bool /*benableavx512*/)
> -{
> - unsigned long hwcaps = getauxval(AT_HWCAP);
> - unsigned long hwcaps2 = getauxval(AT_HWCAP2);
> -
> - int flags = 0;
> -
> -#ifdef ENABLE_ASSEMBLY
> - #if HAVE_NEON
> - flags |= X265_CPU_NEON; // All of ARM64 has NEON
> - #endif
> - #if HAVE_NEON_DOTPROD
> - flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);
> - #endif
> - #if HAVE_NEON_I8MM
> - flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0);
> - #endif
> - #if HAVE_SVE
> - flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);
> - #endif
> - #if HAVE_SVE2
> - flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);
> - #endif
> -#endif
> -
> - return flags;
> -}
> -#endif // end of Linux+AArch64
> -
> #elif X265_ARCH_POWER8
>
> uint32_t cpu_detect(bool benableavx512)
> --
> 2.36.0.windows.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241011/a3d2cfcc/attachment-0001.htm>
More information about the x265-devel
mailing list