[x265] [X265][PATCH] Revert "AArch64: Runtime CPU feature detection"
Ponsanthini Arunachalam
ponsanthini.arunachalam at multicorewareinc.com
Wed Oct 9 10:25:56 UTC 2024
>From c2038d6d6825354a5137db9cdedabb2067d5b6d2 Mon Sep 17 00:00:00 2001
From: Harshitha Suresh <harshitha at multicorewareinc.com>
Date: Wed, 9 Oct 2024 11:58:03 +0530
Subject: [PATCH] Revert "AArch64: Runtime CPU feature detection"
This commit is being reverted since I8MM ARM optimization feature detection
flag is not yet supported by Windows.
---
.../msys/make-x86_64-w64-mingw32-Makefiles.sh | 8 --
build/msys/toolchain-x86_64-w64-mingw32.cmake | 6 --
source/CMakeLists.txt | 21 ++---
source/common/CMakeLists.txt | 5 --
source/common/cpu.cpp | 85 +------------------
5 files changed, 9 insertions(+), 116 deletions(-)
delete mode 100644 build/msys/make-x86_64-w64-mingw32-Makefiles.sh
delete mode 100644 build/msys/toolchain-x86_64-w64-mingw32.cmake
diff --git a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
b/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
deleted file mode 100644
index d98eced87..000000000
--- a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-
-# This will generate a cross-compile environment, compiling an x86_64
-# Win64 target from a 32bit MinGW32 host environment. If your MinGW
-# install is 64bit, you can use the native compiler batch file:
-# make-Makefiles.sh
-
-cmake -G "MSYS Makefiles"
-DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-w64-mingw32.cmake ../../source &&
cmake-gui ../../source
diff --git a/build/msys/toolchain-x86_64-w64-mingw32.cmake
b/build/msys/toolchain-x86_64-w64-mingw32.cmake
deleted file mode 100644
index a3f768b7e..000000000
--- a/build/msys/toolchain-x86_64-w64-mingw32.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-SET(CMAKE_SYSTEM_NAME Windows)
-SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
-SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
-SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
-SET(CMAKE_RANLIB x86_64-w64-mingw32-ranlib)
-SET(CMAKE_ASM_YASM_COMPILER yasm)
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index cd19050c3..8a3128bb7 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -303,12 +303,10 @@ if(GCC)
endif()
endif()
- set(ARM64_ARCH_ARGS "-O3")
if(CPU_HAS_NEON_DOTPROD)
# Neon DotProd is mandatory from Armv8.4.
message(STATUS "Found Neon DotProd")
- set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)
- set(ARM_ARGS -O3)
+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
add_definitions(-DHAVE_NEON_DOTPROD=1)
endif()
if(CPU_HAS_NEON_I8MM)
@@ -318,8 +316,7 @@ if(GCC)
if(NOT CPU_HAS_NEON_DOTPROD)
message(FATAL_ERROR "Unsupported AArch64 feature
combination (Neon I8MM without Neon DotProd)")
endif()
- set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)
- set(ARM_ARGS -O3)
+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
add_definitions(-DHAVE_NEON_I8MM=1)
endif()
if(CPU_HAS_SVE)
@@ -328,15 +325,13 @@ if(GCC)
if(NOT CPU_HAS_NEON_I8MM)
message(FATAL_ERROR "Unsupported AArch64 feature
combination (SVE without Neon I8MM)")
endif()
- set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)
- set(ARM_ARGS -O3)
+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
add_definitions(-DHAVE_SVE=1)
endif()
if(CPU_HAS_SVE2)
message(STATUS "Found SVE2")
# SVE2 is only available from Armv9.0, and armv9-a implies
+dotprod
- set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)
- set(ARM_ARGS -O3)
+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
add_definitions(-DHAVE_SVE2=1)
endif()
set(ARM_ARGS ${ARM_ARGS} -fPIC)
@@ -701,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
add_custom_command(
OUTPUT ${ASM}.${SUFFIX}
COMMAND ${CMAKE_CXX_COMPILER}
- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
DEPENDS ${ASM_SRC})
endforeach()
if(CPU_HAS_SVE2)
@@ -712,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
add_custom_command(
OUTPUT ${ASM}.${SUFFIX}
COMMAND ${CMAKE_CXX_COMPILER}
- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
DEPENDS ${ASM_SRC})
endforeach()
endif()
@@ -724,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
add_custom_command(
OUTPUT ${ASM}.${SUFFIX}
COMMAND ${CMAKE_CXX_COMPILER}
- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
DEPENDS ${ASM_SRC})
endforeach()
endif()
@@ -736,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
add_custom_command(
OUTPUT ${ASM}.${SUFFIX}
COMMAND ${CMAKE_CXX_COMPILER}
- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
DEPENDS ${ASM_SRC})
endforeach()
endif()
diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
index 33025cada..dc4a74107 100644
--- a/source/common/CMakeLists.txt
+++ b/source/common/CMakeLists.txt
@@ -123,34 +123,29 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))
set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm
Assembly Sources that use the Neon DotProd extension")
foreach(SRC ${C_SRCS_NEON})
set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
- set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS
${ARM64_ARCH_ARGS} )
endforeach()
if(CPU_HAS_NEON_I8MM)
foreach(SRC ${C_SRCS_NEON_I8MM})
set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
- set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
endforeach()
endif()
if(CPU_HAS_NEON_DOTPROD)
foreach(SRC ${C_SRCS_NEON_DOTPROD})
set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
- set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
endforeach()
endif()
if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)
foreach(SRC ${C_SRCS_SVE})
set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
- set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
endforeach()
endif()
if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)
foreach(SRC ${C_SRCS_SVE2})
set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
- set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
endforeach()
endif()
diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index c1fa928e4..d18aeb8d2 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -391,8 +391,7 @@ uint32_t cpu_detect(bool benableavx512)
#elif X265_ARCH_ARM64
-#if defined(_MSC_VER) || defined(__APPLE__)
-uint32_t cpu_detect(bool /*benableavx512*/)
+uint32_t cpu_detect(bool benableavx512)
{
int flags = 0;
@@ -417,88 +416,6 @@ uint32_t cpu_detect(bool /*benableavx512*/)
return flags;
}
-// TODO: Remove isOryonCPU() once Windows defines PF_ flag for I8MM on
supported ARM64 devices
-#elif defined(__MINGW64__) // Windows+Aarch64
-
-#include <windows.h>
-#include <processthreadsapi.h>
-
-bool isOryonCPU()
-{
-
- char processorName[128];
- DWORD bufferSize = 128;
-
- LONG result = RegGetValue(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"ProcessorNameString", RRF_RT_ANY, NULL, (PVOID)&processorName,
&bufferSize);
- if (strstr(processorName, "Oryon") != NULL)
- {
- return true;
- }
- else
- {
- return false;
- }
-}
-uint32_t cpu_detect(bool /*benableavx512*/)
-{
-
- int flags = 0;
-
-#ifdef ENABLE_ASSEMBLY
- #if HAVE_NEON
- flags |= X265_CPU_NEON; // All of ARM64 has NEON
- #endif
- #if HAVE_NEON_DOTPROD && defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
- flags |=
IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) ?
X265_CPU_NEON_DOTPROD : 0;
- #endif
- #if HAVE_NEON_I8MM
- flags |= isOryonCPU() ? X265_CPU_NEON_I8MM : 0;
- #endif
- #if HAVE_SVE && defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
- flags |=
IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE
: 0;
- #endif
- #if HAVE_SVE2 && defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
- flags |=
IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) ?
X265_CPU_SVE2 : 0;
- #endif
-#endif
-
- return flags;
-} // end of Windows+Aarch64
-
-#else // Linux+Aarch64
-
-#include <asm/hwcap.h>
-#include <sys/auxv.h>
-
-uint32_t cpu_detect(bool /*benableavx512*/)
-{
- unsigned long hwcaps = getauxval(AT_HWCAP);
- unsigned long hwcaps2 = getauxval(AT_HWCAP2);
-
- int flags = 0;
-
-#ifdef ENABLE_ASSEMBLY
- #if HAVE_NEON
- flags |= X265_CPU_NEON; // All of ARM64 has NEON
- #endif
- #if HAVE_NEON_DOTPROD
- flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);
- #endif
- #if HAVE_NEON_I8MM
- flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0);
- #endif
- #if HAVE_SVE
- flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);
- #endif
- #if HAVE_SVE2
- flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);
- #endif
-#endif
-
- return flags;
-}
-#endif // end of Linux+AArch64
-
#elif X265_ARCH_POWER8
uint32_t cpu_detect(bool benableavx512)
--
2.36.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241009/d4ac3622/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Revert-AArch64-Runtime-CPU-feature-detection.patch
Type: application/octet-stream
Size: 10365 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241009/d4ac3622/attachment-0001.obj>
More information about the x265-devel
mailing list