[x265] [PATCH 4/6] AArch64: Add Windows run-time CPU feature detection
Hari Limaye
hari.limaye at arm.com
Thu Oct 24 23:29:10 UTC 2024
Add run-time CPU feature detection for AArch64 ISA extensions on Windows
platforms.
---
source/CMakeLists.txt | 71 ++++++++++++++++++++++++-------------
source/common/aarch64/cpu.h | 57 +++++++++++++++++++++++++++++
2 files changed, 104 insertions(+), 24 deletions(-)
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index cb513db8e..b311c350e 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1")
option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 Intrinsics files" OFF)
option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature detection" ON)
- if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+ if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows")
set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE)
message(STATUS "Run-time CPU feature detection unsupported on this platform")
endif()
@@ -306,6 +306,52 @@ if(GCC)
endif()
endif()
+ if(ENABLE_SVE OR ENABLE_SVE2)
+ set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+ string(APPEND CMAKE_REQUIRED_FLAGS " ${AARCH64_SVE_FLAG}")
+ set(OLD_CMAKE_TRY_COMPILE_TARGET_TYPE ${CMAKE_TRY_COMPILE_TARGET_TYPE})
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+ # Check whether the compiler can compile SVE functions that require
+ # backup/restore of SVE registers according to AAPCS.
+ # https://github.com/llvm/llvm-project/issues/80009.
+ set(SVE_COMPILATION_TEST "
+#include <arm_sve.h>
+void other();
+svfloat32_t func(svfloat32_t a) {
+ other();
+ return a;
+}
+int main() { return 0; }")
+
+ check_c_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_C_TEST_COMPILED)
+ check_cxx_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_CXX_TEST_COMPILED)
+
+ # Check if arm_neon_sve_bridge.h is available.
+ set(SVE_HEADER_TEST "
+#ifndef __ARM_NEON_SVE_BRIDGE
+#error 1
+#endif
+#include <arm_sve.h>
+#include <arm_neon_sve_bridge.h>
+int main() { return 0; }")
+ check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED)
+ check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED)
+
+ set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE ${OLD_CMAKE_TRY_COMPILE_TARGET_TYPE})
+ if (SVE_COMPILATION_C_TEST_COMPILED AND SVE_COMPILATION_CXX_TEST_COMPILED)
+ if (SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED)
+ add_definitions(-DHAVE_SVE_BRIDGE=1)
+ set(HAVE_SVE_BRIDGE 1)
+ endif()
+ else()
+ set(ENABLE_SVE OFF CACHE BOOL "" FORCE)
+ set(ENABLE_SVE2 OFF CACHE BOOL "" FORCE)
+ message(STATUS "Disabling SVE and SVE2")
+ endif()
+ endif()
+
# Impose constraint that disabling one extension disables all 'higher order' ones.
if(NOT ENABLE_NEON)
message(STATUS "Disabling Neon")
@@ -359,29 +405,6 @@ if(GCC)
if(CC_HAS_FLAX_VEC_CONV_NONE)
set(ARM_ARGS ${ARM_ARGS} -flax-vector-conversions=none)
endif()
- if(CPU_HAS_SVE)
- set(SVE_HEADER_TEST "
-#ifndef __ARM_NEON_SVE_BRIDGE
-#error 1
-#endif
-#include <arm_sve.h>
-#include <arm_neon_sve_bridge.h>
-int main() { return 0; }")
- set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
- # CMAKE_REQUIRED_FLAGS requires a space-delimited string, whereas
- # ARM_ARGS is defined and used elsewhere as a ;-list.
- # Add `-march=...+sve` so the test functions correctly with Clang.
- foreach(ARM_ARG ${ARM_ARGS} ${AARCH64_SVE_FLAG})
- string(APPEND CMAKE_REQUIRED_FLAGS " ${ARM_ARG}")
- endforeach()
- check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED)
- check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED)
- set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
- if(SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED)
- add_definitions(-DHAVE_SVE_BRIDGE=1)
- set(HAVE_SVE_BRIDGE 1)
- endif()
- endif()
endif()
if(ENABLE_PIC)
list(APPEND ARM_ARGS -DPIC)
diff --git a/source/common/aarch64/cpu.h b/source/common/aarch64/cpu.h
index 857ba980a..c61b86359 100644
--- a/source/common/aarch64/cpu.h
+++ b/source/common/aarch64/cpu.h
@@ -62,6 +62,63 @@ static inline int aarch64_get_cpu_flags()
return flags;
}
+#elif defined(_WIN32)
+
+#include <windows.h>
+
+static inline int aarch64_get_cpu_flags()
+{
+ int flags = 0;
+// IsProcessorFeaturePresent() parameter documentation:
+// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
+#if HAVE_NEON
+ flags |= X265_CPU_NEON;
+#endif // HAVE_NEON
+#if HAVE_NEON_DOTPROD
+// Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
+// 20348, supported by Windows 11 and Windows Server 2022.
+#if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
+ {
+ flags |= X265_CPU_NEON_DOTPROD;
+ }
+#endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
+#endif // HAVE_NEON_DOTPROD
+#if HAVE_NEON_I8MM
+// Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK
+// 26100.
+#if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
+ // There's no PF_* flag yet that indicates whether Neon I8MM is available
+ // or not. But if SVE_I8MM is available, that also implies that Neon I8MM
+ // is available.
+ if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE))
+ {
+ flags |= X265_CPU_NEON_I8MM;
+ }
+#endif // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
+#endif // HAVE_NEON_I8MM
+#if HAVE_SVE
+// Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100.
+#if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE))
+ {
+ flags |= X265_CPU_SVE;
+ }
+#endif // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
+#endif // HAVE_SVE
+#if HAVE_SVE2
+// Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK
+// 26100.
+#if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE))
+ {
+ flags |= X265_CPU_SVE2;
+ }
+#endif // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
+#endif // HAVE_SVE2
+ return flags;
+}
+
#elif defined(__linux__)
#include <sys/auxv.h>
--
2.42.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0004-AArch64-Add-Windows-run-time-CPU-feature-detection.patch
Type: text/x-patch
Size: 7297 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241025/e00627e6/attachment.bin>
More information about the x265-devel
mailing list