[x265] [PATCH 4/6] AArch64: Add Windows run-time CPU feature detection

Hari Limaye hari.limaye at arm.com
Thu Oct 24 23:29:10 UTC 2024


Add run-time CPU feature detection for AArch64 ISA extensions on Windows
platforms.
---
 source/CMakeLists.txt       | 71 ++++++++++++++++++++++++-------------
 source/common/aarch64/cpu.h | 57 +++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 24 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index cb513db8e..b311c350e 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1")
     option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 Intrinsics files" OFF)
 
     option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature detection" ON)
-    if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
+    if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows")
         set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE)
         message(STATUS "Run-time CPU feature detection unsupported on this platform")
     endif()
@@ -306,6 +306,52 @@ if(GCC)
             endif()
         endif()
 
+        if(ENABLE_SVE OR ENABLE_SVE2)
+            set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+            string(APPEND CMAKE_REQUIRED_FLAGS " ${AARCH64_SVE_FLAG}")
+            set(OLD_CMAKE_TRY_COMPILE_TARGET_TYPE ${CMAKE_TRY_COMPILE_TARGET_TYPE})
+            set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+            # Check whether the compiler can compile SVE functions that require
+            # backup/restore of SVE registers according to AAPCS.
+            # https://github.com/llvm/llvm-project/issues/80009.
+            set(SVE_COMPILATION_TEST "
+#include <arm_sve.h>
+void other();
+svfloat32_t func(svfloat32_t a) {
+  other();
+  return a;
+}
+int main() { return 0; }")
+
+            check_c_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_C_TEST_COMPILED)
+            check_cxx_source_compiles("${SVE_COMPILATION_TEST}" SVE_COMPILATION_CXX_TEST_COMPILED)
+
+            # Check if arm_neon_sve_bridge.h is available.
+            set(SVE_HEADER_TEST "
+#ifndef __ARM_NEON_SVE_BRIDGE
+#error 1
+#endif
+#include <arm_sve.h>
+#include <arm_neon_sve_bridge.h>
+int main() { return 0; }")
+            check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED)
+            check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED)
+
+            set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
+            set(CMAKE_TRY_COMPILE_TARGET_TYPE ${OLD_CMAKE_TRY_COMPILE_TARGET_TYPE})
+            if (SVE_COMPILATION_C_TEST_COMPILED AND SVE_COMPILATION_CXX_TEST_COMPILED)
+                if (SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED)
+                    add_definitions(-DHAVE_SVE_BRIDGE=1)
+                    set(HAVE_SVE_BRIDGE 1)
+                endif()
+            else()
+                set(ENABLE_SVE OFF CACHE BOOL "" FORCE)
+                set(ENABLE_SVE2 OFF CACHE BOOL "" FORCE)
+                message(STATUS "Disabling SVE and SVE2")
+            endif()
+        endif()
+
         # Impose constraint that disabling one extension disables all 'higher order' ones.
         if(NOT ENABLE_NEON)
             message(STATUS "Disabling Neon")
@@ -359,29 +405,6 @@ if(GCC)
         if(CC_HAS_FLAX_VEC_CONV_NONE)
             set(ARM_ARGS ${ARM_ARGS} -flax-vector-conversions=none)
         endif()
-        if(CPU_HAS_SVE)
-            set(SVE_HEADER_TEST "
-#ifndef __ARM_NEON_SVE_BRIDGE
-#error 1
-#endif
-#include <arm_sve.h>
-#include <arm_neon_sve_bridge.h>
-int main() { return 0; }")
-            set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
-            # CMAKE_REQUIRED_FLAGS requires a space-delimited string, whereas
-            # ARM_ARGS is defined and used elsewhere as a ;-list.
-            # Add `-march=...+sve` so the test functions correctly with Clang.
-            foreach(ARM_ARG ${ARM_ARGS} ${AARCH64_SVE_FLAG})
-                string(APPEND CMAKE_REQUIRED_FLAGS " ${ARM_ARG}")
-            endforeach()
-            check_c_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_C_TEST_COMPILED)
-            check_cxx_source_compiles("${SVE_HEADER_TEST}" SVE_HEADER_CXX_TEST_COMPILED)
-            set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
-            if(SVE_HEADER_C_TEST_COMPILED AND SVE_HEADER_CXX_TEST_COMPILED)
-                add_definitions(-DHAVE_SVE_BRIDGE=1)
-                set(HAVE_SVE_BRIDGE 1)
-            endif()
-        endif()
     endif()
 	if(ENABLE_PIC)
 	list(APPEND ARM_ARGS -DPIC)
diff --git a/source/common/aarch64/cpu.h b/source/common/aarch64/cpu.h
index 857ba980a..c61b86359 100644
--- a/source/common/aarch64/cpu.h
+++ b/source/common/aarch64/cpu.h
@@ -62,6 +62,63 @@ static inline int aarch64_get_cpu_flags()
     return flags;
 }
 
+#elif defined(_WIN32)
+
+#include <windows.h>
+
+static inline int aarch64_get_cpu_flags()
+{
+    int flags = 0;
+// IsProcessorFeaturePresent() parameter documentation:
+// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
+#if HAVE_NEON
+    flags |= X265_CPU_NEON;
+#endif // HAVE_NEON
+#if HAVE_NEON_DOTPROD
+// Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
+// 20348, supported by Windows 11 and Windows Server 2022.
+#if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
+    if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
+    {
+        flags |= X265_CPU_NEON_DOTPROD;
+    }
+#endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
+#endif // HAVE_NEON_DOTPROD
+#if HAVE_NEON_I8MM
+// Support for PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE was added in Windows SDK
+// 26100.
+#if defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
+    // There's no PF_* flag yet that indicates whether Neon I8MM is available
+    // or not. But if SVE_I8MM is available, that also implies that Neon I8MM
+    // is available.
+    if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE))
+    {
+        flags |= X265_CPU_NEON_I8MM;
+    }
+#endif  // defined(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)
+#endif  // HAVE_NEON_I8MM
+#if HAVE_SVE
+// Support for PF_ARM_SVE_INSTRUCTIONS_AVAILABLE was added in Windows SDK 26100.
+#if defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
+    if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE))
+    {
+        flags |= X265_CPU_SVE;
+    }
+#endif  // defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
+#endif  // HAVE_SVE
+#if HAVE_SVE2
+// Support for PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE was added in Windows SDK
+// 26100.
+#if defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
+    if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE))
+    {
+        flags |= X265_CPU_SVE2;
+    }
+#endif  // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
+#endif  // HAVE_SVE2
+    return flags;
+}
+
 #elif defined(__linux__)
 
 #include <sys/auxv.h>
-- 
2.42.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0004-AArch64-Add-Windows-run-time-CPU-feature-detection.patch
Type: text/x-patch
Size: 7297 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241025/e00627e6/attachment.bin>


More information about the x265-devel mailing list