[x265] [PATCH] AArch64: Runtime CPU feature detection

Hari Limaye Hari.Limaye at arm.com
Thu Sep 26 08:20:16 UTC 2024


Hi,

Thank you for putting up this patch.

In “source/common/cpu.cpp”, the test for Neon_I8MM is checking the wrong flag.

The line:
+         flags |= (hwcaps2 & HWCAP2_SVEI8MM ? X265_CPU_NEON_I8MM : 0);
Should be:

+         flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0);

Many thanks,
Hari

From: x265-devel <x265-devel-bounces at videolan.org> on behalf of Dash Santosh <dash.sathyanarayanan at multicorewareinc.com>
Date: Thursday, 26 September 2024 at 07:13
To: x265-devel at videolan.org <x265-devel at videolan.org>
Subject: [x265] [PATCH] AArch64: Runtime CPU feature detection
>From e9614d170f93f3ad4f01e95abfed0a260f218bd5 Mon Sep 17 00:00:00 2001
From: Min Chen <chenm003 at 163.com<mailto:chenm003 at 163.com>>
Date: Sat, 14 Sep 2024 14:25:28 -0700
Subject: [PATCH] AArch64: Runtime CPU feature detection

---
 source/CMakeLists.txt        | 20 ++++++++++++--------
 source/common/CMakeLists.txt |  5 +++++
 source/common/cpu.cpp        | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 37b83f959..32a99206f 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -306,7 +306,8 @@ if(GCC)
         if(CPU_HAS_NEON_DOTPROD)
             # Neon DotProd is mandatory from Armv8.4.
             message(STATUS "Found Neon DotProd")
-            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
+            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_NEON_DOTPROD=1)
         endif()
         if(CPU_HAS_NEON_I8MM)
@@ -316,7 +317,8 @@ if(GCC)
             if(NOT CPU_HAS_NEON_DOTPROD)
                 message(FATAL_ERROR "Unsupported AArch64 feature combination (Neon I8MM without Neon DotProd)")
             endif()
-            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
+            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_NEON_I8MM=1)
         endif()
         if(CPU_HAS_SVE)
@@ -325,13 +327,15 @@ if(GCC)
             if(NOT CPU_HAS_NEON_I8MM)
                 message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon I8MM)")
             endif()
-            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
+            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_SVE=1)
         endif()
         if(CPU_HAS_SVE2)
             message(STATUS "Found SVE2")
             # SVE2 is only available from Armv9.0, and armv9-a implies +dotprod
-            set(ARM_ARGS -O3 -march=armv9-a+i8mm+sve2)
+            set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_SVE2=1)
         endif()
         set(ARM_ARGS ${ARM_ARGS} -fPIC)
@@ -692,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
             add_custom_command(
                 OUTPUT ${ASM}.${SUFFIX}
                 COMMAND ${CMAKE_CXX_COMPILER}
-                ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
+                ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
                 DEPENDS ${ASM_SRC})
         endforeach()
         if(CPU_HAS_SVE2)
@@ -703,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
@@ -715,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
@@ -727,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
index dc4a74107..33025cada 100644
--- a/source/common/CMakeLists.txt
+++ b/source/common/CMakeLists.txt
@@ -123,29 +123,34 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))
     set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm Assembly Sources that use the Neon DotProd extension")
     foreach(SRC ${C_SRCS_NEON})
         set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+        set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
     endforeach()

     if(CPU_HAS_NEON_I8MM)
         foreach(SRC ${C_SRCS_NEON_I8MM})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_NEON_DOTPROD)
         foreach(SRC ${C_SRCS_NEON_DOTPROD})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)
         foreach(SRC ${C_SRCS_SVE})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)
         foreach(SRC ${C_SRCS_SVE2})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index 61cdaadfb..24c60ff0e 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -391,6 +391,8 @@ uint32_t cpu_detect(bool benableavx512)

 #elif X265_ARCH_ARM64

+// TODO: Support ARM on Windows
+#if _MSC_VER
 uint32_t cpu_detect(bool benableavx512)
 {
     int flags = 0;
@@ -413,6 +415,37 @@ uint32_t cpu_detect(bool benableavx512)

     return flags;
 }
+#else // Linux+Aarch64
+
+#include <asm/hwcap.h>
+#include <sys/auxv.h>
+
+uint32_t cpu_detect(bool benableavx512)
+{
+    unsigned long hwcaps = getauxval(AT_HWCAP);
+    unsigned long hwcaps2 = getauxval(AT_HWCAP2);
+
+    int flags = 0;
+
+    #if HAVE_NEON
+         flags |= X265_CPU_NEON;    // All of ARM64 has NEON
+    #endif
+    #if HAVE_NEON_DOTPROD
+         flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);
+    #endif
+    #if HAVE_NEON_I8MM
+         flags |= (hwcaps2 & HWCAP2_SVEI8MM ? X265_CPU_NEON_I8MM : 0);
+    #endif
+    #if HAVE_SVE
+         flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);
+    #endif
+    #if HAVE_SVE2
+         flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);
+    #endif
+
+    return flags;
+}
+#endif // end of Linux+AArch64

 #elif X265_ARCH_POWER8

--
2.43.0.windows.1

--
[Image removed by sender.]<https://multicorewareinc.com/>
 [Image removed by sender.] <https://www.linkedin.com/company/multicoreware-inc/>   [Image removed by sender.] <https://twitter.com/MulticoreWare>   [Image removed by sender.] <https://www.facebook.com/multicoreware>   [Image removed by sender.] <https://www.youtube.com/channel/UCXZ1A1MzS5JwBqwBkNfsBBw?sub_confirmation=1>   [Image removed by sender.] <https://www.instagram.com/multicoreware.inc/>

Dash Santosh

Research Engineer, Video Engineering

Mobile: +91 78679 43737

IndiQube Echo Point, Avinashi Road

Coimbatore - 641 014
[Image removed by sender.]

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240926/49c6f72f/attachment-0001.htm>


More information about the x265-devel mailing list