[x265] [PATCH 07/14] AArch64: Add Armv8.6 Neon I8MM feature detection

Hari Limaye hari.limaye at arm.com
Fri Sep 6 13:34:03 UTC 2024


Add compile-time feature detection for AArch64 Neon I8MM instructions,
which are mandatory from Armv8.6.

Impose the constraint that the presence of SVE or SVE2 imply the
presence of Neon I8MM - which is true for all known systems apart from
the HPC-focussed Fujitsu A64FX.
---
 build/README.txt                 | 23 +++++++++++++++++------
 source/CMakeLists.txt            | 32 ++++++++++++++++++++++++++------
 source/cmake/FindNEON_I8MM.cmake | 21 +++++++++++++++++++++
 source/common/cpu.cpp            | 18 ++++++++++++------
 source/test/testbench.cpp        |  1 +
 source/x265.h                    |  1 +
 6 files changed, 78 insertions(+), 18 deletions(-)
 create mode 100644 source/cmake/FindNEON_I8MM.cmake

diff --git a/build/README.txt b/build/README.txt
index 8e229c3bd..14d9ff8d0 100644
--- a/build/README.txt
+++ b/build/README.txt
@@ -106,7 +106,17 @@ running CMake to configure the project. For example:
 
 * cmake -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++
 
-Moreover, if the target platform supports SVE or SVE2, CROSS_COMPILE_SVE or
+If target platform supports Armv8.4 Neon DotProd instructions, the
+CROSS_COMPILE_NEON_DOTPROD CMake option should be set to ON:
+
+* cmake -DCROSS_COMPILE_NEON_DOTPROD=ON  <other configuration options...>
+
+If target platform supports Armv8.6 Neon I8MM instructions, the
+CROSS_COMPILE_NEON_I8MM CMake option should be set to ON:
+
+* cmake -DCROSS_COMPILE_NEON_I8MM=ON  <other configuration options...>
+
+If the target platform supports SVE or SVE2, CROSS_COMPILE_SVE or
 CROSS_COMPILE_SVE2 CMake options should be set to ON, respectively.
 For example, when running CMake to configure the project:
 
@@ -114,11 +124,12 @@ For example, when running CMake to configure the project:
 2. cmake -DCROSS_COMPILE_SVE2=ON <other configuration options...>
 
 Note: when the CROSS_COMPILE_SVE option is set to ON the build configuration will
-compile for Neon DotProd, as we impose the constraint that SVE implies Neon DotProd.
+also compile for Neon DotProd and I8MM, as we impose the constraint that SVE implies
+both Neon DotProd and I8MM.
 
-If target platform supports Armv8.4 Neon DotProd instructions, the
-CROSS_COMPILE_NEON_DOTPROD CMake option should be set to ON:
-
-* cmake -DCROSS_COMPILE_NEON_DOTPROD=ON  <other configuration options...>
+Similarly when the CROSS_COMPILE_SVE2 option is set to ON the build configuration
+will also compile for Neon I8MM, as we impose the constraint that SVE2 implies Neon
+I8MM. SVE2 already implies that Neon DotProd is implemented since SVE2 is an Armv9.0
+feature which implies Armv8.5, and Neon DotProd is mandatory from Armv8.4.
 
 Then, the normal build process can be followed.
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 3c0c6d167..37b83f959 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -91,6 +91,7 @@ elseif(ARM64MATCH GREATER "-1")
     option(CROSS_COMPILE_SVE "Cross Compile for SVE Target" OFF)
     option(CROSS_COMPILE_SVE2 "Cross Compile for SVE2 Target" OFF)
     option(CROSS_COMPILE_NEON_DOTPROD "Cross Compile for Neon DotProd Target" OFF)
+    option(CROSS_COMPILE_NEON_I8MM "Cross Compile for Neon I8MM Target" OFF)
 else()
     message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
     message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
@@ -272,20 +273,29 @@ if(GCC)
             if(CROSS_COMPILE_NEON_DOTPROD)
                 set(CPU_HAS_NEON_DOTPROD 1)
             endif()
+            if(CROSS_COMPILE_NEON_I8MM)
+                set(CPU_HAS_NEON_I8MM 1)
+                # Impose the constraint that Neon I8MM implies Neon DotProd.
+                set(CPU_HAS_NEON_DOTPROD 1)
+            endif()
             if(CROSS_COMPILE_SVE)
                 set(CPU_HAS_SVE 1)
-                # We impose the constraint that SVE implies Neon DotProd.
+                # Impose the constraint that SVE implies Neon DotProd and I8MM.
                 set(CPU_HAS_NEON_DOTPROD 1)
+                set(CPU_HAS_NEON_I8MM 1)
             endif()
             if(CROSS_COMPILE_SVE2)
                 set(CPU_HAS_SVE2 1)
                 # SVE2 implies SVE and Neon DotProd.
                 set(CPU_HAS_SVE 1)
                 set(CPU_HAS_NEON_DOTPROD 1)
+                # Impose the constraint that SVE2 implies Neon I8MM.
+                set(CPU_HAS_NEON_I8MM 1)
             endif()
         else()
             if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
                 find_package(NEON_DOTPROD)
+                find_package(NEON_I8MM)
                 find_package(SVE)
                 find_package(SVE2)
             else()
@@ -299,19 +309,29 @@ if(GCC)
             set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
             add_definitions(-DHAVE_NEON_DOTPROD=1)
         endif()
+        if(CPU_HAS_NEON_I8MM)
+            # Neon I8MM is mandatory from Armv8.6.
+            message(STATUS "Found Neon I8MM")
+            # Impose the constraint that Neon I8MM implies Neon DotProd.
+            if(NOT CPU_HAS_NEON_DOTPROD)
+                message(FATAL_ERROR "Unsupported AArch64 feature combination (Neon I8MM without Neon DotProd)")
+            endif()
+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
+            add_definitions(-DHAVE_NEON_I8MM=1)
+        endif()
         if(CPU_HAS_SVE)
             message(STATUS "Found SVE")
-            # We impose the constraint that SVE implies Neon DotProd.
-            if(NOT CPU_HAS_NEON_DOTPROD)
-                message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon DotProd)")
+            # Impose the constraint that SVE implies Neon I8MM.
+            if(NOT CPU_HAS_NEON_I8MM)
+                message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon I8MM)")
             endif()
-            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+sve)
+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
             add_definitions(-DHAVE_SVE=1)
         endif()
         if(CPU_HAS_SVE2)
             message(STATUS "Found SVE2")
             # SVE2 is only available from Armv9.0, and armv9-a implies +dotprod
-            set(ARM_ARGS -O3 -march=armv9-a+sve2)
+            set(ARM_ARGS -O3 -march=armv9-a+i8mm+sve2)
             add_definitions(-DHAVE_SVE2=1)
         endif()
         set(ARM_ARGS ${ARM_ARGS} -fPIC)
diff --git a/source/cmake/FindNEON_I8MM.cmake b/source/cmake/FindNEON_I8MM.cmake
new file mode 100644
index 000000000..75c50bfe9
--- /dev/null
+++ b/source/cmake/FindNEON_I8MM.cmake
@@ -0,0 +1,21 @@
+include(FindPackageHandleStandardArgs)
+
+# Check if Armv8.6 Neon I8MM is supported by the Arm CPU
+if(APPLE)
+    execute_process(COMMAND sysctl -a
+                    COMMAND grep "hw.optional.arm.FEAT_I8MM: 1"
+                    OUTPUT_VARIABLE has_i8mm
+                    ERROR_QUIET
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+else()
+    execute_process(COMMAND cat /proc/cpuinfo
+                    COMMAND grep Features
+                    COMMAND grep i8mm
+                    OUTPUT_VARIABLE has_i8mm
+                    ERROR_QUIET
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
+
+if(has_i8mm)
+    set(CPU_HAS_NEON_I8MM 1)
+endif()
diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index 84a241876..61cdaadfb 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -118,6 +118,9 @@ const cpu_name_t cpu_names[] =
 #if defined(HAVE_NEON_DOTPROD)
     { "Neon_DotProd",    X265_CPU_NEON_DOTPROD },
 #endif
+#if defined(HAVE_NEON_I8MM)
+    { "Neon_I8MM",       X265_CPU_NEON_I8MM },
+#endif
 #elif X265_ARCH_POWER8
     { "Altivec",         X265_CPU_ALTIVEC },
 
@@ -392,18 +395,21 @@ uint32_t cpu_detect(bool benableavx512)
 {
     int flags = 0;
 
-    #if HAVE_SVE2
-         flags |= X265_CPU_SVE2;
-    #endif
-    #if HAVE_SVE
-         flags |= X265_CPU_SVE;
-    #endif
     #if HAVE_NEON
          flags |= X265_CPU_NEON;
     #endif
     #if HAVE_NEON_DOTPROD
          flags |= X265_CPU_NEON_DOTPROD;
     #endif
+    #if HAVE_NEON_I8MM
+         flags |= X265_CPU_NEON_I8MM;
+    #endif
+    #if HAVE_SVE
+         flags |= X265_CPU_SVE;
+    #endif
+    #if HAVE_SVE2
+         flags |= X265_CPU_SVE2;
+    #endif
 
     return flags;
 }
diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp
index 8c0b13998..de5fef2c2 100644
--- a/source/test/testbench.cpp
+++ b/source/test/testbench.cpp
@@ -179,6 +179,7 @@ int main(int argc, char *argv[])
         { "SVE2", X265_CPU_SVE2 },
         { "SVE", X265_CPU_SVE },
         { "Neon_DotProd", X265_CPU_NEON_DOTPROD },
+        { "Neon_I8MM", X265_CPU_NEON_I8MM },
         { "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
 #endif
         { "", 0 },
diff --git a/source/x265.h b/source/x265.h
index 80d1358cd..896a77825 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -550,6 +550,7 @@ typedef enum
 #define X265_CPU_SVE2            (1 << 3)   /* AArch64 SVE2 */
 #define X265_CPU_SVE             (1 << 4)   /* AArch64 SVE2 */
 #define X265_CPU_NEON_DOTPROD    (1 << 5)   /* AArch64 Neon DotProd */
+#define X265_CPU_NEON_I8MM       (1 << 6)   /* AArch64 Neon I8MM */
 
 /* IBM Power8 */
 #define X265_CPU_ALTIVEC         0x0000001
-- 
2.42.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0007-AArch64-Add-Armv8.6-Neon-I8MM-feature-detection.patch
Type: text/x-patch
Size: 9741 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240906/7ce126a0/attachment-0001.bin>


More information about the x265-devel mailing list