[x265] [PATCH 07/14] AArch64: Add Armv8.6 Neon I8MM feature detection
Hari Limaye
hari.limaye at arm.com
Fri Sep 6 13:34:03 UTC 2024
Add compile-time feature detection for AArch64 Neon I8MM instructions,
which are mandatory from Armv8.6.
Impose the constraint that the presence of SVE or SVE2 imply the
presence of Neon I8MM - which is true for all known systems apart from
the HPC-focussed Fujitsu A64FX.
---
build/README.txt | 23 +++++++++++++++++------
source/CMakeLists.txt | 32 ++++++++++++++++++++++++++------
source/cmake/FindNEON_I8MM.cmake | 21 +++++++++++++++++++++
source/common/cpu.cpp | 18 ++++++++++++------
source/test/testbench.cpp | 1 +
source/x265.h | 1 +
6 files changed, 78 insertions(+), 18 deletions(-)
create mode 100644 source/cmake/FindNEON_I8MM.cmake
diff --git a/build/README.txt b/build/README.txt
index 8e229c3bd..14d9ff8d0 100644
--- a/build/README.txt
+++ b/build/README.txt
@@ -106,7 +106,17 @@ running CMake to configure the project. For example:
* cmake -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++
-Moreover, if the target platform supports SVE or SVE2, CROSS_COMPILE_SVE or
+If target platform supports Armv8.4 Neon DotProd instructions, the
+CROSS_COMPILE_NEON_DOTPROD CMake option should be set to ON:
+
+* cmake -DCROSS_COMPILE_NEON_DOTPROD=ON <other configuration options...>
+
+If target platform supports Armv8.6 Neon I8MM instructions, the
+CROSS_COMPILE_NEON_I8MM CMake option should be set to ON:
+
+* cmake -DCROSS_COMPILE_NEON_I8MM=ON <other configuration options...>
+
+If the target platform supports SVE or SVE2, CROSS_COMPILE_SVE or
CROSS_COMPILE_SVE2 CMake options should be set to ON, respectively.
For example, when running CMake to configure the project:
@@ -114,11 +124,12 @@ For example, when running CMake to configure the project:
2. cmake -DCROSS_COMPILE_SVE2=ON <other configuration options...>
Note: when the CROSS_COMPILE_SVE option is set to ON the build configuration will
-compile for Neon DotProd, as we impose the constraint that SVE implies Neon DotProd.
+also compile for Neon DotProd and I8MM, as we impose the constraint that SVE implies
+both Neon DotProd and I8MM.
-If target platform supports Armv8.4 Neon DotProd instructions, the
-CROSS_COMPILE_NEON_DOTPROD CMake option should be set to ON:
-
-* cmake -DCROSS_COMPILE_NEON_DOTPROD=ON <other configuration options...>
+Similarly when the CROSS_COMPILE_SVE2 option is set to ON the build configuration
+will also compile for Neon I8MM, as we impose the constraint that SVE2 implies Neon
+I8MM. SVE2 already implies that Neon DotProd is implemented since SVE2 is an Armv9.0
+feature which implies Armv8.5, and Neon DotProd is mandatory from Armv8.4.
Then, the normal build process can be followed.
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 3c0c6d167..37b83f959 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -91,6 +91,7 @@ elseif(ARM64MATCH GREATER "-1")
option(CROSS_COMPILE_SVE "Cross Compile for SVE Target" OFF)
option(CROSS_COMPILE_SVE2 "Cross Compile for SVE2 Target" OFF)
option(CROSS_COMPILE_NEON_DOTPROD "Cross Compile for Neon DotProd Target" OFF)
+ option(CROSS_COMPILE_NEON_I8MM "Cross Compile for Neon I8MM Target" OFF)
else()
message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
@@ -272,20 +273,29 @@ if(GCC)
if(CROSS_COMPILE_NEON_DOTPROD)
set(CPU_HAS_NEON_DOTPROD 1)
endif()
+ if(CROSS_COMPILE_NEON_I8MM)
+ set(CPU_HAS_NEON_I8MM 1)
+ # Impose the constraint that Neon I8MM implies Neon DotProd.
+ set(CPU_HAS_NEON_DOTPROD 1)
+ endif()
if(CROSS_COMPILE_SVE)
set(CPU_HAS_SVE 1)
- # We impose the constraint that SVE implies Neon DotProd.
+ # Impose the constraint that SVE implies Neon DotProd and I8MM.
set(CPU_HAS_NEON_DOTPROD 1)
+ set(CPU_HAS_NEON_I8MM 1)
endif()
if(CROSS_COMPILE_SVE2)
set(CPU_HAS_SVE2 1)
# SVE2 implies SVE and Neon DotProd.
set(CPU_HAS_SVE 1)
set(CPU_HAS_NEON_DOTPROD 1)
+ # Impose the constraint that SVE2 implies Neon I8MM.
+ set(CPU_HAS_NEON_I8MM 1)
endif()
else()
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
find_package(NEON_DOTPROD)
+ find_package(NEON_I8MM)
find_package(SVE)
find_package(SVE2)
else()
@@ -299,19 +309,29 @@ if(GCC)
set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
add_definitions(-DHAVE_NEON_DOTPROD=1)
endif()
+ if(CPU_HAS_NEON_I8MM)
+ # Neon I8MM is mandatory from Armv8.6.
+ message(STATUS "Found Neon I8MM")
+ # Impose the constraint that Neon I8MM implies Neon DotProd.
+ if(NOT CPU_HAS_NEON_DOTPROD)
+ message(FATAL_ERROR "Unsupported AArch64 feature combination (Neon I8MM without Neon DotProd)")
+ endif()
+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
+ add_definitions(-DHAVE_NEON_I8MM=1)
+ endif()
if(CPU_HAS_SVE)
message(STATUS "Found SVE")
- # We impose the constraint that SVE implies Neon DotProd.
- if(NOT CPU_HAS_NEON_DOTPROD)
- message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon DotProd)")
+ # Impose the constraint that SVE implies Neon I8MM.
+ if(NOT CPU_HAS_NEON_I8MM)
+ message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon I8MM)")
endif()
- set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+sve)
+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
add_definitions(-DHAVE_SVE=1)
endif()
if(CPU_HAS_SVE2)
message(STATUS "Found SVE2")
# SVE2 is only available from Armv9.0, and armv9-a implies +dotprod
- set(ARM_ARGS -O3 -march=armv9-a+sve2)
+ set(ARM_ARGS -O3 -march=armv9-a+i8mm+sve2)
add_definitions(-DHAVE_SVE2=1)
endif()
set(ARM_ARGS ${ARM_ARGS} -fPIC)
diff --git a/source/cmake/FindNEON_I8MM.cmake b/source/cmake/FindNEON_I8MM.cmake
new file mode 100644
index 000000000..75c50bfe9
--- /dev/null
+++ b/source/cmake/FindNEON_I8MM.cmake
@@ -0,0 +1,21 @@
+include(FindPackageHandleStandardArgs)
+
+# Check if Armv8.6 Neon I8MM is supported by the Arm CPU
+if(APPLE)
+ execute_process(COMMAND sysctl -a
+ COMMAND grep "hw.optional.arm.FEAT_I8MM: 1"
+ OUTPUT_VARIABLE has_i8mm
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+else()
+ execute_process(COMMAND cat /proc/cpuinfo
+ COMMAND grep Features
+ COMMAND grep i8mm
+ OUTPUT_VARIABLE has_i8mm
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
+
+if(has_i8mm)
+ set(CPU_HAS_NEON_I8MM 1)
+endif()
diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index 84a241876..61cdaadfb 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -118,6 +118,9 @@ const cpu_name_t cpu_names[] =
#if defined(HAVE_NEON_DOTPROD)
{ "Neon_DotProd", X265_CPU_NEON_DOTPROD },
#endif
+#if defined(HAVE_NEON_I8MM)
+ { "Neon_I8MM", X265_CPU_NEON_I8MM },
+#endif
#elif X265_ARCH_POWER8
{ "Altivec", X265_CPU_ALTIVEC },
@@ -392,18 +395,21 @@ uint32_t cpu_detect(bool benableavx512)
{
int flags = 0;
- #if HAVE_SVE2
- flags |= X265_CPU_SVE2;
- #endif
- #if HAVE_SVE
- flags |= X265_CPU_SVE;
- #endif
#if HAVE_NEON
flags |= X265_CPU_NEON;
#endif
#if HAVE_NEON_DOTPROD
flags |= X265_CPU_NEON_DOTPROD;
#endif
+ #if HAVE_NEON_I8MM
+ flags |= X265_CPU_NEON_I8MM;
+ #endif
+ #if HAVE_SVE
+ flags |= X265_CPU_SVE;
+ #endif
+ #if HAVE_SVE2
+ flags |= X265_CPU_SVE2;
+ #endif
return flags;
}
diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp
index 8c0b13998..de5fef2c2 100644
--- a/source/test/testbench.cpp
+++ b/source/test/testbench.cpp
@@ -179,6 +179,7 @@ int main(int argc, char *argv[])
{ "SVE2", X265_CPU_SVE2 },
{ "SVE", X265_CPU_SVE },
{ "Neon_DotProd", X265_CPU_NEON_DOTPROD },
+ { "Neon_I8MM", X265_CPU_NEON_I8MM },
{ "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
#endif
{ "", 0 },
diff --git a/source/x265.h b/source/x265.h
index 80d1358cd..896a77825 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -550,6 +550,7 @@ typedef enum
#define X265_CPU_SVE2 (1 << 3) /* AArch64 SVE2 */
#define X265_CPU_SVE (1 << 4) /* AArch64 SVE2 */
#define X265_CPU_NEON_DOTPROD (1 << 5) /* AArch64 Neon DotProd */
+#define X265_CPU_NEON_I8MM (1 << 6) /* AArch64 Neon I8MM */
/* IBM Power8 */
#define X265_CPU_ALTIVEC 0x0000001
--
2.42.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0007-AArch64-Add-Armv8.6-Neon-I8MM-feature-detection.patch
Type: text/x-patch
Size: 9741 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240906/7ce126a0/attachment-0001.bin>
More information about the x265-devel
mailing list