[x265] [PATCH] AArch64: Add SVE BitPerm enablement and feature detection
George Steed
george.steed at arm.com
Tue May 13 13:40:04 UTC 2025
This patch expands the existing CMake and runtime feature detection code
to include support for the AArch64 SVE Bit Permute extension
(FEAT_SVE_BitPerm). This is an optional extension to SVE2, introduced
from Armv9.0-A.
In particular this extension provides support for the BEXT, BDEP, and
BGRP instructions which can be useful when manipulating bit masks. This
includes kernels like scanPosLast for which an optimized implementation
making use of this extension will follow in a later commit.
---
build/README.txt | 10 ++++++++++
source/CMakeLists.txt | 14 ++++++++++++++
source/cmake/FindSVE2_BITPERM.cmake | 14 ++++++++++++++
source/common/aarch64/cpu.h | 20 +++++++++++++++++++-
source/common/cpu.cpp | 3 +++
source/test/testbench.cpp | 1 +
source/x265.h | 3 ++-
7 files changed, 63 insertions(+), 2 deletions(-)
create mode 100644 source/cmake/FindSVE2_BITPERM.cmake
diff --git a/build/README.txt b/build/README.txt
index 4274951e5..057e9242c 100644
--- a/build/README.txt
+++ b/build/README.txt
@@ -112,6 +112,7 @@ The following AArch64 ISA features are turned on by default when cross-compiling
* Neon I8MM, mandatory from Armv8.6
* SVE, mandatory from Armv9.0
* SVE2, mandatory from Armv9.0
+* SVE2 BitPerm, optional from Armv9.0
If the target platform does not support Armv8.4 Neon DotProd instructions, the
ENABLE_NEON_DOTPROD CMake option should be set to OFF:
@@ -144,4 +145,13 @@ Note: when any of ENABLE_NEON_DOTPROD, ENABLE_NEON_I8MM, or ENABLE_SVE are set t
OFF, the build configuration will disable SVE2, as we impose the constraint that
SVE2 implies Neon I8MM, as well as Neon DotProd and SVE.
+If the target platform does not support SVE2 BitPerm instructions, the
+ENABLE_SVE2_BITPERM CMake option should be set to OFF:
+
+* cmake -DENABLE_SVE2_BITPERM=OFF <other configuration options...>
+
+Note: when any of ENABLE_NEON_DOTPROD, ENABLE_NEON_I8MM, ENABLE_SVE, or
+ENABLE_SVE2 are set to OFF, the build configuration will disable SVE2 BitPerm,
+as SVE2 BitPerm requires that SVE2 is also present and enabled.
+
Then, the normal build process can be followed.
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 9d329e7c7..5bc08dd84 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -95,6 +95,7 @@ elseif(ARM64MATCH GREATER "-1")
option(ENABLE_NEON_I8MM "Enable Neon I8MM" ON)
option(ENABLE_SVE "Enable SVE" ON)
option(ENABLE_SVE2 "Enable SVE2" ON)
+ option(ENABLE_SVE2_BITPERM "Enable SVE2 BitPerm" ON)
# Compiler flags for AArch64 extensions.
set(AARCH64_NEON_FLAG "-march=armv8-a")
@@ -105,6 +106,8 @@ elseif(ARM64MATCH GREATER "-1")
set(AARCH64_SVE_FLAG "-march=armv8.2-a+dotprod+i8mm+sve")
# SVE2 is only available from Armv9.0, and armv9-a implies +dotprod and +sve.
set(AARCH64_SVE2_FLAG "-march=armv9-a+i8mm+sve2")
+ # SVE2 BitPerm implies +dotprod, +sve, and +sve2.
+ set(AARCH64_SVE2_BITPERM_FLAG "-march=armv9-a+i8mm+sve2-bitperm")
else()
message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
@@ -309,12 +312,14 @@ if(GCC)
set(CPU_HAS_NEON_I8MM 1)
set(CPU_HAS_SVE 1)
set(CPU_HAS_SVE2 1)
+ set(CPU_HAS_SVE2_BITPERM 1)
else()
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
find_package(NEON_DOTPROD)
find_package(NEON_I8MM)
find_package(SVE)
find_package(SVE2)
+ find_package(SVE2_BITPERM)
else()
message(STATUS "Compile-time CPU feature detection unsupported on this platform")
endif()
@@ -396,6 +401,11 @@ int main() { return 0; }")
if(NOT ENABLE_SVE2)
message(STATUS "Disabling SVE2")
set(CPU_HAS_SVE2 0)
+ set(ENABLE_SVE2_BITPERM 0)
+ endif()
+ if(NOT ENABLE_SVE2_BITPERM)
+ message(STATUS "Disabling SVE2 BitPerm")
+ set(CPU_HAS_SVE2_BITPERM 0)
endif()
if(CPU_HAS_NEON)
@@ -418,6 +428,10 @@ int main() { return 0; }")
message(STATUS "Found SVE2")
add_definitions(-DHAVE_SVE2=1)
endif()
+ if(CPU_HAS_SVE2_BITPERM)
+ message(STATUS "Found SVE2 BitPerm")
+ add_definitions(-DHAVE_SVE2_BITPERM=1)
+ endif()
set(ARM_ARGS -O3)
# Do not allow implicit vector type conversions in Clang builds (this
# is already the default in GCC builds).
diff --git a/source/cmake/FindSVE2_BITPERM.cmake b/source/cmake/FindSVE2_BITPERM.cmake
new file mode 100644
index 000000000..0736eeecc
--- /dev/null
+++ b/source/cmake/FindSVE2_BITPERM.cmake
@@ -0,0 +1,14 @@
+include(FindPackageHandleStandardArgs)
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ execute_process(COMMAND cat /proc/cpuinfo
+ COMMAND grep Features
+ COMMAND grep svebitperm
+ OUTPUT_VARIABLE sve2_bitperm_version
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
+
+if(sve2_bitperm_version)
+ set(CPU_HAS_SVE2_BITPERM 1)
+endif()
diff --git a/source/common/aarch64/cpu.h b/source/common/aarch64/cpu.h
index c61b86359..ea851301e 100644
--- a/source/common/aarch64/cpu.h
+++ b/source/common/aarch64/cpu.h
@@ -116,6 +116,14 @@ static inline int aarch64_get_cpu_flags()
}
#endif // defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
#endif // HAVE_SVE2
+#if HAVE_SVE2_BITPERM
+#if defined(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE)
+ if (IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE))
+ {
+ flags |= X265_CPU_SVE2_BITPERM;
+ }
+#endif // defined(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE)
+#endif // HAVE_SVE2_BITPERM
return flags;
}
@@ -126,6 +134,7 @@ static inline int aarch64_get_cpu_flags()
#define X265_AARCH64_HWCAP_ASIMDDP (1 << 20)
#define X265_AARCH64_HWCAP_SVE (1 << 22)
#define X265_AARCH64_HWCAP2_SVE2 (1 << 1)
+#define X265_AARCH64_HWCAP2_SVEBITPERM (1 << 4)
#define X265_AARCH64_HWCAP2_I8MM (1 << 13)
static inline int aarch64_get_cpu_flags()
@@ -135,7 +144,7 @@ static inline int aarch64_get_cpu_flags()
#if HAVE_NEON_DOTPROD || HAVE_SVE
unsigned long hwcap = getauxval(AT_HWCAP);
#endif
-#if HAVE_NEON_I8MM || HAVE_SVE2
+#if HAVE_NEON_I8MM || HAVE_SVE2 || HAVE_SVE2_BITPERM
unsigned long hwcap2 = getauxval(AT_HWCAP2);
#endif
@@ -154,6 +163,9 @@ static inline int aarch64_get_cpu_flags()
#if HAVE_SVE2
if (hwcap2 & X265_AARCH64_HWCAP2_SVE2) flags |= X265_CPU_SVE2;
#endif
+#if HAVE_SVE2_BITPERM
+ if (hwcap2 & X265_AARCH64_HWCAP2_SVEBITPERM) flags |= X265_CPU_SVE2_BITPERM;
+#endif
return flags;
}
@@ -179,6 +191,9 @@ static inline int aarch64_cpu_detect()
// Restrict flags: SVE2 assumes that FEAT_SVE is available.
if (!(flags & X265_CPU_SVE)) flags &= ~X265_CPU_SVE2;
+ // Restrict flags: SVE2_BitPerm assumes that FEAT_SVE2 is available.
+ if (!(flags & X265_CPU_SVE2)) flags &= ~X265_CPU_SVE2_BITPERM;
+
return flags;
}
@@ -202,6 +217,9 @@ static inline int aarch64_cpu_detect()
#endif
#if HAVE_SVE2
flags |= X265_CPU_SVE2;
+#endif
+#if HAVE_SVE2_BITPERM
+ flags |= X265_CPU_SVE2_BITPERM;
#endif
return flags;
}
diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index d4a4e63c5..00defd837 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -123,6 +123,9 @@ const cpu_name_t cpu_names[] =
#if defined(HAVE_NEON_I8MM)
{ "Neon_I8MM", X265_CPU_NEON_I8MM },
#endif
+#if defined(HAVE_SVE2_BITPERM)
+ { "SVE2_BitPerm", X265_CPU_SVE2_BITPERM },
+#endif
#elif X265_ARCH_POWER8
{ "Altivec", X265_CPU_ALTIVEC },
diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp
index f651dc51b..fb5b4252f 100644
--- a/source/test/testbench.cpp
+++ b/source/test/testbench.cpp
@@ -103,6 +103,7 @@ struct test_arch_t
{ "Neon_I8MM", X265_CPU_NEON_I8MM },
{ "SVE", X265_CPU_SVE },
{ "SVE2", X265_CPU_SVE2 },
+ { "SVE2_BitPerm", X265_CPU_SVE2_BITPERM },
{ "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
#endif
{ "", 0 },
diff --git a/source/x265.h b/source/x265.h
index 7241beda2..8bc7bea2a 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -555,9 +555,10 @@ typedef enum
#define X265_CPU_NEON (1 << 1) /* ARM NEON */
#define X265_CPU_FAST_NEON_MRC (1 << 2) /* Transfer from NEON to ARM register is fast (Cortex-A9) */
#define X265_CPU_SVE2 (1 << 3) /* AArch64 SVE2 */
-#define X265_CPU_SVE (1 << 4) /* AArch64 SVE2 */
+#define X265_CPU_SVE (1 << 4) /* AArch64 SVE */
#define X265_CPU_NEON_DOTPROD (1 << 5) /* AArch64 Neon DotProd */
#define X265_CPU_NEON_I8MM (1 << 6) /* AArch64 Neon I8MM */
+#define X265_CPU_SVE2_BITPERM (1 << 7) /* AArch64 SVE2 BitPerm */
/* IBM Power8 */
#define X265_CPU_ALTIVEC 0x0000001
--
2.43.0
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-AArch64-Add-SVE-BitPerm-enablement-and-feature-detec.patch
Type: text/x-diff
Size: 9382 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250513/b5d27cb4/attachment.patch>
More information about the x265-devel
mailing list