[x264-devel] [Git][videolan/x264][master] 2 commits: configure: Check for support for AArch64 SVE and SVE2

Martin Storsjö (@mstorsjo) gitlab at videolan.org
Tue Oct 24 10:25:40 UTC 2023



Martin Storsjö pushed to branch master at VideoLAN / x264


Commits:
db9bc75b by Martin Storsjö at 2023-10-18T11:23:47+03:00
configure: Check for support for AArch64 SVE and SVE2

We don't expect the user to build the whole x264 codebase with
SVE/SVE2 enabled, as we only enable this feature for the assembly
files that use it, in order to have binaries that are portable
and enable the SVE codepaths at runtime if supported.

- - - - -
9c3c7168 by Martin Storsjö at 2023-10-19T22:58:11+03:00
Add cpu flags and runtime detection of SVE and SVE2

We could also use HWCAP_SVE and HWCAP2_SVE2 for detecting this,
but these might not be available in all userland headers, while
HWCAP_CPUID is available much earlier.

The register ID_AA64ZFR0_EL1, which indicates if SVE2 is available,
can only be accessed if SVE is available. If not building all the
C code with SVE enabled (which could make it impossible to run on
on HW without SVE), binutils refuses to assemble an instruction
reading ID_AA64ZFR0_EL1 - but if referring to it with the technical
name S3_0_C0_C4_4, it can be assembled even without any extra
extensions enabled.

- - - - -


4 changed files:

- common/cpu.c
- configure
- tools/checkasm.c
- x264.h


Changes:

=====================================
common/cpu.c
=====================================
@@ -96,6 +96,8 @@ const x264_cpu_name_t x264_cpu_names[] =
 #elif ARCH_AARCH64
     {"ARMv8",           X264_CPU_ARMV8},
     {"NEON",            X264_CPU_NEON},
+    {"SVE",             X264_CPU_SVE},
+    {"SVE2",            X264_CPU_SVE2},
 #elif ARCH_MIPS
     {"MSA",             X264_CPU_MSA},
 #elif ARCH_LOONGARCH
@@ -418,13 +420,62 @@ uint32_t x264_cpu_detect( void )
 
 #elif HAVE_AARCH64
 
+#ifdef __linux__
+#include <sys/auxv.h>
+
+#define get_cpu_feature_reg( reg, val ) \
+        __asm__( "mrs %0, " #reg : "=r" ( val ) )
+
+static uint32_t detect_flags( void )
+{
+    uint32_t flags = 0;
+
+#if defined( AT_HWCAP ) && defined( HWCAP_CPUID )
+    unsigned long hwcap = getauxval( AT_HWCAP );
+    if ( hwcap & HWCAP_CPUID ) {
+        // We could check for support directly with HWCAP_SVE and HWCAP2_SVE2,
+        // but those were added into headers much later. By using direct
+        // register access, we can detect these features even if compiled with
+        // slightly older userland headers.
+        // https://www.kernel.org/doc/html/latest/arm64/cpu-feature-registers.html
+        uint64_t tmp;
+        get_cpu_feature_reg( ID_AA64PFR0_EL1, tmp );
+        if ( ( ( tmp >> 32 ) & 0xf ) == 0x1 ) {
+            flags |= X264_CPU_SVE;
+
+            get_cpu_feature_reg( S3_0_C0_C4_4, tmp ); // ID_AA64ZFR0_EL1
+            if ( ( ( tmp >> 0 ) & 0xf ) == 0x1 )
+                flags |= X264_CPU_SVE2;
+        }
+    }
+#endif
+
+    return flags;
+}
+#endif
+
 uint32_t x264_cpu_detect( void )
 {
+    uint32_t flags = X264_CPU_ARMV8;
 #if HAVE_NEON
-    return X264_CPU_ARMV8 | X264_CPU_NEON;
-#else
-    return X264_CPU_ARMV8;
+    flags |= X264_CPU_NEON;
+#endif
+
+    // If these features are enabled unconditionally in the compiler, we can
+    // assume that they are available.
+#ifdef __ARM_FEATURE_SVE
+    flags |= X264_CPU_SVE;
 #endif
+#ifdef __ARM_FEATURE_SVE2
+    flags |= X264_CPU_SVE2;
+#endif
+
+    // Where possible, try to do runtime detection as well.
+#ifdef __linux__
+    flags |= detect_flags();
+#endif
+
+    return flags;
 }
 
 #elif HAVE_MSA


=====================================
configure
=====================================
@@ -411,7 +411,8 @@ NL="
 # list of all preprocessor HAVE values we can define
 CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
              LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \
-             MSA LSX MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
+             MSA LSX MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10 \
+             SVE SVE2"
 
 # parse options
 
@@ -1003,6 +1004,8 @@ if [ $asm = auto -a $ARCH = AARCH64 ] ; then
     elif cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then
         define HAVE_AARCH64
         define HAVE_NEON
+        cc_check '' '' '__asm__(".arch armv8.2-a+sve  \n ptrue p0.b, vl16");'         && define HAVE_SVE
+        cc_check '' '' '__asm__(".arch armv8.2-a+sve2 \n smlalb z10.s, z2.h, z1.h");' && define HAVE_SVE2
         ASFLAGS="$ASFLAGS -c"
     else
         echo "no NEON support, try adding -mfpu=neon to CFLAGS"


=====================================
tools/checkasm.c
=====================================
@@ -214,6 +214,8 @@ static void print_bench(void)
                     b->cpu&X264_CPU_NEON ? "neon" :
                     b->cpu&X264_CPU_ARMV6 ? "armv6" :
 #elif ARCH_AARCH64
+                    b->cpu&X264_CPU_SVE2 ? "sve2" :
+                    b->cpu&X264_CPU_SVE ? "sve" :
                     b->cpu&X264_CPU_NEON ? "neon" :
                     b->cpu&X264_CPU_ARMV8 ? "armv8" :
 #elif ARCH_MIPS
@@ -2979,6 +2981,10 @@ static int check_all_flags( void )
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_ARMV8, "ARMv8" );
     if( cpu_detect & X264_CPU_NEON )
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_NEON, "NEON" );
+    if( cpu_detect & X264_CPU_SVE )
+        ret |= add_flags( &cpu0, &cpu1, X264_CPU_SVE, "SVE" );
+    if( cpu_detect & X264_CPU_SVE2 )
+        ret |= add_flags( &cpu0, &cpu1, X264_CPU_SVE2, "SVE2" );
 #elif ARCH_MIPS
     if( cpu_detect & X264_CPU_MSA )
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_MSA, "MSA" );


=====================================
x264.h
=====================================
@@ -177,6 +177,8 @@ typedef struct x264_nal_t
 #define X264_CPU_NEON            0x0000002U  /* ARM NEON */
 #define X264_CPU_FAST_NEON_MRC   0x0000004U  /* Transfer from NEON to ARM register is fast (Cortex-A9) */
 #define X264_CPU_ARMV8           0x0000008U
+#define X264_CPU_SVE             0x0000010U  /* AArch64 SVE */
+#define X264_CPU_SVE2            0x0000020U  /* AArch64 SVE2 */
 
 /* MIPS */
 #define X264_CPU_MSA             0x0000001U  /* MIPS MSA */



View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/5f84d403fcaf15b717a5d08d07e4411f0dcb0013...9c3c71688226fbb23f4d36399fab08f018e760b0

-- 
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/5f84d403fcaf15b717a5d08d07e4411f0dcb0013...9c3c71688226fbb23f4d36399fab08f018e760b0
You're receiving this email because of your account on code.videolan.org.


VideoLAN code repository instance


More information about the x264-devel mailing list