[x265] [PATCH] Add run-time CPU feature detection for FreeBSD / OpenBSD

Brad Smith brad at comstyle.com
Mon Dec 2 02:37:45 UTC 2024


Add run-time CPU feature detection for FreeBSD / OpenBSD

Utilizing elf_aux_info() to do run-time CPU feature detection.

Based on the bits I did for x264.
---
 source/CMakeLists.txt       | 12 +++++++++++-
 source/common/aarch64/cpu.h |  6 +++---
 source/common/cpu.cpp       | 31 ++++++++++++++++++++++++++++++-
 source/common/cpu.h         |  2 ++
 4 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 310683909..35735f009 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1")
     option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 Intrinsics files" OFF)
 
     option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature detection" ON)
-    if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows")
+    if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|FreeBSD|OpenBSD|Darwin|Windows")
         set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE)
         message(STATUS "Run-time CPU feature detection unsupported on this platform")
     endif()
@@ -524,6 +524,16 @@ if(ENABLE_ASSEMBLY)
    add_definitions(-DENABLE_ASSEMBLY)
 endif()
 
+check_symbol_exists(getauxval sys/auxv.h HAVE_GETAUXVAL)
+if(HAVE_GETAUXVAL)
+    add_definitions(-DHAVE_GETAUXVAL=1)
+endif()
+
+check_symbol_exists(elf_aux_info sys/auxv.h HAVE_ELF_AUX_INFO)
+if(HAVE_ELF_AUX_INFO)
+    add_definitions(-DHAVE_ELF_AUX_INFO=1)
+endif()
+
 option(CHECKED_BUILD "Enable run-time sanity checks (debugging)" OFF)
 if(CHECKED_BUILD)
     add_definitions(-DCHECKED_BUILD=1)
diff --git a/source/common/aarch64/cpu.h b/source/common/aarch64/cpu.h
index c61b86359..1468eb613 100644
--- a/source/common/aarch64/cpu.h
+++ b/source/common/aarch64/cpu.h
@@ -119,7 +119,7 @@ static inline int aarch64_get_cpu_flags()
     return flags;
 }
 
-#elif defined(__linux__)
+#elif HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
 
 #include <sys/auxv.h>
 
@@ -133,10 +133,10 @@ static inline int aarch64_get_cpu_flags()
     int flags = 0;
 
 #if HAVE_NEON_DOTPROD || HAVE_SVE
-    unsigned long hwcap = getauxval(AT_HWCAP);
+    unsigned long hwcap = x265_getauxval(AT_HWCAP);
 #endif
 #if HAVE_NEON_I8MM || HAVE_SVE2
-    unsigned long hwcap2 = getauxval(AT_HWCAP2);
+    unsigned long hwcap2 = x265_getauxval(AT_HWCAP2);
 #endif
 
 #if HAVE_NEON
diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index ae0907890..1e074238c 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -31,6 +31,10 @@
 #include "cpu.h"
 #include "common.h"
 
+#include <errno.h>
+#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
+#include <sys/auxv.h>
+#endif
 #if MACOS || SYS_FREEBSD
 #include <sys/types.h>
 #include <sys/sysctl.h>
@@ -41,7 +45,7 @@
 #include <machine/cpu.h>
 #endif
 
-#if X265_ARCH_ARM && !defined(HAVE_NEON)
+#if X265_ARCH_ARM && !defined(HAVE_NEON) && !(HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO)
 #include <signal.h>
 #include <setjmp.h>
 static sigjmp_buf jmpbuf;
@@ -128,6 +132,23 @@ const cpu_name_t cpu_names[] =
     { "", 0 },
 };
 
+unsigned long x265_getauxval(unsigned long type)
+{
+#if HAVE_GETAUXVAL
+    return getauxval(type);
+#elif HAVE_ELF_AUX_INFO
+    unsigned long aux = 0;
+    int ret = elf_aux_info(type, &aux, sizeof(aux));
+    if (ret != 0) {
+        errno = ret;
+    }
+    return aux;
+#else
+    errno = ENOSYS;
+    return 0;
+#endif
+}
+
 #if X265_ARCH_X86
 
 extern "C" {
@@ -348,6 +369,8 @@ void PFX(cpu_neon_test)(void);
 int PFX(cpu_fast_neon_mrc_test)(void);
 }
 
+#define X265_ARM_HWCAP_NEON (1U << 12)
+
 uint32_t cpu_detect(bool benableavx512)
 {
     int flags = 0;
@@ -355,6 +378,11 @@ uint32_t cpu_detect(bool benableavx512)
 #if HAVE_ARMV6
     flags |= X265_CPU_ARMV6;
 
+#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
+    unsigned long hwcap = x265_getauxval(AT_HWCAP);
+
+    if (hwcap & X265_ARM_HWCAP_NEON) flags |= X265_CPU_NEON;
+#else
     // don't do this hack if compiled with -mfpu=neon
 #if !HAVE_NEON
     static void (* oldsig)(int);
@@ -372,6 +400,7 @@ uint32_t cpu_detect(bool benableavx512)
 #endif // if !HAVE_NEON
 
     flags |= X265_CPU_NEON;
+#endif
 
     // fast neon -> arm (Cortex-A9) detection relies on user access to the
     // cycle counter; this assumes ARMv7 performance counters.
diff --git a/source/common/cpu.h b/source/common/cpu.h
index 52a9e9cba..ec62eb312 100644
--- a/source/common/cpu.h
+++ b/source/common/cpu.h
@@ -48,6 +48,8 @@ extern "C" void PFX(safe_intel_cpu_indicator_init)(void);
 #define x265_emms() PFX(cpu_emms)()
 #endif
 
+unsigned long x265_getauxval(unsigned long);
+
 namespace X265_NS {
 uint32_t cpu_detect(bool);
 bool detect512();
-- 
2.47.0



More information about the x265-devel mailing list