[x265] [PATCH] Add run-time CPU feature detection for FreeBSD / OpenBSD
Brad Smith
brad at comstyle.com
Mon Dec 2 02:37:45 UTC 2024
Add run-time CPU feature detection for FreeBSD / OpenBSD
Utilizing elf_aux_info() to do run-time CPU feature detection.
Based on the bits I did for x264.
---
source/CMakeLists.txt | 12 +++++++++++-
source/common/aarch64/cpu.h | 6 +++---
source/common/cpu.cpp | 31 ++++++++++++++++++++++++++++++-
source/common/cpu.h | 2 ++
4 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 310683909..35735f009 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -88,7 +88,7 @@ elseif(ARM64MATCH GREATER "-1")
option(AARCH64_WARNINGS_AS_ERRORS "Build with -Werror for AArch64 Intrinsics files" OFF)
option(AARCH64_RUNTIME_CPU_DETECT "Enable AArch64 run-time CPU feature detection" ON)
- if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin|Windows")
+ if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux|FreeBSD|OpenBSD|Darwin|Windows")
set(AARCH64_RUNTIME_CPU_DETECT OFF CACHE BOOL "" FORCE)
message(STATUS "Run-time CPU feature detection unsupported on this platform")
endif()
@@ -524,6 +524,16 @@ if(ENABLE_ASSEMBLY)
add_definitions(-DENABLE_ASSEMBLY)
endif()
+check_symbol_exists(getauxval sys/auxv.h HAVE_GETAUXVAL)
+if(HAVE_GETAUXVAL)
+ add_definitions(-DHAVE_GETAUXVAL=1)
+endif()
+
+check_symbol_exists(elf_aux_info sys/auxv.h HAVE_ELF_AUX_INFO)
+if(HAVE_ELF_AUX_INFO)
+ add_definitions(-DHAVE_ELF_AUX_INFO=1)
+endif()
+
option(CHECKED_BUILD "Enable run-time sanity checks (debugging)" OFF)
if(CHECKED_BUILD)
add_definitions(-DCHECKED_BUILD=1)
diff --git a/source/common/aarch64/cpu.h b/source/common/aarch64/cpu.h
index c61b86359..1468eb613 100644
--- a/source/common/aarch64/cpu.h
+++ b/source/common/aarch64/cpu.h
@@ -119,7 +119,7 @@ static inline int aarch64_get_cpu_flags()
return flags;
}
-#elif defined(__linux__)
+#elif HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
#include <sys/auxv.h>
@@ -133,10 +133,10 @@ static inline int aarch64_get_cpu_flags()
int flags = 0;
#if HAVE_NEON_DOTPROD || HAVE_SVE
- unsigned long hwcap = getauxval(AT_HWCAP);
+ unsigned long hwcap = x265_getauxval(AT_HWCAP);
#endif
#if HAVE_NEON_I8MM || HAVE_SVE2
- unsigned long hwcap2 = getauxval(AT_HWCAP2);
+ unsigned long hwcap2 = x265_getauxval(AT_HWCAP2);
#endif
#if HAVE_NEON
diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index ae0907890..1e074238c 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -31,6 +31,10 @@
#include "cpu.h"
#include "common.h"
+#include <errno.h>
+#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
+#include <sys/auxv.h>
+#endif
#if MACOS || SYS_FREEBSD
#include <sys/types.h>
#include <sys/sysctl.h>
@@ -41,7 +45,7 @@
#include <machine/cpu.h>
#endif
-#if X265_ARCH_ARM && !defined(HAVE_NEON)
+#if X265_ARCH_ARM && !defined(HAVE_NEON) && !(HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO)
#include <signal.h>
#include <setjmp.h>
static sigjmp_buf jmpbuf;
@@ -128,6 +132,23 @@ const cpu_name_t cpu_names[] =
{ "", 0 },
};
+unsigned long x265_getauxval(unsigned long type)
+{
+#if HAVE_GETAUXVAL
+ return getauxval(type);
+#elif HAVE_ELF_AUX_INFO
+ unsigned long aux = 0;
+ int ret = elf_aux_info(type, &aux, sizeof(aux));
+ if (ret != 0) {
+ errno = ret;
+ }
+ return aux;
+#else
+ errno = ENOSYS;
+ return 0;
+#endif
+}
+
#if X265_ARCH_X86
extern "C" {
@@ -348,6 +369,8 @@ void PFX(cpu_neon_test)(void);
int PFX(cpu_fast_neon_mrc_test)(void);
}
+#define X265_ARM_HWCAP_NEON (1U << 12)
+
uint32_t cpu_detect(bool benableavx512)
{
int flags = 0;
@@ -355,6 +378,11 @@ uint32_t cpu_detect(bool benableavx512)
#if HAVE_ARMV6
flags |= X265_CPU_ARMV6;
+#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO
+ unsigned long hwcap = x265_getauxval(AT_HWCAP);
+
+ if (hwcap & X265_ARM_HWCAP_NEON) flags |= X265_CPU_NEON;
+#else
// don't do this hack if compiled with -mfpu=neon
#if !HAVE_NEON
static void (* oldsig)(int);
@@ -372,6 +400,7 @@ uint32_t cpu_detect(bool benableavx512)
#endif // if !HAVE_NEON
flags |= X265_CPU_NEON;
+#endif
// fast neon -> arm (Cortex-A9) detection relies on user access to the
// cycle counter; this assumes ARMv7 performance counters.
diff --git a/source/common/cpu.h b/source/common/cpu.h
index 52a9e9cba..ec62eb312 100644
--- a/source/common/cpu.h
+++ b/source/common/cpu.h
@@ -48,6 +48,8 @@ extern "C" void PFX(safe_intel_cpu_indicator_init)(void);
#define x265_emms() PFX(cpu_emms)()
#endif
+unsigned long x265_getauxval(unsigned long);
+
namespace X265_NS {
uint32_t cpu_detect(bool);
bool detect512();
--
2.47.0
More information about the x265-devel
mailing list