[x265] [PATCH] AArch64: Runtime CPU feature detection

Dash Santosh dash.sathyanarayanan at multicorewareinc.com
Thu Oct 3 09:56:08 UTC 2024


Hi Hari,
Thanks for spotting this. Also added support for Windows on ARM. Please
find below the updated patch:


On Thu, Sep 26, 2024 at 11:43 AM Dash Santosh <
dash.sathyanarayanan at multicorewareinc.com> wrote:

> From e9614d170f93f3ad4f01e95abfed0a260f218bd5 Mon Sep 17 00:00:00 2001
> From: Min Chen <chenm003 at 163.com>
> Date: Sat, 14 Sep 2024 14:25:28 -0700
> Subject: [PATCH] AArch64: Runtime CPU feature detection
>
> ---
>  source/CMakeLists.txt        | 20 ++++++++++++--------
>  source/common/CMakeLists.txt |  5 +++++
>  source/common/cpu.cpp        | 33 +++++++++++++++++++++++++++++++++
>  3 files changed, 50 insertions(+), 8 deletions(-)
>
> diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
> index 37b83f959..32a99206f 100755
> --- a/source/CMakeLists.txt
> +++ b/source/CMakeLists.txt
> @@ -306,7 +306,8 @@ if(GCC)
>          if(CPU_HAS_NEON_DOTPROD)
>              # Neon DotProd is mandatory from Armv8.4.
>              message(STATUS "Found Neon DotProd")
> -            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
> +            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)
> +            set(ARM_ARGS -O3)
>              add_definitions(-DHAVE_NEON_DOTPROD=1)
>          endif()
>          if(CPU_HAS_NEON_I8MM)
> @@ -316,7 +317,8 @@ if(GCC)
>              if(NOT CPU_HAS_NEON_DOTPROD)
>                  message(FATAL_ERROR "Unsupported AArch64 feature
> combination (Neon I8MM without Neon DotProd)")
>              endif()
> -            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
> +            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)
> +            set(ARM_ARGS -O3)
>              add_definitions(-DHAVE_NEON_I8MM=1)
>          endif()
>          if(CPU_HAS_SVE)
> @@ -325,13 +327,15 @@ if(GCC)
>              if(NOT CPU_HAS_NEON_I8MM)
>                  message(FATAL_ERROR "Unsupported AArch64 feature
> combination (SVE without Neon I8MM)")
>              endif()
> -            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
> +            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)
> +            set(ARM_ARGS -O3)
>              add_definitions(-DHAVE_SVE=1)
>          endif()
>          if(CPU_HAS_SVE2)
>              message(STATUS "Found SVE2")
>              # SVE2 is only available from Armv9.0, and armv9-a implies
> +dotprod
> -            set(ARM_ARGS -O3 -march=armv9-a+i8mm+sve2)
> +            set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)
> +            set(ARM_ARGS -O3)
>              add_definitions(-DHAVE_SVE2=1)
>          endif()
>          set(ARM_ARGS ${ARM_ARGS} -fPIC)
> @@ -692,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>              add_custom_command(
>                  OUTPUT ${ASM}.${SUFFIX}
>                  COMMAND ${CMAKE_CXX_COMPILER}
> -                ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> +                ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>                  DEPENDS ${ASM_SRC})
>          endforeach()
>          if(CPU_HAS_SVE2)
> @@ -703,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>                  add_custom_command(
>                      OUTPUT ${ASM}.${SUFFIX}
>                      COMMAND ${CMAKE_CXX_COMPILER}
> -                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> +                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>                      DEPENDS ${ASM_SRC})
>              endforeach()
>          endif()
> @@ -715,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>                  add_custom_command(
>                      OUTPUT ${ASM}.${SUFFIX}
>                      COMMAND ${CMAKE_CXX_COMPILER}
> -                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> +                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>                      DEPENDS ${ASM_SRC})
>              endforeach()
>          endif()
> @@ -727,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>                  add_custom_command(
>                      OUTPUT ${ASM}.${SUFFIX}
>                      COMMAND ${CMAKE_CXX_COMPILER}
> -                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
> ${ASM}.${SUFFIX}
> +                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>                      DEPENDS ${ASM_SRC})
>              endforeach()
>          endif()
> diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
> index dc4a74107..33025cada 100644
> --- a/source/common/CMakeLists.txt
> +++ b/source/common/CMakeLists.txt
> @@ -123,29 +123,34 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR
> CROSS_COMPILE_ARM64))
>      set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL
> "Arm Assembly Sources that use the Neon DotProd extension")
>      foreach(SRC ${C_SRCS_NEON})
>          set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> +        set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS
> ${ARM64_ARCH_ARGS} )
>      endforeach()
>
>      if(CPU_HAS_NEON_I8MM)
>          foreach(SRC ${C_SRCS_NEON_I8MM})
>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>          endforeach()
>      endif()
>
>      if(CPU_HAS_NEON_DOTPROD)
>          foreach(SRC ${C_SRCS_NEON_DOTPROD})
>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>          endforeach()
>      endif()
>
>      if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)
>          foreach(SRC ${C_SRCS_SVE})
>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>          endforeach()
>      endif()
>
>      if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)
>          foreach(SRC ${C_SRCS_SVE2})
>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>          endforeach()
>      endif()
>
> diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
> index 61cdaadfb..24c60ff0e 100644
> --- a/source/common/cpu.cpp
> +++ b/source/common/cpu.cpp
> @@ -391,6 +391,8 @@ uint32_t cpu_detect(bool benableavx512)
>
>  #elif X265_ARCH_ARM64
>
> +// TODO: Support ARM on Windows
> +#if _MSC_VER
>  uint32_t cpu_detect(bool benableavx512)
>  {
>      int flags = 0;
> @@ -413,6 +415,37 @@ uint32_t cpu_detect(bool benableavx512)
>
>      return flags;
>  }
> +#else // Linux+Aarch64
> +
> +#include <asm/hwcap.h>
> +#include <sys/auxv.h>
> +
> +uint32_t cpu_detect(bool benableavx512)
> +{
> +    unsigned long hwcaps = getauxval(AT_HWCAP);
> +    unsigned long hwcaps2 = getauxval(AT_HWCAP2);
> +
> +    int flags = 0;
> +
> +    #if HAVE_NEON
> +         flags |= X265_CPU_NEON;    // All of ARM64 has NEON
> +    #endif
> +    #if HAVE_NEON_DOTPROD
> +         flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);
> +    #endif
> +    #if HAVE_NEON_I8MM
> +         flags |= (hwcaps2 & HWCAP2_SVEI8MM ? X265_CPU_NEON_I8MM : 0);
> +    #endif
> +    #if HAVE_SVE
> +         flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);
> +    #endif
> +    #if HAVE_SVE2
> +         flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);
> +    #endif
> +
> +    return flags;
> +}
> +#endif // end of Linux+AArch64
>
>  #elif X265_ARCH_POWER8
>
> --
> 2.43.0.windows.1
>
>
> --
>
> * <https://multicorewareinc.com/>*
>   <https://www.linkedin.com/company/multicoreware-inc/>
> <https://twitter.com/MulticoreWare>
> <https://www.facebook.com/multicoreware>
> <https://www.youtube.com/channel/UCXZ1A1MzS5JwBqwBkNfsBBw?sub_confirmation=1>
>    <https://www.instagram.com/multicoreware.inc/>
>
> *Dash Santosh*
>
> *Research Engineer, Video Engineering*
>
> Mobile: +91 78679 43737
>
> IndiQube Echo Point, Avinashi Road
>
> Coimbatore - 641 014
>
>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241003/4eedddf3/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: v2-0001-Aarch64-runtime-cpu-detection-windows-ARM.patch
Type: application/octet-stream
Size: 10137 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241003/4eedddf3/attachment-0001.obj>


More information about the x265-devel mailing list