[x265] [PATCH] AArch64: Runtime CPU feature detection

Dash Santosh dash.sathyanarayanan at multicorewareinc.com
Thu Oct 3 09:56:32 UTC 2024


>From 7d2353aaf7509721461c141f2800962c15ff440c Mon Sep 17 00:00:00 2001
From: Logaprakash Ramajayam <logaprakash.ramajayam at multicorewareinc.com>
Date: Wed, 2 Oct 2024 21:59:59 -0700
Subject: [PATCH] AArch64: Runtime CPU feature detection

---
 .../make-aarch64-w64-mingw32-Makefiles.sh     |  8 ++
 .../msys/toolchain-aarch64-w64-mingw32.cmake  |  8 ++
 source/CMakeLists.txt                         | 21 +++--
 source/common/CMakeLists.txt                  |  5 ++
 source/common/cpu.cpp                         | 81 ++++++++++++++++++-
 5 files changed, 114 insertions(+), 9 deletions(-)
 create mode 100644 build/msys/make-aarch64-w64-mingw32-Makefiles.sh
 create mode 100644 build/msys/toolchain-aarch64-w64-mingw32.cmake

diff --git a/build/msys/make-aarch64-w64-mingw32-Makefiles.sh
b/build/msys/make-aarch64-w64-mingw32-Makefiles.sh
new file mode 100644
index 000000000..eceffa4a9
--- /dev/null
+++ b/build/msys/make-aarch64-w64-mingw32-Makefiles.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+# This will generate a cross-compile environment, compiling an aarch64
+# Win64 target from a 32bit MinGW32 host environment.  If your MinGW
+# install is 64bit, you can use the native compiler batch file:
+# make-Makefiles.sh
+
+cmake -G "MSYS Makefiles"
-DCMAKE_TOOLCHAIN_FILE=toolchain-aarch64-w64-mingw32.cmake ../../source &&
cmake-gui ../../source
diff --git a/build/msys/toolchain-aarch64-w64-mingw32.cmake
b/build/msys/toolchain-aarch64-w64-mingw32.cmake
new file mode 100644
index 000000000..6607bdf64
--- /dev/null
+++ b/build/msys/toolchain-aarch64-w64-mingw32.cmake
@@ -0,0 +1,8 @@
+SET(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+SET(CMAKE_C_COMPILER aarch64-w64-mingw32-gcc)
+SET(CMAKE_CXX_COMPILER aarch64-w64-mingw32-g++)
+SET(CMAKE_RC_COMPILER aarch64-w64-mingw32-windres)
+SET(CMAKE_RANLIB aarch64-w64-mingw32-ranlib)
+SET(CMAKE_ASM_YASM_COMPILER yasm)
+SET(CROSS_COMPILE_ARM64 1)
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 13bc8ccfe..d1fe38559 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -303,10 +303,12 @@ if(GCC)
             endif()
         endif()

+        set(ARM64_ARCH_ARGS "-O3")
         if(CPU_HAS_NEON_DOTPROD)
             # Neon DotProd is mandatory from Armv8.4.
             message(STATUS "Found Neon DotProd")
-            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
+            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_NEON_DOTPROD=1)
         endif()
         if(CPU_HAS_NEON_I8MM)
@@ -316,7 +318,8 @@ if(GCC)
             if(NOT CPU_HAS_NEON_DOTPROD)
                 message(FATAL_ERROR "Unsupported AArch64 feature
combination (Neon I8MM without Neon DotProd)")
             endif()
-            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
+            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_NEON_I8MM=1)
         endif()
         if(CPU_HAS_SVE)
@@ -325,13 +328,15 @@ if(GCC)
             if(NOT CPU_HAS_NEON_I8MM)
                 message(FATAL_ERROR "Unsupported AArch64 feature
combination (SVE without Neon I8MM)")
             endif()
-            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
+            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_SVE=1)
         endif()
         if(CPU_HAS_SVE2)
             message(STATUS "Found SVE2")
             # SVE2 is only available from Armv9.0, and armv9-a implies
+dotprod
-            set(ARM_ARGS -O3 -march=armv9-a+i8mm+sve2)
+            set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)
+            set(ARM_ARGS -O3)
             add_definitions(-DHAVE_SVE2=1)
         endif()
         set(ARM_ARGS ${ARM_ARGS} -fPIC)
@@ -692,7 +697,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
             add_custom_command(
                 OUTPUT ${ASM}.${SUFFIX}
                 COMMAND ${CMAKE_CXX_COMPILER}
-                ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
+                ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
                 DEPENDS ${ASM_SRC})
         endforeach()
         if(CPU_HAS_SVE2)
@@ -703,7 +708,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
@@ -715,7 +720,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
@@ -727,7 +732,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
index dc4a74107..33025cada 100644
--- a/source/common/CMakeLists.txt
+++ b/source/common/CMakeLists.txt
@@ -123,29 +123,34 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))
     set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm
Assembly Sources that use the Neon DotProd extension")
     foreach(SRC ${C_SRCS_NEON})
         set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+        set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS
${ARM64_ARCH_ARGS} )
     endforeach()

     if(CPU_HAS_NEON_I8MM)
         foreach(SRC ${C_SRCS_NEON_I8MM})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_NEON_DOTPROD)
         foreach(SRC ${C_SRCS_NEON_DOTPROD})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)
         foreach(SRC ${C_SRCS_SVE})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)
         foreach(SRC ${C_SRCS_SVE2})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
+            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index 61cdaadfb..a2b0ac081 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -391,7 +391,8 @@ uint32_t cpu_detect(bool benableavx512)

 #elif X265_ARCH_ARM64

-uint32_t cpu_detect(bool benableavx512)
+#if defined(_MSC_VER) || defined(__APPLE__)
+uint32_t cpu_detect(bool /*benableavx512*/)
 {
     int flags = 0;

@@ -414,6 +415,84 @@ uint32_t cpu_detect(bool benableavx512)
     return flags;
 }

+// TODO: Support ARM on Windows
+#elif defined(__MINGW64__)
+
+#include <windows.h>
+#include <processthreadsapi.h>
+
+bool isOryonCPU()
+{
+
+    char processorName[128];
+    DWORD bufferSize = 128;
+
+    LONG result = RegGetValue(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"ProcessorNameString", RRF_RT_ANY, NULL, (PVOID)&processorName,
&bufferSize);
+    if (strstr(processorName, "Oryon") != NULL)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+uint32_t cpu_detect(bool /*benableavx512*/)
+{
+
+    int flags = 0;
+
+    #if HAVE_NEON
+         flags |= X265_CPU_NEON;    // All of ARM64 has NEON
+    #endif
+    #if HAVE_NEON_DOTPROD && defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
+         flags |=
IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) ?
X265_CPU_NEON_DOTPROD : 0;
+    #endif
+    #if HAVE_NEON_I8MM
+         flags |= isOryonCPU() ? X265_CPU_NEON_I8MM : 0;
+    #endif
+    #if HAVE_SVE && defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
+         flags |=
IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE
: 0;
+    #endif
+    #if HAVE_SVE2 && defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
+         flags |=
IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) ?
X265_CPU_SVE2 : 0;
+    #endif
+
+    return flags;
+}
+
+#else // Linux+Aarch64
+
+#include <asm/hwcap.h>
+#include <sys/auxv.h>
+
+uint32_t cpu_detect(bool /*benableavx5128*/)
+{
+    unsigned long hwcaps = getauxval(AT_HWCAP);
+    unsigned long hwcaps2 = getauxval(AT_HWCAP2);
+
+    int flags = 0;
+
+    #if HAVE_NEON
+         flags |= X265_CPU_NEON;    // All of ARM64 has NEON
+    #endif
+    #if HAVE_NEON_DOTPROD
+         flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);
+    #endif
+    #if HAVE_NEON_I8MM
+         flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0);
+    #endif
+    #if HAVE_SVE
+         flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);
+    #endif
+    #if HAVE_SVE2
+         flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);
+    #endif
+
+    return flags;
+}
+#endif // end of Linux+AArch64
+
 #elif X265_ARCH_POWER8

 uint32_t cpu_detect(bool benableavx512)
-- 
2.45.2


On Thu, Oct 3, 2024 at 3:26 PM Dash Santosh <
dash.sathyanarayanan at multicorewareinc.com> wrote:

> Hi Hari,
> Thanks for spotting this. Also added support for Windows on ARM. Please
> find below the updated patch:
>
>
> On Thu, Sep 26, 2024 at 11:43 AM Dash Santosh <
> dash.sathyanarayanan at multicorewareinc.com> wrote:
>
>> From e9614d170f93f3ad4f01e95abfed0a260f218bd5 Mon Sep 17 00:00:00 2001
>> From: Min Chen <chenm003 at 163.com>
>> Date: Sat, 14 Sep 2024 14:25:28 -0700
>> Subject: [PATCH] AArch64: Runtime CPU feature detection
>>
>> ---
>>  source/CMakeLists.txt        | 20 ++++++++++++--------
>>  source/common/CMakeLists.txt |  5 +++++
>>  source/common/cpu.cpp        | 33 +++++++++++++++++++++++++++++++++
>>  3 files changed, 50 insertions(+), 8 deletions(-)
>>
>> diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
>> index 37b83f959..32a99206f 100755
>> --- a/source/CMakeLists.txt
>> +++ b/source/CMakeLists.txt
>> @@ -306,7 +306,8 @@ if(GCC)
>>          if(CPU_HAS_NEON_DOTPROD)
>>              # Neon DotProd is mandatory from Armv8.4.
>>              message(STATUS "Found Neon DotProd")
>> -            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
>> +            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)
>> +            set(ARM_ARGS -O3)
>>              add_definitions(-DHAVE_NEON_DOTPROD=1)
>>          endif()
>>          if(CPU_HAS_NEON_I8MM)
>> @@ -316,7 +317,8 @@ if(GCC)
>>              if(NOT CPU_HAS_NEON_DOTPROD)
>>                  message(FATAL_ERROR "Unsupported AArch64 feature
>> combination (Neon I8MM without Neon DotProd)")
>>              endif()
>> -            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
>> +            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)
>> +            set(ARM_ARGS -O3)
>>              add_definitions(-DHAVE_NEON_I8MM=1)
>>          endif()
>>          if(CPU_HAS_SVE)
>> @@ -325,13 +327,15 @@ if(GCC)
>>              if(NOT CPU_HAS_NEON_I8MM)
>>                  message(FATAL_ERROR "Unsupported AArch64 feature
>> combination (SVE without Neon I8MM)")
>>              endif()
>> -            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
>> +            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)
>> +            set(ARM_ARGS -O3)
>>              add_definitions(-DHAVE_SVE=1)
>>          endif()
>>          if(CPU_HAS_SVE2)
>>              message(STATUS "Found SVE2")
>>              # SVE2 is only available from Armv9.0, and armv9-a implies
>> +dotprod
>> -            set(ARM_ARGS -O3 -march=armv9-a+i8mm+sve2)
>> +            set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)
>> +            set(ARM_ARGS -O3)
>>              add_definitions(-DHAVE_SVE2=1)
>>          endif()
>>          set(ARM_ARGS ${ARM_ARGS} -fPIC)
>> @@ -692,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>>              add_custom_command(
>>                  OUTPUT ${ASM}.${SUFFIX}
>>                  COMMAND ${CMAKE_CXX_COMPILER}
>> -                ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
>> ${ASM}.${SUFFIX}
>> +                ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
>> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>>                  DEPENDS ${ASM_SRC})
>>          endforeach()
>>          if(CPU_HAS_SVE2)
>> @@ -703,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>>                  add_custom_command(
>>                      OUTPUT ${ASM}.${SUFFIX}
>>                      COMMAND ${CMAKE_CXX_COMPILER}
>> -                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
>> ${ASM}.${SUFFIX}
>> +                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
>> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>>                      DEPENDS ${ASM_SRC})
>>              endforeach()
>>          endif()
>> @@ -715,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>>                  add_custom_command(
>>                      OUTPUT ${ASM}.${SUFFIX}
>>                      COMMAND ${CMAKE_CXX_COMPILER}
>> -                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
>> ${ASM}.${SUFFIX}
>> +                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
>> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>>                      DEPENDS ${ASM_SRC})
>>              endforeach()
>>          endif()
>> @@ -727,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>>                  add_custom_command(
>>                      OUTPUT ${ASM}.${SUFFIX}
>>                      COMMAND ${CMAKE_CXX_COMPILER}
>> -                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
>> ${ASM}.${SUFFIX}
>> +                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
>> ${ASM_SRC} -o ${ASM}.${SUFFIX}
>>                      DEPENDS ${ASM_SRC})
>>              endforeach()
>>          endif()
>> diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
>> index dc4a74107..33025cada 100644
>> --- a/source/common/CMakeLists.txt
>> +++ b/source/common/CMakeLists.txt
>> @@ -123,29 +123,34 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR
>> CROSS_COMPILE_ARM64))
>>      set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL
>> "Arm Assembly Sources that use the Neon DotProd extension")
>>      foreach(SRC ${C_SRCS_NEON})
>>          set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
>> +        set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS
>> ${ARM64_ARCH_ARGS} )
>>      endforeach()
>>
>>      if(CPU_HAS_NEON_I8MM)
>>          foreach(SRC ${C_SRCS_NEON_I8MM})
>>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
>> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
>> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>>          endforeach()
>>      endif()
>>
>>      if(CPU_HAS_NEON_DOTPROD)
>>          foreach(SRC ${C_SRCS_NEON_DOTPROD})
>>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
>> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
>> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>>          endforeach()
>>      endif()
>>
>>      if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)
>>          foreach(SRC ${C_SRCS_SVE})
>>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
>> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
>> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>>          endforeach()
>>      endif()
>>
>>      if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)
>>          foreach(SRC ${C_SRCS_SVE2})
>>              set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
>> +            set_source_files_properties( aarch64/${SRC} PROPERTIES
>> COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
>>          endforeach()
>>      endif()
>>
>> diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
>> index 61cdaadfb..24c60ff0e 100644
>> --- a/source/common/cpu.cpp
>> +++ b/source/common/cpu.cpp
>> @@ -391,6 +391,8 @@ uint32_t cpu_detect(bool benableavx512)
>>
>>  #elif X265_ARCH_ARM64
>>
>> +// TODO: Support ARM on Windows
>> +#if _MSC_VER
>>  uint32_t cpu_detect(bool benableavx512)
>>  {
>>      int flags = 0;
>> @@ -413,6 +415,37 @@ uint32_t cpu_detect(bool benableavx512)
>>
>>      return flags;
>>  }
>> +#else // Linux+Aarch64
>> +
>> +#include <asm/hwcap.h>
>> +#include <sys/auxv.h>
>> +
>> +uint32_t cpu_detect(bool benableavx512)
>> +{
>> +    unsigned long hwcaps = getauxval(AT_HWCAP);
>> +    unsigned long hwcaps2 = getauxval(AT_HWCAP2);
>> +
>> +    int flags = 0;
>> +
>> +    #if HAVE_NEON
>> +         flags |= X265_CPU_NEON;    // All of ARM64 has NEON
>> +    #endif
>> +    #if HAVE_NEON_DOTPROD
>> +         flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);
>> +    #endif
>> +    #if HAVE_NEON_I8MM
>> +         flags |= (hwcaps2 & HWCAP2_SVEI8MM ? X265_CPU_NEON_I8MM : 0);
>> +    #endif
>> +    #if HAVE_SVE
>> +         flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);
>> +    #endif
>> +    #if HAVE_SVE2
>> +         flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);
>> +    #endif
>> +
>> +    return flags;
>> +}
>> +#endif // end of Linux+AArch64
>>
>>  #elif X265_ARCH_POWER8
>>
>> --
>> 2.43.0.windows.1
>>
>>
>> --
>>
>> * <https://multicorewareinc.com/>*
>>   <https://www.linkedin.com/company/multicoreware-inc/>
>> <https://twitter.com/MulticoreWare>
>> <https://www.facebook.com/multicoreware>
>> <https://www.youtube.com/channel/UCXZ1A1MzS5JwBqwBkNfsBBw?sub_confirmation=1>
>>    <https://www.instagram.com/multicoreware.inc/>
>>
>> *Dash Santosh*
>>
>> *Research Engineer, Video Engineering*
>>
>> Mobile: +91 78679 43737
>>
>> IndiQube Echo Point, Avinashi Road
>>
>> Coimbatore - 641 014
>>
>>
>>
>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241003/18ea5920/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: v2-0001-Aarch64-runtime-cpu-detection-windows-ARM.patch
Type: application/octet-stream
Size: 10137 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241003/18ea5920/attachment-0001.obj>


More information about the x265-devel mailing list