<div dir="ltr">From c2038d6d6825354a5137db9cdedabb2067d5b6d2 Mon Sep 17 00:00:00 2001<br>From: Harshitha Suresh <<a href="mailto:harshitha@multicorewareinc.com">harshitha@multicorewareinc.com</a>><br>Date: Wed, 9 Oct 2024 11:58:03 +0530<br>Subject: [PATCH] Revert "AArch64: Runtime CPU feature detection"<br><br>This commit is being reverted since I8MM ARM optimization feature detection flag is not yet supported by Windows.<br>---<br> .../msys/make-x86_64-w64-mingw32-Makefiles.sh |  8 --<br> build/msys/toolchain-x86_64-w64-mingw32.cmake |  6 --<br> source/CMakeLists.txt                         | 21 ++---<br> source/common/CMakeLists.txt                  |  5 --<br> source/common/cpu.cpp                         | 85 +------------------<br> 5 files changed, 9 insertions(+), 116 deletions(-)<br> delete mode 100644 build/msys/make-x86_64-w64-mingw32-Makefiles.sh<br> delete mode 100644 build/msys/toolchain-x86_64-w64-mingw32.cmake<br><br>diff --git a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh b/build/msys/make-x86_64-w64-mingw32-Makefiles.sh<br>deleted file mode 100644<br>index d98eced87..000000000<br>--- a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh<br>+++ /dev/null<br>@@ -1,8 +0,0 @@<br>-#!/bin/sh<br>-<br>-# This will generate a cross-compile environment, compiling an x86_64<br>-# Win64 target from a 32bit MinGW32 host environment.  If your MinGW<br>-# install is 64bit, you can use the native compiler batch file:<br>-# make-Makefiles.sh<br>-<br>-cmake -G "MSYS Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-w64-mingw32.cmake ../../source && cmake-gui ../../source<br>diff --git a/build/msys/toolchain-x86_64-w64-mingw32.cmake b/build/msys/toolchain-x86_64-w64-mingw32.cmake<br>deleted file mode 100644<br>index a3f768b7e..000000000<br>--- a/build/msys/toolchain-x86_64-w64-mingw32.cmake<br>+++ /dev/null<br>@@ -1,6 +0,0 @@<br>-SET(CMAKE_SYSTEM_NAME Windows)<br>-SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)<br>-SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)<br>-SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)<br>-SET(CMAKE_RANLIB x86_64-w64-mingw32-ranlib)<br>-SET(CMAKE_ASM_YASM_COMPILER yasm)<br>diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt<br>index cd19050c3..8a3128bb7 100755<br>--- a/source/CMakeLists.txt<br>+++ b/source/CMakeLists.txt<br>@@ -303,12 +303,10 @@ if(GCC)<br>             endif()<br>         endif()<br> <br>-        set(ARM64_ARCH_ARGS "-O3")<br>         if(CPU_HAS_NEON_DOTPROD)<br>             # Neon DotProd is mandatory from Armv8.4.<br>             message(STATUS "Found Neon DotProd")<br>-            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)<br>-            set(ARM_ARGS -O3)<br>+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)<br>             add_definitions(-DHAVE_NEON_DOTPROD=1)<br>         endif()<br>         if(CPU_HAS_NEON_I8MM)<br>@@ -318,8 +316,7 @@ if(GCC)<br>             if(NOT CPU_HAS_NEON_DOTPROD)<br>                 message(FATAL_ERROR "Unsupported AArch64 feature combination (Neon I8MM without Neon DotProd)")<br>             endif()<br>-            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)<br>-            set(ARM_ARGS -O3)<br>+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)<br>             add_definitions(-DHAVE_NEON_I8MM=1)<br>         endif()<br>         if(CPU_HAS_SVE)<br>@@ -328,15 +325,13 @@ if(GCC)<br>             if(NOT CPU_HAS_NEON_I8MM)<br>                 message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon I8MM)")<br>             endif()<br>-            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)<br>-            set(ARM_ARGS -O3)<br>+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)<br>             add_definitions(-DHAVE_SVE=1)<br>         endif()<br>         if(CPU_HAS_SVE2)<br>             message(STATUS "Found SVE2")<br>             # SVE2 is only available from Armv9.0, and armv9-a implies +dotprod<br>-            set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)<br>-            set(ARM_ARGS -O3)<br>+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)<br>             add_definitions(-DHAVE_SVE2=1)<br>         endif()<br>         set(ARM_ARGS ${ARM_ARGS} -fPIC)<br>@@ -701,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br>             add_custom_command(<br>                 OUTPUT ${ASM}.${SUFFIX}<br>                 COMMAND ${CMAKE_CXX_COMPILER}<br>-                ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+                ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>                 DEPENDS ${ASM_SRC})<br>         endforeach()<br>         if(CPU_HAS_SVE2)<br>@@ -712,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br>                 add_custom_command(<br>                     OUTPUT ${ASM}.${SUFFIX}<br>                     COMMAND ${CMAKE_CXX_COMPILER}<br>-                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>                     DEPENDS ${ASM_SRC})<br>             endforeach()<br>         endif()<br>@@ -724,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br>                 add_custom_command(<br>                     OUTPUT ${ASM}.${SUFFIX}<br>                     COMMAND ${CMAKE_CXX_COMPILER}<br>-                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>                     DEPENDS ${ASM_SRC})<br>             endforeach()<br>         endif()<br>@@ -736,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br>                 add_custom_command(<br>                     OUTPUT ${ASM}.${SUFFIX}<br>                     COMMAND ${CMAKE_CXX_COMPILER}<br>-                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>                     DEPENDS ${ASM_SRC})<br>             endforeach()<br>         endif()<br>diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt<br>index 33025cada..dc4a74107 100644<br>--- a/source/common/CMakeLists.txt<br>+++ b/source/common/CMakeLists.txt<br>@@ -123,34 +123,29 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))<br>     set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm Assembly Sources that use the Neon DotProd extension")<br>     foreach(SRC ${C_SRCS_NEON})<br>         set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>-        set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br>     endforeach()<br> <br>     if(CPU_HAS_NEON_I8MM)<br>         foreach(SRC ${C_SRCS_NEON_I8MM})<br>             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>-            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br>         endforeach()<br>     endif()<br> <br>     if(CPU_HAS_NEON_DOTPROD)<br>         foreach(SRC ${C_SRCS_NEON_DOTPROD})<br>             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>-            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br>         endforeach()<br>     endif()<br> <br>     if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)<br>         foreach(SRC ${C_SRCS_SVE})<br>             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>-            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br>         endforeach()<br>     endif()<br> <br>     if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)<br>         foreach(SRC ${C_SRCS_SVE2})<br>             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>-            set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br>         endforeach()<br>     endif()<br> <br>diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp<br>index c1fa928e4..d18aeb8d2 100644<br>--- a/source/common/cpu.cpp<br>+++ b/source/common/cpu.cpp<br>@@ -391,8 +391,7 @@ uint32_t cpu_detect(bool benableavx512)<br> <br> #elif X265_ARCH_ARM64<br> <br>-#if defined(_MSC_VER) || defined(__APPLE__)<br>-uint32_t cpu_detect(bool /*benableavx512*/)<br>+uint32_t cpu_detect(bool benableavx512)<br> {<br>     int flags = 0;<br> <br>@@ -417,88 +416,6 @@ uint32_t cpu_detect(bool /*benableavx512*/)<br>     return flags;<br> }<br> <br>-// TODO: Remove isOryonCPU() once Windows defines PF_ flag for I8MM on supported ARM64 devices<br>-#elif defined(__MINGW64__) // Windows+Aarch64<br>-<br>-#include <windows.h><br>-#include <processthreadsapi.h><br>-<br>-bool isOryonCPU()<br>-{<br>-<br>-    char processorName[128];<br>-    DWORD bufferSize = 128;<br>-<br>-    LONG result = RegGetValue(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "ProcessorNameString", RRF_RT_ANY, NULL, (PVOID)&processorName, &bufferSize);<br>-    if (strstr(processorName, "Oryon") != NULL)<br>-    {<br>-        return true;<br>-    }<br>-    else<br>-    {<br>-        return false;<br>-    }<br>-}<br>-uint32_t cpu_detect(bool /*benableavx512*/)<br>-{<br>-<br>-    int flags = 0;<br>-<br>-#ifdef ENABLE_ASSEMBLY<br>-    #if HAVE_NEON<br>-         flags |= X265_CPU_NEON;    // All of ARM64 has NEON<br>-    #endif<br>-    #if HAVE_NEON_DOTPROD && defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)<br>-         flags |= IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) ? X265_CPU_NEON_DOTPROD : 0;<br>-    #endif<br>-    #if HAVE_NEON_I8MM<br>-         flags |= isOryonCPU() ? X265_CPU_NEON_I8MM : 0;<br>-    #endif<br>-    #if HAVE_SVE && defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)<br>-         flags |= IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE : 0;<br>-    #endif<br>-    #if HAVE_SVE2 && defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)<br>-         flags |= IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE2 : 0;<br>-    #endif<br>-#endif<br>-<br>-    return flags;<br>-} // end of Windows+Aarch64<br>-<br>-#else // Linux+Aarch64<br>-<br>-#include <asm/hwcap.h><br>-#include <sys/auxv.h><br>-<br>-uint32_t cpu_detect(bool /*benableavx512*/)<br>-{<br>-    unsigned long hwcaps = getauxval(AT_HWCAP);<br>-    unsigned long hwcaps2 = getauxval(AT_HWCAP2);<br>-<br>-    int flags = 0;<br>-<br>-#ifdef ENABLE_ASSEMBLY<br>-    #if HAVE_NEON<br>-         flags |= X265_CPU_NEON;    // All of ARM64 has NEON<br>-    #endif<br>-    #if HAVE_NEON_DOTPROD<br>-         flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);<br>-    #endif<br>-    #if HAVE_NEON_I8MM<br>-         flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0);<br>-    #endif<br>-    #if HAVE_SVE<br>-         flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);<br>-    #endif<br>-    #if HAVE_SVE2<br>-         flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);<br>-    #endif<br>-#endif<br>-<br>-    return flags;<br>-}<br>-#endif // end of Linux+AArch64<br>-<br> #elif X265_ARCH_POWER8<br> <br> uint32_t cpu_detect(bool benableavx512)<br>-- <br>2.36.0.windows.1<br><br></div>