<div dir="ltr">This patch has been pushed to the master branch.<br clear="all"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div><b>__________________________</b></div><div><b>Karam Singh</b></div><div><b>Ph.D. IIT Guwahati</b></div><div><font size="1">Senior Software (Video Coding) Engineer </font></div><div><font size="1">Mobile: +91 8011279030</font></div><div><font size="1">Block 9A, 6th floor, DLF Cyber City</font></div><div><font size="1">Manapakkam, Chennai 600 089</font></div></div></div></div><br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, Oct 9, 2024 at 3:56 PM Ponsanthini Arunachalam <<a href="mailto:ponsanthini.arunachalam@multicorewareinc.com">ponsanthini.arunachalam@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">From c2038d6d6825354a5137db9cdedabb2067d5b6d2 Mon Sep 17 00:00:00 2001<br>From: Harshitha Suresh <<a href="mailto:harshitha@multicorewareinc.com" target="_blank">harshitha@multicorewareinc.com</a>><br>Date: Wed, 9 Oct 2024 11:58:03 +0530<br>Subject: [PATCH] Revert "AArch64: Runtime CPU feature detection"<br><br>This commit is being reverted since I8MM ARM optimization feature detection flag is not yet supported by Windows.<br>---<br> .../msys/make-x86_64-w64-mingw32-Makefiles.sh | 8 --<br> build/msys/toolchain-x86_64-w64-mingw32.cmake | 6 --<br> source/CMakeLists.txt | 21 ++---<br> source/common/CMakeLists.txt | 5 --<br> source/common/cpu.cpp | 85 +------------------<br> 5 files changed, 9 insertions(+), 116 deletions(-)<br> delete mode 100644 build/msys/make-x86_64-w64-mingw32-Makefiles.sh<br> delete mode 100644 build/msys/toolchain-x86_64-w64-mingw32.cmake<br><br>diff --git a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh b/build/msys/make-x86_64-w64-mingw32-Makefiles.sh<br>deleted file mode 100644<br>index d98eced87..000000000<br>--- a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh<br>+++ /dev/null<br>@@ -1,8 +0,0 @@<br>-#!/bin/sh<br>-<br>-# This will generate a cross-compile environment, compiling an x86_64<br>-# Win64 target from a 32bit MinGW32 host environment. If your MinGW<br>-# install is 64bit, you can use the native compiler batch file:<br>-# make-Makefiles.sh<br>-<br>-cmake -G "MSYS Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-w64-mingw32.cmake ../../source && cmake-gui ../../source<br>diff --git a/build/msys/toolchain-x86_64-w64-mingw32.cmake b/build/msys/toolchain-x86_64-w64-mingw32.cmake<br>deleted file mode 100644<br>index a3f768b7e..000000000<br>--- a/build/msys/toolchain-x86_64-w64-mingw32.cmake<br>+++ /dev/null<br>@@ -1,6 +0,0 @@<br>-SET(CMAKE_SYSTEM_NAME Windows)<br>-SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)<br>-SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)<br>-SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)<br>-SET(CMAKE_RANLIB x86_64-w64-mingw32-ranlib)<br>-SET(CMAKE_ASM_YASM_COMPILER yasm)<br>diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt<br>index cd19050c3..8a3128bb7 100755<br>--- a/source/CMakeLists.txt<br>+++ b/source/CMakeLists.txt<br>@@ -303,12 +303,10 @@ if(GCC)<br> endif()<br> endif()<br> <br>- set(ARM64_ARCH_ARGS "-O3")<br> if(CPU_HAS_NEON_DOTPROD)<br> # Neon DotProd is mandatory from Armv8.4.<br> message(STATUS "Found Neon DotProd")<br>- set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)<br>- set(ARM_ARGS -O3)<br>+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)<br> add_definitions(-DHAVE_NEON_DOTPROD=1)<br> endif()<br> if(CPU_HAS_NEON_I8MM)<br>@@ -318,8 +316,7 @@ if(GCC)<br> if(NOT CPU_HAS_NEON_DOTPROD)<br> message(FATAL_ERROR "Unsupported AArch64 feature combination (Neon I8MM without Neon DotProd)")<br> endif()<br>- set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)<br>- set(ARM_ARGS -O3)<br>+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)<br> add_definitions(-DHAVE_NEON_I8MM=1)<br> endif()<br> if(CPU_HAS_SVE)<br>@@ -328,15 +325,13 @@ if(GCC)<br> if(NOT CPU_HAS_NEON_I8MM)<br> message(FATAL_ERROR "Unsupported AArch64 feature combination (SVE without Neon I8MM)")<br> endif()<br>- set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)<br>- set(ARM_ARGS -O3)<br>+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)<br> add_definitions(-DHAVE_SVE=1)<br> endif()<br> if(CPU_HAS_SVE2)<br> message(STATUS "Found SVE2")<br> # SVE2 is only available from Armv9.0, and armv9-a implies +dotprod<br>- set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)<br>- set(ARM_ARGS -O3)<br>+ set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)<br> add_definitions(-DHAVE_SVE2=1)<br> endif()<br> set(ARM_ARGS ${ARM_ARGS} -fPIC)<br>@@ -701,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br> add_custom_command(<br> OUTPUT ${ASM}.${SUFFIX}<br> COMMAND ${CMAKE_CXX_COMPILER}<br>- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br> DEPENDS ${ASM_SRC})<br> endforeach()<br> if(CPU_HAS_SVE2)<br>@@ -712,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br> add_custom_command(<br> OUTPUT ${ASM}.${SUFFIX}<br> COMMAND ${CMAKE_CXX_COMPILER}<br>- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br> DEPENDS ${ASM_SRC})<br> endforeach()<br> endif()<br>@@ -724,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br> add_custom_command(<br> OUTPUT ${ASM}.${SUFFIX}<br> COMMAND ${CMAKE_CXX_COMPILER}<br>- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br> DEPENDS ${ASM_SRC})<br> endforeach()<br> endif()<br>@@ -736,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br> add_custom_command(<br> OUTPUT ${ASM}.${SUFFIX}<br> COMMAND ${CMAKE_CXX_COMPILER}<br>- ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br>+ ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}<br> DEPENDS ${ASM_SRC})<br> endforeach()<br> endif()<br>diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt<br>index 33025cada..dc4a74107 100644<br>--- a/source/common/CMakeLists.txt<br>+++ b/source/common/CMakeLists.txt<br>@@ -123,34 +123,29 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))<br> set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm Assembly Sources that use the Neon DotProd extension")<br> foreach(SRC ${C_SRCS_NEON})<br> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>- set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br> endforeach()<br> <br> if(CPU_HAS_NEON_I8MM)<br> foreach(SRC ${C_SRCS_NEON_I8MM})<br> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>- set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br> endforeach()<br> endif()<br> <br> if(CPU_HAS_NEON_DOTPROD)<br> foreach(SRC ${C_SRCS_NEON_DOTPROD})<br> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>- set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br> endforeach()<br> endif()<br> <br> if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)<br> foreach(SRC ${C_SRCS_SVE})<br> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>- set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br> endforeach()<br> endif()<br> <br> if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)<br> foreach(SRC ${C_SRCS_SVE2})<br> set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})<br>- set_source_files_properties( aarch64/${SRC} PROPERTIES COMPILE_FLAGS ${ARM64_ARCH_ARGS} )<br> endforeach()<br> endif()<br> <br>diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp<br>index c1fa928e4..d18aeb8d2 100644<br>--- a/source/common/cpu.cpp<br>+++ b/source/common/cpu.cpp<br>@@ -391,8 +391,7 @@ uint32_t cpu_detect(bool benableavx512)<br> <br> #elif X265_ARCH_ARM64<br> <br>-#if defined(_MSC_VER) || defined(__APPLE__)<br>-uint32_t cpu_detect(bool /*benableavx512*/)<br>+uint32_t cpu_detect(bool benableavx512)<br> {<br> int flags = 0;<br> <br>@@ -417,88 +416,6 @@ uint32_t cpu_detect(bool /*benableavx512*/)<br> return flags;<br> }<br> <br>-// TODO: Remove isOryonCPU() once Windows defines PF_ flag for I8MM on supported ARM64 devices<br>-#elif defined(__MINGW64__) // Windows+Aarch64<br>-<br>-#include <windows.h><br>-#include <processthreadsapi.h><br>-<br>-bool isOryonCPU()<br>-{<br>-<br>- char processorName[128];<br>- DWORD bufferSize = 128;<br>-<br>- LONG result = RegGetValue(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "ProcessorNameString", RRF_RT_ANY, NULL, (PVOID)&processorName, &bufferSize);<br>- if (strstr(processorName, "Oryon") != NULL)<br>- {<br>- return true;<br>- }<br>- else<br>- {<br>- return false;<br>- }<br>-}<br>-uint32_t cpu_detect(bool /*benableavx512*/)<br>-{<br>-<br>- int flags = 0;<br>-<br>-#ifdef ENABLE_ASSEMBLY<br>- #if HAVE_NEON<br>- flags |= X265_CPU_NEON; // All of ARM64 has NEON<br>- #endif<br>- #if HAVE_NEON_DOTPROD && defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)<br>- flags |= IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) ? X265_CPU_NEON_DOTPROD : 0;<br>- #endif<br>- #if HAVE_NEON_I8MM<br>- flags |= isOryonCPU() ? X265_CPU_NEON_I8MM : 0;<br>- #endif<br>- #if HAVE_SVE && defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)<br>- flags |= IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE : 0;<br>- #endif<br>- #if HAVE_SVE2 && defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)<br>- flags |= IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE2 : 0;<br>- #endif<br>-#endif<br>-<br>- return flags;<br>-} // end of Windows+Aarch64<br>-<br>-#else // Linux+Aarch64<br>-<br>-#include <asm/hwcap.h><br>-#include <sys/auxv.h><br>-<br>-uint32_t cpu_detect(bool /*benableavx512*/)<br>-{<br>- unsigned long hwcaps = getauxval(AT_HWCAP);<br>- unsigned long hwcaps2 = getauxval(AT_HWCAP2);<br>-<br>- int flags = 0;<br>-<br>-#ifdef ENABLE_ASSEMBLY<br>- #if HAVE_NEON<br>- flags |= X265_CPU_NEON; // All of ARM64 has NEON<br>- #endif<br>- #if HAVE_NEON_DOTPROD<br>- flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);<br>- #endif<br>- #if HAVE_NEON_I8MM<br>- flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0);<br>- #endif<br>- #if HAVE_SVE<br>- flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);<br>- #endif<br>- #if HAVE_SVE2<br>- flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);<br>- #endif<br>-#endif<br>-<br>- return flags;<br>-}<br>-#endif // end of Linux+AArch64<br>-<br> #elif X265_ARCH_POWER8<br> <br> uint32_t cpu_detect(bool benableavx512)<br>-- <br>2.36.0.windows.1<br><br></div>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div>