[x265] [X265][PATCH] Revert "AArch64: Runtime CPU feature detection"

Ponsanthini Arunachalam ponsanthini.arunachalam at multicorewareinc.com
Wed Oct 9 10:25:56 UTC 2024


>From c2038d6d6825354a5137db9cdedabb2067d5b6d2 Mon Sep 17 00:00:00 2001
From: Harshitha Suresh <harshitha at multicorewareinc.com>
Date: Wed, 9 Oct 2024 11:58:03 +0530
Subject: [PATCH] Revert "AArch64: Runtime CPU feature detection"

This commit is being reverted since I8MM ARM optimization feature detection
flag is not yet supported by Windows.
---
 .../msys/make-x86_64-w64-mingw32-Makefiles.sh |  8 --
 build/msys/toolchain-x86_64-w64-mingw32.cmake |  6 --
 source/CMakeLists.txt                         | 21 ++---
 source/common/CMakeLists.txt                  |  5 --
 source/common/cpu.cpp                         | 85 +------------------
 5 files changed, 9 insertions(+), 116 deletions(-)
 delete mode 100644 build/msys/make-x86_64-w64-mingw32-Makefiles.sh
 delete mode 100644 build/msys/toolchain-x86_64-w64-mingw32.cmake

diff --git a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
b/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
deleted file mode 100644
index d98eced87..000000000
--- a/build/msys/make-x86_64-w64-mingw32-Makefiles.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-
-# This will generate a cross-compile environment, compiling an x86_64
-# Win64 target from a 32bit MinGW32 host environment.  If your MinGW
-# install is 64bit, you can use the native compiler batch file:
-# make-Makefiles.sh
-
-cmake -G "MSYS Makefiles"
-DCMAKE_TOOLCHAIN_FILE=toolchain-x86_64-w64-mingw32.cmake ../../source &&
cmake-gui ../../source
diff --git a/build/msys/toolchain-x86_64-w64-mingw32.cmake
b/build/msys/toolchain-x86_64-w64-mingw32.cmake
deleted file mode 100644
index a3f768b7e..000000000
--- a/build/msys/toolchain-x86_64-w64-mingw32.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-SET(CMAKE_SYSTEM_NAME Windows)
-SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
-SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
-SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
-SET(CMAKE_RANLIB x86_64-w64-mingw32-ranlib)
-SET(CMAKE_ASM_YASM_COMPILER yasm)
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index cd19050c3..8a3128bb7 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -303,12 +303,10 @@ if(GCC)
             endif()
         endif()

-        set(ARM64_ARCH_ARGS "-O3")
         if(CPU_HAS_NEON_DOTPROD)
             # Neon DotProd is mandatory from Armv8.4.
             message(STATUS "Found Neon DotProd")
-            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod)
-            set(ARM_ARGS -O3)
+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod)
             add_definitions(-DHAVE_NEON_DOTPROD=1)
         endif()
         if(CPU_HAS_NEON_I8MM)
@@ -318,8 +316,7 @@ if(GCC)
             if(NOT CPU_HAS_NEON_DOTPROD)
                 message(FATAL_ERROR "Unsupported AArch64 feature
combination (Neon I8MM without Neon DotProd)")
             endif()
-            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm)
-            set(ARM_ARGS -O3)
+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm)
             add_definitions(-DHAVE_NEON_I8MM=1)
         endif()
         if(CPU_HAS_SVE)
@@ -328,15 +325,13 @@ if(GCC)
             if(NOT CPU_HAS_NEON_I8MM)
                 message(FATAL_ERROR "Unsupported AArch64 feature
combination (SVE without Neon I8MM)")
             endif()
-            set(ARM64_ARCH_ARGS -march=armv8.2-a+dotprod+i8mm+sve)
-            set(ARM_ARGS -O3)
+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
             add_definitions(-DHAVE_SVE=1)
         endif()
         if(CPU_HAS_SVE2)
             message(STATUS "Found SVE2")
             # SVE2 is only available from Armv9.0, and armv9-a implies
+dotprod
-            set(ARM64_ARCH_ARGS -march=armv9-a+i8mm+sve2)
-            set(ARM_ARGS -O3)
+            set(ARM_ARGS -O3 -march=armv8.2-a+dotprod+i8mm+sve)
             add_definitions(-DHAVE_SVE2=1)
         endif()
         set(ARM_ARGS ${ARM_ARGS} -fPIC)
@@ -701,7 +696,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
             add_custom_command(
                 OUTPUT ${ASM}.${SUFFIX}
                 COMMAND ${CMAKE_CXX_COMPILER}
-                ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+                ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
                 DEPENDS ${ASM_SRC})
         endforeach()
         if(CPU_HAS_SVE2)
@@ -712,7 +707,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
@@ -724,7 +719,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
@@ -736,7 +731,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
                 add_custom_command(
                     OUTPUT ${ASM}.${SUFFIX}
                     COMMAND ${CMAKE_CXX_COMPILER}
-                    ARGS ${ARM_ARGS} ${ARM64_ARCH_ARGS} ${ASM_FLAGS} -c
${ASM_SRC} -o ${ASM}.${SUFFIX}
+                    ARGS ${ARM_ARGS} ${ASM_FLAGS} -c ${ASM_SRC} -o
${ASM}.${SUFFIX}
                     DEPENDS ${ASM_SRC})
             endforeach()
         endif()
diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
index 33025cada..dc4a74107 100644
--- a/source/common/CMakeLists.txt
+++ b/source/common/CMakeLists.txt
@@ -123,34 +123,29 @@ if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))
     set(ARM_ASMS_NEON_DOTPROD "${A_SRCS_NEON_DOTPROD}" CACHE INTERNAL "Arm
Assembly Sources that use the Neon DotProd extension")
     foreach(SRC ${C_SRCS_NEON})
         set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
-        set_source_files_properties( ${SRC} PROPERTIES COMPILE_FLAGS
${ARM64_ARCH_ARGS} )
     endforeach()

     if(CPU_HAS_NEON_I8MM)
         foreach(SRC ${C_SRCS_NEON_I8MM})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
-            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_NEON_DOTPROD)
         foreach(SRC ${C_SRCS_NEON_DOTPROD})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
-            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_SVE AND HAVE_SVE_BRIDGE)
         foreach(SRC ${C_SRCS_SVE})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
-            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

     if(CPU_HAS_SVE2 AND HAVE_SVE_BRIDGE)
         foreach(SRC ${C_SRCS_SVE2})
             set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
-            set_source_files_properties( aarch64/${SRC} PROPERTIES
COMPILE_FLAGS ${ARM64_ARCH_ARGS} )
         endforeach()
     endif()

diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp
index c1fa928e4..d18aeb8d2 100644
--- a/source/common/cpu.cpp
+++ b/source/common/cpu.cpp
@@ -391,8 +391,7 @@ uint32_t cpu_detect(bool benableavx512)

 #elif X265_ARCH_ARM64

-#if defined(_MSC_VER) || defined(__APPLE__)
-uint32_t cpu_detect(bool /*benableavx512*/)
+uint32_t cpu_detect(bool benableavx512)
 {
     int flags = 0;

@@ -417,88 +416,6 @@ uint32_t cpu_detect(bool /*benableavx512*/)
     return flags;
 }

-// TODO: Remove isOryonCPU() once Windows defines PF_ flag for I8MM on
supported ARM64 devices
-#elif defined(__MINGW64__) // Windows+Aarch64
-
-#include <windows.h>
-#include <processthreadsapi.h>
-
-bool isOryonCPU()
-{
-
-    char processorName[128];
-    DWORD bufferSize = 128;
-
-    LONG result = RegGetValue(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"ProcessorNameString", RRF_RT_ANY, NULL, (PVOID)&processorName,
&bufferSize);
-    if (strstr(processorName, "Oryon") != NULL)
-    {
-        return true;
-    }
-    else
-    {
-        return false;
-    }
-}
-uint32_t cpu_detect(bool /*benableavx512*/)
-{
-
-    int flags = 0;
-
-#ifdef ENABLE_ASSEMBLY
-    #if HAVE_NEON
-         flags |= X265_CPU_NEON;    // All of ARM64 has NEON
-    #endif
-    #if HAVE_NEON_DOTPROD && defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
-         flags |=
IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) ?
X265_CPU_NEON_DOTPROD : 0;
-    #endif
-    #if HAVE_NEON_I8MM
-         flags |= isOryonCPU() ? X265_CPU_NEON_I8MM : 0;
-    #endif
-    #if HAVE_SVE && defined(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)
-         flags |=
IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) ? X265_CPU_SVE
: 0;
-    #endif
-    #if HAVE_SVE2 && defined(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)
-         flags |=
IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) ?
X265_CPU_SVE2 : 0;
-    #endif
-#endif
-
-    return flags;
-} // end of Windows+Aarch64
-
-#else // Linux+Aarch64
-
-#include <asm/hwcap.h>
-#include <sys/auxv.h>
-
-uint32_t cpu_detect(bool /*benableavx512*/)
-{
-    unsigned long hwcaps = getauxval(AT_HWCAP);
-    unsigned long hwcaps2 = getauxval(AT_HWCAP2);
-
-    int flags = 0;
-
-#ifdef ENABLE_ASSEMBLY
-    #if HAVE_NEON
-         flags |= X265_CPU_NEON;    // All of ARM64 has NEON
-    #endif
-    #if HAVE_NEON_DOTPROD
-         flags |= (hwcaps & HWCAP_ASIMDDP ? X265_CPU_NEON_DOTPROD : 0);
-    #endif
-    #if HAVE_NEON_I8MM
-         flags |= (hwcaps2 & HWCAP2_I8MM ? X265_CPU_NEON_I8MM : 0);
-    #endif
-    #if HAVE_SVE
-         flags |= (hwcaps & HWCAP_SVE ? X265_CPU_SVE : 0);
-    #endif
-    #if HAVE_SVE2
-         flags |= (hwcaps2 & HWCAP2_SVE2 ? X265_CPU_SVE2 : 0);
-    #endif
-#endif
-
-    return flags;
-}
-#endif // end of Linux+AArch64
-
 #elif X265_ARCH_POWER8

 uint32_t cpu_detect(bool benableavx512)
-- 
2.36.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241009/d4ac3622/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Revert-AArch64-Runtime-CPU-feature-detection.patch
Type: application/octet-stream
Size: 10365 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241009/d4ac3622/attachment-0001.obj>


More information about the x265-devel mailing list