[x264-devel] Fix x264 CPU detection with >=64 CPUs on Windows

Steven Walters git at videolan.org
Wed Jan 26 02:56:55 CET 2011


x264 | branch: master | Steven Walters <kemuri9 at gmail.com> | Sun Jan 23 15:19:11 2011 -0500| [b8e1d1b753c594b1b3fa3d7ecbf1e393200ae7b3] | committer: Jason Garrett-Glaser

Fix x264 CPU detection with >=64 CPUs on Windows
x264 won't actually use more than one processor group's worth of CPUs, however.
This isn't a problem, as a single x264 instance can't effectively use a full 64 cores anyways.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=b8e1d1b753c594b1b3fa3d7ecbf1e393200ae7b3
---

 common/win32thread.c |   43 ++++++++++++++++++++++++++++++++++---------
 1 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/common/win32thread.c b/common/win32thread.c
index 1679c4e..7c93928 100644
--- a/common/win32thread.c
+++ b/common/win32thread.c
@@ -24,8 +24,11 @@
  * For more information, contact us at licensing at x264.com.
  *****************************************************************************/
 
-/* TODO: work with windows 7 x86_64's (and later systems) awkward
- *       way of handling systems with >64 logical processors */
+/* Microsoft's way of supporting systems with >64 logical cpus can be found at
+ * http://www.microsoft.com/whdc/system/Sysinternals/MoreThan64proc.mspx */
+
+/* Based on the agreed standing that x264 does not need to utilize >64 logical cpus,
+ * this API does not detect nor utilize more than 64 cpus for systems that have them. */
 
 #include "common.h"
 #include <process.h>
@@ -33,6 +36,14 @@
 /* number of times to spin a thread about to block on a locked mutex before retrying and sleeping if still locked */
 #define X264_SPIN_COUNT 0
 
+/* GROUP_AFFINITY struct */
+typedef struct
+{
+    ULONG_PTR mask; // KAFFINITY = ULONG_PTR
+    USHORT group;
+    USHORT reserved[3];
+} x264_group_affinity_t;
+
 typedef struct
 {
     /* global mutex for replacing MUTEX_INITIALIZER instances */
@@ -263,13 +274,27 @@ void x264_win32_threading_destroy( void )
 
 int x264_pthread_num_processors_np()
 {
-    DWORD_PTR process_cpus, system_cpus;
-    if( GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus ) )
+    DWORD_PTR system_cpus, process_cpus = 0;
+    int cpus = 0;
+
+    /* GetProcessAffinityMask returns affinities of 0 when the process has threads in multiple processor groups.
+     * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
+#if ARCH_X86_64
+    /* find function pointers to API functions specific to x86_64 platforms, if they exist */
+    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
+    if( get_thread_affinity )
     {
-        int cpus = 0;
-        for( DWORD_PTR bit = 1; bit; bit <<= 1 )
-            cpus += !!(process_cpus & bit);
-        return cpus;
+        /* running on a platform that supports >64 logical cpus */
+        x264_group_affinity_t thread_affinity;
+        if( get_thread_affinity( GetCurrentThread(), &thread_affinity ) )
+            process_cpus = thread_affinity.mask;
     }
-    return 1;
+#endif
+    if( !process_cpus )
+        GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus );
+    for( DWORD_PTR bit = 1; bit; bit <<= 1 )
+        cpus += !!(process_cpus & bit);
+
+    return cpus ? cpus : 1;
 }



More information about the x264-devel mailing list