[x265] [PATCH] bug: Fixes with numa thread settings on windows/linux

Pradeep Ramachandran pradeep at multicorewareinc.com
Mon Sep 21 11:32:14 CEST 2015


# HG changeset patch
# User Pradeep Ramachandran <pradeep at multicorewareinc.com>
# Date 1442813339 -19800
#      Mon Sep 21 10:58:59 2015 +0530
# Node ID 2df699442f7fbafdbed5e155aa43bc1498ad0670
# Parent  57f027dfb3088eabef5f442be2a2e38fb0d4052f
bug: Fixes with numa thread settings on windows/linux

1) When multiple +es specified, the mask was broken, fixed now
2) Enabled mask to be 64-bits
3) Better log prints for thread pools created

diff -r 57f027dfb308 -r 2df699442f7f source/common/threadpool.cpp
--- a/source/common/threadpool.cpp	Fri Sep 18 18:53:46 2015 -0500
+++ b/source/common/threadpool.cpp	Mon Sep 21 10:58:59 2015 +0530
@@ -227,7 +227,7 @@
     enum { MAX_NODE_NUM = 127 };
     int cpusPerNode[MAX_NODE_NUM + 1];
     int threadsPerPool[MAX_NODE_NUM + 2];
-    uint32_t nodeMaskPerPool[MAX_NODE_NUM +2];
+    uint64_t nodeMaskPerPool[MAX_NODE_NUM +2];
 
     memset(cpusPerNode, 0, sizeof(cpusPerNode));
     memset(threadsPerPool, 0, sizeof(threadsPerPool));
@@ -281,20 +281,20 @@
                 for (int j = i; j < numNumaNodes; j++)
                 {
                     threadsPerPool[numNumaNodes] += cpusPerNode[j];
-                    nodeMaskPerPool[numNumaNodes] |= (1U << j);
+                    nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << j);
                 }
                 break;
             }
             else if (*nodeStr == '+')
             {
                 threadsPerPool[numNumaNodes] += cpusPerNode[i];
-                nodeMaskPerPool[numNumaNodes] = (1U << i);
+                nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << i);
             }
             else
             {
                 int count = atoi(nodeStr);
                 threadsPerPool[i] = X265_MIN(count, cpusPerNode[i]);
-                nodeMaskPerPool[i] = (1U << i);
+                nodeMaskPerPool[i] = ((uint64_t)1 << i);
             }
 
             /* consume current node string, comma, and white-space */
@@ -309,7 +309,7 @@
         for (int i = 0; i < numNumaNodes; i++)
         {
             threadsPerPool[numNumaNodes]  += cpusPerNode[i];
-            nodeMaskPerPool[numNumaNodes] |= (1U << i);
+            nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << i);
         }
     }
  
@@ -356,8 +356,15 @@
                 numPools = 0;
                 return NULL;
             }
-            if (numNumaNodes > 1)
-                x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads with NUMA node mask %lx\n", i, numThreads, nodeMaskPerPool[node]);
+            if (numNumaNodes > 1)
+            {
+                char *nodesstr = new char[64 * strlen(",63") + 1];
+                int len = 0;
+                for (int j = 0; j < 64; j++)
+                    if ((nodeMaskPerPool[node] >> j) & 1)
+                        len += sprintf(nodesstr + len, ",%d", j);
+                x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on numa nodes %s\n", i, numThreads, nodesstr + 1);
+            }
             else
                 x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", numThreads);
             threadsPerPool[node] -= numThreads;
@@ -373,7 +380,7 @@
     memset(this, 0, sizeof(*this));
 }
 
-bool ThreadPool::create(int numThreads, int maxProviders, uint32_t nodeMask)
+bool ThreadPool::create(int numThreads, int maxProviders, uint64_t nodeMask)
 {
     X265_CHECK(numThreads <= MAX_POOL_THREADS, "a single thread pool cannot have more than MAX_POOL_THREADS threads\n");
 
@@ -473,7 +480,7 @@
 void ThreadPool::setThreadNodeAffinity(void *numaMask)
 {
 #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
-    if (SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(*((DWORD*)numaMask))))
+    if (SetThreadAffinityMask(GetCurrentThread(), *((DWORD_PTR*)numaMask)))
         return;
     else
         x265_log(NULL, X265_LOG_ERROR, "unable to set thread affinity for NUMA node mask\n");
diff -r 57f027dfb308 -r 2df699442f7f source/common/threadpool.h
--- a/source/common/threadpool.h	Fri Sep 18 18:53:46 2015 -0500
+++ b/source/common/threadpool.h	Mon Sep 21 10:58:59 2015 +0530
@@ -85,7 +85,7 @@
     int           m_numWorkers;
     void*         m_numaMask; // node mask in linux, cpu mask in windows
 #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
-    DWORD         m_winCpuMask;
+    DWORD_PTR     m_winCpuMask;
 #endif
     bool          m_isActive;
 
@@ -95,7 +95,7 @@
     ThreadPool();
     ~ThreadPool();
 
-    bool create(int numThreads, int maxProviders, uint32_t nodeMask);
+    bool create(int numThreads, int maxProviders, uint64_t nodeMask);
     bool start();
     void stopWorkers();
     void setCurrentThreadAffinity();


More information about the x265-devel mailing list