[x265] [PATCH] bug: Fixes with numa thread settings on windows/linux
Pradeep Ramachandran
pradeep at multicorewareinc.com
Mon Sep 21 11:32:14 CEST 2015
# HG changeset patch
# User Pradeep Ramachandran <pradeep at multicorewareinc.com>
# Date 1442813339 -19800
# Mon Sep 21 10:58:59 2015 +0530
# Node ID 2df699442f7fbafdbed5e155aa43bc1498ad0670
# Parent 57f027dfb3088eabef5f442be2a2e38fb0d4052f
bug: Fixes with numa thread settings on windows/linux
1) When multiple +es specified, the mask was broken, fixed now
2) Enabled mask to be 64-bits
3) Better log prints for thread pools created
diff -r 57f027dfb308 -r 2df699442f7f source/common/threadpool.cpp
--- a/source/common/threadpool.cpp Fri Sep 18 18:53:46 2015 -0500
+++ b/source/common/threadpool.cpp Mon Sep 21 10:58:59 2015 +0530
@@ -227,7 +227,7 @@
enum { MAX_NODE_NUM = 127 };
int cpusPerNode[MAX_NODE_NUM + 1];
int threadsPerPool[MAX_NODE_NUM + 2];
- uint32_t nodeMaskPerPool[MAX_NODE_NUM +2];
+ uint64_t nodeMaskPerPool[MAX_NODE_NUM +2];
memset(cpusPerNode, 0, sizeof(cpusPerNode));
memset(threadsPerPool, 0, sizeof(threadsPerPool));
@@ -281,20 +281,20 @@
for (int j = i; j < numNumaNodes; j++)
{
threadsPerPool[numNumaNodes] += cpusPerNode[j];
- nodeMaskPerPool[numNumaNodes] |= (1U << j);
+ nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << j);
}
break;
}
else if (*nodeStr == '+')
{
threadsPerPool[numNumaNodes] += cpusPerNode[i];
- nodeMaskPerPool[numNumaNodes] = (1U << i);
+ nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << i);
}
else
{
int count = atoi(nodeStr);
threadsPerPool[i] = X265_MIN(count, cpusPerNode[i]);
- nodeMaskPerPool[i] = (1U << i);
+ nodeMaskPerPool[i] = ((uint64_t)1 << i);
}
/* consume current node string, comma, and white-space */
@@ -309,7 +309,7 @@
for (int i = 0; i < numNumaNodes; i++)
{
threadsPerPool[numNumaNodes] += cpusPerNode[i];
- nodeMaskPerPool[numNumaNodes] |= (1U << i);
+ nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << i);
}
}
@@ -356,8 +356,15 @@
numPools = 0;
return NULL;
}
- if (numNumaNodes > 1)
- x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads with NUMA node mask %lx\n", i, numThreads, nodeMaskPerPool[node]);
+ if (numNumaNodes > 1)
+ {
+ char *nodesstr = new char[64 * strlen(",63") + 1];
+ int len = 0;
+ for (int j = 0; j < 64; j++)
+ if ((nodeMaskPerPool[node] >> j) & 1)
+ len += sprintf(nodesstr + len, ",%d", j);
+ x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on numa nodes %s\n", i, numThreads, nodesstr + 1);
+ }
else
x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", numThreads);
threadsPerPool[node] -= numThreads;
@@ -373,7 +380,7 @@
memset(this, 0, sizeof(*this));
}
-bool ThreadPool::create(int numThreads, int maxProviders, uint32_t nodeMask)
+bool ThreadPool::create(int numThreads, int maxProviders, uint64_t nodeMask)
{
X265_CHECK(numThreads <= MAX_POOL_THREADS, "a single thread pool cannot have more than MAX_POOL_THREADS threads\n");
@@ -473,7 +480,7 @@
void ThreadPool::setThreadNodeAffinity(void *numaMask)
{
#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7
- if (SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(*((DWORD*)numaMask))))
+ if (SetThreadAffinityMask(GetCurrentThread(), *((DWORD_PTR*)numaMask)))
return;
else
x265_log(NULL, X265_LOG_ERROR, "unable to set thread affinity for NUMA node mask\n");
diff -r 57f027dfb308 -r 2df699442f7f source/common/threadpool.h
--- a/source/common/threadpool.h Fri Sep 18 18:53:46 2015 -0500
+++ b/source/common/threadpool.h Mon Sep 21 10:58:59 2015 +0530
@@ -85,7 +85,7 @@
int m_numWorkers;
void* m_numaMask; // node mask in linux, cpu mask in windows
#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7
- DWORD m_winCpuMask;
+ DWORD_PTR m_winCpuMask;
#endif
bool m_isActive;
@@ -95,7 +95,7 @@
ThreadPool();
~ThreadPool();
- bool create(int numThreads, int maxProviders, uint32_t nodeMask);
+ bool create(int numThreads, int maxProviders, uint64_t nodeMask);
bool start();
void stopWorkers();
void setCurrentThreadAffinity();
More information about the x265-devel
mailing list