<div dir="ltr">Please ignore this patch - it has a bug that I've fixed in the subsequent patch that I sent out.</div><div class="gmail_extra"><br clear="all"><div><div class="gmail_signature"><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr">Pradeep Ramachandran, PhD<div>Solution Architect,</div><div>Multicoreware Inc.</div><div>Ph: +91 99627 82018</div></div></div></div></div></div></div></div></div></div>
<br><div class="gmail_quote">On Tue, Sep 8, 2015 at 11:50 AM, <span dir="ltr"><<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Pradeep Ramachandran <<a href="mailto:pradeep@multicorewareinc.com">pradeep@multicorewareinc.com</a>><br>
# Date 1441692185 -19800<br>
# Tue Sep 08 11:33:05 2015 +0530<br>
# Node ID 48b00cc32bc5cd77970db4776637a127a6e5d848<br>
# Parent e1adac00dce8e5641cbe9aec3d50a72261c308d9<br>
Fix for threadpooling to use all available threads on windows, and build fix<br>
to remove warning for windows machines older than Win7 and linux machines<br>
without NUMA support<br>
<br>
diff -r e1adac00dce8 -r 48b00cc32bc5 source/common/threadpool.cpp<br>
--- a/source/common/threadpool.cpp Thu Sep 03 14:41:06 2015 +0530<br>
+++ b/source/common/threadpool.cpp Tue Sep 08 11:33:05 2015 +0530<br>
@@ -378,8 +378,15 @@<br>
X265_CHECK(numThreads <= MAX_POOL_THREADS, "a single thread pool cannot have more than MAX_POOL_THREADS threads\n");<br>
<br>
#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7<br>
- m_winNodemask = nodeMask & ~(0x1 << getNumaNodeCount());<br>
- m_numaNodeMask = &m_winNodemask;<br>
+ int numaNode = -1;<br>
+ GROUP_AFFINITY groupAffinity;<br>
+ for (int i = 0; i < getNumaNodeCount(); i++)<br>
+ {<br>
+ numaNode = (nodeMask & (0x1 << i)) - 1;<br>
+ if (GetNumaNodeProcessorMaskEx((USHORT)numaNode, &groupAffinity))<br>
+ m_winCpuMask |= groupAffinity.Mask;<br>
+ }<br>
+ m_numaMask = &m_winCpuMask;<br>
#elif HAVE_LIBNUMA<br>
if (numa_available() >= 0)<br>
{<br>
@@ -387,11 +394,13 @@<br>
if (nodemask)<br>
{<br>
*(nodemask->maskp) = nodeMask;<br>
- m_numaNodeMask = nodemask;<br>
+ m_numaMask = nodemask;<br>
}<br>
else<br>
x265_log(NULL, X265_LOG_ERROR, "unable to get NUMA node mask for %lx\n", nodeMask);<br>
}<br>
+#else<br>
+ (void)nodeMask;<br>
#endif<br>
<br>
m_numWorkers = numThreads;<br>
@@ -449,33 +458,35 @@<br>
X265_FREE(m_jpTable);<br>
<br>
#if HAVE_LIBNUMA<br>
- if(m_numaNodeMask)<br>
- numa_free_nodemask((struct bitmask*)m_numaNodeMask);<br>
+ if(m_numaMask)<br>
+ numa_free_nodemask((struct bitmask*)m_numaMask);<br>
#endif<br>
}<br>
<br>
void ThreadPool::setCurrentThreadAffinity()<br>
{<br>
- setThreadNodeAffinity(m_numaNodeMask);<br>
+ setThreadNodeAffinity(m_numaMask);<br>
}<br>
<br>
/* static */<br>
-void ThreadPool::setThreadNodeAffinity(void *numaNodeMask)<br>
+void ThreadPool::setThreadNodeAffinity(void *numaMask)<br>
{<br>
#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7<br>
- if (SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(*((DWORD*)numaNodeMask))))<br>
+ if (SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(*((DWORD*)numaMask))))<br>
return;<br>
else<br>
x265_log(NULL, X265_LOG_ERROR, "unable to set thread affinity for NUMA node mask\n");<br>
#elif HAVE_LIBNUMA<br>
if (numa_available() >= 0)<br>
{<br>
- numa_run_on_node_mask((struct bitmask*)numaNodeMask);<br>
- numa_set_interleave_mask((struct bitmask*)numaNodeMask);<br>
+ numa_run_on_node_mask((struct bitmask*)numaMask);<br>
+ numa_set_interleave_mask((struct bitmask*)numaMask);<br>
numa_set_localalloc();<br>
return;<br>
}<br>
x265_log(NULL, X265_LOG_ERROR, "unable to set thread affinity for NUMA node mask\n");<br>
+#else<br>
+ (void)numaMask;<br>
#endif<br>
return;<br>
}<br>
diff -r e1adac00dce8 -r 48b00cc32bc5 source/common/threadpool.h<br>
--- a/source/common/threadpool.h Thu Sep 03 14:41:06 2015 +0530<br>
+++ b/source/common/threadpool.h Tue Sep 08 11:33:05 2015 +0530<br>
@@ -83,9 +83,9 @@<br>
sleepbitmap_t m_sleepBitmap;<br>
int m_numProviders;<br>
int m_numWorkers;<br>
- void* m_numaNodeMask;<br>
+ void* m_numaMask; // node mask in linux, cpu mask in windows<br>
#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7<br>
- DWORD m_winNodemask;<br>
+ DWORD m_winCpuMask;<br>
#endif<br>
bool m_isActive;<br>
<br>
@@ -106,7 +106,7 @@<br>
<br>
static int getCpuCount();<br>
static int getNumaNodeCount();<br>
- static void setThreadNodeAffinity(void *numaNodeMask);<br>
+ static void setThreadNodeAffinity(void *numaMask);<br>
};<br>
<br>
/* Any worker thread may enlist the help of idle worker threads from the same<br>
</blockquote></div><br></div>