[x265] [PATCH] bug: Fixes with numa thread settings on windows/linux

Steve Borho steve at borho.org
Mon Sep 21 18:44:45 CEST 2015


On 09/21, Pradeep Ramachandran wrote:
> # HG changeset patch
> # User Pradeep Ramachandran <pradeep at multicorewareinc.com>
> # Date 1442813339 -19800
> #      Mon Sep 21 10:58:59 2015 +0530
> # Node ID 2df699442f7fbafdbed5e155aa43bc1498ad0670
> # Parent  57f027dfb3088eabef5f442be2a2e38fb0d4052f
> bug: Fixes with numa thread settings on windows/linux
> 
> 1) When multiple +es specified, the mask was broken, fixed now
> 2) Enabled mask to be 64-bits
> 3) Better log prints for thread pools created

LGTM

> diff -r 57f027dfb308 -r 2df699442f7f source/common/threadpool.cpp
> --- a/source/common/threadpool.cpp	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/threadpool.cpp	Mon Sep 21 10:58:59 2015 +0530
> @@ -227,7 +227,7 @@
>      enum { MAX_NODE_NUM = 127 };
>      int cpusPerNode[MAX_NODE_NUM + 1];
>      int threadsPerPool[MAX_NODE_NUM + 2];
> -    uint32_t nodeMaskPerPool[MAX_NODE_NUM +2];
> +    uint64_t nodeMaskPerPool[MAX_NODE_NUM +2];
>  
>      memset(cpusPerNode, 0, sizeof(cpusPerNode));
>      memset(threadsPerPool, 0, sizeof(threadsPerPool));
> @@ -281,20 +281,20 @@
>                  for (int j = i; j < numNumaNodes; j++)
>                  {
>                      threadsPerPool[numNumaNodes] += cpusPerNode[j];
> -                    nodeMaskPerPool[numNumaNodes] |= (1U << j);
> +                    nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << j);
>                  }
>                  break;
>              }
>              else if (*nodeStr == '+')
>              {
>                  threadsPerPool[numNumaNodes] += cpusPerNode[i];
> -                nodeMaskPerPool[numNumaNodes] = (1U << i);
> +                nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << i);
>              }
>              else
>              {
>                  int count = atoi(nodeStr);
>                  threadsPerPool[i] = X265_MIN(count, cpusPerNode[i]);
> -                nodeMaskPerPool[i] = (1U << i);
> +                nodeMaskPerPool[i] = ((uint64_t)1 << i);
>              }
>  
>              /* consume current node string, comma, and white-space */
> @@ -309,7 +309,7 @@
>          for (int i = 0; i < numNumaNodes; i++)
>          {
>              threadsPerPool[numNumaNodes]  += cpusPerNode[i];
> -            nodeMaskPerPool[numNumaNodes] |= (1U << i);
> +            nodeMaskPerPool[numNumaNodes] |= ((uint64_t)1 << i);
>          }
>      }
>   
> @@ -356,8 +356,15 @@
>                  numPools = 0;
>                  return NULL;
>              }
> -            if (numNumaNodes > 1)
> -                x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads with NUMA node mask %lx\n", i, numThreads, nodeMaskPerPool[node]);
> +            if (numNumaNodes > 1)
> +            {
> +                char *nodesstr = new char[64 * strlen(",63") + 1];
> +                int len = 0;
> +                for (int j = 0; j < 64; j++)
> +                    if ((nodeMaskPerPool[node] >> j) & 1)
> +                        len += sprintf(nodesstr + len, ",%d", j);
> +                x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on numa nodes %s\n", i, numThreads, nodesstr + 1);
> +            }
>              else
>                  x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", numThreads);
>              threadsPerPool[node] -= numThreads;
> @@ -373,7 +380,7 @@
>      memset(this, 0, sizeof(*this));
>  }
>  
> -bool ThreadPool::create(int numThreads, int maxProviders, uint32_t nodeMask)
> +bool ThreadPool::create(int numThreads, int maxProviders, uint64_t nodeMask)
>  {
>      X265_CHECK(numThreads <= MAX_POOL_THREADS, "a single thread pool cannot have more than MAX_POOL_THREADS threads\n");
>  
> @@ -473,7 +480,7 @@
>  void ThreadPool::setThreadNodeAffinity(void *numaMask)
>  {
>  #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
> -    if (SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(*((DWORD*)numaMask))))
> +    if (SetThreadAffinityMask(GetCurrentThread(), *((DWORD_PTR*)numaMask)))
>          return;
>      else
>          x265_log(NULL, X265_LOG_ERROR, "unable to set thread affinity for NUMA node mask\n");
> diff -r 57f027dfb308 -r 2df699442f7f source/common/threadpool.h
> --- a/source/common/threadpool.h	Fri Sep 18 18:53:46 2015 -0500
> +++ b/source/common/threadpool.h	Mon Sep 21 10:58:59 2015 +0530
> @@ -85,7 +85,7 @@
>      int           m_numWorkers;
>      void*         m_numaMask; // node mask in linux, cpu mask in windows
>  #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
> -    DWORD         m_winCpuMask;
> +    DWORD_PTR     m_winCpuMask;
>  #endif
>      bool          m_isActive;
>  
> @@ -95,7 +95,7 @@
>      ThreadPool();
>      ~ThreadPool();
>  
> -    bool create(int numThreads, int maxProviders, uint32_t nodeMask);
> +    bool create(int numThreads, int maxProviders, uint64_t nodeMask);
>      bool start();
>      void stopWorkers();
>      void setCurrentThreadAffinity();
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list