[x265] [PATCH] Allocate frame threads based on available pool threads

aruna at multicorewareinc.com aruna at multicorewareinc.com
Wed Jul 5 11:58:39 CEST 2017


# HG changeset patch
# User Aruna Matheswaran <aruna at multicorewareinc.com>
# Date 1498107303 -19800
#      Thu Jun 22 10:25:03 2017 +0530
# Node ID 006c75cf822e92e3865fc97d21c25b0fdc072b51
# Parent  58b4fa89c42da0e9ef229035ea02f29d3a02fffe
Allocate frame threads based on available pool threads

This patch decides #frame-threads based on #pool-threads available. If pools not
specified, #frame-threads will be decided based on detected #CPU-threads.

This patch also decreases #frame-threads allocated for #pool-threads in the
interval (15 - 31) and (>= 32) as there is high run to run variation in bitrate
and SSIM with higher frame threads.With this reduction in #frame-threads there
is ~3-4 % drop in fps with little SSIM improvement for #pool-threads (15 - 31)
and no significant change in performance for #pool-threads (>= 32).

diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.cpp
--- a/source/common/threadpool.cpp	Fri Jun 30 16:31:29 2017 +0530
+++ b/source/common/threadpool.cpp	Thu Jun 22 10:25:03 2017 +0530
@@ -253,6 +253,7 @@
     int cpusPerNode[MAX_NODE_NUM + 1];
     int threadsPerPool[MAX_NODE_NUM + 2];
     uint64_t nodeMaskPerPool[MAX_NODE_NUM + 2];
+    int totalNumThreads = 0;
 
     memset(cpusPerNode, 0, sizeof(cpusPerNode));
     memset(threadsPerPool, 0, sizeof(threadsPerPool));
@@ -388,9 +389,23 @@
         if (bNumaSupport)
             x265_log(p, X265_LOG_DEBUG, "NUMA node %d may use %d logical cores\n", i, cpusPerNode[i]);
         if (threadsPerPool[i])
+        {
             numPools += (threadsPerPool[i] + MAX_POOL_THREADS - 1) / MAX_POOL_THREADS;
+            totalNumThreads += threadsPerPool[i];
+        }
     }
+    if (!isThreadsReserved)
+    {
+        if (!numPools)
+        {
+            x265_log(p, X265_LOG_DEBUG, "No pool thread available. Deciding frame-threads based on detected CPU threads\n");
+            totalNumThreads = ThreadPool::getCpuCount(); // auto-detect frame threads
+        }
 
+        if (!p->frameNumThreads)
+            ThreadPool::getFrameThreadsCount(p, totalNumThreads);
+    }
+    
     if (!numPools)
         return NULL;
 
@@ -412,7 +427,7 @@
                 node++;
             int numThreads = X265_MIN(MAX_POOL_THREADS, threadsPerPool[node]);
             int origNumThreads = numThreads;
-            if (p->lookaheadThreads > numThreads / 2)
+            if (i == 0 && p->lookaheadThreads > numThreads / 2)
             {
                 p->lookaheadThreads = numThreads / 2;
                 x265_log(p, X265_LOG_DEBUG, "Setting lookahead threads to a maximum of half the total number of threads\n");
@@ -423,7 +438,7 @@
                 maxProviders = 1;
             }
 
-            else
+            else if (i == 0)
                 numThreads -= p->lookaheadThreads;
             if (!pools[i].create(numThreads, maxProviders, nodeMaskPerPool[node]))
             {
@@ -643,4 +658,21 @@
 #endif
 }
 
+void ThreadPool::getFrameThreadsCount(x265_param* p, int cpuCount)
+{
+    int rows = (p->sourceHeight + p->maxCUSize - 1) >> g_log2Size[p->maxCUSize];
+    if (!p->bEnableWavefront)
+        p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2, X265_MAX_FRAME_THREADS);
+    else if (cpuCount >= 32)
+        p->frameNumThreads = (p->sourceHeight > 2000) ? 6 : 5; 
+    else if (cpuCount >= 16)
+        p->frameNumThreads = 4; 
+    else if (cpuCount >= 8)
+        p->frameNumThreads = 3;
+    else if (cpuCount >= 4)
+        p->frameNumThreads = 2;
+    else
+        p->frameNumThreads = 1;
+}
+
 } // end namespace X265_NS
diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.h
--- a/source/common/threadpool.h	Fri Jun 30 16:31:29 2017 +0530
+++ b/source/common/threadpool.h	Thu Jun 22 10:25:03 2017 +0530
@@ -105,6 +105,7 @@
     static ThreadPool* allocThreadPools(x265_param* p, int& numPools, bool isThreadsReserved);
     static int  getCpuCount();
     static int  getNumaNodeCount();
+    static void getFrameThreadsCount(x265_param* p,int cpuCount);
 };
 
 /* Any worker thread may enlist the help of idle worker threads from the same
diff -r 58b4fa89c42d -r 006c75cf822e source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Jun 30 16:31:29 2017 +0530
+++ b/source/encoder/encoder.cpp	Thu Jun 22 10:25:03 2017 +0530
@@ -134,26 +134,19 @@
     if (!p->bEnableWavefront && !p->bDistributeModeAnalysis && !p->bDistributeMotionEstimation && !p->lookaheadSlices)
         allowPools = false;
 
-    if (!p->frameNumThreads)
-    {
-        // auto-detect frame threads
-        int cpuCount = ThreadPool::getCpuCount();
-        if (!p->bEnableWavefront)
-            p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2, X265_MAX_FRAME_THREADS);
-        else if (cpuCount >= 32)
-            p->frameNumThreads = (p->sourceHeight > 2000) ? 8 : 6; // dual-socket 10-core IvyBridge or higher
-        else if (cpuCount >= 16)
-            p->frameNumThreads = 5; // 8 HT cores, or dual socket
-        else if (cpuCount >= 8)
-            p->frameNumThreads = 3; // 4 HT cores
-        else if (cpuCount >= 4)
-            p->frameNumThreads = 2; // Dual or Quad core
-        else
-            p->frameNumThreads = 1;
-    }
     m_numPools = 0;
     if (allowPools)
         m_threadPool = ThreadPool::allocThreadPools(p, m_numPools, 0);
+    else
+    {
+        if (!p->frameNumThreads)
+        {
+            // auto-detect frame threads
+            int cpuCount = ThreadPool::getCpuCount();
+            ThreadPool::getFrameThreadsCount(p, cpuCount);
+        }
+    }
+
     if (!m_numPools)
     {
         // issue warnings if any of these features were requested


More information about the x265-devel mailing list