[x265] [PATCH] Allocate frame threads based on available pool threads
aruna at multicorewareinc.com
aruna at multicorewareinc.com
Wed Jul 5 11:58:39 CEST 2017
# HG changeset patch
# User Aruna Matheswaran <aruna at multicorewareinc.com>
# Date 1498107303 -19800
# Thu Jun 22 10:25:03 2017 +0530
# Node ID 006c75cf822e92e3865fc97d21c25b0fdc072b51
# Parent 58b4fa89c42da0e9ef229035ea02f29d3a02fffe
Allocate frame threads based on available pool threads
This patch decides #frame-threads based on #pool-threads available. If pools not
specified, #frame-threads will be decided based on detected #CPU-threads.
This patch also decreases #frame-threads allocated for #pool-threads in the
interval (15 - 31) and (>= 32) as there is high run to run variation in bitrate
and SSIM with higher frame threads.With this reduction in #frame-threads there
is ~3-4 % drop in fps with little SSIM improvement for #pool-threads (15 - 31)
and no significant change in performance for #pool-threads (>= 32).
diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.cpp
--- a/source/common/threadpool.cpp Fri Jun 30 16:31:29 2017 +0530
+++ b/source/common/threadpool.cpp Thu Jun 22 10:25:03 2017 +0530
@@ -253,6 +253,7 @@
int cpusPerNode[MAX_NODE_NUM + 1];
int threadsPerPool[MAX_NODE_NUM + 2];
uint64_t nodeMaskPerPool[MAX_NODE_NUM + 2];
+ int totalNumThreads = 0;
memset(cpusPerNode, 0, sizeof(cpusPerNode));
memset(threadsPerPool, 0, sizeof(threadsPerPool));
@@ -388,9 +389,23 @@
if (bNumaSupport)
x265_log(p, X265_LOG_DEBUG, "NUMA node %d may use %d logical cores\n", i, cpusPerNode[i]);
if (threadsPerPool[i])
+ {
numPools += (threadsPerPool[i] + MAX_POOL_THREADS - 1) / MAX_POOL_THREADS;
+ totalNumThreads += threadsPerPool[i];
+ }
}
+ if (!isThreadsReserved)
+ {
+ if (!numPools)
+ {
+ x265_log(p, X265_LOG_DEBUG, "No pool thread available. Deciding frame-threads based on detected CPU threads\n");
+ totalNumThreads = ThreadPool::getCpuCount(); // auto-detect frame threads
+ }
+ if (!p->frameNumThreads)
+ ThreadPool::getFrameThreadsCount(p, totalNumThreads);
+ }
+
if (!numPools)
return NULL;
@@ -412,7 +427,7 @@
node++;
int numThreads = X265_MIN(MAX_POOL_THREADS, threadsPerPool[node]);
int origNumThreads = numThreads;
- if (p->lookaheadThreads > numThreads / 2)
+ if (i == 0 && p->lookaheadThreads > numThreads / 2)
{
p->lookaheadThreads = numThreads / 2;
x265_log(p, X265_LOG_DEBUG, "Setting lookahead threads to a maximum of half the total number of threads\n");
@@ -423,7 +438,7 @@
maxProviders = 1;
}
- else
+ else if (i == 0)
numThreads -= p->lookaheadThreads;
if (!pools[i].create(numThreads, maxProviders, nodeMaskPerPool[node]))
{
@@ -643,4 +658,21 @@
#endif
}
+void ThreadPool::getFrameThreadsCount(x265_param* p, int cpuCount)
+{
+ int rows = (p->sourceHeight + p->maxCUSize - 1) >> g_log2Size[p->maxCUSize];
+ if (!p->bEnableWavefront)
+ p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2, X265_MAX_FRAME_THREADS);
+ else if (cpuCount >= 32)
+ p->frameNumThreads = (p->sourceHeight > 2000) ? 6 : 5;
+ else if (cpuCount >= 16)
+ p->frameNumThreads = 4;
+ else if (cpuCount >= 8)
+ p->frameNumThreads = 3;
+ else if (cpuCount >= 4)
+ p->frameNumThreads = 2;
+ else
+ p->frameNumThreads = 1;
+}
+
} // end namespace X265_NS
diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.h
--- a/source/common/threadpool.h Fri Jun 30 16:31:29 2017 +0530
+++ b/source/common/threadpool.h Thu Jun 22 10:25:03 2017 +0530
@@ -105,6 +105,7 @@
static ThreadPool* allocThreadPools(x265_param* p, int& numPools, bool isThreadsReserved);
static int getCpuCount();
static int getNumaNodeCount();
+ static void getFrameThreadsCount(x265_param* p,int cpuCount);
};
/* Any worker thread may enlist the help of idle worker threads from the same
diff -r 58b4fa89c42d -r 006c75cf822e source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Jun 30 16:31:29 2017 +0530
+++ b/source/encoder/encoder.cpp Thu Jun 22 10:25:03 2017 +0530
@@ -134,26 +134,19 @@
if (!p->bEnableWavefront && !p->bDistributeModeAnalysis && !p->bDistributeMotionEstimation && !p->lookaheadSlices)
allowPools = false;
- if (!p->frameNumThreads)
- {
- // auto-detect frame threads
- int cpuCount = ThreadPool::getCpuCount();
- if (!p->bEnableWavefront)
- p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2, X265_MAX_FRAME_THREADS);
- else if (cpuCount >= 32)
- p->frameNumThreads = (p->sourceHeight > 2000) ? 8 : 6; // dual-socket 10-core IvyBridge or higher
- else if (cpuCount >= 16)
- p->frameNumThreads = 5; // 8 HT cores, or dual socket
- else if (cpuCount >= 8)
- p->frameNumThreads = 3; // 4 HT cores
- else if (cpuCount >= 4)
- p->frameNumThreads = 2; // Dual or Quad core
- else
- p->frameNumThreads = 1;
- }
m_numPools = 0;
if (allowPools)
m_threadPool = ThreadPool::allocThreadPools(p, m_numPools, 0);
+ else
+ {
+ if (!p->frameNumThreads)
+ {
+ // auto-detect frame threads
+ int cpuCount = ThreadPool::getCpuCount();
+ ThreadPool::getFrameThreadsCount(p, cpuCount);
+ }
+ }
+
if (!m_numPools)
{
// issue warnings if any of these features were requested
More information about the x265-devel
mailing list