[x265] [PATCH] Allocate frame threads based on available pool threads
Pradeep Ramachandran
pradeep at multicorewareinc.com
Wed Jul 5 17:00:39 CEST 2017
On Wed, Jul 5, 2017 at 3:28 PM, <aruna at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Aruna Matheswaran <aruna at multicorewareinc.com>
> # Date 1498107303 -19800
> # Thu Jun 22 10:25:03 2017 +0530
> # Node ID 006c75cf822e92e3865fc97d21c25b0fdc072b51
> # Parent 58b4fa89c42da0e9ef229035ea02f29d3a02fffe
> Allocate frame threads based on available pool threads
>
> This patch decides #frame-threads based on #pool-threads available. If
> pools not
> specified, #frame-threads will be decided based on detected #CPU-threads.
>
> This patch also decreases #frame-threads allocated for #pool-threads in the
> interval (15 - 31) and (>= 32) as there is high run to run variation in
> bitrate
> and SSIM with higher frame threads.With this reduction in #frame-threads
> there
> is ~3-4 % drop in fps with little SSIM improvement for #pool-threads (15 -
> 31)
> and no significant change in performance for #pool-threads (>= 32).
>
Thanks. The improvements in quality seem to justify the change.
This has been pushed to default branch of x265.
> diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.cpp
> --- a/source/common/threadpool.cpp Fri Jun 30 16:31:29 2017 +0530
> +++ b/source/common/threadpool.cpp Thu Jun 22 10:25:03 2017 +0530
> @@ -253,6 +253,7 @@
> int cpusPerNode[MAX_NODE_NUM + 1];
> int threadsPerPool[MAX_NODE_NUM + 2];
> uint64_t nodeMaskPerPool[MAX_NODE_NUM + 2];
> + int totalNumThreads = 0;
>
> memset(cpusPerNode, 0, sizeof(cpusPerNode));
> memset(threadsPerPool, 0, sizeof(threadsPerPool));
> @@ -388,9 +389,23 @@
> if (bNumaSupport)
> x265_log(p, X265_LOG_DEBUG, "NUMA node %d may use %d logical
> cores\n", i, cpusPerNode[i]);
> if (threadsPerPool[i])
> + {
> numPools += (threadsPerPool[i] + MAX_POOL_THREADS - 1) /
> MAX_POOL_THREADS;
> + totalNumThreads += threadsPerPool[i];
> + }
> }
> + if (!isThreadsReserved)
> + {
> + if (!numPools)
> + {
> + x265_log(p, X265_LOG_DEBUG, "No pool thread available.
> Deciding frame-threads based on detected CPU threads\n");
> + totalNumThreads = ThreadPool::getCpuCount(); // auto-detect
> frame threads
> + }
>
> + if (!p->frameNumThreads)
> + ThreadPool::getFrameThreadsCount(p, totalNumThreads);
> + }
> +
> if (!numPools)
> return NULL;
>
> @@ -412,7 +427,7 @@
> node++;
> int numThreads = X265_MIN(MAX_POOL_THREADS,
> threadsPerPool[node]);
> int origNumThreads = numThreads;
> - if (p->lookaheadThreads > numThreads / 2)
> + if (i == 0 && p->lookaheadThreads > numThreads / 2)
> {
> p->lookaheadThreads = numThreads / 2;
> x265_log(p, X265_LOG_DEBUG, "Setting lookahead threads to
> a maximum of half the total number of threads\n");
> @@ -423,7 +438,7 @@
> maxProviders = 1;
> }
>
> - else
> + else if (i == 0)
> numThreads -= p->lookaheadThreads;
> if (!pools[i].create(numThreads, maxProviders,
> nodeMaskPerPool[node]))
> {
> @@ -643,4 +658,21 @@
> #endif
> }
>
> +void ThreadPool::getFrameThreadsCount(x265_param* p, int cpuCount)
> +{
> + int rows = (p->sourceHeight + p->maxCUSize - 1) >>
> g_log2Size[p->maxCUSize];
> + if (!p->bEnableWavefront)
> + p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2,
> X265_MAX_FRAME_THREADS);
> + else if (cpuCount >= 32)
> + p->frameNumThreads = (p->sourceHeight > 2000) ? 6 : 5;
> + else if (cpuCount >= 16)
> + p->frameNumThreads = 4;
> + else if (cpuCount >= 8)
> + p->frameNumThreads = 3;
> + else if (cpuCount >= 4)
> + p->frameNumThreads = 2;
> + else
> + p->frameNumThreads = 1;
> +}
> +
> } // end namespace X265_NS
> diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.h
> --- a/source/common/threadpool.h Fri Jun 30 16:31:29 2017 +0530
> +++ b/source/common/threadpool.h Thu Jun 22 10:25:03 2017 +0530
> @@ -105,6 +105,7 @@
> static ThreadPool* allocThreadPools(x265_param* p, int& numPools,
> bool isThreadsReserved);
> static int getCpuCount();
> static int getNumaNodeCount();
> + static void getFrameThreadsCount(x265_param* p,int cpuCount);
> };
>
> /* Any worker thread may enlist the help of idle worker threads from the
> same
> diff -r 58b4fa89c42d -r 006c75cf822e source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri Jun 30 16:31:29 2017 +0530
> +++ b/source/encoder/encoder.cpp Thu Jun 22 10:25:03 2017 +0530
> @@ -134,26 +134,19 @@
> if (!p->bEnableWavefront && !p->bDistributeModeAnalysis && !p->bDistributeMotionEstimation
> && !p->lookaheadSlices)
> allowPools = false;
>
> - if (!p->frameNumThreads)
> - {
> - // auto-detect frame threads
> - int cpuCount = ThreadPool::getCpuCount();
> - if (!p->bEnableWavefront)
> - p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2,
> X265_MAX_FRAME_THREADS);
> - else if (cpuCount >= 32)
> - p->frameNumThreads = (p->sourceHeight > 2000) ? 8 : 6; //
> dual-socket 10-core IvyBridge or higher
> - else if (cpuCount >= 16)
> - p->frameNumThreads = 5; // 8 HT cores, or dual socket
> - else if (cpuCount >= 8)
> - p->frameNumThreads = 3; // 4 HT cores
> - else if (cpuCount >= 4)
> - p->frameNumThreads = 2; // Dual or Quad core
> - else
> - p->frameNumThreads = 1;
> - }
> m_numPools = 0;
> if (allowPools)
> m_threadPool = ThreadPool::allocThreadPools(p, m_numPools, 0);
> + else
> + {
> + if (!p->frameNumThreads)
> + {
> + // auto-detect frame threads
> + int cpuCount = ThreadPool::getCpuCount();
> + ThreadPool::getFrameThreadsCount(p, cpuCount);
> + }
> + }
> +
> if (!m_numPools)
> {
> // issue warnings if any of these features were requested
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170705/381c34a3/attachment.html>
More information about the x265-devel
mailing list