[x265] [PATCH] tme: add warning for low threads
Shashank Pathipati
shashank.pathipati at multicorewareinc.com
Thu Mar 12 12:23:34 UTC 2026
>From 537158b33b35c731fadaf964fb5abc676db30f53 Mon Sep 17 00:00:00 2001
From: Shashank Pathipati <shashank.pathipati at multicorewareinc.com>
Date: Thu, 12 Mar 2026 17:51:41 +0530
Subject: [PATCH] tme: add warning for low threads
---
source/common/common.h | 1 +
source/common/threadpool.cpp | 245 +++++++++++++++++++----------------
2 files changed, 134 insertions(+), 112 deletions(-)
diff --git a/source/common/common.h b/source/common/common.h
index 92af90426..ff60a85fe 100644
--- a/source/common/common.h
+++ b/source/common/common.h
@@ -354,6 +354,7 @@ typedef int16_t coeff_t; // transform coefficient
#define MAX_NUM_PUS_PER_CTU 593 // Maximum number of PUs in a 64x64 CTU
#define MAX_NUM_PU_SIZES 24 // Number of distinct PU sizes in a 64x64 CTU
+#define MIN_TME_THREADS 32 // Recommended number of threads for ThreadedME
namespace X265_NS {
diff --git a/source/common/threadpool.cpp b/source/common/threadpool.cpp
index 43b1a7312..79075425a 100644
--- a/source/common/threadpool.cpp
+++ b/source/common/threadpool.cpp
@@ -248,6 +248,136 @@ int ThreadPool::tryBondPeers(int maxPeers, sleepbitmap_t peerBitmap, BondedTaskG
return bondCount;
}
+
+/* Distributes totalNumThreads between ThreadedME and FrameEncoder pools.
+ * Modifies threadsPerPool[], nodeMaskPerPool[], numNumaNodes, and numPools in-place.
+ * Returns the number of threads reserved for frame encoding. */
+static void distributeThreadsForTme(
+ x265_param* p,
+ int totalNumThreads,
+ int& numNumaNodes,
+ bool bNumaSupport,
+ int* threadsPerPool,
+ uint64_t* nodeMaskPerPool,
+ int& numPools,
+ int& threadsFrameEnc)
+{
+ if (totalNumThreads < MIN_TME_THREADS)
+ {
+ x265_log(p, X265_LOG_WARNING, "Low thread count detected, disabling --threaded-me."
+ " Minimum recommended is 32 cores / threads\n");
+ p->bThreadedME = 0;
+ return;
+ }
+
+ int targetTME = ThreadPool::configureTmeThreadCount(p, totalNumThreads);
+ targetTME = (targetTME < 1) ? 1 : targetTME;
+
+ threadsFrameEnc = totalNumThreads - targetTME;
+ int defaultNumFT = ThreadPool::getFrameThreadsCount(p, totalNumThreads);
+ if (threadsFrameEnc < defaultNumFT)
+ {
+ threadsFrameEnc = defaultNumFT;
+ targetTME = totalNumThreads - threadsFrameEnc;
+ }
+
+#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 || HAVE_LIBNUMA
+ if (bNumaSupport && numNumaNodes > 1)
+ {
+ int tmeNumaNodes = 0;
+ int leftover = 0;
+
+ // First thread pool belongs to ThreadedME
+ std::vector<int> threads(1, 0);
+ std::vector<uint64_t> nodeMasks(1, 0);
+ int poolIndex = 0;
+
+ /* Greedily assign whole NUMA nodes to TME until reaching or exceeding the target */
+ for (int i = 0; i < numNumaNodes + 1; i++)
+ {
+ if (!threadsPerPool[i] && !nodeMaskPerPool[i])
+ continue;
+
+ int toTake = X265_MIN(threadsPerPool[i], targetTME - threads[0]);
+ if (toTake > 0)
+ {
+ threads[poolIndex] += toTake;
+ nodeMasks[poolIndex] |= nodeMaskPerPool[i];
+ tmeNumaNodes++;
+
+ if (threads[0] == targetTME)
+ poolIndex++;
+
+ if (toTake < threadsPerPool[i])
+ leftover = threadsPerPool[i] - toTake;
+ }
+ else
+ {
+ threads.push_back(threadsPerPool[i]);
+ nodeMasks.push_back(nodeMaskPerPool[i]);
+ poolIndex++;
+ }
+ }
+
+ // Distribute leftover threads among FrameEncoders
+ if (leftover)
+ {
+ // Case 1: There are 1 or more threadpools for FrameEncoder(s) by now
+ if (threads.size() > 1)
+ {
+ int split = static_cast<int>(static_cast<double>(leftover) / (numNumaNodes - 1));
+ for (int pool = 1; pool < numNumaNodes; pool++)
+ {
+ int give = X265_MIN(split, leftover);
+ threads[pool] += give;
+ leftover -= give;
+ }
+ }
+
+ // Case 2: FrameEncoder(s) haven't received threads yet
+ if (threads.size() == 1)
+ {
+ threads.push_back(leftover);
+ // Give the same node mask as the last node of ThreadedME
+ uint64_t msb = 1;
+ uint64_t tmeNodeMask = nodeMasks[0];
+ while (tmeNodeMask > 1)
+ {
+ tmeNodeMask >>= 1;
+ msb <<= 1;
+ }
+ nodeMasks.push_back(msb);
+ }
+ }
+
+ // Apply calculated threadpool assignment
+ // TODO: Make sure this doesn't cause a problem later on
+ memset(threadsPerPool, 0, sizeof(int) * (numNumaNodes + 2));
+ memset(nodeMaskPerPool, 0, sizeof(uint64_t) * (numNumaNodes + 2));
+
+ numPools = numNumaNodes = static_cast<int>(threads.size());
+ for (int pool = 0; pool < numPools; pool++)
+ {
+ threadsPerPool[pool] = threads[pool];
+ nodeMaskPerPool[pool] = nodeMasks[pool];
+ }
+ }
+ else
+#endif
+ {
+ memset(threadsPerPool, 0, sizeof(int) * (numNumaNodes + 2));
+ memset(nodeMaskPerPool, 0, sizeof(uint64_t) * (numNumaNodes + 2));
+
+ threadsPerPool[0] = targetTME;
+ nodeMaskPerPool[0] = 1;
+
+ threadsPerPool[1] = threadsFrameEnc;
+ nodeMaskPerPool[1] = 1;
+
+ numPools = 2;
+ }
+}
+
ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools, bool isThreadsReserved)
{
enum { MAX_NODE_NUM = 127 };
@@ -384,120 +514,11 @@ ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools, bool isTh
if (!totalNumThreads)
totalNumThreads = ThreadPool::getCpuCount();
- int threadsFrameEnc = 0;
-
+ int threadsFrameEnc = totalNumThreads;
if (p->bThreadedME)
{
- int targetTME = configureTmeThreadCount(p, totalNumThreads);
- targetTME = (targetTME < 1) ? 1 : targetTME;
-
- threadsFrameEnc = totalNumThreads - targetTME;
- int defaultNumFT = getFrameThreadsCount(p, totalNumThreads);
- if (threadsFrameEnc < defaultNumFT)
- {
- threadsFrameEnc = defaultNumFT;
- targetTME = totalNumThreads - threadsFrameEnc;
- }
-
-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 || HAVE_LIBNUMA
- if (bNumaSupport && numNumaNodes > 1)
- {
- int tmeNumaNodes = 0;
- int leftover = 0;
-
- // First thread pool belongs to ThreadedME
- std::vector<int> threads(1, 0);
- std::vector<uint64_t> nodeMasks(1, 0);
- int poolIndex = 0;
-
- /* Greedily assign whole NUMA nodes to TME until reaching or exceeding the target */
- for (int i = 0; i < numNumaNodes + 1; i++)
- {
- if (!threadsPerPool[i] && !nodeMaskPerPool[i])
- continue;
-
- int toTake = X265_MIN(threadsPerPool[i], targetTME - threads[0]);
- if (toTake > 0)
- {
- threads[poolIndex] += toTake;
- nodeMasks[poolIndex] |= nodeMaskPerPool[i];
- tmeNumaNodes++;
-
- if (threads[0] == targetTME)
- poolIndex++;
-
- if (toTake < threadsPerPool[i])
- leftover = threadsPerPool[i] - toTake;
- }
- else
- {
- threads.push_back(threadsPerPool[i]);
- nodeMasks.push_back(nodeMaskPerPool[i]);
- poolIndex++;
- }
- }
-
- // Distribute leftover threads among FrameEncoders
- if (leftover)
- {
- // Case 1: There are 1 or more threadpools for FrameEncoder(s) by now
- if (threads.size() > 1)
- {
- int split = static_cast<int>(static_cast<double>(leftover) / (numNumaNodes - 1));
- for (int pool = 1; pool < numNumaNodes; pool++)
- {
- int give = X265_MIN(split, leftover);
- threads[pool] += give;
- leftover -= give;
- }
- }
-
- // Case 2: FrameEncoder(s) haven't received threads yet
- if (threads.size() == 1)
- {
- threads.push_back(leftover);
- // Give the same node mask as the last node of ThreadedME
- uint64_t msb = 1;
- uint64_t tmeNodeMask = nodeMasks[0];
- while (tmeNodeMask > 1)
- {
- tmeNodeMask >>= 1;
- msb <<= 1;
- }
- nodeMasks.push_back(msb);
- }
- }
-
- // Apply calculated threadpool assignment
- // TODO: Make sure this doesn't cause a problem later on
- memset(threadsPerPool, 0, sizeof(threadsPerPool));
- memset(nodeMaskPerPool, 0, sizeof(nodeMaskPerPool));
-
- numPools = numNumaNodes = static_cast<int>(threads.size());
- for (int pool = 0; pool < numPools; pool++)
- {
- threadsPerPool[pool] = threads[pool];
- nodeMaskPerPool[pool] = nodeMasks[pool];
- }
- }
- else
-#endif
- {
- memset(threadsPerPool, 0, sizeof(threadsPerPool));
- memset(nodeMaskPerPool, 0, sizeof(nodeMaskPerPool));
-
- threadsPerPool[0] = targetTME;
- nodeMaskPerPool[0] = 1;
-
- threadsPerPool[1] = threadsFrameEnc;
- nodeMaskPerPool[1] = 1;
-
- numPools = 2;
- }
- }
- else
- {
- threadsFrameEnc = totalNumThreads;
+ distributeThreadsForTme(p, totalNumThreads, numNumaNodes, bNumaSupport, threadsPerPool,
+ nodeMaskPerPool, numPools, threadsFrameEnc);
}
// If the last pool size is > MAX_POOL_THREADS, clip it to spawn thread pools only of size >= 1/2 max (heuristic)
--
2.43.0
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260312/ca9406af/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-tme-add-warning-for-low-threads.patch
Type: application/octet-stream
Size: 10533 bytes
Desc: 0001-tme-add-warning-for-low-threads.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260312/ca9406af/attachment-0001.obj>
More information about the x265-devel
mailing list