[x265] [PATCH] tme: add warning for low threads

Shashank Pathipati shashank.pathipati at multicorewareinc.com
Thu Mar 12 12:23:34 UTC 2026


>From 537158b33b35c731fadaf964fb5abc676db30f53 Mon Sep 17 00:00:00 2001
From: Shashank Pathipati <shashank.pathipati at multicorewareinc.com>
Date: Thu, 12 Mar 2026 17:51:41 +0530
Subject: [PATCH] tme: add warning for low threads

---
 source/common/common.h       |   1 +
 source/common/threadpool.cpp | 245 +++++++++++++++++++----------------
 2 files changed, 134 insertions(+), 112 deletions(-)

diff --git a/source/common/common.h b/source/common/common.h
index 92af90426..ff60a85fe 100644
--- a/source/common/common.h
+++ b/source/common/common.h
@@ -354,6 +354,7 @@ typedef int16_t  coeff_t;      // transform coefficient

 #define MAX_NUM_PUS_PER_CTU      593   // Maximum number of PUs in a 64x64 CTU
 #define MAX_NUM_PU_SIZES         24    // Number of distinct PU sizes in a 64x64 CTU
+#define MIN_TME_THREADS          32    // Recommended number of threads for ThreadedME

 namespace X265_NS {

diff --git a/source/common/threadpool.cpp b/source/common/threadpool.cpp
index 43b1a7312..79075425a 100644
--- a/source/common/threadpool.cpp
+++ b/source/common/threadpool.cpp
@@ -248,6 +248,136 @@ int ThreadPool::tryBondPeers(int maxPeers, sleepbitmap_t peerBitmap, BondedTaskG

     return bondCount;
 }
+
+/* Distributes totalNumThreads between ThreadedME and FrameEncoder pools.
+ * Modifies threadsPerPool[], nodeMaskPerPool[], numNumaNodes, and numPools in-place.
+ * Returns the number of threads reserved for frame encoding. */
+static void distributeThreadsForTme(
+    x265_param* p,
+    int totalNumThreads,
+    int& numNumaNodes,
+    bool bNumaSupport,
+    int* threadsPerPool,
+    uint64_t* nodeMaskPerPool,
+    int& numPools,
+    int& threadsFrameEnc)
+{
+    if (totalNumThreads < MIN_TME_THREADS)
+    {
+        x265_log(p, X265_LOG_WARNING, "Low thread count detected, disabling --threaded-me."
+            " Minimum recommended is 32 cores / threads\n");
+        p->bThreadedME = 0;
+        return;
+    }
+
+    int targetTME = ThreadPool::configureTmeThreadCount(p, totalNumThreads);
+    targetTME = (targetTME < 1) ? 1 : targetTME;
+
+    threadsFrameEnc = totalNumThreads - targetTME;
+    int defaultNumFT = ThreadPool::getFrameThreadsCount(p, totalNumThreads);
+    if (threadsFrameEnc < defaultNumFT)
+    {
+        threadsFrameEnc = defaultNumFT;
+        targetTME = totalNumThreads - threadsFrameEnc;
+    }
+
+#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 || HAVE_LIBNUMA
+    if (bNumaSupport && numNumaNodes > 1)
+    {
+        int tmeNumaNodes = 0;
+        int leftover = 0;
+
+        // First thread pool belongs to ThreadedME
+        std::vector<int> threads(1, 0);
+        std::vector<uint64_t> nodeMasks(1, 0);
+        int poolIndex = 0;
+
+        /* Greedily assign whole NUMA nodes to TME until reaching or exceeding the target */
+        for (int i = 0; i < numNumaNodes + 1; i++)
+        {
+            if (!threadsPerPool[i] && !nodeMaskPerPool[i])
+                continue;
+
+            int toTake = X265_MIN(threadsPerPool[i], targetTME - threads[0]);
+            if (toTake > 0)
+            {
+                threads[poolIndex] += toTake;
+                nodeMasks[poolIndex] |= nodeMaskPerPool[i];
+                tmeNumaNodes++;
+
+                if (threads[0] == targetTME)
+                    poolIndex++;
+
+                if (toTake < threadsPerPool[i])
+                    leftover = threadsPerPool[i] - toTake;
+            }
+            else
+            {
+                threads.push_back(threadsPerPool[i]);
+                nodeMasks.push_back(nodeMaskPerPool[i]);
+                poolIndex++;
+            }
+        }
+
+        // Distribute leftover threads among FrameEncoders
+        if (leftover)
+        {
+            // Case 1: There are 1 or more threadpools for FrameEncoder(s) by now
+            if (threads.size() > 1)
+            {
+                int split = static_cast<int>(static_cast<double>(leftover) / (numNumaNodes - 1));
+                for (int pool = 1; pool < numNumaNodes; pool++)
+                {
+                    int give = X265_MIN(split, leftover);
+                    threads[pool] += give;
+                    leftover -= give;
+                }
+            }
+
+            // Case 2: FrameEncoder(s) haven't received threads yet
+            if (threads.size() == 1)
+            {
+                threads.push_back(leftover);
+                // Give the same node mask as the last node of ThreadedME
+                uint64_t msb = 1;
+                uint64_t tmeNodeMask = nodeMasks[0];
+                while (tmeNodeMask > 1)
+                {
+                    tmeNodeMask >>= 1;
+                    msb <<= 1;
+                }
+                nodeMasks.push_back(msb);
+            }
+        }
+
+        // Apply calculated threadpool assignment
+        // TODO: Make sure this doesn't cause a problem later on
+        memset(threadsPerPool, 0, sizeof(int) * (numNumaNodes + 2));
+        memset(nodeMaskPerPool, 0, sizeof(uint64_t) * (numNumaNodes + 2));
+
+        numPools = numNumaNodes = static_cast<int>(threads.size());
+        for (int pool = 0; pool < numPools; pool++)
+        {
+            threadsPerPool[pool] = threads[pool];
+            nodeMaskPerPool[pool] = nodeMasks[pool];
+        }
+    }
+    else
+#endif
+    {
+        memset(threadsPerPool, 0, sizeof(int) * (numNumaNodes + 2));
+        memset(nodeMaskPerPool, 0, sizeof(uint64_t) * (numNumaNodes + 2));
+
+        threadsPerPool[0] = targetTME;
+        nodeMaskPerPool[0] = 1;
+
+        threadsPerPool[1] = threadsFrameEnc;
+        nodeMaskPerPool[1] = 1;
+
+        numPools = 2;
+    }
+}
+
 ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools, bool isThreadsReserved)
 {
     enum { MAX_NODE_NUM = 127 };
@@ -384,120 +514,11 @@ ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools, bool isTh
     if (!totalNumThreads)
         totalNumThreads = ThreadPool::getCpuCount();

-    int threadsFrameEnc = 0;
-
+    int threadsFrameEnc = totalNumThreads;
     if (p->bThreadedME)
     {
-        int targetTME = configureTmeThreadCount(p, totalNumThreads);
-        targetTME = (targetTME < 1) ? 1 : targetTME;
-
-        threadsFrameEnc = totalNumThreads - targetTME;
-        int defaultNumFT = getFrameThreadsCount(p, totalNumThreads);
-        if (threadsFrameEnc < defaultNumFT)
-        {
-            threadsFrameEnc = defaultNumFT;
-            targetTME = totalNumThreads - threadsFrameEnc;
-        }
-
-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 || HAVE_LIBNUMA
-        if (bNumaSupport && numNumaNodes > 1)
-        {
-            int tmeNumaNodes = 0;
-            int leftover = 0;
-
-            // First thread pool belongs to ThreadedME
-            std::vector<int> threads(1, 0);
-            std::vector<uint64_t> nodeMasks(1, 0);
-            int poolIndex = 0;
-
-            /* Greedily assign whole NUMA nodes to TME until reaching or exceeding the target */
-            for (int i = 0; i < numNumaNodes + 1; i++)
-            {
-                if (!threadsPerPool[i] && !nodeMaskPerPool[i])
-                    continue;
-
-                int toTake = X265_MIN(threadsPerPool[i], targetTME - threads[0]);
-                if (toTake > 0)
-                {
-                    threads[poolIndex] += toTake;
-                    nodeMasks[poolIndex] |= nodeMaskPerPool[i];
-                    tmeNumaNodes++;
-
-                    if (threads[0] == targetTME)
-                        poolIndex++;
-
-                    if (toTake < threadsPerPool[i])
-                        leftover = threadsPerPool[i] - toTake;
-                }
-                else
-                {
-                    threads.push_back(threadsPerPool[i]);
-                    nodeMasks.push_back(nodeMaskPerPool[i]);
-                    poolIndex++;
-                }
-            }
-
-            // Distribute leftover threads among FrameEncoders
-            if (leftover)
-            {
-                // Case 1: There are 1 or more threadpools for FrameEncoder(s) by now
-                if (threads.size() > 1)
-                {
-                    int split = static_cast<int>(static_cast<double>(leftover) / (numNumaNodes - 1));
-                    for (int pool = 1; pool < numNumaNodes; pool++)
-                    {
-                        int give = X265_MIN(split, leftover);
-                        threads[pool] += give;
-                        leftover -= give;
-                    }
-                }
-
-                // Case 2: FrameEncoder(s) haven't received threads yet
-                if (threads.size() == 1)
-                {
-                    threads.push_back(leftover);
-                    // Give the same node mask as the last node of ThreadedME
-                    uint64_t msb = 1;
-                    uint64_t tmeNodeMask = nodeMasks[0];
-                    while (tmeNodeMask > 1)
-                    {
-                        tmeNodeMask >>= 1;
-                        msb <<= 1;
-                    }
-                    nodeMasks.push_back(msb);
-                }
-            }
-
-            // Apply calculated threadpool assignment
-            // TODO: Make sure this doesn't cause a problem later on
-            memset(threadsPerPool, 0, sizeof(threadsPerPool));
-            memset(nodeMaskPerPool, 0, sizeof(nodeMaskPerPool));
-
-            numPools = numNumaNodes = static_cast<int>(threads.size());
-            for (int pool = 0; pool < numPools; pool++)
-            {
-                threadsPerPool[pool] = threads[pool];
-                nodeMaskPerPool[pool] = nodeMasks[pool];
-            }
-        }
-        else
-#endif
-        {
-            memset(threadsPerPool, 0, sizeof(threadsPerPool));
-            memset(nodeMaskPerPool, 0, sizeof(nodeMaskPerPool));
-
-            threadsPerPool[0] = targetTME;
-            nodeMaskPerPool[0] = 1;
-
-            threadsPerPool[1] = threadsFrameEnc;
-            nodeMaskPerPool[1] = 1;
-
-            numPools = 2;
-        }
-    }
-    else
-    {
-        threadsFrameEnc = totalNumThreads;
+        distributeThreadsForTme(p, totalNumThreads, numNumaNodes, bNumaSupport, threadsPerPool,
+                                nodeMaskPerPool, numPools, threadsFrameEnc);
     }

     // If the last pool size is > MAX_POOL_THREADS, clip it to spawn thread pools only of size >= 1/2 max (heuristic)
--
2.43.0


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260312/ca9406af/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-tme-add-warning-for-low-threads.patch
Type: application/octet-stream
Size: 10533 bytes
Desc: 0001-tme-add-warning-for-low-threads.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260312/ca9406af/attachment-0001.obj>


More information about the x265-devel mailing list