[x265] [PATCH] threaded-me: optimizations and fixes
Shashank Pathipati
shashank.pathipati at multicorewareinc.com
Tue Apr 7 17:48:12 UTC 2026
From 7779ac6714434649ba89561247a58f539bffc955 Mon Sep 17 00:00:00 2001
From: Shashank Pathipati <shashank.pathipati at multicorewareinc.com>
Date: Thu, 26 Mar 2026 10:41:55 +0530
Subject: [PATCH] tme: - update doc and mark --threaded-me as experimental -
minor optimizations for --threaded-me - add CPU freq adaptive threadpool
split - add CLIs for smoke and regression - add support for --no-wpp and
--me=sea
Co-Authored-By: Syed Majid <syed.majid at multicorewareinc.com>
---
doc/reST/cli.rst | 19 ++++---
source/common/threadpool.cpp | 85 +++++++++++++++++++++++++++-
source/common/threadpool.h | 14 +++++
source/encoder/frameencoder.cpp | 50 +++++++++-------
source/encoder/motion.cpp | 12 ++++
source/encoder/search.cpp | 97 +++++++++++++++++++++++++++-----
source/encoder/threadedme.cpp | 45 ++++++++++++++-
source/encoder/threadedme.h | 28 ++++-----
source/test/regression-tests.txt | 8 +++
source/test/smoke-tests.txt | 7 +++
10 files changed, 301 insertions(+), 64 deletions(-)
diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
index f602e1307..43c488b14 100755
--- a/doc/reST/cli.rst
+++ b/doc/reST/cli.rst
@@ -408,16 +408,21 @@ Performance Options
can reduce compression efficiency. Recommended on many-core CPUs when
encode speed is prioritized over compression efficiency.
- If VBV options are enabled, Threaded ME is automatically disabled and a
- warning is emitted.
+ This feature is automatically disabled in the following conditions:
- This feature is implicitly disabled when no thread pool is present.
+ - When no thread pool is present.
+ - When the detected CPU core count is less than 32.
+ - If VBV options are enabled, due to incompatibility with re-encoding trigggers.
- --threaded-me provides speedups on many-core CPUs, accompanied by a
- compression efficiency loss.
+ Default disabled. **Experimental Feature**
+
+ .. note::
+ :option:`--threaded-me` currently provides encoding speedups only on
+ many-core machines running at low clock frequencies (at or below
+ approximately 1.5 GHz). On high-frequency systems or machines with
+ fewer cores, the overhead of the additional motion estimation work
+ may outweigh the parallelism gains.
- Default disabled.
-
.. option:: --preset, -p <integer|string>
Sets parameters to preselected values, trading off compression efficiency against
diff --git a/source/common/threadpool.cpp b/source/common/threadpool.cpp
index 79075425a..b3e29bea4 100644
--- a/source/common/threadpool.cpp
+++ b/source/common/threadpool.cpp
@@ -33,6 +33,15 @@
#include <winnt.h>
#endif
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#elif !defined(_WIN32)
+#include <fstream>
+#include <string>
+#include <cstdio>
+#include <cstdlib>
+#endif
+
#if X86_64
#ifdef __GNUC__
@@ -351,7 +360,6 @@ static void distributeThreadsForTme(
}
// Apply calculated threadpool assignment
- // TODO: Make sure this doesn't cause a problem later on
memset(threadsPerPool, 0, sizeof(int) * (numNumaNodes + 2));
memset(nodeMaskPerPool, 0, sizeof(uint64_t) * (numNumaNodes + 2));
@@ -905,16 +913,87 @@ int ThreadPool::configureTmeThreadCount(x265_param* param, int cpuCount)
}
}
+ bool isHighFreq = (getCPUFrequencyMHz() > 1500.0);
+
if (selectedRule >= 0)
{
const TmeRuleConfig& cfg = s_tmeRuleConfig[selectedRule];
param->tmeTaskBlockSize = cfg.widthBasedTaskBlockSize ? ((param->sourceWidth + 480 - 1) / 480) : cfg.taskBlockSize[resClass];
param->tmeNumBufferRows = cfg.numBufferRows[resClass];
- return (cpuCount * cfg.threadPercent[resClass]) / 100;
+ return (!isHighFreq) ? (cpuCount * cfg.threadPercent[resClass]) / 100 : cpuCount / 2;
}
static const int s_defaultThreadPercent[TME_RES_COUNT] = { 80, 80, 70 };
- return (cpuCount * s_defaultThreadPercent[resClass]) / 100;
+ return (!isHighFreq) ? (cpuCount * s_defaultThreadPercent[resClass]) / 100 : cpuCount / 2;
+}
+
+double getCPUFrequencyMHz()
+{
+#if defined(_WIN32)
+ HKEY hKey;
+ DWORD mhz = 0;
+ DWORD size = sizeof(mhz);
+ if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
+ "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
+ 0, KEY_READ, &hKey) == ERROR_SUCCESS)
+ {
+ RegQueryValueExA(hKey, "~MHz", NULL, NULL, (LPBYTE)&mhz, &size);
+ RegCloseKey(hKey);
+ }
+ return (double)mhz;
+
+#elif defined(__APPLE__)
+ uint64_t freq = 0;
+ size_t size = sizeof(freq);
+ if (sysctlbyname("hw.cpufrequency", &freq, &size, NULL, 0) == 0)
+ return (double)freq / 1.0e6;
+ return 0.0;
+
+#else /* Linux */
+ /* scaling_cur_freq reflects the live frequency chosen by the governor
+ * and EPP hint. Iterate over all cpuN entries and return the highest observed value.
+ */
+ {
+ uint64_t maxKhz = 0;
+ char path[64];
+ for (int cpu = 0; ; ++cpu)
+ {
+ snprintf(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq", cpu);
+ std::ifstream f(path);
+ if (!f.is_open())
+ break;
+ uint64_t khz = 0;
+ f >> khz;
+ if (khz > maxKhz)
+ maxKhz = khz;
+ }
+ if (maxKhz > 0)
+ return (double)maxKhz / 1000.0;
+ }
+ /* Fall back to /proc/cpuinfo — collect the max "cpu MHz" across all entries. */
+ {
+ std::ifstream f("/proc/cpuinfo");
+ std::string line;
+ double maxMhz = 0.0;
+ while (std::getline(f, line))
+ {
+ if (line.find("cpu MHz") != std::string::npos)
+ {
+ size_t colon = line.find(':');
+ if (colon != std::string::npos)
+ {
+ double mhz = strtod(line.c_str() + colon + 1, NULL);
+ if (mhz > maxMhz)
+ maxMhz = mhz;
+ }
+ }
+ }
+ if (maxMhz > 0.0)
+ return maxMhz;
+ }
+ return 0.0;
+#endif
}
} // end namespace X265_NS
diff --git a/source/common/threadpool.h b/source/common/threadpool.h
index f223fd010..04e35528e 100644
--- a/source/common/threadpool.h
+++ b/source/common/threadpool.h
@@ -171,6 +171,20 @@ public:
virtual void processTasks(int workerThreadId) = 0;
};
+/**
+ * @brief Return the highest current CPU frequency in MHz across all cores, or 0.0 if unavailable.
+ *
+ * The value reflects the live frequency as reported by the cpufreq subsystem,
+ * which accounts for the active scaling governor and EPP hint.
+ *
+ * Platform support:
+ * Linux – iterates /sys/devices/system/cpu/cpuN/cpufreq/scaling_cur_freq (kHz)
+ * for all cores and returns the maximum; falls back to /proc/cpuinfo
+ * macOS – sysctl hw.cpufrequency (Hz)
+ * Windows – registry ~MHz under CentralProcessor\0
+ */
+double getCPUFrequencyMHz();
+
} // end namespace X265_NS
#endif // ifndef X265_THREADPOOL_H
diff --git a/source/encoder/frameencoder.cpp b/source/encoder/frameencoder.cpp
index af73626af..c8bf12508 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -1036,6 +1036,13 @@ void FrameEncoder::compressFrame(int layer)
}
}
+ if (m_top->m_threadedME && !slice->isIntra())
+ {
+ ScopedLock lock(m_tmeDepLock);
+ m_tmeDeps[i].external = true;
+ m_top->m_threadedME->enqueueReadyRows(i, layer, this);
+ }
+
if (!i)
m_row0WaitTime[layer] = x265_mdate();
else if (i == m_numRows - 1)
@@ -1636,6 +1643,29 @@ void FrameEncoder::processRowEncoder(int intRow, ThreadLocalData& tld, int layer
const uint32_t cuAddr = lineStartCUAddr + col;
CUData* ctu = curEncData.getPicCTU(cuAddr);
const uint32_t bLastCuInSlice = (bLastRowInSlice & (col == numCols - 1)) ? 1 : 0;
+
+ /* Must wait for TME to finish before initCTU because both threads
+ * operate on the same CUData — the encoder's initCTU would corrupt
+ * data that deriveMVsForCTU is still reading. */
+ if (m_top->m_threadedME && slice->m_sliceType != I_SLICE)
+ {
+ int64_t waitStart = x265_mdate();
+ bool waited = false;
+
+ while (m_frame[layer]->m_ctuMEFlags[cuAddr].get() == 0)
+ {
+#ifdef DETAILED_CU_STATS
+ tld.analysis.m_stats[m_jpId].countTmeBlockedCTUs++;
+#endif
+ m_frame[layer]->m_ctuMEFlags[cuAddr].waitForChange(0);
+ waited = true;
+ }
+
+ int64_t waitEnd = x265_mdate();
+ if (waited)
+ ATOMIC_ADD(&m_totalThreadedMEWait[layer], waitEnd - waitStart);
+ }
+
ctu->initCTU(*m_frame[layer], cuAddr, slice->m_sliceQp, bFirstRowInSlice, bLastRowInSlice, bLastCuInSlice);
if (!layer && bIsVbv)
@@ -1692,26 +1722,6 @@ void FrameEncoder::processRowEncoder(int intRow, ThreadLocalData& tld, int layer
if (m_param->dynamicRd && (int32_t)(m_rce.qpaRc - m_rce.qpNoVbv) > 0)
ctu->m_vbvAffected = true;
- if (m_top->m_threadedME && slice->m_sliceType != I_SLICE)
- {
- int64_t waitStart = x265_mdate();
- bool waited = false;
-
- // Wait for threadedME to complete ME upto this CTU
- while (m_frame[layer]->m_ctuMEFlags[cuAddr].get() == 0)
- {
-#ifdef DETAILED_CU_STATS
- tld.analysis.m_stats[m_jpId].countTmeBlockedCTUs++;
-#endif
- m_frame[layer]->m_ctuMEFlags[cuAddr].waitForChange(0);
- waited = true;
- }
-
- int64_t waitEnd = x265_mdate();
- if (waited)
- ATOMIC_ADD(&m_totalThreadedMEWait[layer], waitEnd - waitStart);
- }
-
// Does all the CU analysis, returns best top level mode decision
Mode& best = tld.analysis.compressCTU(*ctu, *m_frame[layer], m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
diff --git a/source/encoder/motion.cpp b/source/encoder/motion.cpp
index 1a8cf6371..230037e2a 100644
--- a/source/encoder/motion.cpp
+++ b/source/encoder/motion.cpp
@@ -642,6 +642,7 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
for (int16_t dist = 1; dist <= 4; dist <<= 1)
{
+ const MV bmv0 = bmv;
const int32_t top = omv.y - dist;
const int32_t bottom = omv.y + dist;
const int32_t left = omv.x - dist;
@@ -697,10 +698,13 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
COST_MV(omv.x, bottom);
}
}
+ if (bmv == bmv0)
+ break;
}
for (int16_t dist = 8; dist <= 64; dist += 8)
{
+ const MV bmv0 = bmv;
const int32_t top = omv.y - dist;
const int32_t bottom = omv.y + dist;
const int32_t left = omv.x - dist;
@@ -772,6 +776,8 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
}
}
}
+ if (bmv == bmv0)
+ break;
}
outMV = bmv;
return bcost;
@@ -996,6 +1002,12 @@ int MotionEstimate::motionEstimate(ReferencePlanes *ref,
pmv = pmv.roundToFPel();
MV omv = bmv; // current search origin or starting point
+ if (bcost == 0)
+ {
+ outQMv = bmv.toQPel();
+ return mvcost(bmv << 2); // return just the MV cost (no residual)
+ }
+
int search = ref->isHMELowres ? (hme ? searchMethodL0 : searchMethodL1) : searchMethod;
switch (search)
{
diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp
index 304911f96..ebf914912 100644
--- a/source/encoder/search.cpp
+++ b/source/encoder/search.cpp
@@ -348,6 +348,12 @@ void Search::puMotionEstimation(const Slice* slice, const CUGeom& cuGeom, CUData
PicYuv* recon = slice->m_mref[list][ref].reconPic;
int offset = recon->getLumaAddr(cu.m_cuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) - recon->getLumaAddr(0);
+ if (m_param->searchMethod == X265_SEA)
+ {
+ for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
+ m_me.integral[planes] = slice->m_refFrameList[list][ref]->m_encData->m_meIntegral[planes] + offset;
+ }
+
m_me.setSourcePU(fencPic->m_picOrg[0], fencPic->m_stride, offset, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine);
setSearchRange(cu, mvp, searchRange, mvmin, mvmax);
@@ -359,24 +365,89 @@ void Search::puMotionEstimation(const Slice* slice, const CUGeom& cuGeom, CUData
else
{
m_vertRestriction = slice->m_refPOCList[list][ref] == slice->m_poc;
- satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices, m_vertRestriction,
- m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
-
- if (bLowresMVP && mvp_lowres.notZero() && mvp_lowres != mvp)
+ pixel* srcRef = m_param->bSourceReferenceEstimation ?
+ m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0;
+
+ MV bestMvp = mvp;
+ bool usedLowresMvp = false;
+
+ /* Only do SAD comparison when:
+ * 1. srcRef is null (not source reference estimation mode)
+ * 2. lowres MVP is valid and different from spatial MVP
+ * 3. fencPUYuv is initialised */
+ if (!srcRef &&
+ bLowresMVP && mvp_lowres.notZero() && mvp_lowres != mvp &&
+ m_me.fencPUYuv.m_buf[0] != NULL)
{
- MV outmv_lowres;
- bLowresMVP = false;
- setSearchRange(cu, mvp_lowres, m_param->searchRange, mvmin, mvmax);
- int lowresMvCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp_lowres, numMvc, mvc, m_param->searchRange,outmv_lowres, m_param->maxSlices,
- m_vertRestriction, m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0): 0);
+ intptr_t stride = slice->m_mref[list][ref].lumaStride;
+ PicYuv* refPic = slice->m_mref[list][ref].reconPic;
- if (lowresMvCost < satdCost)
+ /* Only proceed if strides match */
+ if (refPic->m_stride == stride)
{
- outmv = outmv_lowres;
- satdCost = lowresMvCost;
- bLowresMVP = true;
+ intptr_t bOffset = refPic->getLumaAddr(cu.m_cuAddr,
+ pu.cuAbsPartIdx + pu.puAbsPartIdx)
+ - refPic->getLumaAddr(0);
+
+ pixel* fenc = m_me.fencPUYuv.m_buf[0];
+ pixel* frefBase = slice->m_mref[list][ref].fpelPlane[0]
+ + bOffset;
+
+ MV mvp_fp = mvp.clipped(
+ MV(mvmin.x << 2, mvmin.y << 2),
+ MV(mvmax.x << 2, mvmax.y << 2)).roundToFPel();
+
+ MV lowres_fp = mvp_lowres.clipped(
+ MV(mvmin.x << 2, mvmin.y << 2),
+ MV(mvmax.x << 2, mvmax.y << 2)).roundToFPel();
+
+ /* Picture boundary check for 4K safety */
+ int picW = refPic->m_picWidth;
+ int picH = refPic->m_picHeight;
+
+ bool mvpValid = (mvp_fp.x >= mvmin.x &&
+ mvp_fp.x <= mvmax.x &&
+ mvp_fp.y >= mvmin.y &&
+ mvp_fp.y <= mvmax.y &&
+ mvp_fp.x + pu.width <= picW &&
+ mvp_fp.y + pu.height <= picH);
+
+ bool lowresValid = (lowres_fp.x >= mvmin.x &&
+ lowres_fp.x <= mvmax.x &&
+ lowres_fp.y >= mvmin.y &&
+ lowres_fp.y <= mvmax.y &&
+ lowres_fp.x + pu.width <= picW &&
+ lowres_fp.y + pu.height <= picH);
+
+ if (mvpValid && lowresValid && mvp_fp != lowres_fp)
+ {
+ pixelcmp_t sadFunc = primitives.pu[m_me.partEnum].sad;
+
+ int sadMvp = sadFunc(fenc, FENC_STRIDE,
+ frefBase + mvp_fp.x + mvp_fp.y * stride,
+ stride);
+ int sadLowres = sadFunc(fenc, FENC_STRIDE,
+ frefBase + lowres_fp.x + lowres_fp.y * stride,
+ stride);
+
+ if (sadLowres < sadMvp)
+ {
+ bestMvp = mvp_lowres;
+ mvp = mvp_lowres; /* fix mvcost basis */
+ usedLowresMvp = true;
+ }
+ }
}
}
+
+ satdCost = m_me.motionEstimate(&slice->m_mref[list][ref],
+ mvmin, mvmax,
+ bestMvp,
+ numMvc, mvc,
+ m_param->searchRange, outmv,
+ m_param->maxSlices, m_vertRestriction, srcRef);
+
+ bLowresMVP = usedLowresMvp;
}
bits += m_me.bitcost(outmv);
diff --git a/source/encoder/threadedme.cpp b/source/encoder/threadedme.cpp
index 1028beb92..3c27835f3 100644
--- a/source/encoder/threadedme.cpp
+++ b/source/encoder/threadedme.cpp
@@ -144,7 +144,7 @@ void ThreadedME::threadMain()
frameEnc->m_tmeTasks.pop();
m_taskQueueLock.acquire();
- m_taskQueue.push(task);
+ m_taskQueue.push_back(task);
m_taskQueueLock.release();
newCTUsPushed++;
@@ -177,8 +177,41 @@ void ThreadedME::findJob(int workerThreadId)
m_tld[workerThreadId].analysis.m_stats[m_jpId].countTmeTasks++;
#endif
- CTUTask task = m_taskQueue.top();
- m_taskQueue.pop();
+ /* Scan for the most urgent task based on live WPP progress.
+ * Primary key: urgency — distance from WPP frontier (smaller / negative = more urgent)
+ * Secondary key: diagonal (row + col) — prefer earlier WPP wavefront position
+ * Tertiary key: enqueue sequence — preserve FIFO among equal candidates */
+ int bestIdx = 0;
+ int bestUrgency = ctuTaskUrgency(m_taskQueue[0]);
+ int bestDiag = m_taskQueue[0].row + m_taskQueue[0].col;
+ uint64_t bestSeq = m_taskQueue[0].seq;
+
+ for (int k = 1; k < (int)m_taskQueue.size(); k++)
+ {
+ const CTUTask& t = m_taskQueue[k];
+ int urgency = ctuTaskUrgency(t);
+ int diag = t.row + t.col;
+
+ bool isBetter = false;
+ if (urgency < bestUrgency)
+ isBetter = true;
+ else if (urgency == bestUrgency && diag < bestDiag)
+ isBetter = true;
+ else if (urgency == bestUrgency && diag == bestDiag && t.seq < bestSeq)
+ isBetter = true;
+
+ if (isBetter)
+ {
+ bestIdx = k;
+ bestUrgency = urgency;
+ bestDiag = diag;
+ bestSeq = t.seq;
+ }
+ }
+
+ CTUTask task = m_taskQueue[bestIdx];
+ m_taskQueue[bestIdx] = m_taskQueue.back();
+ m_taskQueue.pop_back();
m_taskQueueLock.release();
int numCols = (m_param->sourceWidth + m_param->maxCUSize - 1) / m_param->maxCUSize;
@@ -257,4 +290,10 @@ void initCTU(CUData& ctu, int row, int col, CTUTask& task)
ctu.initCTU(frame, ctuAddr, slice->m_sliceQp, bFirstRowInSlice, bLastRowInSlice, bLastCuInSlice);
}
+int ctuTaskUrgency(const CTUTask& task)
+{
+ int wppProgress = (int)task.frameEnc->m_rows[task.row].completed;
+ return task.col - wppProgress;
+}
+
}
\ No newline at end of file
diff --git a/source/encoder/threadedme.h b/source/encoder/threadedme.h
index 5e5fc4878..71a9279c2 100644
--- a/source/encoder/threadedme.h
+++ b/source/encoder/threadedme.h
@@ -34,7 +34,6 @@
#include "analysis.h"
#include "mv.h"
-#include <queue>
#include <vector>
#include <fstream>
@@ -134,22 +133,6 @@ struct CTUTask
};
-struct CompareCTUTask {
- bool operator()(const CTUTask& a, const CTUTask& b) const {
- if (a.frame->m_poc == b.frame->m_poc)
- {
- int a_pos = a.row + a.col;
- int b_pos = b.row + b.col;
- if (a_pos != b_pos) return a_pos > b_pos;
- }
-
- /* Compare by sequence number to preserve FIFO enqueue order.
- * priority_queue in C++ is a max-heap, so return true when a.seq > b.seq
- * to make smaller seq (earlier enqueue) the top() element. */
- return a.seq > b.seq;
- }
-};
-
/**
* @brief Threaded motion-estimation module that schedules CTU blocks across worker threads.
*
@@ -163,7 +146,7 @@ public:
x265_param* m_param;
Encoder& m_enc;
- std::priority_queue<CTUTask, std::vector<CTUTask>, CompareCTUTask> m_taskQueue;
+ std::vector<CTUTask> m_taskQueue;
Lock m_taskQueueLock;
Event m_taskEvent;
@@ -244,6 +227,15 @@ public:
*/
void initCTU(CUData& ctu, int row, int col, CTUTask& task);
+/**
+ * @brief Compute scheduling urgency for a CTU task based on WPP progress.
+ *
+ * Returns how many CTUs WPP must process in this row before it reaches the
+ * task's first CTU. Negative values mean WPP has already passed the task's
+ * column and is likely stalling on its result right now.
+ */
+int ctuTaskUrgency(const CTUTask& task);
+
};
#endif
diff --git a/source/test/regression-tests.txt b/source/test/regression-tests.txt
index 081013cca..7551dbb7b 100644
--- a/source/test/regression-tests.txt
+++ b/source/test/regression-tests.txt
@@ -201,3 +201,11 @@ ParkScene_1920x1080_24.y4m, --crf 24 --mcstf --preset slow --bframes 5
BasketballDrive_1920x1080_50.y4m, --crf 26 --preset slow --sbrc --no-open-gop --keyint 60 --min-keyint 60 --vbv-bufsize 6000 --vbv-maxrate 5000 --temporal-layers 4 --b-adapt 0 --no-cutree
crowd_run_1080p50.y4m, --crf 22 --preset superfast --sbrc --no-open-gop --me sea --vbv-maxrate 9000 --vbv-bufsize 7500 --keyint 100 --min-keyint 100
# vim: tw=200
+
+#Threaded ME tests
+BasketballDrive_1920x1080_50.y4m,--preset ultrafast --threaded-me --constrained-intra --signhide --qp 26
+CrowdRun_1920x1080_50_10bit_422.yuv,--preset medium --threaded-me --no-wpp --no-cutree --rdoq-level 1 --limit-refs 1
+DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slow --threaded-me --temporal-layers 2 --no-psy-rd --qg-size 32
+old_town_cross_444_720p50.y4m,--preset faster --threaded-me --me sea --weightp --limit-modes
+big_buck_bunny_360p24.y4m,--preset medium --threaded-me --no-wpp --aq-mode 3 --aq-strength 1.5 --weightb
+
diff --git a/source/test/smoke-tests.txt b/source/test/smoke-tests.txt
index 89f8ce452..4e6acfa61 100644
--- a/source/test/smoke-tests.txt
+++ b/source/test/smoke-tests.txt
@@ -25,3 +25,10 @@ CrowdRun_1920x1080_50_10bit_444.yuv,--preset=superfast --bitrate 7000 --sao --li
# CrowdRun_1920x1080_50_10bit_444.yuv,--preset=ultrafast --weightp --keyint -1 --film-grain "CrowdRun_1920x1080_50_10bit_444.bin"
# DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset=veryfast --min-cu 16 --film-grain "DucksAndLegs_1920x1080_60_10bit_422.bin"
# NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset=superfast --bitrate 10000 --sao --limit-sao --cll --max-cll "1000,400" --film-grain "NebutaFestival_2560x1600_60_10bit_crop.bin"
+
+#Threaded ME tests
+BasketballDrive_1920x1080_50.y4m,--preset=superfast --threaded-me --qp 28
+CrowdRun_1920x1080_50_10bit_422.yuv,--preset=medium --threaded-me --no-wpp --crf 22
+ducks_take_off_444_720p50.y4m,--preset=slower --threaded-me --me sea --limit-refs 2
+RaceHorses_416x240_30_10bit.yuv,--preset=faster --threaded-me --aq-mode 3 --rdoq-level 1
+News-4k.y4m,--preset=veryfast --threaded-me --no-cutree --tune psnr
--
2.52.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260407/14546c06/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-tme.patch
Type: application/octet-stream
Size: 25315 bytes
Desc: 0001-tme.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260407/14546c06/attachment-0001.obj>
More information about the x265-devel
mailing list