[x265] [PATCH 5/6] fix(tme): fix compatibility with --no-wpp and --me=sea

Shashank Pathipati shashank.pathipati at multicorewareinc.com
Fri Apr 10 09:22:12 UTC 2026


>From b0a682b13b1395ae10afdda61071bf3c93ca6b3c Mon Sep 17 00:00:00 2001
From: Shashank Pathipati <shashank.pathipati at multicorewareinc.com>
Date: Fri, 10 Apr 2026 14:35:36 +0530
Subject: [PATCH 5/6] fix(tme): fix compatibility with --no-wpp and --me=sea

---
 source/encoder/frameencoder.cpp | 50 ++++++++++++++++++++-------------
 source/encoder/search.cpp       |  6 ++++
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/source/encoder/frameencoder.cpp b/source/encoder/frameencoder.cpp
index af73626af..c8bf12508 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -1036,6 +1036,13 @@ void FrameEncoder::compressFrame(int layer)
                     }
                 }

+                if (m_top->m_threadedME && !slice->isIntra())
+                {
+                    ScopedLock lock(m_tmeDepLock);
+                    m_tmeDeps[i].external = true;
+                    m_top->m_threadedME->enqueueReadyRows(i, layer, this);
+                }
+
                 if (!i)
                     m_row0WaitTime[layer] = x265_mdate();
                 else if (i == m_numRows - 1)
@@ -1636,6 +1643,29 @@ void FrameEncoder::processRowEncoder(int intRow, ThreadLocalData& tld, int layer
         const uint32_t cuAddr = lineStartCUAddr + col;
         CUData* ctu = curEncData.getPicCTU(cuAddr);
         const uint32_t bLastCuInSlice = (bLastRowInSlice & (col == numCols - 1)) ? 1 : 0;
+
+        /* Must wait for TME to finish before initCTU because both threads
+         * operate on the same CUData — the encoder's initCTU would corrupt
+         * data that deriveMVsForCTU is still reading. */
+        if (m_top->m_threadedME && slice->m_sliceType != I_SLICE)
+        {
+            int64_t waitStart = x265_mdate();
+            bool waited = false;
+
+            while (m_frame[layer]->m_ctuMEFlags[cuAddr].get() == 0)
+            {
+#ifdef DETAILED_CU_STATS
+                tld.analysis.m_stats[m_jpId].countTmeBlockedCTUs++;
+#endif
+                m_frame[layer]->m_ctuMEFlags[cuAddr].waitForChange(0);
+                waited = true;
+            }
+
+            int64_t waitEnd = x265_mdate();
+            if (waited)
+                ATOMIC_ADD(&m_totalThreadedMEWait[layer], waitEnd - waitStart);
+        }
+
         ctu->initCTU(*m_frame[layer], cuAddr, slice->m_sliceQp, bFirstRowInSlice, bLastRowInSlice, bLastCuInSlice);

         if (!layer && bIsVbv)
@@ -1692,26 +1722,6 @@ void FrameEncoder::processRowEncoder(int intRow, ThreadLocalData& tld, int layer
         if (m_param->dynamicRd && (int32_t)(m_rce.qpaRc - m_rce.qpNoVbv) > 0)
             ctu->m_vbvAffected = true;

-        if (m_top->m_threadedME && slice->m_sliceType != I_SLICE)
-        {
-            int64_t waitStart = x265_mdate();
-            bool waited = false;
-
-            // Wait for threadedME to complete ME upto this CTU
-            while (m_frame[layer]->m_ctuMEFlags[cuAddr].get() == 0)
-            {
-#ifdef DETAILED_CU_STATS
-                tld.analysis.m_stats[m_jpId].countTmeBlockedCTUs++;
-#endif
-                m_frame[layer]->m_ctuMEFlags[cuAddr].waitForChange(0);
-                waited = true;
-            }
-
-            int64_t waitEnd = x265_mdate();
-            if (waited)
-                ATOMIC_ADD(&m_totalThreadedMEWait[layer], waitEnd - waitStart);
-        }
-
         // Does all the CU analysis, returns best top level mode decision
         Mode& best = tld.analysis.compressCTU(*ctu, *m_frame[layer], m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);

diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp
index 238bf63ff..ebf914912 100644
--- a/source/encoder/search.cpp
+++ b/source/encoder/search.cpp
@@ -348,6 +348,12 @@ void Search::puMotionEstimation(const Slice* slice, const CUGeom& cuGeom, CUData
                 PicYuv* recon = slice->m_mref[list][ref].reconPic;
                 int offset = recon->getLumaAddr(cu.m_cuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) - recon->getLumaAddr(0);

+                if (m_param->searchMethod == X265_SEA)
+                {
+                    for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
+                        m_me.integral[planes] = slice->m_refFrameList[list][ref]->m_encData->m_meIntegral[planes] + offset;
+                }
+
                 m_me.setSourcePU(fencPic->m_picOrg[0], fencPic->m_stride, offset, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine);
                 setSearchRange(cu, mvp, searchRange, mvmin, mvmax);

--
2.52.0.windows.1



-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260410/dcfe9be0/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0005-fix-tme-fix-compatibility-with-no-wpp-and-me-sea.patch
Type: application/octet-stream
Size: 4571 bytes
Desc: 0005-fix-tme-fix-compatibility-with-no-wpp-and-me-sea.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260410/dcfe9be0/attachment-0001.obj>


More information about the x265-devel mailing list