[x265] [PATCH 3/6] perf(tme): early-exit diamond search and replace dual lowres-MVP ME with SAD prefilter

Shashank Pathipati shashank.pathipati at multicorewareinc.com
Fri Apr 10 09:17:14 UTC 2026


>From 1b350e56bb12c5f1e411d7e0ceb0a041289d3abf Mon Sep 17 00:00:00 2001
From: Syed Majid <syed.majid at multicorewareinc.com>
Date: Fri, 10 Apr 2026 14:30:08 +0530
Subject: [PATCH 3/6] perf(tme): early-exit diamond search and replace dual
 lowres-MVP ME with SAD prefilter

---
 source/encoder/motion.cpp | 12 ++++++
 source/encoder/search.cpp | 91 +++++++++++++++++++++++++++++++++------
 2 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/source/encoder/motion.cpp b/source/encoder/motion.cpp
index 1a8cf6371..9700a692b 100644
--- a/source/encoder/motion.cpp
+++ b/source/encoder/motion.cpp
@@ -642,6 +642,7 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M

     for (int16_t dist = 1; dist <= 4; dist <<= 1)
     {
+        const MV bmv0 = bmv;
         const int32_t top = omv.y - dist;
         const int32_t bottom = omv.y + dist;
         const int32_t left = omv.x - dist;
@@ -697,10 +698,13 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
                 COST_MV(omv.x, bottom);
             }
         }
+        if (bmv == bmv0)
+            break;
     }

     for (int16_t dist = 8; dist <= 64; dist += 8)
     {
+        const MV bmv0 = bmv;
         const int32_t top = omv.y - dist;
         const int32_t bottom = omv.y + dist;
         const int32_t left = omv.x - dist;
@@ -772,6 +776,8 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
                 }
             }
         }
+        if (bmv == bmv0)
+            break;
     }
     outMV = bmv;
     return bcost;
@@ -996,6 +1002,12 @@ int MotionEstimate::motionEstimate(ReferencePlanes *ref,
     pmv = pmv.roundToFPel();
     MV omv = bmv;  // current search origin or starting point

+    if (bcost == 0)
+    {
+        outQMv = bmv.toQPel();
+        return mvcost(bmv << 2); // return just the MV cost (no residual)
+    }
+
     int search = ref->isHMELowres ? (hme ? searchMethodL0 : searchMethodL1) : searchMethod;
     switch (search)
     {
diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp
index 304911f96..238bf63ff 100644
--- a/source/encoder/search.cpp
+++ b/source/encoder/search.cpp
@@ -359,24 +359,89 @@ void Search::puMotionEstimation(const Slice* slice, const CUGeom& cuGeom, CUData
                 else
                 {
                     m_vertRestriction = slice->m_refPOCList[list][ref] == slice->m_poc;
-                    satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices, m_vertRestriction,
-                        m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
-
-                    if (bLowresMVP && mvp_lowres.notZero() && mvp_lowres != mvp)
+                    pixel* srcRef = m_param->bSourceReferenceEstimation ?
+                        m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0;
+
+                    MV bestMvp = mvp;
+                    bool usedLowresMvp = false;
+
+                    /* Only do SAD comparison when:
+                     * 1. srcRef is null (not source reference estimation mode)
+                     * 2. lowres MVP is valid and different from spatial MVP
+                     * 3. fencPUYuv is initialised */
+                    if (!srcRef &&
+                        bLowresMVP && mvp_lowres.notZero() && mvp_lowres != mvp &&
+                        m_me.fencPUYuv.m_buf[0] != NULL)
                     {
-                        MV outmv_lowres;
-                        bLowresMVP = false;
-                        setSearchRange(cu, mvp_lowres, m_param->searchRange, mvmin, mvmax);
-                        int lowresMvCost = m_me.motionEstimate(&slice->m_mref[list][ref],  mvmin, mvmax, mvp_lowres, numMvc, mvc, m_param->searchRange,outmv_lowres, m_param->maxSlices,
-                            m_vertRestriction, m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0): 0);
+                        intptr_t stride  = slice->m_mref[list][ref].lumaStride;
+                        PicYuv*  refPic  = slice->m_mref[list][ref].reconPic;

-                        if (lowresMvCost < satdCost)
+                        /* Only proceed if strides match */
+                        if (refPic->m_stride == stride)
                         {
-                            outmv = outmv_lowres;
-                            satdCost = lowresMvCost;
-                            bLowresMVP = true;
+                            intptr_t bOffset = refPic->getLumaAddr(cu.m_cuAddr,
+                                                   pu.cuAbsPartIdx + pu.puAbsPartIdx)
+                                             - refPic->getLumaAddr(0);
+
+                            pixel* fenc     = m_me.fencPUYuv.m_buf[0];
+                            pixel* frefBase = slice->m_mref[list][ref].fpelPlane[0]
+                                             + bOffset;
+
+                            MV mvp_fp = mvp.clipped(
+                                MV(mvmin.x << 2, mvmin.y << 2),
+                                MV(mvmax.x << 2, mvmax.y << 2)).roundToFPel();
+
+                            MV lowres_fp = mvp_lowres.clipped(
+                                MV(mvmin.x << 2, mvmin.y << 2),
+                                MV(mvmax.x << 2, mvmax.y << 2)).roundToFPel();
+
+                            /* Picture boundary check for 4K safety */
+                            int picW = refPic->m_picWidth;
+                            int picH = refPic->m_picHeight;
+
+                            bool mvpValid = (mvp_fp.x    >= mvmin.x &&
+                                             mvp_fp.x    <= mvmax.x &&
+                                             mvp_fp.y    >= mvmin.y &&
+                                             mvp_fp.y    <= mvmax.y &&
+                                             mvp_fp.x + pu.width  <= picW &&
+                                             mvp_fp.y + pu.height <= picH);
+
+                            bool lowresValid = (lowres_fp.x >= mvmin.x &&
+                                                lowres_fp.x <= mvmax.x &&
+                                                lowres_fp.y >= mvmin.y &&
+                                                lowres_fp.y <= mvmax.y &&
+                                                lowres_fp.x + pu.width  <= picW &&
+                                                lowres_fp.y + pu.height <= picH);
+
+                            if (mvpValid && lowresValid && mvp_fp != lowres_fp)
+                            {
+                                pixelcmp_t sadFunc = primitives.pu[m_me.partEnum].sad;
+
+                                int sadMvp = sadFunc(fenc, FENC_STRIDE,
+                                    frefBase + mvp_fp.x    + mvp_fp.y    * stride,
+                                    stride);
+                                int sadLowres = sadFunc(fenc, FENC_STRIDE,
+                                    frefBase + lowres_fp.x + lowres_fp.y * stride,
+                                    stride);
+
+                                if (sadLowres < sadMvp)
+                                {
+                                    bestMvp       = mvp_lowres;
+                                    mvp           = mvp_lowres; /* fix mvcost basis */
+                                    usedLowresMvp = true;
+                                }
+                            }
                         }
                     }
+
+                    satdCost = m_me.motionEstimate(&slice->m_mref[list][ref],
+                        mvmin, mvmax,
+                        bestMvp,
+                        numMvc, mvc,
+                        m_param->searchRange, outmv,
+                        m_param->maxSlices, m_vertRestriction, srcRef);
+
+                    bLowresMVP = usedLowresMvp;
                 }

                 bits += m_me.bitcost(outmv);
--
2.52.0.windows.1



-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260410/bb0f68e0/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0003-perf-tme-early-exit-diamond-search-and-replace-dual-.patch
Type: application/octet-stream
Size: 8071 bytes
Desc: 0003-perf-tme-early-exit-diamond-search-and-replace-dual-.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260410/bb0f68e0/attachment-0001.obj>


More information about the x265-devel mailing list