[x265] [PATCH 3/6] perf(tme): early-exit diamond search and replace dual lowres-MVP ME with SAD prefilter
Shashank Pathipati
shashank.pathipati at multicorewareinc.com
Fri Apr 10 09:17:14 UTC 2026
>From 1b350e56bb12c5f1e411d7e0ceb0a041289d3abf Mon Sep 17 00:00:00 2001
From: Syed Majid <syed.majid at multicorewareinc.com>
Date: Fri, 10 Apr 2026 14:30:08 +0530
Subject: [PATCH 3/6] perf(tme): early-exit diamond search and replace dual
lowres-MVP ME with SAD prefilter
---
source/encoder/motion.cpp | 12 ++++++
source/encoder/search.cpp | 91 +++++++++++++++++++++++++++++++++------
2 files changed, 90 insertions(+), 13 deletions(-)
diff --git a/source/encoder/motion.cpp b/source/encoder/motion.cpp
index 1a8cf6371..9700a692b 100644
--- a/source/encoder/motion.cpp
+++ b/source/encoder/motion.cpp
@@ -642,6 +642,7 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
for (int16_t dist = 1; dist <= 4; dist <<= 1)
{
+ const MV bmv0 = bmv;
const int32_t top = omv.y - dist;
const int32_t bottom = omv.y + dist;
const int32_t left = omv.x - dist;
@@ -697,10 +698,13 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
COST_MV(omv.x, bottom);
}
}
+ if (bmv == bmv0)
+ break;
}
for (int16_t dist = 8; dist <= 64; dist += 8)
{
+ const MV bmv0 = bmv;
const int32_t top = omv.y - dist;
const int32_t bottom = omv.y + dist;
const int32_t left = omv.x - dist;
@@ -772,6 +776,8 @@ int MotionEstimate::diamondSearch(ReferencePlanes* ref, const MV& mvmin, const M
}
}
}
+ if (bmv == bmv0)
+ break;
}
outMV = bmv;
return bcost;
@@ -996,6 +1002,12 @@ int MotionEstimate::motionEstimate(ReferencePlanes *ref,
pmv = pmv.roundToFPel();
MV omv = bmv; // current search origin or starting point
+ if (bcost == 0)
+ {
+ outQMv = bmv.toQPel();
+ return mvcost(bmv << 2); // return just the MV cost (no residual)
+ }
+
int search = ref->isHMELowres ? (hme ? searchMethodL0 : searchMethodL1) : searchMethod;
switch (search)
{
diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp
index 304911f96..238bf63ff 100644
--- a/source/encoder/search.cpp
+++ b/source/encoder/search.cpp
@@ -359,24 +359,89 @@ void Search::puMotionEstimation(const Slice* slice, const CUGeom& cuGeom, CUData
else
{
m_vertRestriction = slice->m_refPOCList[list][ref] == slice->m_poc;
- satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices, m_vertRestriction,
- m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
-
- if (bLowresMVP && mvp_lowres.notZero() && mvp_lowres != mvp)
+ pixel* srcRef = m_param->bSourceReferenceEstimation ?
+ m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0;
+
+ MV bestMvp = mvp;
+ bool usedLowresMvp = false;
+
+ /* Only do SAD comparison when:
+ * 1. srcRef is null (not source reference estimation mode)
+ * 2. lowres MVP is valid and different from spatial MVP
+ * 3. fencPUYuv is initialised */
+ if (!srcRef &&
+ bLowresMVP && mvp_lowres.notZero() && mvp_lowres != mvp &&
+ m_me.fencPUYuv.m_buf[0] != NULL)
{
- MV outmv_lowres;
- bLowresMVP = false;
- setSearchRange(cu, mvp_lowres, m_param->searchRange, mvmin, mvmax);
- int lowresMvCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp_lowres, numMvc, mvc, m_param->searchRange,outmv_lowres, m_param->maxSlices,
- m_vertRestriction, m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0): 0);
+ intptr_t stride = slice->m_mref[list][ref].lumaStride;
+ PicYuv* refPic = slice->m_mref[list][ref].reconPic;
- if (lowresMvCost < satdCost)
+ /* Only proceed if strides match */
+ if (refPic->m_stride == stride)
{
- outmv = outmv_lowres;
- satdCost = lowresMvCost;
- bLowresMVP = true;
+ intptr_t bOffset = refPic->getLumaAddr(cu.m_cuAddr,
+ pu.cuAbsPartIdx + pu.puAbsPartIdx)
+ - refPic->getLumaAddr(0);
+
+ pixel* fenc = m_me.fencPUYuv.m_buf[0];
+ pixel* frefBase = slice->m_mref[list][ref].fpelPlane[0]
+ + bOffset;
+
+ MV mvp_fp = mvp.clipped(
+ MV(mvmin.x << 2, mvmin.y << 2),
+ MV(mvmax.x << 2, mvmax.y << 2)).roundToFPel();
+
+ MV lowres_fp = mvp_lowres.clipped(
+ MV(mvmin.x << 2, mvmin.y << 2),
+ MV(mvmax.x << 2, mvmax.y << 2)).roundToFPel();
+
+ /* Picture boundary check for 4K safety */
+ int picW = refPic->m_picWidth;
+ int picH = refPic->m_picHeight;
+
+ bool mvpValid = (mvp_fp.x >= mvmin.x &&
+ mvp_fp.x <= mvmax.x &&
+ mvp_fp.y >= mvmin.y &&
+ mvp_fp.y <= mvmax.y &&
+ mvp_fp.x + pu.width <= picW &&
+ mvp_fp.y + pu.height <= picH);
+
+ bool lowresValid = (lowres_fp.x >= mvmin.x &&
+ lowres_fp.x <= mvmax.x &&
+ lowres_fp.y >= mvmin.y &&
+ lowres_fp.y <= mvmax.y &&
+ lowres_fp.x + pu.width <= picW &&
+ lowres_fp.y + pu.height <= picH);
+
+ if (mvpValid && lowresValid && mvp_fp != lowres_fp)
+ {
+ pixelcmp_t sadFunc = primitives.pu[m_me.partEnum].sad;
+
+ int sadMvp = sadFunc(fenc, FENC_STRIDE,
+ frefBase + mvp_fp.x + mvp_fp.y * stride,
+ stride);
+ int sadLowres = sadFunc(fenc, FENC_STRIDE,
+ frefBase + lowres_fp.x + lowres_fp.y * stride,
+ stride);
+
+ if (sadLowres < sadMvp)
+ {
+ bestMvp = mvp_lowres;
+ mvp = mvp_lowres; /* fix mvcost basis */
+ usedLowresMvp = true;
+ }
+ }
}
}
+
+ satdCost = m_me.motionEstimate(&slice->m_mref[list][ref],
+ mvmin, mvmax,
+ bestMvp,
+ numMvc, mvc,
+ m_param->searchRange, outmv,
+ m_param->maxSlices, m_vertRestriction, srcRef);
+
+ bLowresMVP = usedLowresMvp;
}
bits += m_me.bitcost(outmv);
--
2.52.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260410/bb0f68e0/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0003-perf-tme-early-exit-diamond-search-and-replace-dual-.patch
Type: application/octet-stream
Size: 8071 bytes
Desc: 0003-perf-tme-early-exit-diamond-search-and-replace-dual-.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20260410/bb0f68e0/attachment-0001.obj>
More information about the x265-devel
mailing list