[x265] [PATCH 10/10] Added batch-motion-search for all planes in Lookahead
Anusuya Kumarasamy
anusuya.kumarasamy at multicorewareinc.com
Mon Nov 11 14:25:38 UTC 2024
>From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001
From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
Date: Mon, 11 Nov 2024 10:50:11 +0530
Subject: [PATCH 10/10] Added batch-motion-search for all planes in Lookahead
---
source/common/lowres.cpp | 15 +++++++
source/common/lowres.h | 1 +
source/common/temporalfilter.cpp | 64 ++++++++++++++--------------
source/common/temporalfilter.h | 69 +++++++++++++++++--------------
source/encoder/slicetype.cpp | 71 +++++++++++++++++++++++---------
source/encoder/slicetype.h | 4 +-
6 files changed, 139 insertions(+), 85 deletions(-)
diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
index 17c071c2c..1596f79da 100644
--- a/source/common/lowres.cpp
+++ b/source/common/lowres.cpp
@@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv
*origPic, uint32_t qgSize)
}
}
+ for (int i = 0; i < 4; i++)
+ {
+ CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);
+ }
+
for (int i = 0; i < bframes + 2; i++)
{
CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
@@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)
X265_FREE(lowerResMvCosts[1][i]);
}
}
+
+ for (int i = 0; i < 4; i++)
+ {
+ X265_FREE(lowresMcstfMvs[0][i]);
+ }
X265_FREE(qpAqOffset);
X265_FREE(invQscaleFactor);
X265_FREE(qpCuTreeOffset);
@@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)
lowresMvs[1][i][0].x = 0x7FFF;
}
+ for (int i = 0; i < 4; i++)
+ {
+ lowresMcstfMvs[0][i][0].x = 0x7FFF;
+ }
+
for (int i = 0; i < bframes + 2; i++)
intraMbs[i] = 0;
if (origPic->m_param->rc.vbvBufferSize)
diff --git a/source/common/lowres.h b/source/common/lowres.h
index 7e6baa844..2bf39c3b5 100644
--- a/source/common/lowres.h
+++ b/source/common/lowres.h
@@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes
uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
int32_t* lowresMvCosts[2][X265_BFRAME_MAX + 2];
MV* lowresMvs[2][X265_BFRAME_MAX + 2];
+ MV* lowresMcstfMvs[2][4];
uint32_t maxBlocksInRow;
uint32_t maxBlocksInCol;
uint32_t maxBlocksInRowFullRes;
diff --git a/source/common/temporalfilter.cpp
b/source/common/temporalfilter.cpp
index db58a0c15..aa50c2246 100644
--- a/source/common/temporalfilter.cpp
+++ b/source/common/temporalfilter.cpp
@@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()
m_QP = 0;
m_sliceTypeConfig = 3;
m_numRef = 0;
- m_useSADinME = 1;
m_range = 2;
m_chromaFactor = 0.55;
m_sigmaMultiplier = 9.0;
m_sigmaZeroPoint = 10.0;
- m_motionVectorFactor = 16;
}
void TemporalFilter::init(const x265_param* param)
@@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)
m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT
: 1;
m_metld = new MotionEstimatorTLD;
-
- predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
}
int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
x265_param* param)
@@ -191,7 +187,7 @@ fail:
return 0;
}
-int TemporalFilter::motionErrorLumaSAD(
+int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,
pixel* src,
int stride,
pixel* buf,
@@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(
/* copy PU block into cache */
primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
bufferRowStart, buffStride);
- error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
+ error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
#endif
if (error > besterror)
{
@@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(
return error;
}
-int TemporalFilter::motionErrorLumaSSD(
+int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,
pixel* src,
int stride,
pixel* buf,
@@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(
/* copy PU block into cache */
primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
bufferRowStart, buffStride);
- error =
(int)primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0],
FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
+ error =
(int)primitives.cu[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0],
FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
#endif
if (error > besterror)
@@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,
}
}
-void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride,
pixel* src,int stride, int height, int width, pixel* buf, int blockSize,
+void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD& m_metld,
MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width,
pixel* buf, int blockSize,
int sRange, MV* previous, uint32_t prevMvStride, int factor)
{
@@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
stepSize)
{
const intptr_t pelOffset = blockY * stride + blockX;
- m_metld->me.setSourcePU(src, stride, pelOffset, blockSize,
blockSize, X265_HEX_SEARCH, 1);
+ m_metld.me.setSourcePU(src, stride, pelOffset, blockSize,
blockSize, X265_HEX_SEARCH, 1);
MV best(0, 0);
@@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
MV old = previous[mvIdx];
if (m_useSADinME)
- error = motionErrorLumaSAD(src, stride,
buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld, src,
stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
leastError);
else
- error = motionErrorLumaSSD(src, stride,
buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld, src,
stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
leastError);
if (error < leastError)
{
@@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
}
if (m_useSADinME)
- error = motionErrorLumaSAD(src, stride, buf, blockX,
blockY, 0, 0, blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld, src, stride, buf,
blockX, blockY, 0, 0, blockSize, leastError);
else
- error = motionErrorLumaSSD(src, stride, buf, blockX,
blockY, 0, 0, blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld, src, stride, buf,
blockX, blockY, 0, 0, blockSize, leastError);
if (error < leastError)
{
@@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
for (int x2 = prevBest.x / m_motionVectorFactor - range;
x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
{
if (m_useSADinME)
- error = motionErrorLumaSAD(src, stride, buf,
blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld, src, stride,
buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
else
- error = motionErrorLumaSSD(src, stride, buf,
blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld, src, stride,
buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
if (error < leastError)
{
best.set(x2 * m_motionVectorFactor, y2 *
m_motionVectorFactor);
@@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
MV aboveMV = mvs[idx];
if (m_useSADinME)
- error = motionErrorLumaSAD(src, stride, buf, blockX,
blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld, src, stride, buf,
blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
else
- error = motionErrorLumaSSD(src, stride, buf, blockX,
blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld, src, stride, buf,
blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
if (error < leastError)
{
@@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
MV leftMV = mvs[idx];
if (m_useSADinME)
- error = motionErrorLumaSAD(src, stride, buf, blockX,
blockY, leftMV.x, leftMV.y, blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld, src, stride, buf,
blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
else
- error = motionErrorLumaSSD(src, stride, buf, blockX,
blockY, leftMV.x, leftMV.y, blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld, src, stride, buf,
blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
if (error < leastError)
{
@@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
}
-void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t
mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,
+void MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD&
m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int
blockSize,
MV *previous, uint32_t prevMvStride, int factor, int* minError)
{
@@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
{
const intptr_t pelOffset = blockY * orig->m_stride + blockX;
- m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
+ m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
MV best(0, 0);
int leastError = INT_MAX;
@@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
MV old = previous[mvIdx];
if (m_useSADinME)
- error =
motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, old.x * factor, old.y * factor, blockSize, leastError);
else
- error =
motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, old.x * factor, old.y * factor, blockSize, leastError);
if (error < leastError)
{
@@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
}
if (m_useSADinME)
- error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);
+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);
else
- error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);
+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);
if (error < leastError)
{
@@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
for (int x2 = prevBest.x / m_motionVectorFactor - range;
x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
{
if (m_useSADinME)
- error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
leastError);
else
- error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
leastError);
if (error < leastError)
{
@@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x +
doubleRange; x2++)
{
if (m_useSADinME)
- error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
leastError);
+ error = motionErrorLumaSAD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2, y2, blockSize, leastError);
else
- error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
leastError);
+ error = motionErrorLumaSSD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2, y2, blockSize, leastError);
if (error < leastError)
{
@@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
MV aboveMV = mvs[idx];
if (m_useSADinME)
- error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
aboveMV.y, blockSize, leastError);
else
- error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
aboveMV.y, blockSize, leastError);
if (error < leastError)
{
@@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
MV leftMV = mvs[idx];
if (m_useSADinME)
- error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
blockSize, leastError);
+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
leftMV.y, blockSize, leastError);
else
- error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
blockSize, leastError);
+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
leftMV.y, blockSize, leastError);
if (error < leastError)
{
diff --git a/source/common/temporalfilter.h b/source/common/temporalfilter.h
index 3e03d7737..c4316aca6 100644
--- a/source/common/temporalfilter.h
+++ b/source/common/temporalfilter.h
@@ -84,9 +84,47 @@ namespace X265_NS {
{
me.init(X265_CSP_I400);
me.setQP(X265_LOOKAHEAD_QP);
+ predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
+ m_useSADinME = 1;
+ m_motionVectorFactor = 16;
}
- ~MotionEstimatorTLD() {}
+ Yuv predPUYuv;
+ int m_useSADinME;
+ int m_motionVectorFactor;
+ int32_t m_bitDepth;
+
+ void init(const x265_param* param);
+
+ void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs,
uint32_t mvStride, pixel* src, int stride, int height, int width, pixel*
buf, int bs, int sRange,
+ MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);
+
+ void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV*
mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,
+ MV* previous, uint32_t prevMvStride, int factor, int*
minError);
+
+ int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,
+ int stride,
+ pixel* buf,
+ int x,
+ int y,
+ int dx,
+ int dy,
+ int bs,
+ int besterror = 8 * 8 * 1024 * 1024);
+
+ int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,
+ int stride,
+ pixel* buf,
+ int x,
+ int y,
+ int dx,
+ int dy,
+ int bs,
+ int besterror = 8 * 8 * 1024 * 1024);
+
+ ~MotionEstimatorTLD() {
+ predPUYuv.destroy();
+ }
};
struct TemporalFilterRefPicInfo
@@ -134,7 +172,6 @@ namespace X265_NS {
double m_chromaFactor;
double m_sigmaMultiplier;
double m_sigmaZeroPoint;
- int m_motionVectorFactor;
int m_padding;
// Private member variables
@@ -148,39 +185,11 @@ namespace X265_NS {
uint8_t m_sliceTypeConfig;
MotionEstimatorTLD* m_metld;
- Yuv predPUYuv;
- int m_useSADinME;
int createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
x265_param* param);
void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo*
mctfRefList, double overallStrength);
- void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,
int stride, int height, int width, pixel* buf, int bs, int sRange,
- MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);
-
- void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride,
PicYuv *orig, PicYuv *buffer, int blockSize,
- MV *previous, uint32_t prevMvStride, int factor, int*
minError);
-
- int motionErrorLumaSSD(pixel* src,
- int stride,
- pixel* buf,
- int x,
- int y,
- int dx,
- int dy,
- int bs,
- int besterror = 8 * 8 * 1024 * 1024);
-
- int motionErrorLumaSAD(pixel* src,
- int stride,
- pixel* buf,
- int x,
- int y,
- int dx,
- int dy,
- int bs,
- int besterror = 8 * 8 * 1024 * 1024);
-
void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);
void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input,
PicYuv *output);
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index 5fd885227..abc687ef4 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -1128,7 +1128,10 @@ bool Lookahead::create()
m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);
if (m_param->bEnableTemporalFilter)
+ {
+ m_metld = new MotionEstimatorTLD[numTLD];
m_origPicBuf = new OrigPicBuffer();
+ }
return m_tld && m_scratch;
}
@@ -1170,7 +1173,10 @@ void Lookahead::destroy()
}
if (m_param->bEnableTemporalFilter)
+ {
delete m_origPicBuf;
+ delete[] m_metld;
+ }
X265_FREE(m_scratch);
delete [] m_tld;
@@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int
start, int end, int num)
}
}
-void Lookahead::estimatelowresmotion(Frame* curframe)
+void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld,
Frame* curframe, int refId)
{
+ m_metld.m_bitDepth = curframe->m_param->internalBitDepth;
+ TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];
- for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)
- {
- TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];
-
- curframe->m_mcstf->motionEstimationLuma(ref->mvs0,
ref->mvsStride0, curframe->m_lowres.lowerResPlane[0],
(curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2),
(curframe->m_lowres.width / 2), ref->lowerRes, 16,
m_param->searchRangeForLayer2);
- curframe->m_mcstf->motionEstimationLuma(ref->mvs1,
ref->mvsStride1, curframe->m_lowres.lowresPlane[0],
(curframe->m_lowres.lumaStride), (curframe->m_lowres.lines),
(curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1,
ref->mvs0, ref->mvsStride0, 2);
- curframe->m_mcstf->motionEstimationLuma(ref->mvs2,
ref->mvsStride2, curframe->m_fencPic->m_picOrg[0],
curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight,
curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16,
m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);
- curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs,
ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
ref->mvsStride2, 1, ref->error);
- }
+ m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0,
curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride /
2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2),
ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);
+ m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1,
curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride),
(curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16,
curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);
+ m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2,
curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride,
curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth,
ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0,
ref->mvs1, ref->mvsStride1, 2);
+ m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs,
ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
ref->mvsStride2, 1, ref->error);
+ curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;
}
inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool
isPreFiltered, int16_t i)
@@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()
}
}
- Frame* frameEnc = m_inputQueue.first();
- for (int i = 0; i < m_inputQueue.size(); i++)
+ if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)
{
- if (m_param->bEnableTemporalFilter &&
isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
frameEnc->m_lowres.sliceType))
+ /* pre-calculate all motion searches, using many worker threads */
+ CostEstimateGroup estGroup(*this, frames);
+ Frame* frameEnc = m_inputQueue.first();
+ for (int b = 0; b < m_inputQueue.size(); b++)
{
- if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
m_inputQueue.last()->m_poc))
+ if (m_param->bEnableTemporalFilter &&
isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
frameEnc->m_lowres.sliceType))
{
- x265_log(m_param, X265_LOG_ERROR, "Failed to initialize
MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
- fflush(stderr);
- }
+ if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
m_inputQueue.last()->m_poc))
+ {
+ x265_log(m_param, X265_LOG_ERROR, "Failed to
initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
+ fflush(stderr);
+ }
+
+ for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)
+ {
+ TemporalFilterRefPicInfo* ref =
&frameEnc->m_mcstfRefList[j - 1];
+ int i = ref->poc;
+
+ /* Skip search if already done */
+ if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x !=
0x7FFF)
+ continue;
- estimatelowresmotion(frameEnc);
+ estGroup.add(j - 1, i, frameEnc->m_poc);
+ }
+ }
+ frameEnc = frameEnc->m_next;
}
- frameEnc = frameEnc->m_next;
+
+ /* auto-disable after the first batch if pool is small */
+ m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;
+ estGroup.finishBatch();
}
if (m_param->bEnableTemporalSubLayers > 2)
@@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int
workerThreadID)
if (workerThreadID < 0)
id = pool ? pool->m_numWorkers : 0;
LookaheadTLD& tld = m_lookahead.m_tld[id];
+ MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];
m_lock.acquire();
while (m_jobAcquired < m_jobTotal)
@@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int
workerThreadID)
ProfileScopeEvent(estCostSingle);
Estimate& e = m_estimates[i];
- estimateFrameCost(tld, e.p0, e.p1, e.b, false);
+ Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);
+
+ if (m_lookahead.m_param->bEnableTemporalFilter && curFrame &&
(curFrame->m_lowres.sliceType == X265_TYPE_IDR ||
curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType
== X265_TYPE_P))
+ {
+ estimatelowresmotion(m_metld, curFrame, e.p0);
+ }
+ else
+ estimateFrameCost(tld, e.p0, e.p1, e.b, false);
}
else
{
diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
index 214e295b7..be6ac8112 100644
--- a/source/encoder/slicetype.h
+++ b/source/encoder/slicetype.h
@@ -204,6 +204,7 @@ public:
int8_t m_gopId;
OrigPicBuffer* m_origPicBuf;
+ MotionEstimatorTLD* m_metld;
Lookahead(x265_param *param, ThreadPool *pool);
#if DETAILED_CU_STATS
@@ -227,7 +228,6 @@ public:
void getEstimatedPictureCost(Frame *pic);
void setLookaheadQueue();
int findSliceType(int poc);
- void estimatelowresmotion(Frame* frame);
bool generatemcstf(Frame * frame, PicList refPic, int poclast);
bool isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);
@@ -327,6 +327,8 @@ protected:
int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b,
bool intraPenalty);
void estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0,
int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);
+ void estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame*
curframe, int refId);
+
CostEstimateGroup& operator=(const CostEstimateGroup&);
};
--
2.36.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0010-Added-batch-motion-search-for-all-planes-in-Lookahea.patch
Type: application/octet-stream
Size: 28374 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment-0001.obj>
More information about the x265-devel
mailing list