[x265] [PATCH 10/10] Added batch-motion-search for all planes in Lookahead
Mahesh Pittala
mahesh at multicorewareinc.com
Tue Nov 12 08:59:44 UTC 2024
Pushed all 10 patches to the master and release_4.1 branches
On Mon, Nov 11, 2024 at 7:56 PM Anusuya Kumarasamy <
anusuya.kumarasamy at multicorewareinc.com> wrote:
> From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001
> From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> Date: Mon, 11 Nov 2024 10:50:11 +0530
> Subject: [PATCH 10/10] Added batch-motion-search for all planes in
> Lookahead
>
> ---
> source/common/lowres.cpp | 15 +++++++
> source/common/lowres.h | 1 +
> source/common/temporalfilter.cpp | 64 ++++++++++++++--------------
> source/common/temporalfilter.h | 69 +++++++++++++++++--------------
> source/encoder/slicetype.cpp | 71 +++++++++++++++++++++++---------
> source/encoder/slicetype.h | 4 +-
> 6 files changed, 139 insertions(+), 85 deletions(-)
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index 17c071c2c..1596f79da 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv
> *origPic, uint32_t qgSize)
> }
> }
>
> + for (int i = 0; i < 4; i++)
> + {
> + CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);
> + }
> +
> for (int i = 0; i < bframes + 2; i++)
> {
> CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
> @@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)
> X265_FREE(lowerResMvCosts[1][i]);
> }
> }
> +
> + for (int i = 0; i < 4; i++)
> + {
> + X265_FREE(lowresMcstfMvs[0][i]);
> + }
> X265_FREE(qpAqOffset);
> X265_FREE(invQscaleFactor);
> X265_FREE(qpCuTreeOffset);
> @@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)
> lowresMvs[1][i][0].x = 0x7FFF;
> }
>
> + for (int i = 0; i < 4; i++)
> + {
> + lowresMcstfMvs[0][i][0].x = 0x7FFF;
> + }
> +
> for (int i = 0; i < bframes + 2; i++)
> intraMbs[i] = 0;
> if (origPic->m_param->rc.vbvBufferSize)
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 7e6baa844..2bf39c3b5 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes
> uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
> int32_t* lowresMvCosts[2][X265_BFRAME_MAX + 2];
> MV* lowresMvs[2][X265_BFRAME_MAX + 2];
> + MV* lowresMcstfMvs[2][4];
> uint32_t maxBlocksInRow;
> uint32_t maxBlocksInCol;
> uint32_t maxBlocksInRowFullRes;
> diff --git a/source/common/temporalfilter.cpp
> b/source/common/temporalfilter.cpp
> index db58a0c15..aa50c2246 100644
> --- a/source/common/temporalfilter.cpp
> +++ b/source/common/temporalfilter.cpp
> @@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()
> m_QP = 0;
> m_sliceTypeConfig = 3;
> m_numRef = 0;
> - m_useSADinME = 1;
>
> m_range = 2;
> m_chromaFactor = 0.55;
> m_sigmaMultiplier = 9.0;
> m_sigmaZeroPoint = 10.0;
> - m_motionVectorFactor = 16;
> }
>
> void TemporalFilter::init(const x265_param* param)
> @@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)
> m_numComponents = (m_internalCsp != X265_CSP_I400) ?
> MAX_NUM_COMPONENT : 1;
>
> m_metld = new MotionEstimatorTLD;
> -
> - predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
> }
>
> int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param)
> @@ -191,7 +187,7 @@ fail:
> return 0;
> }
>
> -int TemporalFilter::motionErrorLumaSAD(
> +int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,
> pixel* src,
> int stride,
> pixel* buf,
> @@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(
> /* copy PU block into cache */
> primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> - error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
> + error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
> #endif
> if (error > besterror)
> {
> @@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(
> return error;
> }
>
> -int TemporalFilter::motionErrorLumaSSD(
> +int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,
> pixel* src,
> int stride,
> pixel* buf,
> @@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(
> /* copy PU block into cache */
> primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> - error = (int)primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
> + error = (int)primitives.cu[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
>
> #endif
> if (error > besterror)
> @@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,
> }
> }
>
> -void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride,
> pixel* src,int stride, int height, int width, pixel* buf, int blockSize,
> +void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD&
> m_metld, MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int
> width, pixel* buf, int blockSize,
> int sRange, MV* previous, uint32_t prevMvStride, int factor)
> {
>
> @@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
> stepSize)
> {
> const intptr_t pelOffset = blockY * stride + blockX;
> - m_metld->me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
> + m_metld.me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
>
>
> MV best(0, 0);
> @@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> MV old = previous[mvIdx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
> else
> - error = motionErrorLumaSSD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
>
> if (error < leastError)
> {
> @@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> }
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
> {
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> if (error < leastError)
> {
> best.set(x2 * m_motionVectorFactor, y2 *
> m_motionVectorFactor);
> @@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> MV aboveMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> MV leftMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> }
>
>
> -void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t
> mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,
> +void
> MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD&
> m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int
> blockSize,
> MV *previous, uint32_t prevMvStride, int factor, int* minError)
> {
>
> @@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> {
>
> const intptr_t pelOffset = blockY * orig->m_stride + blockX;
> - m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
> + m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
>
> MV best(0, 0);
> int leastError = INT_MAX;
> @@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> MV old = previous[mvIdx];
>
> if (m_useSADinME)
> - error =
> motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
> else
> - error =
> motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> }
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, 0, 0, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, 0, 0, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
> {
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
>
> if (error < leastError)
> {
> @@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x
> + doubleRange; x2++)
> {
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> MV aboveMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> MV leftMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> diff --git a/source/common/temporalfilter.h
> b/source/common/temporalfilter.h
> index 3e03d7737..c4316aca6 100644
> --- a/source/common/temporalfilter.h
> +++ b/source/common/temporalfilter.h
> @@ -84,9 +84,47 @@ namespace X265_NS {
> {
> me.init(X265_CSP_I400);
> me.setQP(X265_LOOKAHEAD_QP);
> + predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
> + m_useSADinME = 1;
> + m_motionVectorFactor = 16;
> }
>
> - ~MotionEstimatorTLD() {}
> + Yuv predPUYuv;
> + int m_useSADinME;
> + int m_motionVectorFactor;
> + int32_t m_bitDepth;
> +
> + void init(const x265_param* param);
> +
> + void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs,
> uint32_t mvStride, pixel* src, int stride, int height, int width, pixel*
> buf, int bs, int sRange,
> + MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> +
> + void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV*
> mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,
> + MV* previous, uint32_t prevMvStride, int factor, int*
> minError);
> +
> + int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,
> + int stride,
> + pixel* buf,
> + int x,
> + int y,
> + int dx,
> + int dy,
> + int bs,
> + int besterror = 8 * 8 * 1024 * 1024);
> +
> + int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,
> + int stride,
> + pixel* buf,
> + int x,
> + int y,
> + int dx,
> + int dy,
> + int bs,
> + int besterror = 8 * 8 * 1024 * 1024);
> +
> + ~MotionEstimatorTLD() {
> + predPUYuv.destroy();
> + }
> };
>
> struct TemporalFilterRefPicInfo
> @@ -134,7 +172,6 @@ namespace X265_NS {
> double m_chromaFactor;
> double m_sigmaMultiplier;
> double m_sigmaZeroPoint;
> - int m_motionVectorFactor;
> int m_padding;
>
> // Private member variables
> @@ -148,39 +185,11 @@ namespace X265_NS {
> uint8_t m_sliceTypeConfig;
>
> MotionEstimatorTLD* m_metld;
> - Yuv predPUYuv;
> - int m_useSADinME;
>
> int createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param);
>
> void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo*
> mctfRefList, double overallStrength);
>
> - void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,
> int stride, int height, int width, pixel* buf, int bs, int sRange,
> - MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> -
> - void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride,
> PicYuv *orig, PicYuv *buffer, int blockSize,
> - MV *previous, uint32_t prevMvStride, int factor, int*
> minError);
> -
> - int motionErrorLumaSSD(pixel* src,
> - int stride,
> - pixel* buf,
> - int x,
> - int y,
> - int dx,
> - int dy,
> - int bs,
> - int besterror = 8 * 8 * 1024 * 1024);
> -
> - int motionErrorLumaSAD(pixel* src,
> - int stride,
> - pixel* buf,
> - int x,
> - int y,
> - int dx,
> - int dy,
> - int bs,
> - int besterror = 8 * 8 * 1024 * 1024);
> -
> void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);
>
> void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input,
> PicYuv *output);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 5fd885227..abc687ef4 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -1128,7 +1128,10 @@ bool Lookahead::create()
> m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);
>
> if (m_param->bEnableTemporalFilter)
> + {
> + m_metld = new MotionEstimatorTLD[numTLD];
> m_origPicBuf = new OrigPicBuffer();
> + }
>
> return m_tld && m_scratch;
> }
> @@ -1170,7 +1173,10 @@ void Lookahead::destroy()
> }
>
> if (m_param->bEnableTemporalFilter)
> + {
> delete m_origPicBuf;
> + delete[] m_metld;
> + }
>
> X265_FREE(m_scratch);
> delete [] m_tld;
> @@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int
> start, int end, int num)
> }
> }
>
> -void Lookahead::estimatelowresmotion(Frame* curframe)
> +void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld,
> Frame* curframe, int refId)
> {
> + m_metld.m_bitDepth = curframe->m_param->internalBitDepth;
> + TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];
>
> - for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)
> - {
> - TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];
> -
> - curframe->m_mcstf->motionEstimationLuma(ref->mvs0,
> ref->mvsStride0, curframe->m_lowres.lowerResPlane[0],
> (curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2),
> (curframe->m_lowres.width / 2), ref->lowerRes, 16,
> m_param->searchRangeForLayer2);
> - curframe->m_mcstf->motionEstimationLuma(ref->mvs1,
> ref->mvsStride1, curframe->m_lowres.lowresPlane[0],
> (curframe->m_lowres.lumaStride), (curframe->m_lowres.lines),
> (curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1,
> ref->mvs0, ref->mvsStride0, 2);
> - curframe->m_mcstf->motionEstimationLuma(ref->mvs2,
> ref->mvsStride2, curframe->m_fencPic->m_picOrg[0],
> curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight,
> curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16,
> m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);
> - curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
> - }
> + m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0,
> curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride /
> 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2),
> ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);
> + m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1,
> curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride),
> (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16,
> curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);
> + m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2,
> curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride,
> curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth,
> ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0,
> ref->mvs1, ref->mvsStride1, 2);
> + m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
>
> + curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;
> }
>
> inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool
> isPreFiltered, int16_t i)
> @@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()
> }
> }
>
> - Frame* frameEnc = m_inputQueue.first();
> - for (int i = 0; i < m_inputQueue.size(); i++)
> + if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)
> {
> - if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
> + /* pre-calculate all motion searches, using many worker threads */
> + CostEstimateGroup estGroup(*this, frames);
> + Frame* frameEnc = m_inputQueue.first();
> + for (int b = 0; b < m_inputQueue.size(); b++)
> {
> - if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
> m_inputQueue.last()->m_poc))
> + if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
> {
> - x265_log(m_param, X265_LOG_ERROR, "Failed to initialize
> MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> - fflush(stderr);
> - }
> + if (!generatemcstf(frameEnc,
> m_origPicBuf->m_mcstfPicList, m_inputQueue.last()->m_poc))
> + {
> + x265_log(m_param, X265_LOG_ERROR, "Failed to
> initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> + fflush(stderr);
> + }
> +
> + for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)
> + {
> + TemporalFilterRefPicInfo* ref =
> &frameEnc->m_mcstfRefList[j - 1];
> + int i = ref->poc;
> +
> + /* Skip search if already done */
> + if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x !=
> 0x7FFF)
> + continue;
>
> - estimatelowresmotion(frameEnc);
> + estGroup.add(j - 1, i, frameEnc->m_poc);
> + }
> + }
> + frameEnc = frameEnc->m_next;
> }
> - frameEnc = frameEnc->m_next;
> +
> + /* auto-disable after the first batch if pool is small */
> + m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;
> + estGroup.finishBatch();
> }
>
> if (m_param->bEnableTemporalSubLayers > 2)
> @@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
> if (workerThreadID < 0)
> id = pool ? pool->m_numWorkers : 0;
> LookaheadTLD& tld = m_lookahead.m_tld[id];
> + MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];
>
> m_lock.acquire();
> while (m_jobAcquired < m_jobTotal)
> @@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
> ProfileScopeEvent(estCostSingle);
>
> Estimate& e = m_estimates[i];
> - estimateFrameCost(tld, e.p0, e.p1, e.b, false);
> + Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);
> +
> + if (m_lookahead.m_param->bEnableTemporalFilter && curFrame &&
> (curFrame->m_lowres.sliceType == X265_TYPE_IDR ||
> curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType
> == X265_TYPE_P))
> + {
> + estimatelowresmotion(m_metld, curFrame, e.p0);
> + }
> + else
> + estimateFrameCost(tld, e.p0, e.p1, e.b, false);
> }
> else
> {
> diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
> index 214e295b7..be6ac8112 100644
> --- a/source/encoder/slicetype.h
> +++ b/source/encoder/slicetype.h
> @@ -204,6 +204,7 @@ public:
> int8_t m_gopId;
>
> OrigPicBuffer* m_origPicBuf;
> + MotionEstimatorTLD* m_metld;
>
> Lookahead(x265_param *param, ThreadPool *pool);
> #if DETAILED_CU_STATS
> @@ -227,7 +228,6 @@ public:
> void getEstimatedPictureCost(Frame *pic);
> void setLookaheadQueue();
> int findSliceType(int poc);
> - void estimatelowresmotion(Frame* frame);
> bool generatemcstf(Frame * frame, PicList refPic, int poclast);
> bool isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);
>
> @@ -327,6 +327,8 @@ protected:
> int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b,
> bool intraPenalty);
> void estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0,
> int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);
>
> + void estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame*
> curframe, int refId);
> +
> CostEstimateGroup& operator=(const CostEstimateGroup&);
> };
>
> --
> 2.36.0.windows.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241112/e34c5961/attachment-0001.htm>
More information about the x265-devel
mailing list