[x265] x265-devel Digest, Vol 136, Issue 18
Ashok Kumar Mishra
ashok at multicorewareinc.com
Tue Nov 12 06:28:52 UTC 2024
This mcstf patch series can be pushed.
On Mon, Nov 11, 2024 at 7:56 PM <x265-devel-request at videolan.org> wrote:
> Send x265-devel mailing list submissions to
> x265-devel at videolan.org
>
> To subscribe or unsubscribe via the World Wide Web, visit
> https://mailman.videolan.org/listinfo/x265-devel
> or, via email, send a message with subject or body 'help' to
> x265-devel-request at videolan.org
>
> You can reach the person managing the list at
> x265-devel-owner at videolan.org
>
> When replying, please edit your Subject line so it is more specific
> than "Re: Contents of x265-devel digest..."
>
>
> Today's Topics:
>
> 1. [PATCH 09/10] Remove frameencoder instance for mcstf and fix
> memory leak (Anusuya Kumarasamy)
> 2. [PATCH 10/10] Added batch-motion-search for all planes in
> Lookahead (Anusuya Kumarasamy)
>
>
> ----------------------------------------------------------------------
>
> Message: 1
> Date: Mon, 11 Nov 2024 19:54:00 +0530
> From: Anusuya Kumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> To: x265-devel at videolan.org
> Subject: [x265] [PATCH 09/10] Remove frameencoder instance for mcstf
> and fix memory leak
> Message-ID:
> <
> CAE+pfGzRSPLnUWkG7La3k5yVF+5DAwpWE-YFYzrhcYyej5QPPA at mail.gmail.com>
> Content-Type: text/plain; charset="utf-8"
>
> >From 69560b753aadfc06f1b315aea2354f3c01536028 Mon Sep 17 00:00:00 2001
> From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> Date: Mon, 28 Oct 2024 11:30:49 +0530
> Subject: [PATCH 09/10] Remove frameencoder instance for mcstf and fix
> memory
> leak
>
> ---
> source/common/frame.cpp | 1 +
> source/common/lowres.cpp | 2 +-
> source/encoder/frameencoder.cpp | 40 +++++++++------------------------
> source/encoder/frameencoder.h | 4 ----
> 4 files changed, 12 insertions(+), 35 deletions(-)
>
> diff --git a/source/common/frame.cpp b/source/common/frame.cpp
> index e5f5b0327..fbecfb4e9 100644
> --- a/source/common/frame.cpp
> +++ b/source/common/frame.cpp
> @@ -316,6 +316,7 @@ void Frame::destroy()
> m_fencPicSubsampled4 = NULL;
> }
>
> + delete m_mcstf->m_metld;
> for (int i = 0; i < (m_mcstf->m_range << 1); i++)
> m_mcstf->destroyRefPicInfo(&m_mcstfRefList[i]);
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index 14d1e03f4..17c071c2c 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -253,7 +253,7 @@ fail:
> void Lowres::destroy(x265_param* param)
> {
> X265_FREE(buffer[0]);
> - if(bEnableHME)
> + if(bEnableHME || param->bEnableTemporalFilter)
> X265_FREE(lowerResBuffer[0]);
> X265_FREE(intraCost);
> X265_FREE(intraMode);
> diff --git a/source/encoder/frameencoder.cpp
> b/source/encoder/frameencoder.cpp
> index 772810f3d..f0ced45e1 100644
> --- a/source/encoder/frameencoder.cpp
> +++ b/source/encoder/frameencoder.cpp
> @@ -106,16 +106,6 @@ void FrameEncoder::destroy()
> delete m_rce.picTimingSEI;
> delete m_rce.hrdTiming;
> }
> -
> - if (m_param->bEnableTemporalFilter)
> - {
> - delete m_frameEncTF->m_metld;
> -
> - for (int i = 0; i < (m_frameEncTF->m_range << 1); i++)
> - m_frameEncTF->destroyRefPicInfo(&m_mcstfRefList[i]);
> -
> - delete m_frameEncTF;
> - }
> }
>
> bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
> @@ -210,16 +200,6 @@ bool FrameEncoder::init(Encoder *top, int numRows, int
> numCols)
> m_sliceAddrBits = (uint16_t)(tmp + 1);
> }
>
> - if (m_param->bEnableTemporalFilter)
> - {
> - m_frameEncTF = new TemporalFilter();
> - if (m_frameEncTF)
> - m_frameEncTF->init(m_param);
> -
> - for (int i = 0; i < (m_frameEncTF->m_range << 1); i++)
> - ok &= !!m_frameEncTF->createRefPicInfo(&m_mcstfRefList[i],
> m_param);
> - }
> -
> m_retFrameBuffer = X265_MALLOC(Frame*, m_param->numLayers);
> for (int layer = 0; layer < m_param->numLayers; layer++)
> m_retFrameBuffer[layer] = NULL;
> @@ -676,8 +656,8 @@ void FrameEncoder::compressFrame(int layer)
> }
> if (m_param->bEnableTemporalFilter)
> {
> - m_frameEncTF->m_QP = qp;
> - m_frameEncTF->bilateralFilter(m_frame[layer],
> m_frame[layer]->m_mcstfRefList, m_param->temporalFilterStrength);
> + m_frame[layer]->m_mcstf->m_QP = qp;
> + m_frame[layer]->m_mcstf->bilateralFilter(m_frame[layer],
> m_frame[layer]->m_mcstfRefList, m_param->temporalFilterStrength);
> }
>
> if (m_nr)
> @@ -1071,14 +1051,14 @@ void FrameEncoder::compressFrame(int layer)
> if (m_param->bEnableTemporalFilter &&
> m_top->isFilterThisframe(m_frame[layer]->m_mcstf->m_sliceTypeConfig,
> m_frame[layer]->m_lowres.sliceType))
> {
> //Reset the MCSTF context in Frame Encoder and Frame
> - for (int i = 0; i < (m_frameEncTF->m_range << 1); i++)
> - {
> - memset(m_mcstfRefList[i].mvs0, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> - memset(m_mcstfRefList[i].mvs1, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> - memset(m_mcstfRefList[i].mvs2, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> - memset(m_mcstfRefList[i].mvs, 0, sizeof(MV) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> - memset(m_mcstfRefList[i].noise, 0, sizeof(int) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> - memset(m_mcstfRefList[i].error, 0, sizeof(int) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> + for (int i = 0; i < (m_frame[layer]->m_mcstf->m_range << 1); i++)
> + {
> + memset(m_frame[layer]->m_mcstfRefList[i].mvs0, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> + memset(m_frame[layer]->m_mcstfRefList[i].mvs1, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> + memset(m_frame[layer]->m_mcstfRefList[i].mvs2, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> + memset(m_frame[layer]->m_mcstfRefList[i].mvs, 0, sizeof(MV) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> + memset(m_frame[layer]->m_mcstfRefList[i].noise, 0, sizeof(int)
> * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> + memset(m_frame[layer]->m_mcstfRefList[i].error, 0, sizeof(int)
> * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
>
> m_frame[layer]->m_mcstf->m_numRef = 0;
> }
> diff --git a/source/encoder/frameencoder.h b/source/encoder/frameencoder.h
> index 21d05c2f2..c31762402 100644
> --- a/source/encoder/frameencoder.h
> +++ b/source/encoder/frameencoder.h
> @@ -265,10 +265,6 @@ public:
> FrameFilter m_frameFilter;
> NALList m_nalList;
>
> - // initialization for mcstf
> - TemporalFilter* m_frameEncTF;
> - TemporalFilterRefPicInfo
> m_mcstfRefList[MAX_MCSTF_TEMPORAL_WINDOW_LENGTH];
> -
> int m_sLayerId;
>
> class WeightAnalysis : public BondedTaskGroup
> --
> 2.36.0.windows.1
> -------------- next part --------------
> An HTML attachment was scrubbed...
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/286e16dc/attachment-0001.htm
> >
> -------------- next part --------------
> A non-text attachment was scrubbed...
> Name: 0009-Remove-frameencoder-instance-for-mcstf-and-fix-memor.patch
> Type: application/octet-stream
> Size: 5749 bytes
> Desc: not available
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/286e16dc/attachment-0001.obj
> >
>
> ------------------------------
>
> Message: 2
> Date: Mon, 11 Nov 2024 19:55:38 +0530
> From: Anusuya Kumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> To: x265-devel at videolan.org
> Subject: [x265] [PATCH 10/10] Added batch-motion-search for all planes
> in Lookahead
> Message-ID:
> <
> CAE+pfGzyzMGujAxYF_5uS5SHcoG3O7Sn58qvtLn1vxOSV-GDvg at mail.gmail.com>
> Content-Type: text/plain; charset="utf-8"
>
> >From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001
> From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> Date: Mon, 11 Nov 2024 10:50:11 +0530
> Subject: [PATCH 10/10] Added batch-motion-search for all planes in
> Lookahead
>
> ---
> source/common/lowres.cpp | 15 +++++++
> source/common/lowres.h | 1 +
> source/common/temporalfilter.cpp | 64 ++++++++++++++--------------
> source/common/temporalfilter.h | 69 +++++++++++++++++--------------
> source/encoder/slicetype.cpp | 71 +++++++++++++++++++++++---------
> source/encoder/slicetype.h | 4 +-
> 6 files changed, 139 insertions(+), 85 deletions(-)
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index 17c071c2c..1596f79da 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv
> *origPic, uint32_t qgSize)
> }
> }
>
> + for (int i = 0; i < 4; i++)
> + {
> + CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);
> + }
> +
> for (int i = 0; i < bframes + 2; i++)
> {
> CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
> @@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)
> X265_FREE(lowerResMvCosts[1][i]);
> }
> }
> +
> + for (int i = 0; i < 4; i++)
> + {
> + X265_FREE(lowresMcstfMvs[0][i]);
> + }
> X265_FREE(qpAqOffset);
> X265_FREE(invQscaleFactor);
> X265_FREE(qpCuTreeOffset);
> @@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)
> lowresMvs[1][i][0].x = 0x7FFF;
> }
>
> + for (int i = 0; i < 4; i++)
> + {
> + lowresMcstfMvs[0][i][0].x = 0x7FFF;
> + }
> +
> for (int i = 0; i < bframes + 2; i++)
> intraMbs[i] = 0;
> if (origPic->m_param->rc.vbvBufferSize)
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 7e6baa844..2bf39c3b5 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes
> uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
> int32_t* lowresMvCosts[2][X265_BFRAME_MAX + 2];
> MV* lowresMvs[2][X265_BFRAME_MAX + 2];
> + MV* lowresMcstfMvs[2][4];
> uint32_t maxBlocksInRow;
> uint32_t maxBlocksInCol;
> uint32_t maxBlocksInRowFullRes;
> diff --git a/source/common/temporalfilter.cpp
> b/source/common/temporalfilter.cpp
> index db58a0c15..aa50c2246 100644
> --- a/source/common/temporalfilter.cpp
> +++ b/source/common/temporalfilter.cpp
> @@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()
> m_QP = 0;
> m_sliceTypeConfig = 3;
> m_numRef = 0;
> - m_useSADinME = 1;
>
> m_range = 2;
> m_chromaFactor = 0.55;
> m_sigmaMultiplier = 9.0;
> m_sigmaZeroPoint = 10.0;
> - m_motionVectorFactor = 16;
> }
>
> void TemporalFilter::init(const x265_param* param)
> @@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)
> m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT
> : 1;
>
> m_metld = new MotionEstimatorTLD;
> -
> - predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
> }
>
> int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param)
> @@ -191,7 +187,7 @@ fail:
> return 0;
> }
>
> -int TemporalFilter::motionErrorLumaSAD(
> +int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,
> pixel* src,
> int stride,
> pixel* buf,
> @@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(
> /* copy PU block into cache */
> primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> - error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
> + error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
> #endif
> if (error > besterror)
> {
> @@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(
> return error;
> }
>
> -int TemporalFilter::motionErrorLumaSSD(
> +int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,
> pixel* src,
> int stride,
> pixel* buf,
> @@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(
> /* copy PU block into cache */
> primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> - error =
> (int)primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
> + error =
> (int)primitives.cu[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
>
> #endif
> if (error > besterror)
> @@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,
> }
> }
>
> -void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride,
> pixel* src,int stride, int height, int width, pixel* buf, int blockSize,
> +void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD& m_metld,
> MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width,
> pixel* buf, int blockSize,
> int sRange, MV* previous, uint32_t prevMvStride, int factor)
> {
>
> @@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
> stepSize)
> {
> const intptr_t pelOffset = blockY * stride + blockX;
> - m_metld->me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
> + m_metld.me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
>
>
> MV best(0, 0);
> @@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> MV old = previous[mvIdx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
> + error = motionErrorLumaSAD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
> else
> - error = motionErrorLumaSSD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
> + error = motionErrorLumaSSD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
>
> if (error < leastError)
> {
> @@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> }
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
> {
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> if (error < leastError)
> {
> best.set(x2 * m_motionVectorFactor, y2 *
> m_motionVectorFactor);
> @@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> MV aboveMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> MV leftMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
> }
>
>
> -void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t
> mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,
> +void MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD&
> m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int
> blockSize,
> MV *previous, uint32_t prevMvStride, int factor, int* minError)
> {
>
> @@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> {
>
> const intptr_t pelOffset = blockY * orig->m_stride + blockX;
> - m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
> + m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
>
> MV best(0, 0);
> int leastError = INT_MAX;
> @@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> MV old = previous[mvIdx];
>
> if (m_useSADinME)
> - error =
> motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
> else
> - error =
> motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> }
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> + error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> + error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
>
> if (error < leastError)
> {
> @@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
> {
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
>
> if (error < leastError)
> {
> @@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x +
> doubleRange; x2++)
> {
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> + error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> + error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> MV aboveMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
> aboveMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
> aboveMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> @@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
> MV leftMV = mvs[idx];
>
> if (m_useSADinME)
> - error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
> leftMV.y, blockSize, leastError);
> else
> - error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> + error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
> leftMV.y, blockSize, leastError);
>
> if (error < leastError)
> {
> diff --git a/source/common/temporalfilter.h
> b/source/common/temporalfilter.h
> index 3e03d7737..c4316aca6 100644
> --- a/source/common/temporalfilter.h
> +++ b/source/common/temporalfilter.h
> @@ -84,9 +84,47 @@ namespace X265_NS {
> {
> me.init(X265_CSP_I400);
> me.setQP(X265_LOOKAHEAD_QP);
> + predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
> + m_useSADinME = 1;
> + m_motionVectorFactor = 16;
> }
>
> - ~MotionEstimatorTLD() {}
> + Yuv predPUYuv;
> + int m_useSADinME;
> + int m_motionVectorFactor;
> + int32_t m_bitDepth;
> +
> + void init(const x265_param* param);
> +
> + void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs,
> uint32_t mvStride, pixel* src, int stride, int height, int width, pixel*
> buf, int bs, int sRange,
> + MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> +
> + void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV*
> mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,
> + MV* previous, uint32_t prevMvStride, int factor, int*
> minError);
> +
> + int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,
> + int stride,
> + pixel* buf,
> + int x,
> + int y,
> + int dx,
> + int dy,
> + int bs,
> + int besterror = 8 * 8 * 1024 * 1024);
> +
> + int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,
> + int stride,
> + pixel* buf,
> + int x,
> + int y,
> + int dx,
> + int dy,
> + int bs,
> + int besterror = 8 * 8 * 1024 * 1024);
> +
> + ~MotionEstimatorTLD() {
> + predPUYuv.destroy();
> + }
> };
>
> struct TemporalFilterRefPicInfo
> @@ -134,7 +172,6 @@ namespace X265_NS {
> double m_chromaFactor;
> double m_sigmaMultiplier;
> double m_sigmaZeroPoint;
> - int m_motionVectorFactor;
> int m_padding;
>
> // Private member variables
> @@ -148,39 +185,11 @@ namespace X265_NS {
> uint8_t m_sliceTypeConfig;
>
> MotionEstimatorTLD* m_metld;
> - Yuv predPUYuv;
> - int m_useSADinME;
>
> int createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param);
>
> void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo*
> mctfRefList, double overallStrength);
>
> - void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,
> int stride, int height, int width, pixel* buf, int bs, int sRange,
> - MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> -
> - void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride,
> PicYuv *orig, PicYuv *buffer, int blockSize,
> - MV *previous, uint32_t prevMvStride, int factor, int*
> minError);
> -
> - int motionErrorLumaSSD(pixel* src,
> - int stride,
> - pixel* buf,
> - int x,
> - int y,
> - int dx,
> - int dy,
> - int bs,
> - int besterror = 8 * 8 * 1024 * 1024);
> -
> - int motionErrorLumaSAD(pixel* src,
> - int stride,
> - pixel* buf,
> - int x,
> - int y,
> - int dx,
> - int dy,
> - int bs,
> - int besterror = 8 * 8 * 1024 * 1024);
> -
> void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);
>
> void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input,
> PicYuv *output);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 5fd885227..abc687ef4 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -1128,7 +1128,10 @@ bool Lookahead::create()
> m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);
>
> if (m_param->bEnableTemporalFilter)
> + {
> + m_metld = new MotionEstimatorTLD[numTLD];
> m_origPicBuf = new OrigPicBuffer();
> + }
>
> return m_tld && m_scratch;
> }
> @@ -1170,7 +1173,10 @@ void Lookahead::destroy()
> }
>
> if (m_param->bEnableTemporalFilter)
> + {
> delete m_origPicBuf;
> + delete[] m_metld;
> + }
>
> X265_FREE(m_scratch);
> delete [] m_tld;
> @@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int
> start, int end, int num)
> }
> }
>
> -void Lookahead::estimatelowresmotion(Frame* curframe)
> +void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld,
> Frame* curframe, int refId)
> {
> + m_metld.m_bitDepth = curframe->m_param->internalBitDepth;
> + TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];
>
> - for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)
> - {
> - TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];
> -
> - curframe->m_mcstf->motionEstimationLuma(ref->mvs0,
> ref->mvsStride0, curframe->m_lowres.lowerResPlane[0],
> (curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2),
> (curframe->m_lowres.width / 2), ref->lowerRes, 16,
> m_param->searchRangeForLayer2);
> - curframe->m_mcstf->motionEstimationLuma(ref->mvs1,
> ref->mvsStride1, curframe->m_lowres.lowresPlane[0],
> (curframe->m_lowres.lumaStride), (curframe->m_lowres.lines),
> (curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1,
> ref->mvs0, ref->mvsStride0, 2);
> - curframe->m_mcstf->motionEstimationLuma(ref->mvs2,
> ref->mvsStride2, curframe->m_fencPic->m_picOrg[0],
> curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight,
> curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16,
> m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);
> - curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
> - }
> + m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0,
> curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride /
> 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2),
> ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);
> + m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1,
> curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride),
> (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16,
> curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);
> + m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2,
> curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride,
> curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth,
> ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0,
> ref->mvs1, ref->mvsStride1, 2);
> + m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
>
> + curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;
> }
>
> inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool
> isPreFiltered, int16_t i)
> @@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()
> }
> }
>
> - Frame* frameEnc = m_inputQueue.first();
> - for (int i = 0; i < m_inputQueue.size(); i++)
> + if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)
> {
> - if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
> + /* pre-calculate all motion searches, using many worker threads */
> + CostEstimateGroup estGroup(*this, frames);
> + Frame* frameEnc = m_inputQueue.first();
> + for (int b = 0; b < m_inputQueue.size(); b++)
> {
> - if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
> m_inputQueue.last()->m_poc))
> + if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
> {
> - x265_log(m_param, X265_LOG_ERROR, "Failed to initialize
> MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> - fflush(stderr);
> - }
> + if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
> m_inputQueue.last()->m_poc))
> + {
> + x265_log(m_param, X265_LOG_ERROR, "Failed to
> initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> + fflush(stderr);
> + }
> +
> + for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)
> + {
> + TemporalFilterRefPicInfo* ref =
> &frameEnc->m_mcstfRefList[j - 1];
> + int i = ref->poc;
> +
> + /* Skip search if already done */
> + if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x !=
> 0x7FFF)
> + continue;
>
> - estimatelowresmotion(frameEnc);
> + estGroup.add(j - 1, i, frameEnc->m_poc);
> + }
> + }
> + frameEnc = frameEnc->m_next;
> }
> - frameEnc = frameEnc->m_next;
> +
> + /* auto-disable after the first batch if pool is small */
> + m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;
> + estGroup.finishBatch();
> }
>
> if (m_param->bEnableTemporalSubLayers > 2)
> @@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
> if (workerThreadID < 0)
> id = pool ? pool->m_numWorkers : 0;
> LookaheadTLD& tld = m_lookahead.m_tld[id];
> + MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];
>
> m_lock.acquire();
> while (m_jobAcquired < m_jobTotal)
> @@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
> ProfileScopeEvent(estCostSingle);
>
> Estimate& e = m_estimates[i];
> - estimateFrameCost(tld, e.p0, e.p1, e.b, false);
> + Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);
> +
> + if (m_lookahead.m_param->bEnableTemporalFilter && curFrame &&
> (curFrame->m_lowres.sliceType == X265_TYPE_IDR ||
> curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType
> == X265_TYPE_P))
> + {
> + estimatelowresmotion(m_metld, curFrame, e.p0);
> + }
> + else
> + estimateFrameCost(tld, e.p0, e.p1, e.b, false);
> }
> else
> {
> diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
> index 214e295b7..be6ac8112 100644
> --- a/source/encoder/slicetype.h
> +++ b/source/encoder/slicetype.h
> @@ -204,6 +204,7 @@ public:
> int8_t m_gopId;
>
> OrigPicBuffer* m_origPicBuf;
> + MotionEstimatorTLD* m_metld;
>
> Lookahead(x265_param *param, ThreadPool *pool);
> #if DETAILED_CU_STATS
> @@ -227,7 +228,6 @@ public:
> void getEstimatedPictureCost(Frame *pic);
> void setLookaheadQueue();
> int findSliceType(int poc);
> - void estimatelowresmotion(Frame* frame);
> bool generatemcstf(Frame * frame, PicList refPic, int poclast);
> bool isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);
>
> @@ -327,6 +327,8 @@ protected:
> int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b,
> bool intraPenalty);
> void estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0,
> int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);
>
> + void estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame*
> curframe, int refId);
> +
> CostEstimateGroup& operator=(const CostEstimateGroup&);
> };
>
> --
> 2.36.0.windows.1
> -------------- next part --------------
> An HTML attachment was scrubbed...
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment.htm
> >
> -------------- next part --------------
> A non-text attachment was scrubbed...
> Name: 0010-Added-batch-motion-search-for-all-planes-in-Lookahea.patch
> Type: application/octet-stream
> Size: 28374 bytes
> Desc: not available
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment.obj
> >
>
> ------------------------------
>
> Subject: Digest Footer
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
> ------------------------------
>
> End of x265-devel Digest, Vol 136, Issue 18
> *******************************************
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241112/7eaff3d9/attachment-0001.htm>
More information about the x265-devel
mailing list