<div dir="ltr">Pushed all 10 patches to the master and release_4.1 branches</div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, Nov 11, 2024 at 7:56 PM Anusuya Kumarasamy <<a href="mailto:anusuya.kumarasamy@multicorewareinc.com">anusuya.kumarasamy@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001<br>From: AnusuyaKumarasamy <<a href="mailto:anusuya.kumarasamy@multicorewareinc.com" target="_blank">anusuya.kumarasamy@multicorewareinc.com</a>><br>Date: Mon, 11 Nov 2024 10:50:11 +0530<br>Subject: [PATCH 10/10] Added batch-motion-search for all planes in Lookahead<br><br>---<br> source/common/lowres.cpp | 15 +++++++<br> source/common/lowres.h | 1 +<br> source/common/temporalfilter.cpp | 64 ++++++++++++++--------------<br> source/common/temporalfilter.h | 69 +++++++++++++++++--------------<br> source/encoder/slicetype.cpp | 71 +++++++++++++++++++++++---------<br> source/encoder/slicetype.h | 4 +-<br> 6 files changed, 139 insertions(+), 85 deletions(-)<br><br>diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp<br>index 17c071c2c..1596f79da 100644<br>--- a/source/common/lowres.cpp<br>+++ b/source/common/lowres.cpp<br>@@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)<br> }<br> }<br> <br>+ for (int i = 0; i < 4; i++)<br>+ {<br>+ CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);<br>+ }<br>+<br> for (int i = 0; i < bframes + 2; i++)<br> {<br> CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);<br>@@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)<br> X265_FREE(lowerResMvCosts[1][i]);<br> }<br> }<br>+<br>+ for (int i = 0; i < 4; i++)<br>+ {<br>+ X265_FREE(lowresMcstfMvs[0][i]);<br>+ }<br> X265_FREE(qpAqOffset);<br> X265_FREE(invQscaleFactor);<br> X265_FREE(qpCuTreeOffset);<br>@@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)<br> lowresMvs[1][i][0].x = 0x7FFF;<br> }<br> <br>+ for (int i = 0; i < 4; i++)<br>+ {<br>+ lowresMcstfMvs[0][i][0].x = 0x7FFF;<br>+ }<br>+<br> for (int i = 0; i < bframes + 2; i++)<br> intraMbs[i] = 0;<br> if (origPic->m_param->rc.vbvBufferSize)<br>diff --git a/source/common/lowres.h b/source/common/lowres.h<br>index 7e6baa844..2bf39c3b5 100644<br>--- a/source/common/lowres.h<br>+++ b/source/common/lowres.h<br>@@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes<br> uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];<br> int32_t* lowresMvCosts[2][X265_BFRAME_MAX + 2];<br> MV* lowresMvs[2][X265_BFRAME_MAX + 2];<br>+ MV* lowresMcstfMvs[2][4];<br> uint32_t maxBlocksInRow;<br> uint32_t maxBlocksInCol;<br> uint32_t maxBlocksInRowFullRes;<br>diff --git a/source/common/temporalfilter.cpp b/source/common/temporalfilter.cpp<br>index db58a0c15..aa50c2246 100644<br>--- a/source/common/temporalfilter.cpp<br>+++ b/source/common/temporalfilter.cpp<br>@@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()<br> m_QP = 0;<br> m_sliceTypeConfig = 3;<br> m_numRef = 0;<br>- m_useSADinME = 1;<br> <br> m_range = 2;<br> m_chromaFactor = 0.55;<br> m_sigmaMultiplier = 9.0;<br> m_sigmaZeroPoint = 10.0;<br>- m_motionVectorFactor = 16;<br> }<br> <br> void TemporalFilter::init(const x265_param* param)<br>@@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)<br> m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT : 1;<br> <br> m_metld = new MotionEstimatorTLD;<br>-<br>- predPUYuv.create(FENC_STRIDE, X265_CSP_I400);<br> }<br> <br> int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame, x265_param* param)<br>@@ -191,7 +187,7 @@ fail:<br> return 0;<br> }<br> <br>-int TemporalFilter::motionErrorLumaSAD(<br>+int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,<br> pixel* src,<br> int stride,<br> pixel* buf,<br>@@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(<br> /* copy PU block into cache */<br> primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE, bufferRowStart, buffStride);<br> <br>- error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);<br>+ error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);<br> #endif<br> if (error > besterror)<br> {<br>@@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(<br> return error;<br> }<br> <br>-int TemporalFilter::motionErrorLumaSSD(<br>+int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,<br> pixel* src,<br> int stride,<br> pixel* buf,<br>@@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(<br> /* copy PU block into cache */<br> primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE, bufferRowStart, buffStride);<br> <br>- error = (int)<a href="http://primitives.cu" target="_blank">primitives.cu</a>[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0], FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);<br>+ error = (int)<a href="http://primitives.cu" target="_blank">primitives.cu</a>[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0], FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);<br> <br> #endif<br> if (error > besterror)<br>@@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,<br> }<br> }<br> <br>-void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width, pixel* buf, int blockSize,<br>+void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD& m_metld, MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width, pixel* buf, int blockSize,<br> int sRange, MV* previous, uint32_t prevMvStride, int factor)<br> {<br> <br>@@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> for (int blockX = 0; blockX + blockSize <= origWidth; blockX += stepSize)<br> {<br> const intptr_t pelOffset = blockY * stride + blockX;<br>- m_metld->me.setSourcePU(src, stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br>+ m_metld.me.setSourcePU(src, stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br> <br> <br> MV best(0, 0);<br>@@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> MV old = previous[mvIdx];<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> }<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++)<br> {<br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br> if (error < leastError)<br> {<br> best.set(x2 * m_motionVectorFactor, y2 * m_motionVectorFactor);<br>@@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> MV aboveMV = mvs[idx];<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> MV leftMV = mvs[idx];<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> }<br> <br> <br>-void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,<br>+void MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,<br> MV *previous, uint32_t prevMvStride, int factor, int* minError)<br> {<br> <br>@@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br> {<br> <br> const intptr_t pelOffset = blockY * orig->m_stride + blockX;<br>- m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br>+ m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br> <br> MV best(0, 0);<br> int leastError = INT_MAX;<br>@@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br> MV old = previous[mvIdx];<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br> }<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br> for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++)<br> {<br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br> for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2++)<br> {<br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br> MV aboveMV = mvs[idx];<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>@@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br> MV leftMV = mvs[idx];<br> <br> if (m_useSADinME)<br>- error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br> else<br>- error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+ error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br> <br> if (error < leastError)<br> {<br>diff --git a/source/common/temporalfilter.h b/source/common/temporalfilter.h<br>index 3e03d7737..c4316aca6 100644<br>--- a/source/common/temporalfilter.h<br>+++ b/source/common/temporalfilter.h<br>@@ -84,9 +84,47 @@ namespace X265_NS {<br> {<br> me.init(X265_CSP_I400);<br> me.setQP(X265_LOOKAHEAD_QP);<br>+ predPUYuv.create(FENC_STRIDE, X265_CSP_I400);<br>+ m_useSADinME = 1;<br>+ m_motionVectorFactor = 16;<br> }<br> <br>- ~MotionEstimatorTLD() {}<br>+ Yuv predPUYuv;<br>+ int m_useSADinME;<br>+ int m_motionVectorFactor;<br>+ int32_t m_bitDepth;<br>+<br>+ void init(const x265_param* param);<br>+<br>+ void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs, uint32_t mvStride, pixel* src, int stride, int height, int width, pixel* buf, int bs, int sRange,<br>+ MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);<br>+<br>+ void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV* mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,<br>+ MV* previous, uint32_t prevMvStride, int factor, int* minError);<br>+<br>+ int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,<br>+ int stride,<br>+ pixel* buf,<br>+ int x,<br>+ int y,<br>+ int dx,<br>+ int dy,<br>+ int bs,<br>+ int besterror = 8 * 8 * 1024 * 1024);<br>+<br>+ int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,<br>+ int stride,<br>+ pixel* buf,<br>+ int x,<br>+ int y,<br>+ int dx,<br>+ int dy,<br>+ int bs,<br>+ int besterror = 8 * 8 * 1024 * 1024);<br>+<br>+ ~MotionEstimatorTLD() {<br>+ predPUYuv.destroy();<br>+ }<br> };<br> <br> struct TemporalFilterRefPicInfo<br>@@ -134,7 +172,6 @@ namespace X265_NS {<br> double m_chromaFactor;<br> double m_sigmaMultiplier;<br> double m_sigmaZeroPoint;<br>- int m_motionVectorFactor;<br> int m_padding;<br> <br> // Private member variables<br>@@ -148,39 +185,11 @@ namespace X265_NS {<br> uint8_t m_sliceTypeConfig;<br> <br> MotionEstimatorTLD* m_metld;<br>- Yuv predPUYuv;<br>- int m_useSADinME;<br> <br> int createRefPicInfo(TemporalFilterRefPicInfo* refFrame, x265_param* param);<br> <br> void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo* mctfRefList, double overallStrength);<br> <br>- void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src, int stride, int height, int width, pixel* buf, int bs, int sRange,<br>- MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);<br>-<br>- void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,<br>- MV *previous, uint32_t prevMvStride, int factor, int* minError);<br>-<br>- int motionErrorLumaSSD(pixel* src,<br>- int stride,<br>- pixel* buf,<br>- int x,<br>- int y,<br>- int dx,<br>- int dy,<br>- int bs,<br>- int besterror = 8 * 8 * 1024 * 1024);<br>-<br>- int motionErrorLumaSAD(pixel* src,<br>- int stride,<br>- pixel* buf,<br>- int x,<br>- int y,<br>- int dx,<br>- int dy,<br>- int bs,<br>- int besterror = 8 * 8 * 1024 * 1024);<br>-<br> void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);<br> <br> void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input, PicYuv *output);<br>diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp<br>index 5fd885227..abc687ef4 100644<br>--- a/source/encoder/slicetype.cpp<br>+++ b/source/encoder/slicetype.cpp<br>@@ -1128,7 +1128,10 @@ bool Lookahead::create()<br> m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);<br> <br> if (m_param->bEnableTemporalFilter)<br>+ {<br>+ m_metld = new MotionEstimatorTLD[numTLD];<br> m_origPicBuf = new OrigPicBuffer();<br>+ }<br> <br> return m_tld && m_scratch;<br> }<br>@@ -1170,7 +1173,10 @@ void Lookahead::destroy()<br> }<br> <br> if (m_param->bEnableTemporalFilter)<br>+ {<br> delete m_origPicBuf;<br>+ delete[] m_metld;<br>+ }<br> <br> X265_FREE(m_scratch);<br> delete [] m_tld;<br>@@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int start, int end, int num)<br> }<br> }<br> <br>-void Lookahead::estimatelowresmotion(Frame* curframe)<br>+void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame* curframe, int refId)<br> {<br>+ m_metld.m_bitDepth = curframe->m_param->internalBitDepth;<br>+ TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];<br> <br>- for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)<br>- {<br>- TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];<br>-<br>- curframe->m_mcstf->motionEstimationLuma(ref->mvs0, ref->mvsStride0, curframe->m_lowres.lowerResPlane[0], (curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2), ref->lowerRes, 16, m_param->searchRangeForLayer2);<br>- curframe->m_mcstf->motionEstimationLuma(ref->mvs1, ref->mvsStride1, curframe->m_lowres.lowresPlane[0], (curframe->m_lowres.lumaStride), (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);<br>- curframe->m_mcstf->motionEstimationLuma(ref->mvs2, ref->mvsStride2, curframe->m_fencPic->m_picOrg[0], curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16, m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);<br>- curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs, ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2, ref->mvsStride2, 1, ref->error);<br>- }<br>+ m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0, curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2), ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);<br>+ m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1, curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride), (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16, curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);<br>+ m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2, curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);<br>+ m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs, ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2, ref->mvsStride2, 1, ref->error);<br> <br>+ curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;<br> }<br> <br> inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool isPreFiltered, int16_t i)<br>@@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()<br> }<br> }<br> <br>- Frame* frameEnc = m_inputQueue.first();<br>- for (int i = 0; i < m_inputQueue.size(); i++)<br>+ if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)<br> {<br>- if (m_param->bEnableTemporalFilter && isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig, frameEnc->m_lowres.sliceType))<br>+ /* pre-calculate all motion searches, using many worker threads */<br>+ CostEstimateGroup estGroup(*this, frames);<br>+ Frame* frameEnc = m_inputQueue.first();<br>+ for (int b = 0; b < m_inputQueue.size(); b++)<br> {<br>- if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList, m_inputQueue.last()->m_poc))<br>+ if (m_param->bEnableTemporalFilter && isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig, frameEnc->m_lowres.sliceType))<br> {<br>- x265_log(m_param, X265_LOG_ERROR, "Failed to initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);<br>- fflush(stderr);<br>- }<br>+ if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList, m_inputQueue.last()->m_poc))<br>+ {<br>+ x265_log(m_param, X265_LOG_ERROR, "Failed to initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);<br>+ fflush(stderr);<br>+ }<br>+<br>+ for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)<br>+ {<br>+ TemporalFilterRefPicInfo* ref = &frameEnc->m_mcstfRefList[j - 1];<br>+ int i = ref->poc;<br>+<br>+ /* Skip search if already done */<br>+ if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x != 0x7FFF)<br>+ continue;<br> <br>- estimatelowresmotion(frameEnc);<br>+ estGroup.add(j - 1, i, frameEnc->m_poc);<br>+ }<br>+ }<br>+ frameEnc = frameEnc->m_next;<br> }<br>- frameEnc = frameEnc->m_next;<br>+<br>+ /* auto-disable after the first batch if pool is small */<br>+ m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;<br>+ estGroup.finishBatch();<br> }<br> <br> if (m_param->bEnableTemporalSubLayers > 2)<br>@@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int workerThreadID)<br> if (workerThreadID < 0)<br> id = pool ? pool->m_numWorkers : 0;<br> LookaheadTLD& tld = m_lookahead.m_tld[id];<br>+ MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];<br> <br> m_lock.acquire();<br> while (m_jobAcquired < m_jobTotal)<br>@@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int workerThreadID)<br> ProfileScopeEvent(estCostSingle);<br> <br> Estimate& e = m_estimates[i];<br>- estimateFrameCost(tld, e.p0, e.p1, e.b, false);<br>+ Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);<br>+<br>+ if (m_lookahead.m_param->bEnableTemporalFilter && curFrame && (curFrame->m_lowres.sliceType == X265_TYPE_IDR || curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType == X265_TYPE_P))<br>+ {<br>+ estimatelowresmotion(m_metld, curFrame, e.p0);<br>+ }<br>+ else<br>+ estimateFrameCost(tld, e.p0, e.p1, e.b, false);<br> }<br> else<br> {<br>diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h<br>index 214e295b7..be6ac8112 100644<br>--- a/source/encoder/slicetype.h<br>+++ b/source/encoder/slicetype.h<br>@@ -204,6 +204,7 @@ public:<br> int8_t m_gopId;<br> <br> OrigPicBuffer* m_origPicBuf;<br>+ MotionEstimatorTLD* m_metld;<br> <br> Lookahead(x265_param *param, ThreadPool *pool);<br> #if DETAILED_CU_STATS<br>@@ -227,7 +228,6 @@ public:<br> void getEstimatedPictureCost(Frame *pic);<br> void setLookaheadQueue();<br> int findSliceType(int poc);<br>- void estimatelowresmotion(Frame* frame);<br> bool generatemcstf(Frame * frame, PicList refPic, int poclast);<br> bool isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);<br> <br>@@ -327,6 +327,8 @@ protected:<br> int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool intraPenalty);<br> void estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);<br> <br>+ void estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame* curframe, int refId);<br>+<br> CostEstimateGroup& operator=(const CostEstimateGroup&);<br> };<br> <br>-- <br>2.36.0.windows.1<br><br></div>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div>