<div dir="ltr">From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001<br>From: AnusuyaKumarasamy <<a href="mailto:anusuya.kumarasamy@multicorewareinc.com">anusuya.kumarasamy@multicorewareinc.com</a>><br>Date: Mon, 11 Nov 2024 10:50:11 +0530<br>Subject: [PATCH 10/10] Added batch-motion-search for all planes in Lookahead<br><br>---<br> source/common/lowres.cpp         | 15 +++++++<br> source/common/lowres.h           |  1 +<br> source/common/temporalfilter.cpp | 64 ++++++++++++++--------------<br> source/common/temporalfilter.h   | 69 +++++++++++++++++--------------<br> source/encoder/slicetype.cpp     | 71 +++++++++++++++++++++++---------<br> source/encoder/slicetype.h       |  4 +-<br> 6 files changed, 139 insertions(+), 85 deletions(-)<br><br>diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp<br>index 17c071c2c..1596f79da 100644<br>--- a/source/common/lowres.cpp<br>+++ b/source/common/lowres.cpp<br>@@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)<br>         }<br>     }<br> <br>+    for (int i = 0; i < 4; i++)<br>+    {<br>+        CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);<br>+    }<br>+<br>     for (int i = 0; i < bframes + 2; i++)<br>     {<br>         CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);<br>@@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)<br>             X265_FREE(lowerResMvCosts[1][i]);<br>         }<br>     }<br>+<br>+    for (int i = 0; i < 4; i++)<br>+    {<br>+        X265_FREE(lowresMcstfMvs[0][i]);<br>+    }<br>     X265_FREE(qpAqOffset);<br>     X265_FREE(invQscaleFactor);<br>     X265_FREE(qpCuTreeOffset);<br>@@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)<br>         lowresMvs[1][i][0].x = 0x7FFF;<br>     }<br> <br>+    for (int i = 0; i < 4; i++)<br>+    {<br>+        lowresMcstfMvs[0][i][0].x = 0x7FFF;<br>+    }<br>+<br>     for (int i = 0; i < bframes + 2; i++)<br>         intraMbs[i] = 0;<br>     if (origPic->m_param->rc.vbvBufferSize)<br>diff --git a/source/common/lowres.h b/source/common/lowres.h<br>index 7e6baa844..2bf39c3b5 100644<br>--- a/source/common/lowres.h<br>+++ b/source/common/lowres.h<br>@@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes<br>     uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];<br>     int32_t*  lowresMvCosts[2][X265_BFRAME_MAX + 2];<br>     MV*       lowresMvs[2][X265_BFRAME_MAX + 2];<br>+    MV*       lowresMcstfMvs[2][4];<br>     uint32_t  maxBlocksInRow;<br>     uint32_t  maxBlocksInCol;<br>     uint32_t  maxBlocksInRowFullRes;<br>diff --git a/source/common/temporalfilter.cpp b/source/common/temporalfilter.cpp<br>index db58a0c15..aa50c2246 100644<br>--- a/source/common/temporalfilter.cpp<br>+++ b/source/common/temporalfilter.cpp<br>@@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()<br>     m_QP = 0;<br>     m_sliceTypeConfig = 3;<br>     m_numRef = 0;<br>-    m_useSADinME = 1;<br> <br>     m_range = 2;<br>     m_chromaFactor = 0.55;<br>     m_sigmaMultiplier = 9.0;<br>     m_sigmaZeroPoint = 10.0;<br>-    m_motionVectorFactor = 16;<br> }<br> <br> void TemporalFilter::init(const x265_param* param)<br>@@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)<br>     m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT : 1;<br> <br>     m_metld = new MotionEstimatorTLD;<br>-<br>-    predPUYuv.create(FENC_STRIDE, X265_CSP_I400);<br> }<br> <br> int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame, x265_param* param)<br>@@ -191,7 +187,7 @@ fail:<br>     return 0;<br> }<br> <br>-int TemporalFilter::motionErrorLumaSAD(<br>+int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,<br>     pixel* src,<br>     int stride,<br>     pixel* buf,<br>@@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(<br>         /* copy PU block into cache */<br>         primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE, bufferRowStart, buffStride);<br> <br>-        error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);<br>+        error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);<br> #endif<br>         if (error > besterror)<br>         {<br>@@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(<br>     return error;<br> }<br> <br>-int TemporalFilter::motionErrorLumaSSD(<br>+int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,<br>     pixel* src,<br>     int stride,<br>     pixel* buf,<br>@@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(<br>         /* copy PU block into cache */<br>         primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE, bufferRowStart, buffStride);<br> <br>-        error = (int)<a href="http://primitives.cu">primitives.cu</a>[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0], FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);<br>+        error = (int)<a href="http://primitives.cu">primitives.cu</a>[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0], FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);<br> <br> #endif<br>         if (error > besterror)<br>@@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,<br>     }<br> }<br> <br>-void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width, pixel* buf, int blockSize,<br>+void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD& m_metld, MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width, pixel* buf, int blockSize,<br>     int sRange, MV* previous, uint32_t prevMvStride, int factor)<br> {<br> <br>@@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br>         for (int blockX = 0; blockX + blockSize <= origWidth; blockX += stepSize)<br>         {<br>             const intptr_t pelOffset = blockY * stride + blockX;<br>-            m_metld->me.setSourcePU(src, stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br>+            m_metld.me.setSourcePU(src, stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br> <br> <br>             MV best(0, 0);<br>@@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br>                             MV old = previous[mvIdx];<br> <br>                             if (m_useSADinME)<br>-                                error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+                                error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>                             else<br>-                                error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+                                error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br> <br>                             if (error < leastError)<br>                             {<br>@@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br>                 }<br> <br>                 if (m_useSADinME)<br>-                    error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br>+                    error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br>                 else<br>-                    error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br>+                    error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, 0, 0, blockSize, leastError);<br> <br>                 if (error < leastError)<br>                 {<br>@@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br>                 for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++)<br>                 {<br>                     if (m_useSADinME)<br>-                        error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+                        error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>                     else<br>-                        error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+                        error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>                     if (error < leastError)<br>                     {<br>                         best.set(x2 * m_motionVectorFactor, y2 * m_motionVectorFactor);<br>@@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br>                 MV aboveMV = mvs[idx];<br> <br>                 if (m_useSADinME)<br>-                    error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>                 else<br>-                    error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br> <br>                 if (error < leastError)<br>                 {<br>@@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br>                 MV leftMV = mvs[idx];<br> <br>                 if (m_useSADinME)<br>-                    error = motionErrorLumaSAD(src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSAD(m_metld, src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>                 else<br>-                    error = motionErrorLumaSSD(src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSSD(m_metld, src, stride, buf, blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br> <br>                 if (error < leastError)<br>                 {<br>@@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src<br> }<br> <br> <br>-void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,<br>+void MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,<br>     MV *previous, uint32_t prevMvStride, int factor, int* minError)<br> {<br> <br>@@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br>         {<br> <br>             const intptr_t pelOffset = blockY * orig->m_stride + blockX;<br>-            m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br>+            m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);<br> <br>             MV best(0, 0);<br>             int leastError = INT_MAX;<br>@@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br>                             MV old = previous[mvIdx];<br> <br>                             if (m_useSADinME)<br>-                                error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+                                error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>                             else<br>-                                error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br>+                                error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);<br> <br>                             if (error < leastError)<br>                             {<br>@@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br>                 }<br> <br>                 if (m_useSADinME)<br>-                    error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br>+                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br>                 else<br>-                    error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br>+                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize, leastError);<br> <br>                 if (error < leastError)<br>                 {<br>@@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br>                 for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++)<br>                 {<br>                     if (m_useSADinME)<br>-                        error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+                        error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>                     else<br>-                        error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br>+                        error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);<br> <br>                     if (error < leastError)<br>                     {<br>@@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br>                 for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2++)<br>                 {<br>                     if (m_useSADinME)<br>-                        error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br>+                        error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br>                     else<br>-                        error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br>+                        error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize, leastError);<br> <br>                     if (error < leastError)<br>                     {<br>@@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br>                 MV aboveMV = mvs[idx];<br> <br>                 if (m_useSADinME)<br>-                    error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>                 else<br>-                    error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);<br> <br>                 if (error < leastError)<br>                 {<br>@@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P<br>                 MV leftMV = mvs[idx];<br> <br>                 if (m_useSADinME)<br>-                    error = motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>                 else<br>-                    error = motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br>+                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);<br> <br>                 if (error < leastError)<br>                 {<br>diff --git a/source/common/temporalfilter.h b/source/common/temporalfilter.h<br>index 3e03d7737..c4316aca6 100644<br>--- a/source/common/temporalfilter.h<br>+++ b/source/common/temporalfilter.h<br>@@ -84,9 +84,47 @@ namespace X265_NS {<br>         {<br>             me.init(X265_CSP_I400);<br>             me.setQP(X265_LOOKAHEAD_QP);<br>+            predPUYuv.create(FENC_STRIDE, X265_CSP_I400);<br>+            m_useSADinME = 1;<br>+            m_motionVectorFactor = 16;<br>         }<br> <br>-        ~MotionEstimatorTLD() {}<br>+        Yuv  predPUYuv;<br>+        int m_useSADinME;<br>+        int m_motionVectorFactor;<br>+        int32_t  m_bitDepth;<br>+<br>+        void init(const x265_param* param);<br>+<br>+        void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs, uint32_t mvStride, pixel* src, int stride, int height, int width, pixel* buf, int bs, int sRange,<br>+            MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);<br>+<br>+        void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV* mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,<br>+            MV* previous, uint32_t prevMvStride, int factor, int* minError);<br>+<br>+        int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,<br>+            int stride,<br>+            pixel* buf,<br>+            int x,<br>+            int y,<br>+            int dx,<br>+            int dy,<br>+            int bs,<br>+            int besterror = 8 * 8 * 1024 * 1024);<br>+<br>+        int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,<br>+            int stride,<br>+            pixel* buf,<br>+            int x,<br>+            int y,<br>+            int dx,<br>+            int dy,<br>+            int bs,<br>+            int besterror = 8 * 8 * 1024 * 1024);<br>+<br>+        ~MotionEstimatorTLD() {<br>+            predPUYuv.destroy();<br>+        }<br>     };<br> <br>     struct TemporalFilterRefPicInfo<br>@@ -134,7 +172,6 @@ namespace X265_NS {<br>         double m_chromaFactor;<br>         double m_sigmaMultiplier;<br>         double m_sigmaZeroPoint;<br>-        int m_motionVectorFactor;<br>         int m_padding;<br> <br>         // Private member variables<br>@@ -148,39 +185,11 @@ namespace X265_NS {<br>         uint8_t m_sliceTypeConfig;<br> <br>         MotionEstimatorTLD* m_metld;<br>-        Yuv  predPUYuv;<br>-        int m_useSADinME;<br> <br>         int createRefPicInfo(TemporalFilterRefPicInfo* refFrame, x265_param* param);<br> <br>         void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo* mctfRefList, double overallStrength);<br> <br>-        void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src, int stride, int height, int width, pixel* buf, int bs, int sRange,<br>-            MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);<br>-<br>-        void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,<br>-            MV *previous, uint32_t prevMvStride, int factor, int* minError);<br>-<br>-        int motionErrorLumaSSD(pixel* src,<br>-            int stride,<br>-            pixel* buf,<br>-            int x,<br>-            int y,<br>-            int dx,<br>-            int dy,<br>-            int bs,<br>-            int besterror = 8 * 8 * 1024 * 1024);<br>-<br>-        int motionErrorLumaSAD(pixel* src,<br>-            int stride,<br>-            pixel* buf,<br>-            int x,<br>-            int y,<br>-            int dx,<br>-            int dy,<br>-            int bs,<br>-            int besterror = 8 * 8 * 1024 * 1024);<br>-<br>         void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);<br> <br>         void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input, PicYuv *output);<br>diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp<br>index 5fd885227..abc687ef4 100644<br>--- a/source/encoder/slicetype.cpp<br>+++ b/source/encoder/slicetype.cpp<br>@@ -1128,7 +1128,10 @@ bool Lookahead::create()<br>     m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);<br> <br>     if (m_param->bEnableTemporalFilter)<br>+    {<br>+        m_metld = new MotionEstimatorTLD[numTLD];<br>         m_origPicBuf = new OrigPicBuffer();<br>+    }<br> <br>     return m_tld && m_scratch;<br> }<br>@@ -1170,7 +1173,10 @@ void Lookahead::destroy()<br>     }<br> <br>     if (m_param->bEnableTemporalFilter)<br>+    {<br>         delete m_origPicBuf;<br>+        delete[] m_metld;<br>+    }<br> <br>     X265_FREE(m_scratch);<br>     delete [] m_tld;<br>@@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int start, int end, int num)<br>     }<br> }<br> <br>-void Lookahead::estimatelowresmotion(Frame* curframe)<br>+void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame* curframe, int refId)<br> {<br>+    m_metld.m_bitDepth = curframe->m_param->internalBitDepth;<br>+    TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];<br> <br>-    for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)<br>-    {<br>-        TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];<br>-<br>-        curframe->m_mcstf->motionEstimationLuma(ref->mvs0, ref->mvsStride0, curframe->m_lowres.lowerResPlane[0], (curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2), ref->lowerRes, 16, m_param->searchRangeForLayer2);<br>-        curframe->m_mcstf->motionEstimationLuma(ref->mvs1, ref->mvsStride1, curframe->m_lowres.lowresPlane[0], (curframe->m_lowres.lumaStride), (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);<br>-        curframe->m_mcstf->motionEstimationLuma(ref->mvs2, ref->mvsStride2, curframe->m_fencPic->m_picOrg[0], curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16, m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);<br>-        curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs, ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2, ref->mvsStride2, 1, ref->error);<br>-    }<br>+    m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0, curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2), ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);<br>+    m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1, curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride), (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16, curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);<br>+    m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2, curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);<br>+    m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs, ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2, ref->mvsStride2, 1, ref->error);<br> <br>+    curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;<br> }<br> <br> inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool isPreFiltered, int16_t i)<br>@@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()<br>         }<br>     }<br> <br>-    Frame* frameEnc = m_inputQueue.first();<br>-    for (int i = 0; i < m_inputQueue.size(); i++)<br>+    if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)<br>     {<br>-        if (m_param->bEnableTemporalFilter && isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig, frameEnc->m_lowres.sliceType))<br>+        /* pre-calculate all motion searches, using many worker threads */<br>+        CostEstimateGroup estGroup(*this, frames);<br>+        Frame* frameEnc = m_inputQueue.first();<br>+        for (int b = 0; b < m_inputQueue.size(); b++)<br>         {<br>-            if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList, m_inputQueue.last()->m_poc))<br>+            if (m_param->bEnableTemporalFilter && isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig, frameEnc->m_lowres.sliceType))<br>             {<br>-                x265_log(m_param, X265_LOG_ERROR, "Failed to initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);<br>-                fflush(stderr);<br>-            }<br>+                if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList, m_inputQueue.last()->m_poc))<br>+                {<br>+                    x265_log(m_param, X265_LOG_ERROR, "Failed to initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);<br>+                    fflush(stderr);<br>+                }<br>+<br>+                for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)<br>+                {<br>+                    TemporalFilterRefPicInfo* ref = &frameEnc->m_mcstfRefList[j - 1];<br>+                    int i = ref->poc;<br>+<br>+                    /* Skip search if already done */<br>+                    if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x != 0x7FFF)<br>+                        continue;<br> <br>-            estimatelowresmotion(frameEnc);<br>+                    estGroup.add(j - 1, i, frameEnc->m_poc);<br>+                }<br>+            }<br>+            frameEnc = frameEnc->m_next;<br>         }<br>-         frameEnc = frameEnc->m_next;<br>+<br>+        /* auto-disable after the first batch if pool is small */<br>+        m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;<br>+        estGroup.finishBatch();<br>     }<br> <br>     if (m_param->bEnableTemporalSubLayers > 2)<br>@@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int workerThreadID)<br>     if (workerThreadID < 0)<br>         id = pool ? pool->m_numWorkers : 0;<br>     LookaheadTLD& tld = m_lookahead.m_tld[id];<br>+    MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];<br> <br>     m_lock.acquire();<br>     while (m_jobAcquired < m_jobTotal)<br>@@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int workerThreadID)<br>             ProfileScopeEvent(estCostSingle);<br> <br>             Estimate& e = m_estimates[i];<br>-            estimateFrameCost(tld, e.p0, e.p1, e.b, false);<br>+            Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);<br>+<br>+            if (m_lookahead.m_param->bEnableTemporalFilter && curFrame && (curFrame->m_lowres.sliceType == X265_TYPE_IDR || curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType == X265_TYPE_P))<br>+            {<br>+                estimatelowresmotion(m_metld, curFrame, e.p0);<br>+            }<br>+            else<br>+                estimateFrameCost(tld, e.p0, e.p1, e.b, false);<br>         }<br>         else<br>         {<br>diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h<br>index 214e295b7..be6ac8112 100644<br>--- a/source/encoder/slicetype.h<br>+++ b/source/encoder/slicetype.h<br>@@ -204,6 +204,7 @@ public:<br>     int8_t                  m_gopId;<br> <br>     OrigPicBuffer*          m_origPicBuf;<br>+    MotionEstimatorTLD*     m_metld;<br> <br>     Lookahead(x265_param *param, ThreadPool *pool);<br> #if DETAILED_CU_STATS<br>@@ -227,7 +228,6 @@ public:<br>     void    getEstimatedPictureCost(Frame *pic);<br>     void    setLookaheadQueue();<br>     int     findSliceType(int poc);<br>-    void    estimatelowresmotion(Frame* frame);<br>     bool    generatemcstf(Frame * frame, PicList refPic, int poclast);<br>     bool    isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);<br> <br>@@ -327,6 +327,8 @@ protected:<br>     int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool intraPenalty);<br>     void    estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);<br> <br>+    void    estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame* curframe, int refId);<br>+<br>     CostEstimateGroup& operator=(const CostEstimateGroup&);<br> };<br> <br>-- <br>2.36.0.windows.1<br><br></div>