[x265] [PATCH 10/10] Added batch-motion-search for all planes in Lookahead

Anusuya Kumarasamy anusuya.kumarasamy at multicorewareinc.com
Mon Nov 11 14:25:38 UTC 2024


>From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001
From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
Date: Mon, 11 Nov 2024 10:50:11 +0530
Subject: [PATCH 10/10] Added batch-motion-search for all planes in Lookahead

---
 source/common/lowres.cpp         | 15 +++++++
 source/common/lowres.h           |  1 +
 source/common/temporalfilter.cpp | 64 ++++++++++++++--------------
 source/common/temporalfilter.h   | 69 +++++++++++++++++--------------
 source/encoder/slicetype.cpp     | 71 +++++++++++++++++++++++---------
 source/encoder/slicetype.h       |  4 +-
 6 files changed, 139 insertions(+), 85 deletions(-)

diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
index 17c071c2c..1596f79da 100644
--- a/source/common/lowres.cpp
+++ b/source/common/lowres.cpp
@@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv
*origPic, uint32_t qgSize)
         }
     }

+    for (int i = 0; i < 4; i++)
+    {
+        CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);
+    }
+
     for (int i = 0; i < bframes + 2; i++)
     {
         CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
@@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)
             X265_FREE(lowerResMvCosts[1][i]);
         }
     }
+
+    for (int i = 0; i < 4; i++)
+    {
+        X265_FREE(lowresMcstfMvs[0][i]);
+    }
     X265_FREE(qpAqOffset);
     X265_FREE(invQscaleFactor);
     X265_FREE(qpCuTreeOffset);
@@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)
         lowresMvs[1][i][0].x = 0x7FFF;
     }

+    for (int i = 0; i < 4; i++)
+    {
+        lowresMcstfMvs[0][i][0].x = 0x7FFF;
+    }
+
     for (int i = 0; i < bframes + 2; i++)
         intraMbs[i] = 0;
     if (origPic->m_param->rc.vbvBufferSize)
diff --git a/source/common/lowres.h b/source/common/lowres.h
index 7e6baa844..2bf39c3b5 100644
--- a/source/common/lowres.h
+++ b/source/common/lowres.h
@@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes
     uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
     int32_t*  lowresMvCosts[2][X265_BFRAME_MAX + 2];
     MV*       lowresMvs[2][X265_BFRAME_MAX + 2];
+    MV*       lowresMcstfMvs[2][4];
     uint32_t  maxBlocksInRow;
     uint32_t  maxBlocksInCol;
     uint32_t  maxBlocksInRowFullRes;
diff --git a/source/common/temporalfilter.cpp
b/source/common/temporalfilter.cpp
index db58a0c15..aa50c2246 100644
--- a/source/common/temporalfilter.cpp
+++ b/source/common/temporalfilter.cpp
@@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()
     m_QP = 0;
     m_sliceTypeConfig = 3;
     m_numRef = 0;
-    m_useSADinME = 1;

     m_range = 2;
     m_chromaFactor = 0.55;
     m_sigmaMultiplier = 9.0;
     m_sigmaZeroPoint = 10.0;
-    m_motionVectorFactor = 16;
 }

 void TemporalFilter::init(const x265_param* param)
@@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)
     m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT
: 1;

     m_metld = new MotionEstimatorTLD;
-
-    predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
 }

 int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
x265_param* param)
@@ -191,7 +187,7 @@ fail:
     return 0;
 }

-int TemporalFilter::motionErrorLumaSAD(
+int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,
     pixel* src,
     int stride,
     pixel* buf,
@@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(
         /* copy PU block into cache */
         primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
bufferRowStart, buffStride);

-        error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
+        error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
 #endif
         if (error > besterror)
         {
@@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(
     return error;
 }

-int TemporalFilter::motionErrorLumaSSD(
+int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,
     pixel* src,
     int stride,
     pixel* buf,
@@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(
         /* copy PU block into cache */
         primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
bufferRowStart, buffStride);

-        error =
(int)primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0],
FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
+        error =
(int)primitives.cu[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0],
FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);

 #endif
         if (error > besterror)
@@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,
     }
 }

-void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride,
pixel* src,int stride, int height, int width, pixel* buf, int blockSize,
+void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD& m_metld,
MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width,
pixel* buf, int blockSize,
     int sRange, MV* previous, uint32_t prevMvStride, int factor)
 {

@@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
         for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
stepSize)
         {
             const intptr_t pelOffset = blockY * stride + blockX;
-            m_metld->me.setSourcePU(src, stride, pelOffset, blockSize,
blockSize, X265_HEX_SEARCH, 1);
+            m_metld.me.setSourcePU(src, stride, pelOffset, blockSize,
blockSize, X265_HEX_SEARCH, 1);


             MV best(0, 0);
@@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
                             MV old = previous[mvIdx];

                             if (m_useSADinME)
-                                error = motionErrorLumaSAD(src, stride,
buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+                                error = motionErrorLumaSAD(m_metld, src,
stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
leastError);
                             else
-                                error = motionErrorLumaSSD(src, stride,
buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+                                error = motionErrorLumaSSD(m_metld, src,
stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
leastError);

                             if (error < leastError)
                             {
@@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
                 }

                 if (m_useSADinME)
-                    error = motionErrorLumaSAD(src, stride, buf, blockX,
blockY, 0, 0, blockSize, leastError);
+                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
blockX, blockY, 0, 0, blockSize, leastError);
                 else
-                    error = motionErrorLumaSSD(src, stride, buf, blockX,
blockY, 0, 0, blockSize, leastError);
+                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
blockX, blockY, 0, 0, blockSize, leastError);

                 if (error < leastError)
                 {
@@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
                 for (int x2 = prevBest.x / m_motionVectorFactor - range;
x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
                 {
                     if (m_useSADinME)
-                        error = motionErrorLumaSAD(src, stride, buf,
blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
+                        error = motionErrorLumaSAD(m_metld, src, stride,
buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
                     else
-                        error = motionErrorLumaSSD(src, stride, buf,
blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
+                        error = motionErrorLumaSSD(m_metld, src, stride,
buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
blockSize, leastError);
                     if (error < leastError)
                     {
                         best.set(x2 * m_motionVectorFactor, y2 *
m_motionVectorFactor);
@@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
                 MV aboveMV = mvs[idx];

                 if (m_useSADinME)
-                    error = motionErrorLumaSAD(src, stride, buf, blockX,
blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
+                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
                 else
-                    error = motionErrorLumaSSD(src, stride, buf, blockX,
blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
+                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);

                 if (error < leastError)
                 {
@@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
                 MV leftMV = mvs[idx];

                 if (m_useSADinME)
-                    error = motionErrorLumaSAD(src, stride, buf, blockX,
blockY, leftMV.x, leftMV.y, blockSize, leastError);
+                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
                 else
-                    error = motionErrorLumaSSD(src, stride, buf, blockX,
blockY, leftMV.x, leftMV.y, blockSize, leastError);
+                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);

                 if (error < leastError)
                 {
@@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, pixel* src
 }


-void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t
mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,
+void MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD&
m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int
blockSize,
     MV *previous, uint32_t prevMvStride, int factor, int* minError)
 {

@@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
         {

             const intptr_t pelOffset = blockY * orig->m_stride + blockX;
-            m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
+            m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);

             MV best(0, 0);
             int leastError = INT_MAX;
@@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
                             MV old = previous[mvIdx];

                             if (m_useSADinME)
-                                error =
motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+                                error = motionErrorLumaSAD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, old.x * factor, old.y * factor, blockSize, leastError);
                             else
-                                error =
motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
+                                error = motionErrorLumaSSD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, old.x * factor, old.y * factor, blockSize, leastError);

                             if (error < leastError)
                             {
@@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
                 }

                 if (m_useSADinME)
-                    error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);
+                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);
                 else
-                    error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);
+                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
leastError);

                 if (error < leastError)
                 {
@@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
                 for (int x2 = prevBest.x / m_motionVectorFactor - range;
x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
                 {
                     if (m_useSADinME)
-                        error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
+                        error = motionErrorLumaSAD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
leastError);
                     else
-                        error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
+                        error = motionErrorLumaSSD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
leastError);

                     if (error < leastError)
                     {
@@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
                 for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x +
doubleRange; x2++)
                 {
                     if (m_useSADinME)
-                        error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
leastError);
+                        error = motionErrorLumaSAD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2, y2, blockSize, leastError);
                     else
-                        error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
leastError);
+                        error = motionErrorLumaSSD(m_metld,
orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
blockY, x2, y2, blockSize, leastError);

                     if (error < leastError)
                     {
@@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
                 MV aboveMV = mvs[idx];

                 if (m_useSADinME)
-                    error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
blockSize, leastError);
+                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
aboveMV.y, blockSize, leastError);
                 else
-                    error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
blockSize, leastError);
+                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
aboveMV.y, blockSize, leastError);

                 if (error < leastError)
                 {
@@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
                 MV leftMV = mvs[idx];

                 if (m_useSADinME)
-                    error = motionErrorLumaSAD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
blockSize, leastError);
+                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
leftMV.y, blockSize, leastError);
                 else
-                    error = motionErrorLumaSSD(orig->m_picOrg[0],
orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
blockSize, leastError);
+                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
(int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
leftMV.y, blockSize, leastError);

                 if (error < leastError)
                 {
diff --git a/source/common/temporalfilter.h b/source/common/temporalfilter.h
index 3e03d7737..c4316aca6 100644
--- a/source/common/temporalfilter.h
+++ b/source/common/temporalfilter.h
@@ -84,9 +84,47 @@ namespace X265_NS {
         {
             me.init(X265_CSP_I400);
             me.setQP(X265_LOOKAHEAD_QP);
+            predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
+            m_useSADinME = 1;
+            m_motionVectorFactor = 16;
         }

-        ~MotionEstimatorTLD() {}
+        Yuv  predPUYuv;
+        int m_useSADinME;
+        int m_motionVectorFactor;
+        int32_t  m_bitDepth;
+
+        void init(const x265_param* param);
+
+        void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs,
uint32_t mvStride, pixel* src, int stride, int height, int width, pixel*
buf, int bs, int sRange,
+            MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);
+
+        void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV*
mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,
+            MV* previous, uint32_t prevMvStride, int factor, int*
minError);
+
+        int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,
+            int stride,
+            pixel* buf,
+            int x,
+            int y,
+            int dx,
+            int dy,
+            int bs,
+            int besterror = 8 * 8 * 1024 * 1024);
+
+        int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,
+            int stride,
+            pixel* buf,
+            int x,
+            int y,
+            int dx,
+            int dy,
+            int bs,
+            int besterror = 8 * 8 * 1024 * 1024);
+
+        ~MotionEstimatorTLD() {
+            predPUYuv.destroy();
+        }
     };

     struct TemporalFilterRefPicInfo
@@ -134,7 +172,6 @@ namespace X265_NS {
         double m_chromaFactor;
         double m_sigmaMultiplier;
         double m_sigmaZeroPoint;
-        int m_motionVectorFactor;
         int m_padding;

         // Private member variables
@@ -148,39 +185,11 @@ namespace X265_NS {
         uint8_t m_sliceTypeConfig;

         MotionEstimatorTLD* m_metld;
-        Yuv  predPUYuv;
-        int m_useSADinME;

         int createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
x265_param* param);

         void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo*
mctfRefList, double overallStrength);

-        void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,
int stride, int height, int width, pixel* buf, int bs, int sRange,
-            MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);
-
-        void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride,
PicYuv *orig, PicYuv *buffer, int blockSize,
-            MV *previous, uint32_t prevMvStride, int factor, int*
minError);
-
-        int motionErrorLumaSSD(pixel* src,
-            int stride,
-            pixel* buf,
-            int x,
-            int y,
-            int dx,
-            int dy,
-            int bs,
-            int besterror = 8 * 8 * 1024 * 1024);
-
-        int motionErrorLumaSAD(pixel* src,
-            int stride,
-            pixel* buf,
-            int x,
-            int y,
-            int dx,
-            int dy,
-            int bs,
-            int besterror = 8 * 8 * 1024 * 1024);
-
         void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);

         void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input,
PicYuv *output);
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index 5fd885227..abc687ef4 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -1128,7 +1128,10 @@ bool Lookahead::create()
     m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);

     if (m_param->bEnableTemporalFilter)
+    {
+        m_metld = new MotionEstimatorTLD[numTLD];
         m_origPicBuf = new OrigPicBuffer();
+    }

     return m_tld && m_scratch;
 }
@@ -1170,7 +1173,10 @@ void Lookahead::destroy()
     }

     if (m_param->bEnableTemporalFilter)
+    {
         delete m_origPicBuf;
+        delete[] m_metld;
+    }

     X265_FREE(m_scratch);
     delete [] m_tld;
@@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int
start, int end, int num)
     }
 }

-void Lookahead::estimatelowresmotion(Frame* curframe)
+void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld,
Frame* curframe, int refId)
 {
+    m_metld.m_bitDepth = curframe->m_param->internalBitDepth;
+    TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];

-    for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)
-    {
-        TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];
-
-        curframe->m_mcstf->motionEstimationLuma(ref->mvs0,
ref->mvsStride0, curframe->m_lowres.lowerResPlane[0],
(curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2),
(curframe->m_lowres.width / 2), ref->lowerRes, 16,
m_param->searchRangeForLayer2);
-        curframe->m_mcstf->motionEstimationLuma(ref->mvs1,
ref->mvsStride1, curframe->m_lowres.lowresPlane[0],
(curframe->m_lowres.lumaStride), (curframe->m_lowres.lines),
(curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1,
ref->mvs0, ref->mvsStride0, 2);
-        curframe->m_mcstf->motionEstimationLuma(ref->mvs2,
ref->mvsStride2, curframe->m_fencPic->m_picOrg[0],
curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight,
curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16,
m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);
-        curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs,
ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
ref->mvsStride2, 1, ref->error);
-    }
+    m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0,
curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride /
2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2),
ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);
+    m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1,
curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride),
(curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16,
curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);
+    m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2,
curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride,
curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth,
ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0,
ref->mvs1, ref->mvsStride1, 2);
+    m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs,
ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
ref->mvsStride2, 1, ref->error);

+    curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;
 }

 inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool
isPreFiltered, int16_t i)
@@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()
         }
     }

-    Frame* frameEnc = m_inputQueue.first();
-    for (int i = 0; i < m_inputQueue.size(); i++)
+    if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)
     {
-        if (m_param->bEnableTemporalFilter &&
isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
frameEnc->m_lowres.sliceType))
+        /* pre-calculate all motion searches, using many worker threads */
+        CostEstimateGroup estGroup(*this, frames);
+        Frame* frameEnc = m_inputQueue.first();
+        for (int b = 0; b < m_inputQueue.size(); b++)
         {
-            if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
m_inputQueue.last()->m_poc))
+            if (m_param->bEnableTemporalFilter &&
isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
frameEnc->m_lowres.sliceType))
             {
-                x265_log(m_param, X265_LOG_ERROR, "Failed to initialize
MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
-                fflush(stderr);
-            }
+                if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
m_inputQueue.last()->m_poc))
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "Failed to
initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
+                    fflush(stderr);
+                }
+
+                for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)
+                {
+                    TemporalFilterRefPicInfo* ref =
&frameEnc->m_mcstfRefList[j - 1];
+                    int i = ref->poc;
+
+                    /* Skip search if already done */
+                    if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x !=
0x7FFF)
+                        continue;

-            estimatelowresmotion(frameEnc);
+                    estGroup.add(j - 1, i, frameEnc->m_poc);
+                }
+            }
+            frameEnc = frameEnc->m_next;
         }
-         frameEnc = frameEnc->m_next;
+
+        /* auto-disable after the first batch if pool is small */
+        m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;
+        estGroup.finishBatch();
     }

     if (m_param->bEnableTemporalSubLayers > 2)
@@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int
workerThreadID)
     if (workerThreadID < 0)
         id = pool ? pool->m_numWorkers : 0;
     LookaheadTLD& tld = m_lookahead.m_tld[id];
+    MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];

     m_lock.acquire();
     while (m_jobAcquired < m_jobTotal)
@@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int
workerThreadID)
             ProfileScopeEvent(estCostSingle);

             Estimate& e = m_estimates[i];
-            estimateFrameCost(tld, e.p0, e.p1, e.b, false);
+            Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);
+
+            if (m_lookahead.m_param->bEnableTemporalFilter && curFrame &&
(curFrame->m_lowres.sliceType == X265_TYPE_IDR ||
curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType
== X265_TYPE_P))
+            {
+                estimatelowresmotion(m_metld, curFrame, e.p0);
+            }
+            else
+                estimateFrameCost(tld, e.p0, e.p1, e.b, false);
         }
         else
         {
diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
index 214e295b7..be6ac8112 100644
--- a/source/encoder/slicetype.h
+++ b/source/encoder/slicetype.h
@@ -204,6 +204,7 @@ public:
     int8_t                  m_gopId;

     OrigPicBuffer*          m_origPicBuf;
+    MotionEstimatorTLD*     m_metld;

     Lookahead(x265_param *param, ThreadPool *pool);
 #if DETAILED_CU_STATS
@@ -227,7 +228,6 @@ public:
     void    getEstimatedPictureCost(Frame *pic);
     void    setLookaheadQueue();
     int     findSliceType(int poc);
-    void    estimatelowresmotion(Frame* frame);
     bool    generatemcstf(Frame * frame, PicList refPic, int poclast);
     bool    isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);

@@ -327,6 +327,8 @@ protected:
     int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b,
bool intraPenalty);
     void    estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0,
int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);

+    void    estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame*
curframe, int refId);
+
     CostEstimateGroup& operator=(const CostEstimateGroup&);
 };

-- 
2.36.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0010-Added-batch-motion-search-for-all-planes-in-Lookahea.patch
Type: application/octet-stream
Size: 28374 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment-0001.obj>


More information about the x265-devel mailing list