[x265] [PATCH 10/10] Added batch-motion-search for all planes in Lookahead

Mahesh Pittala mahesh at multicorewareinc.com
Tue Nov 12 08:59:44 UTC 2024


Pushed all 10 patches to the master and release_4.1 branches

On Mon, Nov 11, 2024 at 7:56 PM Anusuya Kumarasamy <
anusuya.kumarasamy at multicorewareinc.com> wrote:

> From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001
> From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> Date: Mon, 11 Nov 2024 10:50:11 +0530
> Subject: [PATCH 10/10] Added batch-motion-search for all planes in
> Lookahead
>
> ---
>  source/common/lowres.cpp         | 15 +++++++
>  source/common/lowres.h           |  1 +
>  source/common/temporalfilter.cpp | 64 ++++++++++++++--------------
>  source/common/temporalfilter.h   | 69 +++++++++++++++++--------------
>  source/encoder/slicetype.cpp     | 71 +++++++++++++++++++++++---------
>  source/encoder/slicetype.h       |  4 +-
>  6 files changed, 139 insertions(+), 85 deletions(-)
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index 17c071c2c..1596f79da 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv
> *origPic, uint32_t qgSize)
>          }
>      }
>
> +    for (int i = 0; i < 4; i++)
> +    {
> +        CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);
> +    }
> +
>      for (int i = 0; i < bframes + 2; i++)
>      {
>          CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
> @@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)
>              X265_FREE(lowerResMvCosts[1][i]);
>          }
>      }
> +
> +    for (int i = 0; i < 4; i++)
> +    {
> +        X265_FREE(lowresMcstfMvs[0][i]);
> +    }
>      X265_FREE(qpAqOffset);
>      X265_FREE(invQscaleFactor);
>      X265_FREE(qpCuTreeOffset);
> @@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)
>          lowresMvs[1][i][0].x = 0x7FFF;
>      }
>
> +    for (int i = 0; i < 4; i++)
> +    {
> +        lowresMcstfMvs[0][i][0].x = 0x7FFF;
> +    }
> +
>      for (int i = 0; i < bframes + 2; i++)
>          intraMbs[i] = 0;
>      if (origPic->m_param->rc.vbvBufferSize)
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 7e6baa844..2bf39c3b5 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes
>      uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
>      int32_t*  lowresMvCosts[2][X265_BFRAME_MAX + 2];
>      MV*       lowresMvs[2][X265_BFRAME_MAX + 2];
> +    MV*       lowresMcstfMvs[2][4];
>      uint32_t  maxBlocksInRow;
>      uint32_t  maxBlocksInCol;
>      uint32_t  maxBlocksInRowFullRes;
> diff --git a/source/common/temporalfilter.cpp
> b/source/common/temporalfilter.cpp
> index db58a0c15..aa50c2246 100644
> --- a/source/common/temporalfilter.cpp
> +++ b/source/common/temporalfilter.cpp
> @@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()
>      m_QP = 0;
>      m_sliceTypeConfig = 3;
>      m_numRef = 0;
> -    m_useSADinME = 1;
>
>      m_range = 2;
>      m_chromaFactor = 0.55;
>      m_sigmaMultiplier = 9.0;
>      m_sigmaZeroPoint = 10.0;
> -    m_motionVectorFactor = 16;
>  }
>
>  void TemporalFilter::init(const x265_param* param)
> @@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)
>      m_numComponents = (m_internalCsp != X265_CSP_I400) ?
> MAX_NUM_COMPONENT : 1;
>
>      m_metld = new MotionEstimatorTLD;
> -
> -    predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
>  }
>
>  int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param)
> @@ -191,7 +187,7 @@ fail:
>      return 0;
>  }
>
> -int TemporalFilter::motionErrorLumaSAD(
> +int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,
>      pixel* src,
>      int stride,
>      pixel* buf,
> @@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(
>          /* copy PU block into cache */
>          primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> -        error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
> +        error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
>  #endif
>          if (error > besterror)
>          {
> @@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(
>      return error;
>  }
>
> -int TemporalFilter::motionErrorLumaSSD(
> +int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,
>      pixel* src,
>      int stride,
>      pixel* buf,
> @@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(
>          /* copy PU block into cache */
>          primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> -        error = (int)primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
> +        error = (int)primitives.cu[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
>
>  #endif
>          if (error > besterror)
> @@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,
>      }
>  }
>
> -void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride,
> pixel* src,int stride, int height, int width, pixel* buf, int blockSize,
> +void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD&
> m_metld, MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int
> width, pixel* buf, int blockSize,
>      int sRange, MV* previous, uint32_t prevMvStride, int factor)
>  {
>
> @@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>          for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
> stepSize)
>          {
>              const intptr_t pelOffset = blockY * stride + blockX;
> -            m_metld->me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
> +            m_metld.me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
>
>
>              MV best(0, 0);
> @@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                              MV old = previous[mvIdx];
>
>                              if (m_useSADinME)
> -                                error = motionErrorLumaSAD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> +                                error = motionErrorLumaSAD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
>                              else
> -                                error = motionErrorLumaSSD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> +                                error = motionErrorLumaSSD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
>
>                              if (error < leastError)
>                              {
> @@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  }
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
>                  {
>                      if (m_useSADinME)
> -                        error = motionErrorLumaSAD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> +                        error = motionErrorLumaSAD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
>                      else
> -                        error = motionErrorLumaSSD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> +                        error = motionErrorLumaSSD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
>                      if (error < leastError)
>                      {
>                          best.set(x2 * m_motionVectorFactor, y2 *
> m_motionVectorFactor);
> @@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  MV aboveMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  MV leftMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>  }
>
>
> -void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t
> mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,
> +void
> MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD&
> m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int
> blockSize,
>      MV *previous, uint32_t prevMvStride, int factor, int* minError)
>  {
>
> @@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>          {
>
>              const intptr_t pelOffset = blockY * orig->m_stride + blockX;
> -            m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
> +            m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
>
>              MV best(0, 0);
>              int leastError = INT_MAX;
> @@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                              MV old = previous[mvIdx];
>
>                              if (m_useSADinME)
> -                                error =
> motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> +                                error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
>                              else
> -                                error =
> motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> +                                error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
>
>                              if (error < leastError)
>                              {
> @@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  }
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> +                    error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, 0, 0, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> +                    error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, 0, 0, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
>                  {
>                      if (m_useSADinME)
> -                        error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> +                        error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
>                      else
> -                        error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> +                        error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
>
>                      if (error < leastError)
>                      {
> @@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x
> + doubleRange; x2++)
>                  {
>                      if (m_useSADinME)
> -                        error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> +                        error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
>                      else
> -                        error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> +                        error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
>
>                      if (error < leastError)
>                      {
> @@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  MV aboveMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  MV leftMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> diff --git a/source/common/temporalfilter.h
> b/source/common/temporalfilter.h
> index 3e03d7737..c4316aca6 100644
> --- a/source/common/temporalfilter.h
> +++ b/source/common/temporalfilter.h
> @@ -84,9 +84,47 @@ namespace X265_NS {
>          {
>              me.init(X265_CSP_I400);
>              me.setQP(X265_LOOKAHEAD_QP);
> +            predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
> +            m_useSADinME = 1;
> +            m_motionVectorFactor = 16;
>          }
>
> -        ~MotionEstimatorTLD() {}
> +        Yuv  predPUYuv;
> +        int m_useSADinME;
> +        int m_motionVectorFactor;
> +        int32_t  m_bitDepth;
> +
> +        void init(const x265_param* param);
> +
> +        void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs,
> uint32_t mvStride, pixel* src, int stride, int height, int width, pixel*
> buf, int bs, int sRange,
> +            MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> +
> +        void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV*
> mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,
> +            MV* previous, uint32_t prevMvStride, int factor, int*
> minError);
> +
> +        int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,
> +            int stride,
> +            pixel* buf,
> +            int x,
> +            int y,
> +            int dx,
> +            int dy,
> +            int bs,
> +            int besterror = 8 * 8 * 1024 * 1024);
> +
> +        int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,
> +            int stride,
> +            pixel* buf,
> +            int x,
> +            int y,
> +            int dx,
> +            int dy,
> +            int bs,
> +            int besterror = 8 * 8 * 1024 * 1024);
> +
> +        ~MotionEstimatorTLD() {
> +            predPUYuv.destroy();
> +        }
>      };
>
>      struct TemporalFilterRefPicInfo
> @@ -134,7 +172,6 @@ namespace X265_NS {
>          double m_chromaFactor;
>          double m_sigmaMultiplier;
>          double m_sigmaZeroPoint;
> -        int m_motionVectorFactor;
>          int m_padding;
>
>          // Private member variables
> @@ -148,39 +185,11 @@ namespace X265_NS {
>          uint8_t m_sliceTypeConfig;
>
>          MotionEstimatorTLD* m_metld;
> -        Yuv  predPUYuv;
> -        int m_useSADinME;
>
>          int createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param);
>
>          void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo*
> mctfRefList, double overallStrength);
>
> -        void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,
> int stride, int height, int width, pixel* buf, int bs, int sRange,
> -            MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> -
> -        void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride,
> PicYuv *orig, PicYuv *buffer, int blockSize,
> -            MV *previous, uint32_t prevMvStride, int factor, int*
> minError);
> -
> -        int motionErrorLumaSSD(pixel* src,
> -            int stride,
> -            pixel* buf,
> -            int x,
> -            int y,
> -            int dx,
> -            int dy,
> -            int bs,
> -            int besterror = 8 * 8 * 1024 * 1024);
> -
> -        int motionErrorLumaSAD(pixel* src,
> -            int stride,
> -            pixel* buf,
> -            int x,
> -            int y,
> -            int dx,
> -            int dy,
> -            int bs,
> -            int besterror = 8 * 8 * 1024 * 1024);
> -
>          void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);
>
>          void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input,
> PicYuv *output);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 5fd885227..abc687ef4 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -1128,7 +1128,10 @@ bool Lookahead::create()
>      m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);
>
>      if (m_param->bEnableTemporalFilter)
> +    {
> +        m_metld = new MotionEstimatorTLD[numTLD];
>          m_origPicBuf = new OrigPicBuffer();
> +    }
>
>      return m_tld && m_scratch;
>  }
> @@ -1170,7 +1173,10 @@ void Lookahead::destroy()
>      }
>
>      if (m_param->bEnableTemporalFilter)
> +    {
>          delete m_origPicBuf;
> +        delete[] m_metld;
> +    }
>
>      X265_FREE(m_scratch);
>      delete [] m_tld;
> @@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int
> start, int end, int num)
>      }
>  }
>
> -void Lookahead::estimatelowresmotion(Frame* curframe)
> +void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld,
> Frame* curframe, int refId)
>  {
> +    m_metld.m_bitDepth = curframe->m_param->internalBitDepth;
> +    TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];
>
> -    for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)
> -    {
> -        TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];
> -
> -        curframe->m_mcstf->motionEstimationLuma(ref->mvs0,
> ref->mvsStride0, curframe->m_lowres.lowerResPlane[0],
> (curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2),
> (curframe->m_lowres.width / 2), ref->lowerRes, 16,
> m_param->searchRangeForLayer2);
> -        curframe->m_mcstf->motionEstimationLuma(ref->mvs1,
> ref->mvsStride1, curframe->m_lowres.lowresPlane[0],
> (curframe->m_lowres.lumaStride), (curframe->m_lowres.lines),
> (curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1,
> ref->mvs0, ref->mvsStride0, 2);
> -        curframe->m_mcstf->motionEstimationLuma(ref->mvs2,
> ref->mvsStride2, curframe->m_fencPic->m_picOrg[0],
> curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight,
> curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16,
> m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);
> -        curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
> -    }
> +    m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0,
> curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride /
> 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2),
> ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);
> +    m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1,
> curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride),
> (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16,
> curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);
> +    m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2,
> curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride,
> curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth,
> ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0,
> ref->mvs1, ref->mvsStride1, 2);
> +    m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
>
> +    curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;
>  }
>
>  inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool
> isPreFiltered, int16_t i)
> @@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()
>          }
>      }
>
> -    Frame* frameEnc = m_inputQueue.first();
> -    for (int i = 0; i < m_inputQueue.size(); i++)
> +    if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)
>      {
> -        if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
> +        /* pre-calculate all motion searches, using many worker threads */
> +        CostEstimateGroup estGroup(*this, frames);
> +        Frame* frameEnc = m_inputQueue.first();
> +        for (int b = 0; b < m_inputQueue.size(); b++)
>          {
> -            if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
> m_inputQueue.last()->m_poc))
> +            if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
>              {
> -                x265_log(m_param, X265_LOG_ERROR, "Failed to initialize
> MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> -                fflush(stderr);
> -            }
> +                if (!generatemcstf(frameEnc,
> m_origPicBuf->m_mcstfPicList, m_inputQueue.last()->m_poc))
> +                {
> +                    x265_log(m_param, X265_LOG_ERROR, "Failed to
> initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> +                    fflush(stderr);
> +                }
> +
> +                for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)
> +                {
> +                    TemporalFilterRefPicInfo* ref =
> &frameEnc->m_mcstfRefList[j - 1];
> +                    int i = ref->poc;
> +
> +                    /* Skip search if already done */
> +                    if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x !=
> 0x7FFF)
> +                        continue;
>
> -            estimatelowresmotion(frameEnc);
> +                    estGroup.add(j - 1, i, frameEnc->m_poc);
> +                }
> +            }
> +            frameEnc = frameEnc->m_next;
>          }
> -         frameEnc = frameEnc->m_next;
> +
> +        /* auto-disable after the first batch if pool is small */
> +        m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;
> +        estGroup.finishBatch();
>      }
>
>      if (m_param->bEnableTemporalSubLayers > 2)
> @@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
>      if (workerThreadID < 0)
>          id = pool ? pool->m_numWorkers : 0;
>      LookaheadTLD& tld = m_lookahead.m_tld[id];
> +    MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];
>
>      m_lock.acquire();
>      while (m_jobAcquired < m_jobTotal)
> @@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
>              ProfileScopeEvent(estCostSingle);
>
>              Estimate& e = m_estimates[i];
> -            estimateFrameCost(tld, e.p0, e.p1, e.b, false);
> +            Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);
> +
> +            if (m_lookahead.m_param->bEnableTemporalFilter && curFrame &&
> (curFrame->m_lowres.sliceType == X265_TYPE_IDR ||
> curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType
> == X265_TYPE_P))
> +            {
> +                estimatelowresmotion(m_metld, curFrame, e.p0);
> +            }
> +            else
> +                estimateFrameCost(tld, e.p0, e.p1, e.b, false);
>          }
>          else
>          {
> diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
> index 214e295b7..be6ac8112 100644
> --- a/source/encoder/slicetype.h
> +++ b/source/encoder/slicetype.h
> @@ -204,6 +204,7 @@ public:
>      int8_t                  m_gopId;
>
>      OrigPicBuffer*          m_origPicBuf;
> +    MotionEstimatorTLD*     m_metld;
>
>      Lookahead(x265_param *param, ThreadPool *pool);
>  #if DETAILED_CU_STATS
> @@ -227,7 +228,6 @@ public:
>      void    getEstimatedPictureCost(Frame *pic);
>      void    setLookaheadQueue();
>      int     findSliceType(int poc);
> -    void    estimatelowresmotion(Frame* frame);
>      bool    generatemcstf(Frame * frame, PicList refPic, int poclast);
>      bool    isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);
>
> @@ -327,6 +327,8 @@ protected:
>      int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b,
> bool intraPenalty);
>      void    estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0,
> int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);
>
> +    void    estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame*
> curframe, int refId);
> +
>      CostEstimateGroup& operator=(const CostEstimateGroup&);
>  };
>
> --
> 2.36.0.windows.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241112/e34c5961/attachment-0001.htm>


More information about the x265-devel mailing list