[x265] x265-devel Digest, Vol 136, Issue 18

Ashok Kumar Mishra ashok at multicorewareinc.com
Tue Nov 12 06:28:52 UTC 2024


This mcstf patch series can be pushed.

On Mon, Nov 11, 2024 at 7:56 PM <x265-devel-request at videolan.org> wrote:

> Send x265-devel mailing list submissions to
>         x265-devel at videolan.org
>
> To subscribe or unsubscribe via the World Wide Web, visit
>         https://mailman.videolan.org/listinfo/x265-devel
> or, via email, send a message with subject or body 'help' to
>         x265-devel-request at videolan.org
>
> You can reach the person managing the list at
>         x265-devel-owner at videolan.org
>
> When replying, please edit your Subject line so it is more specific
> than "Re: Contents of x265-devel digest..."
>
>
> Today's Topics:
>
>    1. [PATCH 09/10] Remove frameencoder instance for mcstf and fix
>       memory leak (Anusuya Kumarasamy)
>    2. [PATCH 10/10] Added batch-motion-search for all planes in
>       Lookahead (Anusuya Kumarasamy)
>
>
> ----------------------------------------------------------------------
>
> Message: 1
> Date: Mon, 11 Nov 2024 19:54:00 +0530
> From: Anusuya Kumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> To: x265-devel at videolan.org
> Subject: [x265] [PATCH 09/10] Remove frameencoder instance for mcstf
>         and fix memory leak
> Message-ID:
>         <
> CAE+pfGzRSPLnUWkG7La3k5yVF+5DAwpWE-YFYzrhcYyej5QPPA at mail.gmail.com>
> Content-Type: text/plain; charset="utf-8"
>
> >From 69560b753aadfc06f1b315aea2354f3c01536028 Mon Sep 17 00:00:00 2001
> From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> Date: Mon, 28 Oct 2024 11:30:49 +0530
> Subject: [PATCH 09/10] Remove frameencoder instance for mcstf and fix
> memory
>  leak
>
> ---
>  source/common/frame.cpp         |  1 +
>  source/common/lowres.cpp        |  2 +-
>  source/encoder/frameencoder.cpp | 40 +++++++++------------------------
>  source/encoder/frameencoder.h   |  4 ----
>  4 files changed, 12 insertions(+), 35 deletions(-)
>
> diff --git a/source/common/frame.cpp b/source/common/frame.cpp
> index e5f5b0327..fbecfb4e9 100644
> --- a/source/common/frame.cpp
> +++ b/source/common/frame.cpp
> @@ -316,6 +316,7 @@ void Frame::destroy()
>              m_fencPicSubsampled4 = NULL;
>          }
>
> +        delete m_mcstf->m_metld;
>          for (int i = 0; i < (m_mcstf->m_range << 1); i++)
>              m_mcstf->destroyRefPicInfo(&m_mcstfRefList[i]);
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index 14d1e03f4..17c071c2c 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -253,7 +253,7 @@ fail:
>  void Lowres::destroy(x265_param* param)
>  {
>      X265_FREE(buffer[0]);
> -    if(bEnableHME)
> +    if(bEnableHME || param->bEnableTemporalFilter)
>          X265_FREE(lowerResBuffer[0]);
>      X265_FREE(intraCost);
>      X265_FREE(intraMode);
> diff --git a/source/encoder/frameencoder.cpp
> b/source/encoder/frameencoder.cpp
> index 772810f3d..f0ced45e1 100644
> --- a/source/encoder/frameencoder.cpp
> +++ b/source/encoder/frameencoder.cpp
> @@ -106,16 +106,6 @@ void FrameEncoder::destroy()
>          delete m_rce.picTimingSEI;
>          delete m_rce.hrdTiming;
>      }
> -
> -    if (m_param->bEnableTemporalFilter)
> -    {
> -        delete m_frameEncTF->m_metld;
> -
> -        for (int i = 0; i < (m_frameEncTF->m_range << 1); i++)
> -            m_frameEncTF->destroyRefPicInfo(&m_mcstfRefList[i]);
> -
> -        delete m_frameEncTF;
> -    }
>  }
>
>  bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
> @@ -210,16 +200,6 @@ bool FrameEncoder::init(Encoder *top, int numRows, int
> numCols)
>          m_sliceAddrBits = (uint16_t)(tmp + 1);
>      }
>
> -    if (m_param->bEnableTemporalFilter)
> -    {
> -        m_frameEncTF = new TemporalFilter();
> -        if (m_frameEncTF)
> -            m_frameEncTF->init(m_param);
> -
> -        for (int i = 0; i < (m_frameEncTF->m_range << 1); i++)
> -            ok &= !!m_frameEncTF->createRefPicInfo(&m_mcstfRefList[i],
> m_param);
> -    }
> -
>      m_retFrameBuffer = X265_MALLOC(Frame*, m_param->numLayers);
>      for (int layer = 0; layer < m_param->numLayers; layer++)
>          m_retFrameBuffer[layer] = NULL;
> @@ -676,8 +656,8 @@ void FrameEncoder::compressFrame(int layer)
>      }
>      if (m_param->bEnableTemporalFilter)
>      {
> -        m_frameEncTF->m_QP = qp;
> -        m_frameEncTF->bilateralFilter(m_frame[layer],
> m_frame[layer]->m_mcstfRefList, m_param->temporalFilterStrength);
> +        m_frame[layer]->m_mcstf->m_QP = qp;
> +        m_frame[layer]->m_mcstf->bilateralFilter(m_frame[layer],
> m_frame[layer]->m_mcstfRefList, m_param->temporalFilterStrength);
>      }
>
>      if (m_nr)
> @@ -1071,14 +1051,14 @@ void FrameEncoder::compressFrame(int layer)
>      if (m_param->bEnableTemporalFilter &&
> m_top->isFilterThisframe(m_frame[layer]->m_mcstf->m_sliceTypeConfig,
> m_frame[layer]->m_lowres.sliceType))
>      {
>          //Reset the MCSTF context in Frame Encoder and Frame
> -        for (int i = 0; i < (m_frameEncTF->m_range << 1); i++)
> -        {
> -            memset(m_mcstfRefList[i].mvs0, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> -            memset(m_mcstfRefList[i].mvs1, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> -            memset(m_mcstfRefList[i].mvs2, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> -            memset(m_mcstfRefList[i].mvs,  0, sizeof(MV) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> -            memset(m_mcstfRefList[i].noise, 0, sizeof(int) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> -            memset(m_mcstfRefList[i].error, 0, sizeof(int) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> +        for (int i = 0; i < (m_frame[layer]->m_mcstf->m_range << 1); i++)
> +        {
> +            memset(m_frame[layer]->m_mcstfRefList[i].mvs0, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> +            memset(m_frame[layer]->m_mcstfRefList[i].mvs1, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> +            memset(m_frame[layer]->m_mcstfRefList[i].mvs2, 0, sizeof(MV) *
> ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
> +            memset(m_frame[layer]->m_mcstfRefList[i].mvs,  0, sizeof(MV) *
> ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> +            memset(m_frame[layer]->m_mcstfRefList[i].noise, 0, sizeof(int)
> * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
> +            memset(m_frame[layer]->m_mcstfRefList[i].error, 0, sizeof(int)
> * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
>
>              m_frame[layer]->m_mcstf->m_numRef = 0;
>          }
> diff --git a/source/encoder/frameencoder.h b/source/encoder/frameencoder.h
> index 21d05c2f2..c31762402 100644
> --- a/source/encoder/frameencoder.h
> +++ b/source/encoder/frameencoder.h
> @@ -265,10 +265,6 @@ public:
>      FrameFilter              m_frameFilter;
>      NALList                  m_nalList;
>
> -    // initialization for mcstf
> -    TemporalFilter*          m_frameEncTF;
> -    TemporalFilterRefPicInfo
> m_mcstfRefList[MAX_MCSTF_TEMPORAL_WINDOW_LENGTH];
> -
>      int                      m_sLayerId;
>
>      class WeightAnalysis : public BondedTaskGroup
> --
> 2.36.0.windows.1
> -------------- next part --------------
> An HTML attachment was scrubbed...
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/286e16dc/attachment-0001.htm
> >
> -------------- next part --------------
> A non-text attachment was scrubbed...
> Name: 0009-Remove-frameencoder-instance-for-mcstf-and-fix-memor.patch
> Type: application/octet-stream
> Size: 5749 bytes
> Desc: not available
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/286e16dc/attachment-0001.obj
> >
>
> ------------------------------
>
> Message: 2
> Date: Mon, 11 Nov 2024 19:55:38 +0530
> From: Anusuya Kumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> To: x265-devel at videolan.org
> Subject: [x265] [PATCH 10/10] Added batch-motion-search for all planes
>         in Lookahead
> Message-ID:
>         <
> CAE+pfGzyzMGujAxYF_5uS5SHcoG3O7Sn58qvtLn1vxOSV-GDvg at mail.gmail.com>
> Content-Type: text/plain; charset="utf-8"
>
> >From 79dd07b35ff128ec13f0a6902b8fad1f7d419a04 Mon Sep 17 00:00:00 2001
> From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
> Date: Mon, 11 Nov 2024 10:50:11 +0530
> Subject: [PATCH 10/10] Added batch-motion-search for all planes in
> Lookahead
>
> ---
>  source/common/lowres.cpp         | 15 +++++++
>  source/common/lowres.h           |  1 +
>  source/common/temporalfilter.cpp | 64 ++++++++++++++--------------
>  source/common/temporalfilter.h   | 69 +++++++++++++++++--------------
>  source/encoder/slicetype.cpp     | 71 +++++++++++++++++++++++---------
>  source/encoder/slicetype.h       |  4 +-
>  6 files changed, 139 insertions(+), 85 deletions(-)
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index 17c071c2c..1596f79da 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -194,6 +194,11 @@ bool Lowres::create(x265_param* param, PicYuv
> *origPic, uint32_t qgSize)
>          }
>      }
>
> +    for (int i = 0; i < 4; i++)
> +    {
> +        CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);
> +    }
> +
>      for (int i = 0; i < bframes + 2; i++)
>      {
>          CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
> @@ -281,6 +286,11 @@ void Lowres::destroy(x265_param* param)
>              X265_FREE(lowerResMvCosts[1][i]);
>          }
>      }
> +
> +    for (int i = 0; i < 4; i++)
> +    {
> +        X265_FREE(lowresMcstfMvs[0][i]);
> +    }
>      X265_FREE(qpAqOffset);
>      X265_FREE(invQscaleFactor);
>      X265_FREE(qpCuTreeOffset);
> @@ -358,6 +368,11 @@ void Lowres::init(PicYuv *origPic, int poc)
>          lowresMvs[1][i][0].x = 0x7FFF;
>      }
>
> +    for (int i = 0; i < 4; i++)
> +    {
> +        lowresMcstfMvs[0][i][0].x = 0x7FFF;
> +    }
> +
>      for (int i = 0; i < bframes + 2; i++)
>          intraMbs[i] = 0;
>      if (origPic->m_param->rc.vbvBufferSize)
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 7e6baa844..2bf39c3b5 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -191,6 +191,7 @@ struct Lowres : public ReferencePlanes
>      uint16_t* lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2];
>      int32_t*  lowresMvCosts[2][X265_BFRAME_MAX + 2];
>      MV*       lowresMvs[2][X265_BFRAME_MAX + 2];
> +    MV*       lowresMcstfMvs[2][4];
>      uint32_t  maxBlocksInRow;
>      uint32_t  maxBlocksInCol;
>      uint32_t  maxBlocksInRowFullRes;
> diff --git a/source/common/temporalfilter.cpp
> b/source/common/temporalfilter.cpp
> index db58a0c15..aa50c2246 100644
> --- a/source/common/temporalfilter.cpp
> +++ b/source/common/temporalfilter.cpp
> @@ -144,13 +144,11 @@ TemporalFilter::TemporalFilter()
>      m_QP = 0;
>      m_sliceTypeConfig = 3;
>      m_numRef = 0;
> -    m_useSADinME = 1;
>
>      m_range = 2;
>      m_chromaFactor = 0.55;
>      m_sigmaMultiplier = 9.0;
>      m_sigmaZeroPoint = 10.0;
> -    m_motionVectorFactor = 16;
>  }
>
>  void TemporalFilter::init(const x265_param* param)
> @@ -163,8 +161,6 @@ void TemporalFilter::init(const x265_param* param)
>      m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT
> : 1;
>
>      m_metld = new MotionEstimatorTLD;
> -
> -    predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
>  }
>
>  int TemporalFilter::createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param)
> @@ -191,7 +187,7 @@ fail:
>      return 0;
>  }
>
> -int TemporalFilter::motionErrorLumaSAD(
> +int MotionEstimatorTLD::motionErrorLumaSAD(MotionEstimatorTLD& m_metld,
>      pixel* src,
>      int stride,
>      pixel* buf,
> @@ -233,7 +229,7 @@ int TemporalFilter::motionErrorLumaSAD(
>          /* copy PU block into cache */
>          primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> -        error = m_metld->me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
> +        error = m_metld.me.bufSAD(predPUYuv.m_buf[0], FENC_STRIDE);
>  #endif
>          if (error > besterror)
>          {
> @@ -296,7 +292,7 @@ int TemporalFilter::motionErrorLumaSAD(
>      return error;
>  }
>
> -int TemporalFilter::motionErrorLumaSSD(
> +int MotionEstimatorTLD::motionErrorLumaSSD(MotionEstimatorTLD& m_metld,
>      pixel* src,
>      int stride,
>      pixel* buf,
> @@ -338,7 +334,7 @@ int TemporalFilter::motionErrorLumaSSD(
>          /* copy PU block into cache */
>          primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
> bufferRowStart, buffStride);
>
> -        error =
> (int)primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
> +        error =
> (int)primitives.cu[partEnum].sse_pp(m_metld.me.fencPUYuv.m_buf[0],
> FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
>
>  #endif
>          if (error > besterror)
> @@ -648,7 +644,7 @@ void TemporalFilter::bilateralFilter(Frame* frame,
>      }
>  }
>
> -void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride,
> pixel* src,int stride, int height, int width, pixel* buf, int blockSize,
> +void MotionEstimatorTLD::motionEstimationLuma(MotionEstimatorTLD& m_metld,
> MV *mvs, uint32_t mvStride, pixel* src,int stride, int height, int width,
> pixel* buf, int blockSize,
>      int sRange, MV* previous, uint32_t prevMvStride, int factor)
>  {
>
> @@ -667,7 +663,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>          for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
> stepSize)
>          {
>              const intptr_t pelOffset = blockY * stride + blockX;
> -            m_metld->me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
> +            m_metld.me.setSourcePU(src, stride, pelOffset, blockSize,
> blockSize, X265_HEX_SEARCH, 1);
>
>
>              MV best(0, 0);
> @@ -694,9 +690,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                              MV old = previous[mvIdx];
>
>                              if (m_useSADinME)
> -                                error = motionErrorLumaSAD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
> +                                error = motionErrorLumaSAD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
>                              else
> -                                error = motionErrorLumaSSD(src, stride,
> buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
> +                                error = motionErrorLumaSSD(m_metld, src,
> stride, buf, blockX, blockY, old.x * factor, old.y * factor, blockSize,
> leastError);
>
>                              if (error < leastError)
>                              {
> @@ -708,9 +704,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  }
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, 0, 0, blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, 0, 0, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -726,9 +722,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
>                  {
>                      if (m_useSADinME)
> -                        error = motionErrorLumaSAD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> +                        error = motionErrorLumaSAD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
>                      else
> -                        error = motionErrorLumaSSD(src, stride, buf,
> blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
> +                        error = motionErrorLumaSSD(m_metld, src, stride,
> buf, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor,
> blockSize, leastError);
>                      if (error < leastError)
>                      {
>                          best.set(x2 * m_motionVectorFactor, y2 *
> m_motionVectorFactor);
> @@ -743,9 +739,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  MV aboveMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, aboveMV.x, aboveMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -760,9 +756,9 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>                  MV leftMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(src, stride, buf, blockX,
> blockY, leftMV.x, leftMV.y, blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, src, stride, buf,
> blockX, blockY, leftMV.x, leftMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -802,7 +798,7 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
> uint32_t mvStride, pixel* src
>  }
>
>
> -void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t
> mvStride, PicYuv *orig, PicYuv *buffer, int blockSize,
> +void MotionEstimatorTLD::motionEstimationLumaDoubleRes(MotionEstimatorTLD&
> m_metld, MV *mvs, uint32_t mvStride, PicYuv *orig, PicYuv *buffer, int
> blockSize,
>      MV *previous, uint32_t prevMvStride, int factor, int* minError)
>  {
>
> @@ -822,7 +818,7 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>          {
>
>              const intptr_t pelOffset = blockY * orig->m_stride + blockX;
> -            m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
> +            m_metld.me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
> pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
>
>              MV best(0, 0);
>              int leastError = INT_MAX;
> @@ -848,9 +844,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                              MV old = previous[mvIdx];
>
>                              if (m_useSADinME)
> -                                error =
> motionErrorLumaSAD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> +                                error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
>                              else
> -                                error =
> motionErrorLumaSSD(orig->m_picOrg[0], orig->m_stride, buffer->m_picOrg[0],
> blockX, blockY, old.x * factor, old.y * factor, blockSize, leastError);
> +                                error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, old.x * factor, old.y * factor, blockSize, leastError);
>
>                              if (error < leastError)
>                              {
> @@ -862,9 +858,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  }
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> +                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
>                  else
> -                    error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
> +                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, 0, 0, blockSize,
> leastError);
>
>                  if (error < leastError)
>                  {
> @@ -880,9 +876,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  for (int x2 = prevBest.x / m_motionVectorFactor - range;
> x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
>                  {
>                      if (m_useSADinME)
> -                        error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> +                        error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
>                      else
> -                        error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2 *
> m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, leastError);
> +                        error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize,
> leastError);
>
>                      if (error < leastError)
>                      {
> @@ -899,9 +895,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x +
> doubleRange; x2++)
>                  {
>                      if (m_useSADinME)
> -                        error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> +                        error = motionErrorLumaSAD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
>                      else
> -                        error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, x2, y2, blockSize,
> leastError);
> +                        error = motionErrorLumaSSD(m_metld,
> orig->m_picOrg[0], (int)orig->m_stride, buffer->m_picOrg[0], blockX,
> blockY, x2, y2, blockSize, leastError);
>
>                      if (error < leastError)
>                      {
> @@ -918,9 +914,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  MV aboveMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
> aboveMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x, aboveMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, aboveMV.x,
> aboveMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> @@ -935,9 +931,9 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
> *mvs, uint32_t mvStride, P
>                  MV leftMV = mvs[idx];
>
>                  if (m_useSADinME)
> -                    error = motionErrorLumaSAD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSAD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
> leftMV.y, blockSize, leastError);
>                  else
> -                    error = motionErrorLumaSSD(orig->m_picOrg[0],
> orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x, leftMV.y,
> blockSize, leastError);
> +                    error = motionErrorLumaSSD(m_metld, orig->m_picOrg[0],
> (int)orig->m_stride, buffer->m_picOrg[0], blockX, blockY, leftMV.x,
> leftMV.y, blockSize, leastError);
>
>                  if (error < leastError)
>                  {
> diff --git a/source/common/temporalfilter.h
> b/source/common/temporalfilter.h
> index 3e03d7737..c4316aca6 100644
> --- a/source/common/temporalfilter.h
> +++ b/source/common/temporalfilter.h
> @@ -84,9 +84,47 @@ namespace X265_NS {
>          {
>              me.init(X265_CSP_I400);
>              me.setQP(X265_LOOKAHEAD_QP);
> +            predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
> +            m_useSADinME = 1;
> +            m_motionVectorFactor = 16;
>          }
>
> -        ~MotionEstimatorTLD() {}
> +        Yuv  predPUYuv;
> +        int m_useSADinME;
> +        int m_motionVectorFactor;
> +        int32_t  m_bitDepth;
> +
> +        void init(const x265_param* param);
> +
> +        void motionEstimationLuma(MotionEstimatorTLD& m_tld, MV* mvs,
> uint32_t mvStride, pixel* src, int stride, int height, int width, pixel*
> buf, int bs, int sRange,
> +            MV* previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> +
> +        void motionEstimationLumaDoubleRes(MotionEstimatorTLD& m_tld, MV*
> mvs, uint32_t mvStride, PicYuv* orig, PicYuv* buffer, int blockSize,
> +            MV* previous, uint32_t prevMvStride, int factor, int*
> minError);
> +
> +        int motionErrorLumaSSD(MotionEstimatorTLD& m_tld, pixel* src,
> +            int stride,
> +            pixel* buf,
> +            int x,
> +            int y,
> +            int dx,
> +            int dy,
> +            int bs,
> +            int besterror = 8 * 8 * 1024 * 1024);
> +
> +        int motionErrorLumaSAD(MotionEstimatorTLD& m_tld, pixel* src,
> +            int stride,
> +            pixel* buf,
> +            int x,
> +            int y,
> +            int dx,
> +            int dy,
> +            int bs,
> +            int besterror = 8 * 8 * 1024 * 1024);
> +
> +        ~MotionEstimatorTLD() {
> +            predPUYuv.destroy();
> +        }
>      };
>
>      struct TemporalFilterRefPicInfo
> @@ -134,7 +172,6 @@ namespace X265_NS {
>          double m_chromaFactor;
>          double m_sigmaMultiplier;
>          double m_sigmaZeroPoint;
> -        int m_motionVectorFactor;
>          int m_padding;
>
>          // Private member variables
> @@ -148,39 +185,11 @@ namespace X265_NS {
>          uint8_t m_sliceTypeConfig;
>
>          MotionEstimatorTLD* m_metld;
> -        Yuv  predPUYuv;
> -        int m_useSADinME;
>
>          int createRefPicInfo(TemporalFilterRefPicInfo* refFrame,
> x265_param* param);
>
>          void bilateralFilter(Frame* frame, TemporalFilterRefPicInfo*
> mctfRefList, double overallStrength);
>
> -        void motionEstimationLuma(MV *mvs, uint32_t mvStride, pixel* src,
> int stride, int height, int width, pixel* buf, int bs, int sRange,
> -            MV *previous = 0, uint32_t prevmvStride = 0, int factor = 1);
> -
> -        void motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride,
> PicYuv *orig, PicYuv *buffer, int blockSize,
> -            MV *previous, uint32_t prevMvStride, int factor, int*
> minError);
> -
> -        int motionErrorLumaSSD(pixel* src,
> -            int stride,
> -            pixel* buf,
> -            int x,
> -            int y,
> -            int dx,
> -            int dy,
> -            int bs,
> -            int besterror = 8 * 8 * 1024 * 1024);
> -
> -        int motionErrorLumaSAD(pixel* src,
> -            int stride,
> -            pixel* buf,
> -            int x,
> -            int y,
> -            int dx,
> -            int dy,
> -            int bs,
> -            int besterror = 8 * 8 * 1024 * 1024);
> -
>          void destroyRefPicInfo(TemporalFilterRefPicInfo* curFrame);
>
>          void applyMotion(MV *mvs, uint32_t mvsStride, PicYuv *input,
> PicYuv *output);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 5fd885227..abc687ef4 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -1128,7 +1128,10 @@ bool Lookahead::create()
>      m_scratch = X265_MALLOC(int, m_tld[0].widthInCU);
>
>      if (m_param->bEnableTemporalFilter)
> +    {
> +        m_metld = new MotionEstimatorTLD[numTLD];
>          m_origPicBuf = new OrigPicBuffer();
> +    }
>
>      return m_tld && m_scratch;
>  }
> @@ -1170,7 +1173,10 @@ void Lookahead::destroy()
>      }
>
>      if (m_param->bEnableTemporalFilter)
> +    {
>          delete m_origPicBuf;
> +        delete[] m_metld;
> +    }
>
>      X265_FREE(m_scratch);
>      delete [] m_tld;
> @@ -1794,19 +1800,17 @@ void Lookahead::compCostBref(Lowres **frames, int
> start, int end, int num)
>      }
>  }
>
> -void Lookahead::estimatelowresmotion(Frame* curframe)
> +void CostEstimateGroup::estimatelowresmotion(MotionEstimatorTLD& m_metld,
> Frame* curframe, int refId)
>  {
> +    m_metld.m_bitDepth = curframe->m_param->internalBitDepth;
> +    TemporalFilterRefPicInfo* ref = &curframe->m_mcstfRefList[refId];
>
> -    for (int i = 1; i <= curframe->m_mcstf->m_numRef; i++)
> -    {
> -        TemporalFilterRefPicInfo * ref = &curframe->m_mcstfRefList[i - 1];
> -
> -        curframe->m_mcstf->motionEstimationLuma(ref->mvs0,
> ref->mvsStride0, curframe->m_lowres.lowerResPlane[0],
> (curframe->m_lowres.lumaStride / 2), (curframe->m_lowres.lines / 2),
> (curframe->m_lowres.width / 2), ref->lowerRes, 16,
> m_param->searchRangeForLayer2);
> -        curframe->m_mcstf->motionEstimationLuma(ref->mvs1,
> ref->mvsStride1, curframe->m_lowres.lowresPlane[0],
> (curframe->m_lowres.lumaStride), (curframe->m_lowres.lines),
> (curframe->m_lowres.width), ref->lowres, 16, m_param->searchRangeForLayer1,
> ref->mvs0, ref->mvsStride0, 2);
> -        curframe->m_mcstf->motionEstimationLuma(ref->mvs2,
> ref->mvsStride2, curframe->m_fencPic->m_picOrg[0],
> curframe->m_fencPic->m_stride, curframe->m_fencPic->m_picHeight,
> curframe->m_fencPic->m_picWidth, ref->picBuffer->m_picOrg[0], 16,
> m_param->searchRangeForLayer0, ref->mvs1, ref->mvsStride1, 2);
> -        curframe->m_mcstf->motionEstimationLumaDoubleRes(ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
> -    }
> +    m_metld.motionEstimationLuma(m_metld, ref->mvs0, ref->mvsStride0,
> curframe->m_lowres.lowerResPlane[0], (int)(curframe->m_lowres.lumaStride /
> 2), (curframe->m_lowres.lines / 2), (curframe->m_lowres.width / 2),
> ref->lowerRes, 16, curframe->m_param->searchRangeForLayer2);
> +    m_metld.motionEstimationLuma(m_metld, ref->mvs1, ref->mvsStride1,
> curframe->m_lowres.lowresPlane[0], (int)(curframe->m_lowres.lumaStride),
> (curframe->m_lowres.lines), (curframe->m_lowres.width), ref->lowres, 16,
> curframe->m_param->searchRangeForLayer1, ref->mvs0, ref->mvsStride0, 2);
> +    m_metld.motionEstimationLuma(m_metld, ref->mvs2, ref->mvsStride2,
> curframe->m_fencPic->m_picOrg[0], (int)curframe->m_fencPic->m_stride,
> curframe->m_fencPic->m_picHeight, curframe->m_fencPic->m_picWidth,
> ref->picBuffer->m_picOrg[0], 16, curframe->m_param->searchRangeForLayer0,
> ref->mvs1, ref->mvsStride1, 2);
> +    m_metld.motionEstimationLumaDoubleRes(m_metld, ref->mvs,
> ref->mvsStride, curframe->m_fencPic, ref->picBuffer, 8, ref->mvs2,
> ref->mvsStride2, 1, ref->error);
>
> +    curframe->m_lowres.lowresMcstfMvs[0][refId][0].x = 1;
>  }
>
>  inline int enqueueRefFrame(Frame* iterFrame, Frame* curFrame, bool
> isPreFiltered, int16_t i)
> @@ -2156,20 +2160,39 @@ void Lookahead::slicetypeDecide()
>          }
>      }
>
> -    Frame* frameEnc = m_inputQueue.first();
> -    for (int i = 0; i < m_inputQueue.size(); i++)
> +    if (m_bBatchMotionSearch && m_param->bEnableTemporalFilter)
>      {
> -        if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
> +        /* pre-calculate all motion searches, using many worker threads */
> +        CostEstimateGroup estGroup(*this, frames);
> +        Frame* frameEnc = m_inputQueue.first();
> +        for (int b = 0; b < m_inputQueue.size(); b++)
>          {
> -            if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
> m_inputQueue.last()->m_poc))
> +            if (m_param->bEnableTemporalFilter &&
> isFilterThisframe(frameEnc->m_mcstf->m_sliceTypeConfig,
> frameEnc->m_lowres.sliceType))
>              {
> -                x265_log(m_param, X265_LOG_ERROR, "Failed to initialize
> MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> -                fflush(stderr);
> -            }
> +                if (!generatemcstf(frameEnc, m_origPicBuf->m_mcstfPicList,
> m_inputQueue.last()->m_poc))
> +                {
> +                    x265_log(m_param, X265_LOG_ERROR, "Failed to
> initialize MCSTFReferencePicInfo at POC %d\n", frameEnc->m_poc);
> +                    fflush(stderr);
> +                }
> +
> +                for (int j = 1; j <= frameEnc->m_mcstf->m_numRef; j++)
> +                {
> +                    TemporalFilterRefPicInfo* ref =
> &frameEnc->m_mcstfRefList[j - 1];
> +                    int i = ref->poc;
> +
> +                    /* Skip search if already done */
> +                    if (frames[b + 1]->lowresMcstfMvs[0][j - 1][0].x !=
> 0x7FFF)
> +                        continue;
>
> -            estimatelowresmotion(frameEnc);
> +                    estGroup.add(j - 1, i, frameEnc->m_poc);
> +                }
> +            }
> +            frameEnc = frameEnc->m_next;
>          }
> -         frameEnc = frameEnc->m_next;
> +
> +        /* auto-disable after the first batch if pool is small */
> +        m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;
> +        estGroup.finishBatch();
>      }
>
>      if (m_param->bEnableTemporalSubLayers > 2)
> @@ -4029,6 +4052,7 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
>      if (workerThreadID < 0)
>          id = pool ? pool->m_numWorkers : 0;
>      LookaheadTLD& tld = m_lookahead.m_tld[id];
> +    MotionEstimatorTLD& m_metld = m_lookahead.m_metld[id];
>
>      m_lock.acquire();
>      while (m_jobAcquired < m_jobTotal)
> @@ -4042,7 +4066,14 @@ void CostEstimateGroup::processTasks(int
> workerThreadID)
>              ProfileScopeEvent(estCostSingle);
>
>              Estimate& e = m_estimates[i];
> -            estimateFrameCost(tld, e.p0, e.p1, e.b, false);
> +            Frame* curFrame = m_lookahead.m_inputQueue.getPOC(e.b);
> +
> +            if (m_lookahead.m_param->bEnableTemporalFilter && curFrame &&
> (curFrame->m_lowres.sliceType == X265_TYPE_IDR ||
> curFrame->m_lowres.sliceType == X265_TYPE_I || curFrame->m_lowres.sliceType
> == X265_TYPE_P))
> +            {
> +                estimatelowresmotion(m_metld, curFrame, e.p0);
> +            }
> +            else
> +                estimateFrameCost(tld, e.p0, e.p1, e.b, false);
>          }
>          else
>          {
> diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
> index 214e295b7..be6ac8112 100644
> --- a/source/encoder/slicetype.h
> +++ b/source/encoder/slicetype.h
> @@ -204,6 +204,7 @@ public:
>      int8_t                  m_gopId;
>
>      OrigPicBuffer*          m_origPicBuf;
> +    MotionEstimatorTLD*     m_metld;
>
>      Lookahead(x265_param *param, ThreadPool *pool);
>  #if DETAILED_CU_STATS
> @@ -227,7 +228,6 @@ public:
>      void    getEstimatedPictureCost(Frame *pic);
>      void    setLookaheadQueue();
>      int     findSliceType(int poc);
> -    void    estimatelowresmotion(Frame* frame);
>      bool    generatemcstf(Frame * frame, PicList refPic, int poclast);
>      bool    isFilterThisframe(uint8_t sliceTypeConfig, int curSliceType);
>
> @@ -327,6 +327,8 @@ protected:
>      int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b,
> bool intraPenalty);
>      void    estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0,
> int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);
>
> +    void    estimatelowresmotion(MotionEstimatorTLD& m_metld, Frame*
> curframe, int refId);
> +
>      CostEstimateGroup& operator=(const CostEstimateGroup&);
>  };
>
> --
> 2.36.0.windows.1
> -------------- next part --------------
> An HTML attachment was scrubbed...
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment.htm
> >
> -------------- next part --------------
> A non-text attachment was scrubbed...
> Name: 0010-Added-batch-motion-search-for-all-planes-in-Lookahea.patch
> Type: application/octet-stream
> Size: 28374 bytes
> Desc: not available
> URL: <
> http://mailman.videolan.org/pipermail/x265-devel/attachments/20241111/2fafdc56/attachment.obj
> >
>
> ------------------------------
>
> Subject: Digest Footer
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
> ------------------------------
>
> End of x265-devel Digest, Vol 136, Issue 18
> *******************************************
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20241112/7eaff3d9/attachment-0001.htm>


More information about the x265-devel mailing list