[x265] [PATCH 1 of 2] Improvements to hist-based scenecut algorithm.

Pooja Venkatesan pooja at multicorewareinc.com
Mon Jun 29 12:41:57 CEST 2020


Hi,

I am working on the review comments on this patch series. Will be sending
the updated patches soon. Stay tuned!

Regards,
*Pooja Venkatesan*,
Video Codec Engineer,
Media & AI analytics BU



On Thu, Jun 25, 2020 at 9:00 PM Pooja Venkatesan <pooja at multicorewareinc.com>
wrote:

> From 2777c2e3389eaf556f3420bc0717171bbcf97e52 Mon Sep 17 00:00:00 2001
> From: Pooja Venkatesan <pooja at multicorewareinc.com>
> Date: Thu, 25 Jun 2020 20:42:50 +0530
> Subject: [PATCH] Improvements to hist-based scenecut algorithm.
>
> This patch does the following:
> 1. Add min and max threshold intervals to detect scenecuts.
> 2. For those within the range,
>     Compare colour and edge histogram along with inter and intra satdcosts
> to detect scenecuts.
> 3. Handle scene transitions.
> 4. Change default value of hist-threshold to 0.03
> ---
>  doc/reST/cli.rst             |  7 +--
>  source/common/lowres.cpp     |  2 +
>  source/common/lowres.h       |  5 ++
>  source/common/param.cpp      |  2 +-
>  source/encoder/encoder.cpp   | 25 ++++++++--
>  source/encoder/encoder.h     |  2 +-
>  source/encoder/slicetype.cpp | 88 +++++++++++++++++++++++++++---------
>  source/x265.h                |  2 +-
>  8 files changed, 101 insertions(+), 32 deletions(-)
>
> diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
> index b9d795ace..23b74c3d8 100644
> --- a/doc/reST/cli.rst
> +++ b/doc/reST/cli.rst
> @@ -1468,9 +1468,10 @@ Slice decision options
>  .. option:: --hist-threshold <0.0..1.0>
>
>   This value represents the threshold for normalized SAD of edge
> histograms used in scenecut detection.
> - This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> - greater than 0.2 against the previous frame as scenecut.
> - Default 0.01.
> + This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> + greater than 0.2 against the previous frame as scenecut.
> + Increasing the threshold reduces the number of scenecuts detected.
> + Default 0.03.
>
>  .. option:: --radl <integer>
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index e8dd991bc..8e19ac17c 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -266,6 +266,8 @@ void Lowres::init(PicYuv *origPic, int poc)
>      indB = 0;
>      memset(costEst, -1, sizeof(costEst));
>      memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
> +    interPCostPercDiff = 0.0;
> +    intraCostPercDiff = 0.0;
>
>      if (qpAqOffset && invQscaleFactor)
>          memset(costEstAq, -1, sizeof(costEstAq));
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 5c50fad67..200b1f032 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
>      uint16_t* propagateCost;
>      double    weightedCostDelta[X265_BFRAME_MAX + 2];
>      ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
> +    /* For hist-based scenecut */
> +    bool   m_bIsMaxThres;
> +    double interPCostPercDiff;
> +    double intraCostPercDiff;
> +
>      bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
>      void destroy();
>      void init(PicYuv *origPic, int poc);
> diff --git a/source/common/param.cpp b/source/common/param.cpp
> index 925f0c460..8c0498efc 100644
> --- a/source/common/param.cpp
> +++ b/source/common/param.cpp
> @@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)
>      param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
>      param->bBPyramid = 1;
>      param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
> -    param->edgeTransitionThreshold = 0.01;
> +    param->edgeTransitionThreshold = 0.03;
>      param->bHistBasedSceneCut = 0;
>      param->lookaheadSlices = 8;
>      param->lookaheadThreads = 0;
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index f6bc5408d..bec7ff5c0 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value,
> int32_t minValue, int32_t maxValue
>      return (double)(value - minValue) * (rangeEnd - rangeStart) /
> (maxValue - minValue) + rangeStart;
>  }
>
> -void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
> maxUVSad, double edgeSad)
> +void Encoder::findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup,
> double maxUVSad, double edgeSad)
>  {
> +    double minEdgeT = m_edgeHistThreshold * 0.5;
> +    double minChromaT = minEdgeT * 10.0;
> +    double maxEdgeT = m_edgeHistThreshold * 1.5;
> +    double maxChromaT = maxEdgeT * 10.0;
>      pic->frameData.bScenecut = false;
>
>      if (pic->poc == 0)
> @@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic,
> bool& bDup, double maxUVSad, doub
>          {
>              bDup = true;
>          }
> -        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
> +        else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
> +        {
> +            pic->frameData.bScenecut = false;
> +        }
> +        else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
> +        {
> +            pic->frameData.bScenecut = true;
> +            isMax = true;
> +        }
> +        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold
> +                 || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
>          {
>              pic->frameData.bScenecut = true;
>              bDup = false;
> -            x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> pic->poc);
>          }
>      }
>  }
> @@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
>      bool dontRead = false;
>      bool bdropFrame = false;
>      bool dropflag = false;
> +    bool isMaxThreshold = false;
>
>      if (m_exportedPic)
>      {
> @@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
>              {
>                  double maxUVSad = 0.0, edgeSad = 0.0;
>                  computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
> -                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
> +                findSceneCuts(pic, isMaxThreshold, bdropFrame, maxUVSad,
> edgeSad);
>              }
>          }
>
> @@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
>          if (m_param->bHistBasedSceneCut)
>          {
>              inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut
> == 1) ? true : false;
> +            inFrame->m_lowres.m_bIsMaxThres = isMaxThreshold;
>          }
>          if (m_param->bHistBasedSceneCut && m_param->analysisSave)
>          {
> @@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)
>
>     if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
>     {
> -       p->edgeTransitionThreshold = 0.01;
> +       p->edgeTransitionThreshold = 0.03;
>         x265_log(p, X265_LOG_WARNING, "using  default threshold %.2lf for
> scene cut detection\n", p->edgeTransitionThreshold);
>     }
>
> diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
> index fd6b3e72c..1d4fe2476 100644
> --- a/source/encoder/encoder.h
> +++ b/source/encoder/encoder.h
> @@ -373,7 +373,7 @@ public:
>      bool computeHistograms(x265_picture *pic);
>      void computeHistogramSAD(double *maxUVNormalizedSAD, double
> *edgeNormalizedSAD, int curPoc);
>      double normalizeRange(int32_t value, int32_t minValue, int32_t
> maxValue, double rangeStart, double rangeEnd);
> -    void findSceneCuts(x265_picture *pic, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal);
> +    void findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal);
>
>      void initRefIdx();
>      void analyseRefIdx(int *numRefIdx);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 0a95e77d2..27052ca4e 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -2001,10 +2001,40 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>      int numAnalyzed = numFrames;
>      bool isScenecut = false;
>
> -    /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
>      if (m_param->bHistBasedSceneCut)
> -        isScenecut = frames[1]->bScenecut;
> -    else
> +    {
> +        for (int i = numFrames - 1; i > 0; i--)
> +        {
> +            if (frames[i]->interPCostPercDiff > 0.0)
> +                continue;
> +            int64_t interCost = frames[i]->costEst[1][0];
> +            int64_t intraCost = frames[i]->costEst[0][0];
> +            if (interCost < 0 || intraCost < 0)
> +                continue;
> +            int times = 0;
> +            double averageP = 0.0, averageI = 0.0;
> +            for (int j = i - 1; j >= 0 && times < 5; j--, times++)
> +            {
> +                if (frames[j]->costEst[0][0] > 0 &&
> frames[j]->costEst[1][0] > 0)
> +                {
> +                    averageI += frames[j]->costEst[0][0];
> +                    averageP += frames[j]->costEst[1][0];
> +                }
> +                else
> +                    times--;
> +            }
> +            if (times)
> +            {
> +                averageI = averageI / times;
> +                averageP = averageP / times;
> +                frames[i]->interPCostPercDiff = abs(interCost - averageP)
> / X265_MIN(interCost, averageP) * 100;
> +                frames[i]->intraCostPercDiff = abs(intraCost - averageI)
> / X265_MIN(intraCost, averageI) * 100;
> +            }
> +        }
> +    }
> +
> +    /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> +    if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut &&
> frames[1]->bScenecut))
>          isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>
>      if (isScenecut && (m_param->bHistBasedSceneCut ||
> m_param->scenecutThreshold))
> @@ -2018,17 +2048,16 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>          m_extendGopBoundary = false;
>          for (int i = m_param->bframes + 1; i < origNumFrames; i +=
> m_param->bframes + 1)
>          {
> -            if (!m_param->bHistBasedSceneCut)
> +            if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
>                  scenecut(frames, i, i + 1, true, origNumFrames);
>
>              for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
> origNumFrames); j++)
>              {
> -                if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut
> && scenecutInternal(frames, j - 1, j, true)) ||
> -                    (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
> -                    {
> -                        m_extendGopBoundary = true;
> -                        break;
> -                    }
> +                if (frames[j]->bScenecut && scenecutInternal(frames, j -
> 1, j, true))
> +                {
> +                    m_extendGopBoundary = true;
> +                    break;
> +                }
>              }
>              if (m_extendGopBoundary)
>                  break;
> @@ -2133,14 +2162,15 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>          {
>              for (int j = 1; j < numBFrames + 1; j++)
>              {
> -                if ((!m_param->bHistBasedSceneCut && scenecut(frames, j,
> j + 1, false, origNumFrames)) ||
> -                    (m_param->bHistBasedSceneCut && frames[j +
> 1]->bScenecut) ||
> -                    (bForceRADL && (frames[j]->frameNum == preRADL)))
> -                    {
> -                        frames[j]->sliceType = X265_TYPE_P;
> -                        numAnalyzed = j;
> -                        break;
> -                    }
> +                bool isNextScenecut = false;
> +                if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
> +                    isNextScenecut = scenecut(frames, j, j + 1, false,
> origNumFrames);
> +                if (isNextScenecut || (bForceRADL && frames[j]->frameNum
> == preRADL))
> +                {
> +                    frames[j]->sliceType = X265_TYPE_P;
> +                    numAnalyzed = j;
> +                    break;
> +                }
>              }
>          }
>          resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
> + 1);
> @@ -2203,7 +2233,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>           * and not considered a scenecut. */
>          for (int cp1 = p1; cp1 <= maxp1; cp1++)
>          {
> -            if (!scenecutInternal(frames, p0, cp1, false))
> +            if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames,
> p0, cp1, false))
>              {
>                  /* Any frame in between p0 and cur_p1 cannot be a real
> scenecut. */
>                  for (int i = cp1; i > p0; i--)
> @@ -2212,7 +2242,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>                      noScenecuts = false;
>                  }
>              }
> -            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
> +            else if ((m_param->bHistBasedSceneCut &&
> frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1,
> false))
>              {
>                  /* If current frame is a Scenecut from p0 frame as well
> as Scenecut from
>                   * preceeding frame, mark it as a Scenecut */
> @@ -2273,6 +2303,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>
>      if (!frames[p1]->bScenecut)
>          return false;
> +    /* Check only scene transitions if max threshold */
> +    if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
> +        return frames[p1]->bScenecut;
> +
>      return scenecutInternal(frames, p0, p1, bRealScenecut);
>  }
>
> @@ -2289,7 +2323,19 @@ bool Lookahead::scenecutInternal(Lowres **frames,
> int p0, int p1, bool bRealScen
>      /* magic numbers pulled out of thin air */
>      float threshMin = (float)(threshMax * 0.25);
>      double bias = m_param->scenecutBias;
> -    if (bRealScenecut)
> +    if (m_param->bHistBasedSceneCut)
> +    {
> +        double minT = 50.0 * (1 + m_param->edgeTransitionThreshold);
> +        if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff
> > minT)
> +        {
> +            if (bRealScenecut && frame->bScenecut)
> +                x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> frame->frameNum);
> +            return frame->bScenecut;
> +        }
> +        else
> +            return false;
> +    }
> +    else if (bRealScenecut)
>      {
>          if (m_param->keyframeMin == m_param->keyframeMax)
>              threshMin = threshMax;
> diff --git a/source/x265.h b/source/x265.h
> index 1e6f9ece6..32feb2bca 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -1860,7 +1860,7 @@ typedef struct x265_param
>      /* A genuine threshold used for histogram based scene cut detection.
>       * This threshold determines whether a frame is a scenecut or not
>       * when compared against the edge and chroma histogram sad values.
> -     * Default 0.01. Range: Real number in the interval (0,2). */
> +     * Default 0.03. Range: Real number in the interval (0,1). */
>      double    edgeTransitionThreshold;
>
>      /* Enables histogram based scenecut detection algorithm to detect
> scenecuts. Default disabled */
> --
> 2.24.0.windows.2
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200629/d559072d/attachment-0001.html>


More information about the x265-devel mailing list