[x265] [PATCH 1 of 2] Improvements to hist-based scenecut algorithm.

Aruna Matheswaran aruna at multicorewareinc.com
Mon Jun 29 16:42:31 CEST 2020


Pushed into master.

On Mon, Jun 29, 2020 at 6:43 PM Pooja Venkatesan <pooja at multicorewareinc.com>
wrote:

> From 671506e40fbbb0aed825013e434c7c39cec2bb93 Mon Sep 17 00:00:00 2001
> From: Pooja Venkatesan <pooja at multicorewareinc.com>
> Date: Thu, 25 Jun 2020 20:42:50 +0530
> Subject: [PATCH] Improvements to hist-based scenecut algorithm.
>
> This patch does the following:
> 1. Strengthens scenecut detection using threshold intervals, spatial and
> temporal properties.
> 2. Change default value of hist-threshold to 0.03
> ---
>  doc/reST/cli.rst             |  7 +--
>  source/common/lowres.cpp     |  3 ++
>  source/common/lowres.h       |  5 ++
>  source/common/param.cpp      |  2 +-
>  source/encoder/encoder.cpp   | 25 ++++++++--
>  source/encoder/encoder.h     |  5 +-
>  source/encoder/slicetype.cpp | 89 +++++++++++++++++++++++++++---------
>  source/encoder/slicetype.h   |  1 +
>  source/x265.h                |  2 +-
>  9 files changed, 107 insertions(+), 32 deletions(-)
>
> diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
> index b9d795ace..23b74c3d8 100644
> --- a/doc/reST/cli.rst
> +++ b/doc/reST/cli.rst
> @@ -1468,9 +1468,10 @@ Slice decision options
>  .. option:: --hist-threshold <0.0..1.0>
>
>   This value represents the threshold for normalized SAD of edge
> histograms used in scenecut detection.
> - This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> - greater than 0.2 against the previous frame as scenecut.
> - Default 0.01.
> + This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> + greater than 0.2 against the previous frame as scenecut.
> + Increasing the threshold reduces the number of scenecuts detected.
> + Default 0.03.
>
>  .. option:: --radl <integer>
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index e8dd991bc..db1c2d159 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -266,6 +266,9 @@ void Lowres::init(PicYuv *origPic, int poc)
>      indB = 0;
>      memset(costEst, -1, sizeof(costEst));
>      memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
> +    interPCostPercDiff = 0.0;
> +    intraCostPercDiff = 0.0;
> +    m_bIsMaxThres = false;
>
>      if (qpAqOffset && invQscaleFactor)
>          memset(costEstAq, -1, sizeof(costEstAq));
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 5c50fad67..200b1f032 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
>      uint16_t* propagateCost;
>      double    weightedCostDelta[X265_BFRAME_MAX + 2];
>      ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
> +    /* For hist-based scenecut */
> +    bool   m_bIsMaxThres;
> +    double interPCostPercDiff;
> +    double intraCostPercDiff;
> +
>      bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
>      void destroy();
>      void init(PicYuv *origPic, int poc);
> diff --git a/source/common/param.cpp b/source/common/param.cpp
> index 925f0c460..8c0498efc 100644
> --- a/source/common/param.cpp
> +++ b/source/common/param.cpp
> @@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)
>      param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
>      param->bBPyramid = 1;
>      param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
> -    param->edgeTransitionThreshold = 0.01;
> +    param->edgeTransitionThreshold = 0.03;
>      param->bHistBasedSceneCut = 0;
>      param->lookaheadSlices = 8;
>      param->lookaheadThreads = 0;
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index f6bc5408d..0c6fd80bf 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value,
> int32_t minValue, int32_t maxValue
>      return (double)(value - minValue) * (rangeEnd - rangeStart) /
> (maxValue - minValue) + rangeStart;
>  }
>
> -void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
> maxUVSad, double edgeSad)
> +void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
> maxUVSad, double edgeSad, bool& isMaxThres)
>  {
> +    double minEdgeT = m_edgeHistThreshold * MIN_EDGE_FACTOR;
> +    double minChromaT = minEdgeT * SCENECUT_CHROMA_FACTOR;
> +    double maxEdgeT = m_edgeHistThreshold * MAX_EDGE_FACTOR;
> +    double maxChromaT = maxEdgeT * SCENECUT_CHROMA_FACTOR;
>      pic->frameData.bScenecut = false;
>
>      if (pic->poc == 0)
> @@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic,
> bool& bDup, double maxUVSad, doub
>          {
>              bDup = true;
>          }
> -        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
> +        else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
> +        {
> +            pic->frameData.bScenecut = false;
> +        }
> +        else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
> +        {
> +            pic->frameData.bScenecut = true;
> +            isMaxThres = true;
> +        }
> +        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold
> +                 || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
>          {
>              pic->frameData.bScenecut = true;
>              bDup = false;
> -            x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> pic->poc);
>          }
>      }
>  }
> @@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
>      bool dontRead = false;
>      bool bdropFrame = false;
>      bool dropflag = false;
> +    bool isMaxThres = false;
>
>      if (m_exportedPic)
>      {
> @@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
>              {
>                  double maxUVSad = 0.0, edgeSad = 0.0;
>                  computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
> -                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
> +                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad,
> isMaxThres);
>              }
>          }
>
> @@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
>          if (m_param->bHistBasedSceneCut)
>          {
>              inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut
> == 1) ? true : false;
> +            inFrame->m_lowres.m_bIsMaxThres = isMaxThres;
>          }
>          if (m_param->bHistBasedSceneCut && m_param->analysisSave)
>          {
> @@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)
>
>     if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
>     {
> -       p->edgeTransitionThreshold = 0.01;
> +       p->edgeTransitionThreshold = 0.03;
>         x265_log(p, X265_LOG_WARNING, "using  default threshold %.2lf for
> scene cut detection\n", p->edgeTransitionThreshold);
>     }
>
> diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
> index fd6b3e72c..507c42d5e 100644
> --- a/source/encoder/encoder.h
> +++ b/source/encoder/encoder.h
> @@ -165,6 +165,9 @@ class FrameData;
>
>  #define MAX_SCENECUT_THRESHOLD 1.0
>  #define SCENECUT_STRENGTH_FACTOR 2.0
> +#define MIN_EDGE_FACTOR 0.5
> +#define MAX_EDGE_FACTOR 1.5
> +#define SCENECUT_CHROMA_FACTOR 10.0
>
>  class Encoder : public x265_encoder
>  {
> @@ -373,7 +376,7 @@ public:
>      bool computeHistograms(x265_picture *pic);
>      void computeHistogramSAD(double *maxUVNormalizedSAD, double
> *edgeNormalizedSAD, int curPoc);
>      double normalizeRange(int32_t value, int32_t minValue, int32_t
> maxValue, double rangeStart, double rangeEnd);
> -    void findSceneCuts(x265_picture *pic, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal);
> +    void findSceneCuts(x265_picture *pic, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal, bool& isMaxThres);
>
>      void initRefIdx();
>      void analyseRefIdx(int *numRefIdx);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 0a95e77d2..d3783cfe1 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -2001,10 +2001,41 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>      int numAnalyzed = numFrames;
>      bool isScenecut = false;
>
> -    /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> +    /* Temporal computations for scenecut detection */
>      if (m_param->bHistBasedSceneCut)
> -        isScenecut = frames[1]->bScenecut;
> -    else
> +    {
> +        for (int i = numFrames - 1; i > 0; i--)
> +        {
> +            if (frames[i]->interPCostPercDiff > 0.0)
> +                continue;
> +            int64_t interCost = frames[i]->costEst[1][0];
> +            int64_t intraCost = frames[i]->costEst[0][0];
> +            if (interCost < 0 || intraCost < 0)
> +                continue;
> +            int times = 0;
> +            double averagePcost = 0.0, averageIcost = 0.0;
> +            for (int j = i - 1; j >= 0 && times < 5; j--, times++)
> +            {
> +                if (frames[j]->costEst[0][0] > 0 &&
> frames[j]->costEst[1][0] > 0)
> +                {
> +                    averageIcost += frames[j]->costEst[0][0];
> +                    averagePcost += frames[j]->costEst[1][0];
> +                }
> +                else
> +                    times--;
> +            }
> +            if (times)
> +            {
> +                averageIcost = averageIcost / times;
> +                averagePcost = averagePcost / times;
> +                frames[i]->interPCostPercDiff = abs(interCost -
> averagePcost) / X265_MIN(interCost, averagePcost) * 100;
> +                frames[i]->intraCostPercDiff = abs(intraCost -
> averageIcost) / X265_MIN(intraCost, averageIcost) * 100;
> +            }
> +        }
> +    }
> +
> +    /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> +    if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut &&
> frames[1]->bScenecut))
>          isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>
>      if (isScenecut && (m_param->bHistBasedSceneCut ||
> m_param->scenecutThreshold))
> @@ -2018,17 +2049,16 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>          m_extendGopBoundary = false;
>          for (int i = m_param->bframes + 1; i < origNumFrames; i +=
> m_param->bframes + 1)
>          {
> -            if (!m_param->bHistBasedSceneCut)
> +            if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
>                  scenecut(frames, i, i + 1, true, origNumFrames);
>
>              for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
> origNumFrames); j++)
>              {
> -                if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut
> && scenecutInternal(frames, j - 1, j, true)) ||
> -                    (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
> -                    {
> -                        m_extendGopBoundary = true;
> -                        break;
> -                    }
> +                if (frames[j]->bScenecut && scenecutInternal(frames, j -
> 1, j, true))
> +                {
> +                    m_extendGopBoundary = true;
> +                    break;
> +                }
>              }
>              if (m_extendGopBoundary)
>                  break;
> @@ -2133,14 +2163,15 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>          {
>              for (int j = 1; j < numBFrames + 1; j++)
>              {
> -                if ((!m_param->bHistBasedSceneCut && scenecut(frames, j,
> j + 1, false, origNumFrames)) ||
> -                    (m_param->bHistBasedSceneCut && frames[j +
> 1]->bScenecut) ||
> -                    (bForceRADL && (frames[j]->frameNum == preRADL)))
> -                    {
> -                        frames[j]->sliceType = X265_TYPE_P;
> -                        numAnalyzed = j;
> -                        break;
> -                    }
> +                bool isNextScenecut = false;
> +                if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
> +                    isNextScenecut = scenecut(frames, j, j + 1, false,
> origNumFrames);
> +                if (isNextScenecut || (bForceRADL && frames[j]->frameNum
> == preRADL))
> +                {
> +                    frames[j]->sliceType = X265_TYPE_P;
> +                    numAnalyzed = j;
> +                    break;
> +                }
>              }
>          }
>          resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
> + 1);
> @@ -2203,7 +2234,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>           * and not considered a scenecut. */
>          for (int cp1 = p1; cp1 <= maxp1; cp1++)
>          {
> -            if (!scenecutInternal(frames, p0, cp1, false))
> +            if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames,
> p0, cp1, false))
>              {
>                  /* Any frame in between p0 and cur_p1 cannot be a real
> scenecut. */
>                  for (int i = cp1; i > p0; i--)
> @@ -2212,7 +2243,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>                      noScenecuts = false;
>                  }
>              }
> -            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
> +            else if ((m_param->bHistBasedSceneCut &&
> frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1,
> false))
>              {
>                  /* If current frame is a Scenecut from p0 frame as well
> as Scenecut from
>                   * preceeding frame, mark it as a Scenecut */
> @@ -2273,6 +2304,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>
>      if (!frames[p1]->bScenecut)
>          return false;
> +    /* Check only scene transitions if max threshold */
> +    if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
> +        return frames[p1]->bScenecut;
> +
>      return scenecutInternal(frames, p0, p1, bRealScenecut);
>  }
>
> @@ -2289,7 +2324,19 @@ bool Lookahead::scenecutInternal(Lowres **frames,
> int p0, int p1, bool bRealScen
>      /* magic numbers pulled out of thin air */
>      float threshMin = (float)(threshMax * 0.25);
>      double bias = m_param->scenecutBias;
> -    if (bRealScenecut)
> +    if (m_param->bHistBasedSceneCut)
> +    {
> +        double minT = TEMPORAL_SCENECUT_THRESHOLD * (1 +
> m_param->edgeTransitionThreshold);
> +        if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff
> > minT)
> +        {
> +            if (bRealScenecut && frame->bScenecut)
> +                x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> frame->frameNum);
> +            return frame->bScenecut;
> +        }
> +        else
> +            return false;
> +    }
> +    else if (bRealScenecut)
>      {
>          if (m_param->keyframeMin == m_param->keyframeMax)
>              threshMin = threshMax;
> diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
> index 31cce972b..6484ad8a0 100644
> --- a/source/encoder/slicetype.h
> +++ b/source/encoder/slicetype.h
> @@ -42,6 +42,7 @@ class Lookahead;
>  #define LOWRES_COST_SHIFT 14
>  #define AQ_EDGE_BIAS 0.5
>  #define EDGE_INCLINATION 45
> +#define TEMPORAL_SCENECUT_THRESHOLD 50
>
>  #if HIGH_BIT_DEPTH
>  #define EDGE_THRESHOLD 1023.0
> diff --git a/source/x265.h b/source/x265.h
> index 1e6f9ece6..32feb2bca 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -1860,7 +1860,7 @@ typedef struct x265_param
>      /* A genuine threshold used for histogram based scene cut detection.
>       * This threshold determines whether a frame is a scenecut or not
>       * when compared against the edge and chroma histogram sad values.
> -     * Default 0.01. Range: Real number in the interval (0,2). */
> +     * Default 0.03. Range: Real number in the interval (0,1). */
>      double    edgeTransitionThreshold;
>
>      /* Enables histogram based scenecut detection algorithm to detect
> scenecuts. Default disabled */
> --
> 2.24.0.windows.2
>
> Regards,
> *Pooja Venkatesan*,
> Video Codec Engineer,
> Media & AI analytics BU
>
>
>
> On Mon, Jun 29, 2020 at 4:11 PM Pooja Venkatesan <
> pooja at multicorewareinc.com> wrote:
>
>> Hi,
>>
>> I am working on the review comments on this patch series. Will be sending
>> the updated patches soon. Stay tuned!
>>
>> Regards,
>> *Pooja Venkatesan*,
>> Video Codec Engineer,
>> Media & AI analytics BU
>>
>>
>>
>> On Thu, Jun 25, 2020 at 9:00 PM Pooja Venkatesan <
>> pooja at multicorewareinc.com> wrote:
>>
>>> From 2777c2e3389eaf556f3420bc0717171bbcf97e52 Mon Sep 17 00:00:00 2001
>>> From: Pooja Venkatesan <pooja at multicorewareinc.com>
>>> Date: Thu, 25 Jun 2020 20:42:50 +0530
>>> Subject: [PATCH] Improvements to hist-based scenecut algorithm.
>>>
>>> This patch does the following:
>>> 1. Add min and max threshold intervals to detect scenecuts.
>>> 2. For those within the range,
>>>     Compare colour and edge histogram along with inter and intra
>>> satdcosts to detect scenecuts.
>>> 3. Handle scene transitions.
>>> 4. Change default value of hist-threshold to 0.03
>>> ---
>>>  doc/reST/cli.rst             |  7 +--
>>>  source/common/lowres.cpp     |  2 +
>>>  source/common/lowres.h       |  5 ++
>>>  source/common/param.cpp      |  2 +-
>>>  source/encoder/encoder.cpp   | 25 ++++++++--
>>>  source/encoder/encoder.h     |  2 +-
>>>  source/encoder/slicetype.cpp | 88 +++++++++++++++++++++++++++---------
>>>  source/x265.h                |  2 +-
>>>  8 files changed, 101 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
>>> index b9d795ace..23b74c3d8 100644
>>> --- a/doc/reST/cli.rst
>>> +++ b/doc/reST/cli.rst
>>> @@ -1468,9 +1468,10 @@ Slice decision options
>>>  .. option:: --hist-threshold <0.0..1.0>
>>>
>>>   This value represents the threshold for normalized SAD of edge
>>> histograms used in scenecut detection.
>>> - This requires :option:`--hist-scenecut` to be enabled. For example, a
>>> value of 0.2 indicates that a frame with normalized SAD value
>>> - greater than 0.2 against the previous frame as scenecut.
>>> - Default 0.01.
>>> + This requires :option:`--hist-scenecut` to be enabled. For example, a
>>> value of 0.2 indicates that a frame with normalized SAD value
>>> + greater than 0.2 against the previous frame as scenecut.
>>> + Increasing the threshold reduces the number of scenecuts detected.
>>> + Default 0.03.
>>>
>>>  .. option:: --radl <integer>
>>>
>>> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
>>> index e8dd991bc..8e19ac17c 100644
>>> --- a/source/common/lowres.cpp
>>> +++ b/source/common/lowres.cpp
>>> @@ -266,6 +266,8 @@ void Lowres::init(PicYuv *origPic, int poc)
>>>      indB = 0;
>>>      memset(costEst, -1, sizeof(costEst));
>>>      memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
>>> +    interPCostPercDiff = 0.0;
>>> +    intraCostPercDiff = 0.0;
>>>
>>>      if (qpAqOffset && invQscaleFactor)
>>>          memset(costEstAq, -1, sizeof(costEstAq));
>>> diff --git a/source/common/lowres.h b/source/common/lowres.h
>>> index 5c50fad67..200b1f032 100644
>>> --- a/source/common/lowres.h
>>> +++ b/source/common/lowres.h
>>> @@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
>>>      uint16_t* propagateCost;
>>>      double    weightedCostDelta[X265_BFRAME_MAX + 2];
>>>      ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
>>> +    /* For hist-based scenecut */
>>> +    bool   m_bIsMaxThres;
>>> +    double interPCostPercDiff;
>>> +    double intraCostPercDiff;
>>> +
>>>      bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
>>>      void destroy();
>>>      void init(PicYuv *origPic, int poc);
>>> diff --git a/source/common/param.cpp b/source/common/param.cpp
>>> index 925f0c460..8c0498efc 100644
>>> --- a/source/common/param.cpp
>>> +++ b/source/common/param.cpp
>>> @@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)
>>>      param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
>>>      param->bBPyramid = 1;
>>>      param->scenecutThreshold = 40; /* Magic number pulled in from x264
>>> */
>>> -    param->edgeTransitionThreshold = 0.01;
>>> +    param->edgeTransitionThreshold = 0.03;
>>>      param->bHistBasedSceneCut = 0;
>>>      param->lookaheadSlices = 8;
>>>      param->lookaheadThreads = 0;
>>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>>> index f6bc5408d..bec7ff5c0 100644
>>> --- a/source/encoder/encoder.cpp
>>> +++ b/source/encoder/encoder.cpp
>>> @@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value,
>>> int32_t minValue, int32_t maxValue
>>>      return (double)(value - minValue) * (rangeEnd - rangeStart) /
>>> (maxValue - minValue) + rangeStart;
>>>  }
>>>
>>> -void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
>>> maxUVSad, double edgeSad)
>>> +void Encoder::findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup,
>>> double maxUVSad, double edgeSad)
>>>  {
>>> +    double minEdgeT = m_edgeHistThreshold * 0.5;
>>> +    double minChromaT = minEdgeT * 10.0;
>>> +    double maxEdgeT = m_edgeHistThreshold * 1.5;
>>> +    double maxChromaT = maxEdgeT * 10.0;
>>>      pic->frameData.bScenecut = false;
>>>
>>>      if (pic->poc == 0)
>>> @@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic,
>>> bool& bDup, double maxUVSad, doub
>>>          {
>>>              bDup = true;
>>>          }
>>> -        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
>>> m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
>>> m_chromaHistThreshold))
>>> +        else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
>>> +        {
>>> +            pic->frameData.bScenecut = false;
>>> +        }
>>> +        else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
>>> +        {
>>> +            pic->frameData.bScenecut = true;
>>> +            isMax = true;
>>> +        }
>>> +        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
>>> m_scaledChromaThreshold
>>> +                 || (edgeSad > m_edgeHistThreshold && maxUVSad >=
>>> m_chromaHistThreshold))
>>>          {
>>>              pic->frameData.bScenecut = true;
>>>              bDup = false;
>>> -            x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
>>> pic->poc);
>>>          }
>>>      }
>>>  }
>>> @@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in,
>>> x265_picture* pic_out)
>>>      bool dontRead = false;
>>>      bool bdropFrame = false;
>>>      bool dropflag = false;
>>> +    bool isMaxThreshold = false;
>>>
>>>      if (m_exportedPic)
>>>      {
>>> @@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in,
>>> x265_picture* pic_out)
>>>              {
>>>                  double maxUVSad = 0.0, edgeSad = 0.0;
>>>                  computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
>>> -                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
>>> +                findSceneCuts(pic, isMaxThreshold, bdropFrame,
>>> maxUVSad, edgeSad);
>>>              }
>>>          }
>>>
>>> @@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in,
>>> x265_picture* pic_out)
>>>          if (m_param->bHistBasedSceneCut)
>>>          {
>>>              inFrame->m_lowres.bScenecut =
>>> (inputPic->frameData.bScenecut == 1) ? true : false;
>>> +            inFrame->m_lowres.m_bIsMaxThres = isMaxThreshold;
>>>          }
>>>          if (m_param->bHistBasedSceneCut && m_param->analysisSave)
>>>          {
>>> @@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)
>>>
>>>     if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
>>>     {
>>> -       p->edgeTransitionThreshold = 0.01;
>>> +       p->edgeTransitionThreshold = 0.03;
>>>         x265_log(p, X265_LOG_WARNING, "using  default threshold %.2lf
>>> for scene cut detection\n", p->edgeTransitionThreshold);
>>>     }
>>>
>>> diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
>>> index fd6b3e72c..1d4fe2476 100644
>>> --- a/source/encoder/encoder.h
>>> +++ b/source/encoder/encoder.h
>>> @@ -373,7 +373,7 @@ public:
>>>      bool computeHistograms(x265_picture *pic);
>>>      void computeHistogramSAD(double *maxUVNormalizedSAD, double
>>> *edgeNormalizedSAD, int curPoc);
>>>      double normalizeRange(int32_t value, int32_t minValue, int32_t
>>> maxValue, double rangeStart, double rangeEnd);
>>> -    void findSceneCuts(x265_picture *pic, bool& bDup, double
>>> m_maxUVSADVal, double m_edgeSADVal);
>>> +    void findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup,
>>> double m_maxUVSADVal, double m_edgeSADVal);
>>>
>>>      void initRefIdx();
>>>      void analyseRefIdx(int *numRefIdx);
>>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
>>> index 0a95e77d2..27052ca4e 100644
>>> --- a/source/encoder/slicetype.cpp
>>> +++ b/source/encoder/slicetype.cpp
>>> @@ -2001,10 +2001,40 @@ void Lookahead::slicetypeAnalyse(Lowres
>>> **frames, bool bKeyframe)
>>>      int numAnalyzed = numFrames;
>>>      bool isScenecut = false;
>>>
>>> -    /* When scenecut threshold is set, use scenecut detection for I
>>> frame placements */
>>>      if (m_param->bHistBasedSceneCut)
>>> -        isScenecut = frames[1]->bScenecut;
>>> -    else
>>> +    {
>>> +        for (int i = numFrames - 1; i > 0; i--)
>>> +        {
>>> +            if (frames[i]->interPCostPercDiff > 0.0)
>>> +                continue;
>>> +            int64_t interCost = frames[i]->costEst[1][0];
>>> +            int64_t intraCost = frames[i]->costEst[0][0];
>>> +            if (interCost < 0 || intraCost < 0)
>>> +                continue;
>>> +            int times = 0;
>>> +            double averageP = 0.0, averageI = 0.0;
>>> +            for (int j = i - 1; j >= 0 && times < 5; j--, times++)
>>> +            {
>>> +                if (frames[j]->costEst[0][0] > 0 &&
>>> frames[j]->costEst[1][0] > 0)
>>> +                {
>>> +                    averageI += frames[j]->costEst[0][0];
>>> +                    averageP += frames[j]->costEst[1][0];
>>> +                }
>>> +                else
>>> +                    times--;
>>> +            }
>>> +            if (times)
>>> +            {
>>> +                averageI = averageI / times;
>>> +                averageP = averageP / times;
>>> +                frames[i]->interPCostPercDiff = abs(interCost -
>>> averageP) / X265_MIN(interCost, averageP) * 100;
>>> +                frames[i]->intraCostPercDiff = abs(intraCost -
>>> averageI) / X265_MIN(intraCost, averageI) * 100;
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    /* When scenecut threshold is set, use scenecut detection for I
>>> frame placements */
>>> +    if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut &&
>>> frames[1]->bScenecut))
>>>          isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>>>
>>>      if (isScenecut && (m_param->bHistBasedSceneCut ||
>>> m_param->scenecutThreshold))
>>> @@ -2018,17 +2048,16 @@ void Lookahead::slicetypeAnalyse(Lowres
>>> **frames, bool bKeyframe)
>>>          m_extendGopBoundary = false;
>>>          for (int i = m_param->bframes + 1; i < origNumFrames; i +=
>>> m_param->bframes + 1)
>>>          {
>>> -            if (!m_param->bHistBasedSceneCut)
>>> +            if (!m_param->bHistBasedSceneCut ||
>>> (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
>>>                  scenecut(frames, i, i + 1, true, origNumFrames);
>>>
>>>              for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
>>> origNumFrames); j++)
>>>              {
>>> -                if ((!m_param->bHistBasedSceneCut &&
>>> frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true)) ||
>>> -                    (m_param->bHistBasedSceneCut &&
>>> frames[j]->bScenecut))
>>> -                    {
>>> -                        m_extendGopBoundary = true;
>>> -                        break;
>>> -                    }
>>> +                if (frames[j]->bScenecut && scenecutInternal(frames, j
>>> - 1, j, true))
>>> +                {
>>> +                    m_extendGopBoundary = true;
>>> +                    break;
>>> +                }
>>>              }
>>>              if (m_extendGopBoundary)
>>>                  break;
>>> @@ -2133,14 +2162,15 @@ void Lookahead::slicetypeAnalyse(Lowres
>>> **frames, bool bKeyframe)
>>>          {
>>>              for (int j = 1; j < numBFrames + 1; j++)
>>>              {
>>> -                if ((!m_param->bHistBasedSceneCut && scenecut(frames,
>>> j, j + 1, false, origNumFrames)) ||
>>> -                    (m_param->bHistBasedSceneCut && frames[j +
>>> 1]->bScenecut) ||
>>> -                    (bForceRADL && (frames[j]->frameNum == preRADL)))
>>> -                    {
>>> -                        frames[j]->sliceType = X265_TYPE_P;
>>> -                        numAnalyzed = j;
>>> -                        break;
>>> -                    }
>>> +                bool isNextScenecut = false;
>>> +                if (!m_param->bHistBasedSceneCut ||
>>> (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
>>> +                    isNextScenecut = scenecut(frames, j, j + 1, false,
>>> origNumFrames);
>>> +                if (isNextScenecut || (bForceRADL &&
>>> frames[j]->frameNum == preRADL))
>>> +                {
>>> +                    frames[j]->sliceType = X265_TYPE_P;
>>> +                    numAnalyzed = j;
>>> +                    break;
>>> +                }
>>>              }
>>>          }
>>>          resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2,
>>> numAnalyzed + 1);
>>> @@ -2203,7 +2233,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
>>> int p1, bool bRealScenecut, in
>>>           * and not considered a scenecut. */
>>>          for (int cp1 = p1; cp1 <= maxp1; cp1++)
>>>          {
>>> -            if (!scenecutInternal(frames, p0, cp1, false))
>>> +            if (!m_param->bHistBasedSceneCut &&
>>> !scenecutInternal(frames, p0, cp1, false))
>>>              {
>>>                  /* Any frame in between p0 and cur_p1 cannot be a real
>>> scenecut. */
>>>                  for (int i = cp1; i > p0; i--)
>>> @@ -2212,7 +2242,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
>>> int p1, bool bRealScenecut, in
>>>                      noScenecuts = false;
>>>                  }
>>>              }
>>> -            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
>>> +            else if ((m_param->bHistBasedSceneCut &&
>>> frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1,
>>> false))
>>>              {
>>>                  /* If current frame is a Scenecut from p0 frame as well
>>> as Scenecut from
>>>                   * preceeding frame, mark it as a Scenecut */
>>> @@ -2273,6 +2303,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
>>> int p1, bool bRealScenecut, in
>>>
>>>      if (!frames[p1]->bScenecut)
>>>          return false;
>>> +    /* Check only scene transitions if max threshold */
>>> +    if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
>>> +        return frames[p1]->bScenecut;
>>> +
>>>      return scenecutInternal(frames, p0, p1, bRealScenecut);
>>>  }
>>>
>>> @@ -2289,7 +2323,19 @@ bool Lookahead::scenecutInternal(Lowres **frames,
>>> int p0, int p1, bool bRealScen
>>>      /* magic numbers pulled out of thin air */
>>>      float threshMin = (float)(threshMax * 0.25);
>>>      double bias = m_param->scenecutBias;
>>> -    if (bRealScenecut)
>>> +    if (m_param->bHistBasedSceneCut)
>>> +    {
>>> +        double minT = 50.0 * (1 + m_param->edgeTransitionThreshold);
>>> +        if (frame->interPCostPercDiff > minT ||
>>> frame->intraCostPercDiff > minT)
>>> +        {
>>> +            if (bRealScenecut && frame->bScenecut)
>>> +                x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
>>> frame->frameNum);
>>> +            return frame->bScenecut;
>>> +        }
>>> +        else
>>> +            return false;
>>> +    }
>>> +    else if (bRealScenecut)
>>>      {
>>>          if (m_param->keyframeMin == m_param->keyframeMax)
>>>              threshMin = threshMax;
>>> diff --git a/source/x265.h b/source/x265.h
>>> index 1e6f9ece6..32feb2bca 100644
>>> --- a/source/x265.h
>>> +++ b/source/x265.h
>>> @@ -1860,7 +1860,7 @@ typedef struct x265_param
>>>      /* A genuine threshold used for histogram based scene cut detection.
>>>       * This threshold determines whether a frame is a scenecut or not
>>>       * when compared against the edge and chroma histogram sad values.
>>> -     * Default 0.01. Range: Real number in the interval (0,2). */
>>> +     * Default 0.03. Range: Real number in the interval (0,1). */
>>>      double    edgeTransitionThreshold;
>>>
>>>      /* Enables histogram based scenecut detection algorithm to detect
>>> scenecuts. Default disabled */
>>> --
>>> 2.24.0.windows.2
>>>
>>>

-- 
Regards,
*Aruna Matheswaran,*
Video Codec Engineer,
Media & AI analytics BU,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200629/9f755b64/attachment-0001.html>


More information about the x265-devel mailing list