[x265] [PATCH 1 of 2] slicetype: Modify Scenecut algorithm to detect scene transition points

Steve Borho steve at borho.org
Mon Aug 17 08:14:32 CEST 2015


On 08/17, aarthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1437505166 -19800
> #      Wed Jul 22 00:29:26 2015 +0530
> # Node ID 51320561d41f32faa75dd3d04d88ea68500ce995
> # Parent  d56b2466c04459205287e1581d8a36eebf372ba6
> slicetype: Modify Scenecut algorithm to detect scene transition points
> to improve Rate Control (refs #160).
> 
> identify scene trasitions, fade-ins, fadeouts, sceneCuts and signal the flag bSceneCut
> in Lowres structure. This flag will be used by RateControl to adjust the wps during scene cuts.

'wps' typo

are these two patches ok for the stable branch?

> diff -r d56b2466c044 -r 51320561d41f source/common/lowres.cpp
> --- a/source/common/lowres.cpp	Wed Aug 12 18:12:20 2015 +0530
> +++ b/source/common/lowres.cpp	Wed Jul 22 00:29:26 2015 +0530
> @@ -126,7 +126,7 @@
>  void Lowres::init(PicYuv *origPic, int poc)
>  {
>      bLastMiniGopBFrame = false;
> -    bScenecut = true;  // could be a scene-cut, until ruled out by flash detection
> +    bScenecut = false;  // could be a scene-cut, until ruled out by flash detection
>      bKeyframe = false; // Not a keyframe unless identified by lookahead
>      frameNum = poc;
>      leadingBframes = 0;
> diff -r d56b2466c044 -r 51320561d41f source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp	Wed Aug 12 18:12:20 2015 +0530
> +++ b/source/encoder/slicetype.cpp	Wed Jul 22 00:29:26 2015 +0530
> @@ -483,6 +483,7 @@
>      m_pool  = pool;
>  
>      m_lastNonB = NULL;
> +    m_isSceneTransition = false;
>      m_scratch  = NULL;
>      m_tld      = NULL;
>      m_filled   = false;
> @@ -1267,10 +1268,16 @@
>  
>      int numBFrames = 0;
>      int numAnalyzed = numFrames;
> -    if (m_param->scenecutThreshold && scenecut(frames, 0, 1, true, origNumFrames, maxSearch))
> +
> +    if (m_param->bFrameAdaptive)
>      {
> -        frames[1]->sliceType = X265_TYPE_I;
> -        return;
> +        bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
> +        /* When scenecut threshold is set, use scenecut detection for I frame placements */
> +        if (!m_param->scenecutThreshold && isScenecut)
> +        {
> +            frames[1]->sliceType = X265_TYPE_I;
> +            return;
> +        }
>      }
>  
>      if (m_param->bframes)
> @@ -1357,14 +1364,13 @@
>          /* Check scenecut on the first minigop. */
>          for (int j = 1; j < numBFrames + 1; j++)
>          {
> -            if (m_param->scenecutThreshold && scenecut(frames, j, j + 1, false, origNumFrames, maxSearch))
> +            if (m_param->scenecutThreshold && scenecut(frames, j, j + 1, false, origNumFrames))
>              {
>                  frames[j]->sliceType = X265_TYPE_P;
>                  numAnalyzed = j;
>                  break;
>              }
>          }
> -
>          resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed + 1);
>      }
>      else
> @@ -1388,46 +1394,97 @@
>      if (bIsVbvLookahead)
>          vbvLookahead(frames, numFrames, bKeyframe);
>  
> +     int maxp1 = X265_MIN(m_param->bframes + 1, origNumFrames);
>      /* Restore frame types for all frames that haven't actually been decided yet. */
>      for (int j = resetStart; j <= numFrames; j++)
> +    {
>          frames[j]->sliceType = X265_TYPE_AUTO;
> +        /* If any frame marked as scenecut is being restarted for sliceDecision, 
> +         * undo scene Transition flag */
> +        if (j <= maxp1 && frames[j]->bScenecut && m_isSceneTransition)
> +            m_isSceneTransition = false;
> +    }
>  }
>  
> -bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames, int maxSearch)
> +bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames)
>  {
>      /* Only do analysis during a normal scenecut check. */
>      if (bRealScenecut && m_param->bframes)
>      {
>          int origmaxp1 = p0 + 1;
>          /* Look ahead to avoid coding short flashes as scenecuts. */
> -        if (m_param->bFrameAdaptive == X265_B_ADAPT_TRELLIS)
> -            /* Don't analyse any more frames than the trellis would have covered. */
> -            origmaxp1 += m_param->bframes;
> -        else
> -            origmaxp1++;
> +        origmaxp1 += m_param->bframes;
>          int maxp1 = X265_MIN(origmaxp1, numFrames);
> -
> +        bool fluctuate = false;
> +        bool noScenecuts = false;
> +        int64_t avgSatdCost = 0;
> +        if (frames[0]->costEst[1][0] > -1)
> +            avgSatdCost = frames[0]->costEst[1][0];
> +        int cnt = 1;
>          /* Where A and B are scenes: AAAAAABBBAAAAAA
>           * If BBB is shorter than (maxp1-p0), it is detected as a flash
>           * and not considered a scenecut. */
>          for (int cp1 = p1; cp1 <= maxp1; cp1++)
>          {
>              if (!scenecutInternal(frames, p0, cp1, false))
> +            {
>                  /* Any frame in between p0 and cur_p1 cannot be a real scenecut. */
>                  for (int i = cp1; i > p0; i--)
> +                {
>                      frames[i]->bScenecut = false;
> +                    noScenecuts = false;
> +                }
> +            }
> +            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
> +            {
> +                /* If current frame is a Scenecut from p0 frame as well as Scenecut from
> +                 * preceeding frame, mark it as a Scenecut */
> +                frames[cp1]->bScenecut = true;
> +                noScenecuts = true;
> +            }
> +
> +            /* compute average satdcost of all the frames in the mini-gop to confirm 
> +             * whether there is any great fluctuation among them to rule out false positives */
> +            X265_CHECK(frames[cp1]->costEst[cp1 - p0][0]!= -1, "costEst is not done \n");
> +            avgSatdCost += frames[cp1]->costEst[cp1 - p0][0];
> +            cnt++;
>          }
> +        /* Identify possible scene fluctuations by comparing the satd cost of the frames.
> +         * This could denote the beginning or ending of scene transitions.
> +         * During a scene transition(fade in/fade outs), if fluctuate remains false,
> +         * then the scene had completed its transition or stabilized. 
> +         */
>  
> -        /* Where A-F are scenes: AAAAABBCCDDEEFFFFFF
> -         * If each of BB ... EE are shorter than (maxp1-p0), they are
> -         * detected as flashes and not considered scenecuts.
> -         * Instead, the first F frame becomes a scenecut.
> -         * If the video ends before F, no frame becomes a scenecut. */
> -        for (int cp0 = p0; cp0 <= maxp1; cp0++)
> +        if (noScenecuts)
>          {
> -            if (origmaxp1 > maxSearch || (cp0 < maxp1 && scenecutInternal(frames, cp0, maxp1, false)))
> -                /* If cur_p0 is the p0 of a scenecut, it cannot be the p1 of a scenecut. */
> -                frames[cp0]->bScenecut = false;
> +            fluctuate = false;
> +            avgSatdCost /= cnt;
> +            for (int i= p1 ; i <= maxp1; i++)
> +            {
> +                if (abs(frames[i]->costEst[i - p0][0] - avgSatdCost)  > 0.1 * avgSatdCost)
> +                {
> +                    fluctuate = true;
> +                    if (!m_isSceneTransition && frames[i]->bScenecut)
> +                    {
> +                        m_isSceneTransition = true;
> +                        /* just mark the first scenechange in the scene transition as a scenecut. */
> +                        for (int j = i + 1; j <= maxp1; j++)
> +                        {
> +                            frames[j]->bScenecut = false;

don't need braces here or below

> +                        }
> +                        break;
> +                    }
> +                }
> +                if (frames[i]->bScenecut)
> +                {
> +                    frames[i]->bScenecut = false;
> +                }

this assignment can be done unconditionally

> +            }
> +        }
> +        if (!fluctuate && !noScenecuts)
> +        {
> +            /* Signal end of scene transitioning */
> +            m_isSceneTransition = false;
>          }
>      }
>  
> @@ -1451,22 +1508,23 @@
>  
>      /* magic numbers pulled out of thin air */
>      float threshMin = (float)(threshMax * 0.25);
> -    float bias;
> -
> -    if (m_param->keyframeMin == m_param->keyframeMax)
> -        threshMin = threshMax;
> -    if (gopSize <= m_param->keyframeMin / 4)
> -        bias = threshMin / 4;
> -    else if (gopSize <= m_param->keyframeMin)
> -        bias = threshMin * gopSize / m_param->keyframeMin;
> -    else
> +    double bias = 0.05;
> +    if (bRealScenecut)
>      {
> -        bias = threshMin
> -            + (threshMax - threshMin)
> -            * (gopSize - m_param->keyframeMin)
> -            / (m_param->keyframeMax - m_param->keyframeMin);
> +        if (m_param->keyframeMin == m_param->keyframeMax)
> +            threshMin = threshMax;
> +        if (gopSize <= m_param->keyframeMin / 4)
> +            bias = threshMin / 4;
> +        else if (gopSize <= m_param->keyframeMin)
> +            bias = threshMin * gopSize / m_param->keyframeMin;
> +        else
> +        {
> +            bias = threshMin
> +                + (threshMax - threshMin)
> +                * (gopSize - m_param->keyframeMin)
> +                / (m_param->keyframeMax - m_param->keyframeMin);
> +        }
>      }
> -
>      bool res = pcost >= (1.0 - bias) * icost;
>      if (res && bRealScenecut)
>      {
> diff -r d56b2466c044 -r 51320561d41f source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h	Wed Aug 12 18:12:20 2015 +0530
> +++ b/source/encoder/slicetype.h	Wed Jul 22 00:29:26 2015 +0530
> @@ -127,7 +127,7 @@
>      int           m_numCoopSlices;
>      int           m_numRowsPerSlice;
>      bool          m_filled;
> -
> +    bool          m_isSceneTransition;
>      Lookahead(x265_param *param, ThreadPool *pool);
>  
>  #if DETAILED_CU_STATS
> @@ -156,7 +156,7 @@
>      void    slicetypeAnalyse(Lowres **frames, bool bKeyframe);
>  
>      /* called by slicetypeAnalyse() to make slice decisions */
> -    bool    scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames, int maxSearch);
> +    bool    scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames);
>      bool    scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScenecut);
>      void    slicetypePath(Lowres **frames, int length, char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
>      int64_t slicetypePathCost(Lowres **frames, char *path, int64_t threshold);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list