[x265] [PATCH 1 of 2] slicetype: Modify Scenecut algorithm to detect scene transition points

Aarthi Priya Thirumalai aarthi at multicorewareinc.com
Mon Aug 17 08:21:24 CEST 2015


# HG changeset patch
# User Aarthi Thirumalai
# Date 1437505166 -19800
#      Wed Jul 22 00:29:26 2015 +0530
# Node ID cea0ce5bf57d5decc14743aa69234a1dfe84996b
# Parent  d56b2466c04459205287e1581d8a36eebf372ba6
slicetype: Modify Scenecut algorithm to detect scene transition points
to improve Rate Control (refs #160).

identify scene trasitions, fade-ins, fadeouts, sceneCuts and signal the
flag bSceneCut
in Lowres structure. This flag will be used by RateControl to adjust the qp
during scene cuts.

diff -r d56b2466c044 -r cea0ce5bf57d source/common/lowres.cpp
--- a/source/common/lowres.cpp Wed Aug 12 18:12:20 2015 +0530
+++ b/source/common/lowres.cpp Wed Jul 22 00:29:26 2015 +0530
@@ -126,7 +126,7 @@
 void Lowres::init(PicYuv *origPic, int poc)
 {
     bLastMiniGopBFrame = false;
-    bScenecut = true;  // could be a scene-cut, until ruled out by flash
detection
+    bScenecut = false;  // could be a scene-cut, until ruled out by flash
detection
     bKeyframe = false; // Not a keyframe unless identified by lookahead
     frameNum = poc;
     leadingBframes = 0;
diff -r d56b2466c044 -r cea0ce5bf57d source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Wed Aug 12 18:12:20 2015 +0530
+++ b/source/encoder/slicetype.cpp Wed Jul 22 00:29:26 2015 +0530
@@ -483,6 +483,7 @@
     m_pool  = pool;

     m_lastNonB = NULL;
+    m_isSceneTransition = false;
     m_scratch  = NULL;
     m_tld      = NULL;
     m_filled   = false;
@@ -1267,10 +1268,16 @@

     int numBFrames = 0;
     int numAnalyzed = numFrames;
-    if (m_param->scenecutThreshold && scenecut(frames, 0, 1, true,
origNumFrames, maxSearch))
+
+    if (m_param->bFrameAdaptive)
     {
-        frames[1]->sliceType = X265_TYPE_I;
-        return;
+        bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
+        /* When scenecut threshold is set, use scenecut detection for I
frame placements */
+        if (!m_param->scenecutThreshold && isScenecut)
+        {
+            frames[1]->sliceType = X265_TYPE_I;
+            return;
+        }
     }

     if (m_param->bframes)
@@ -1357,14 +1364,13 @@
         /* Check scenecut on the first minigop. */
         for (int j = 1; j < numBFrames + 1; j++)
         {
-            if (m_param->scenecutThreshold && scenecut(frames, j, j + 1,
false, origNumFrames, maxSearch))
+            if (m_param->scenecutThreshold && scenecut(frames, j, j + 1,
false, origNumFrames))
             {
                 frames[j]->sliceType = X265_TYPE_P;
                 numAnalyzed = j;
                 break;
             }
         }
-
         resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
+ 1);
     }
     else
@@ -1388,46 +1394,94 @@
     if (bIsVbvLookahead)
         vbvLookahead(frames, numFrames, bKeyframe);

+     int maxp1 = X265_MIN(m_param->bframes + 1, origNumFrames);
     /* Restore frame types for all frames that haven't actually been
decided yet. */
     for (int j = resetStart; j <= numFrames; j++)
+    {
         frames[j]->sliceType = X265_TYPE_AUTO;
+        /* If any frame marked as scenecut is being restarted for
sliceDecision,
+         * undo scene Transition flag */
+        if (j <= maxp1 && frames[j]->bScenecut && m_isSceneTransition)
+            m_isSceneTransition = false;
+    }
 }

-bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool
bRealScenecut, int numFrames, int maxSearch)
+bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool
bRealScenecut, int numFrames)
 {
     /* Only do analysis during a normal scenecut check. */
     if (bRealScenecut && m_param->bframes)
     {
         int origmaxp1 = p0 + 1;
         /* Look ahead to avoid coding short flashes as scenecuts. */
-        if (m_param->bFrameAdaptive == X265_B_ADAPT_TRELLIS)
-            /* Don't analyse any more frames than the trellis would have
covered. */
-            origmaxp1 += m_param->bframes;
-        else
-            origmaxp1++;
+        origmaxp1 += m_param->bframes;
         int maxp1 = X265_MIN(origmaxp1, numFrames);
-
+        bool fluctuate = false;
+        bool noScenecuts = false;
+        int64_t avgSatdCost = 0;
+        if (frames[0]->costEst[1][0] > -1)
+            avgSatdCost = frames[0]->costEst[1][0];
+        int cnt = 1;
         /* Where A and B are scenes: AAAAAABBBAAAAAA
          * If BBB is shorter than (maxp1-p0), it is detected as a flash
          * and not considered a scenecut. */
         for (int cp1 = p1; cp1 <= maxp1; cp1++)
         {
             if (!scenecutInternal(frames, p0, cp1, false))
+            {
                 /* Any frame in between p0 and cur_p1 cannot be a real
scenecut. */
                 for (int i = cp1; i > p0; i--)
+                {
                     frames[i]->bScenecut = false;
+                    noScenecuts = false;
+                }
+            }
+            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
+            {
+                /* If current frame is a Scenecut from p0 frame as well as
Scenecut from
+                 * preceeding frame, mark it as a Scenecut */
+                frames[cp1]->bScenecut = true;
+                noScenecuts = true;
+            }
+
+            /* compute average satdcost of all the frames in the mini-gop
to confirm
+             * whether there is any great fluctuation among them to rule
out false positives */
+            X265_CHECK(frames[cp1]->costEst[cp1 - p0][0]!= -1, "costEst is
not done \n");
+            avgSatdCost += frames[cp1]->costEst[cp1 - p0][0];
+            cnt++;
         }
+        /* Identify possible scene fluctuations by comparing the satd cost
of the frames.
+         * This could denote the beginning or ending of scene transitions.
+         * During a scene transition(fade in/fade outs), if fluctuate
remains false,
+         * then the scene had completed its transition or stabilized.
+         */

-        /* Where A-F are scenes: AAAAABBCCDDEEFFFFFF
-         * If each of BB ... EE are shorter than (maxp1-p0), they are
-         * detected as flashes and not considered scenecuts.
-         * Instead, the first F frame becomes a scenecut.
-         * If the video ends before F, no frame becomes a scenecut. */
-        for (int cp0 = p0; cp0 <= maxp1; cp0++)
+        if (noScenecuts)
         {
-            if (origmaxp1 > maxSearch || (cp0 < maxp1 &&
scenecutInternal(frames, cp0, maxp1, false)))
-                /* If cur_p0 is the p0 of a scenecut, it cannot be the p1
of a scenecut. */
-                frames[cp0]->bScenecut = false;
+            fluctuate = false;
+            avgSatdCost /= cnt;
+            for (int i= p1 ; i <= maxp1; i++)
+            {
+                if (abs(frames[i]->costEst[i - p0][0] - avgSatdCost)  >
0.1 * avgSatdCost)
+                {
+                    fluctuate = true;
+                    if (!m_isSceneTransition && frames[i]->bScenecut)
+                    {
+                        m_isSceneTransition = true;
+                        /* just mark the first scenechange in the scene
transition as a scenecut. */
+                        for (int j = i + 1; j <= maxp1; j++)
+                        {
+                            frames[j]->bScenecut = false;
+                        }
+                        break;
+                    }
+                }
+                    frames[i]->bScenecut = false;
+            }
+        }
+        if (!fluctuate && !noScenecuts)
+        {
+            /* Signal end of scene transitioning */
+            m_isSceneTransition = false;
         }
     }

@@ -1451,22 +1505,23 @@

     /* magic numbers pulled out of thin air */
     float threshMin = (float)(threshMax * 0.25);
-    float bias;
-
-    if (m_param->keyframeMin == m_param->keyframeMax)
-        threshMin = threshMax;
-    if (gopSize <= m_param->keyframeMin / 4)
-        bias = threshMin / 4;
-    else if (gopSize <= m_param->keyframeMin)
-        bias = threshMin * gopSize / m_param->keyframeMin;
-    else
+    double bias = 0.05;
+    if (bRealScenecut)
     {
-        bias = threshMin
-            + (threshMax - threshMin)
-            * (gopSize - m_param->keyframeMin)
-            / (m_param->keyframeMax - m_param->keyframeMin);
+        if (m_param->keyframeMin == m_param->keyframeMax)
+            threshMin = threshMax;
+        if (gopSize <= m_param->keyframeMin / 4)
+            bias = threshMin / 4;
+        else if (gopSize <= m_param->keyframeMin)
+            bias = threshMin * gopSize / m_param->keyframeMin;
+        else
+        {
+            bias = threshMin
+                + (threshMax - threshMin)
+                * (gopSize - m_param->keyframeMin)
+                / (m_param->keyframeMax - m_param->keyframeMin);
+        }
     }
-
     bool res = pcost >= (1.0 - bias) * icost;
     if (res && bRealScenecut)
     {
diff -r d56b2466c044 -r cea0ce5bf57d source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Wed Aug 12 18:12:20 2015 +0530
+++ b/source/encoder/slicetype.h Wed Jul 22 00:29:26 2015 +0530
@@ -127,7 +127,7 @@
     int           m_numCoopSlices;
     int           m_numRowsPerSlice;
     bool          m_filled;
-
+    bool          m_isSceneTransition;
     Lookahead(x265_param *param, ThreadPool *pool);

 #if DETAILED_CU_STATS
@@ -156,7 +156,7 @@
     void    slicetypeAnalyse(Lowres **frames, bool bKeyframe);

     /* called by slicetypeAnalyse() to make slice decisions */
-    bool    scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut,
int numFrames, int maxSearch);
+    bool    scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut,
int numFrames);
     bool    scenecutInternal(Lowres **frames, int p0, int p1, bool
bRealScenecut);
     void    slicetypePath(Lowres **frames, int length,
char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
     int64_t slicetypePathCost(Lowres **frames, char *path, int64_t
threshold);


On Mon, Aug 17, 2015 at 11:44 AM, Steve Borho <steve at borho.org> wrote:

> On 08/17, aarthi at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Aarthi Thirumalai
> > # Date 1437505166 -19800
> > #      Wed Jul 22 00:29:26 2015 +0530
> > # Node ID 51320561d41f32faa75dd3d04d88ea68500ce995
> > # Parent  d56b2466c04459205287e1581d8a36eebf372ba6
> > slicetype: Modify Scenecut algorithm to detect scene transition points
> > to improve Rate Control (refs #160).
> >
> > identify scene trasitions, fade-ins, fadeouts, sceneCuts and signal the
> flag bSceneCut
> > in Lowres structure. This flag will be used by RateControl to adjust the
> wps during scene cuts.
>
> 'wps' typo
>
> are these two patches ok for the stable branch?
>
> > diff -r d56b2466c044 -r 51320561d41f source/common/lowres.cpp
> > --- a/source/common/lowres.cpp        Wed Aug 12 18:12:20 2015 +0530
> > +++ b/source/common/lowres.cpp        Wed Jul 22 00:29:26 2015 +0530
> > @@ -126,7 +126,7 @@
> >  void Lowres::init(PicYuv *origPic, int poc)
> >  {
> >      bLastMiniGopBFrame = false;
> > -    bScenecut = true;  // could be a scene-cut, until ruled out by
> flash detection
> > +    bScenecut = false;  // could be a scene-cut, until ruled out by
> flash detection
> >      bKeyframe = false; // Not a keyframe unless identified by lookahead
> >      frameNum = poc;
> >      leadingBframes = 0;
> > diff -r d56b2466c044 -r 51320561d41f source/encoder/slicetype.cpp
> > --- a/source/encoder/slicetype.cpp    Wed Aug 12 18:12:20 2015 +0530
> > +++ b/source/encoder/slicetype.cpp    Wed Jul 22 00:29:26 2015 +0530
> > @@ -483,6 +483,7 @@
> >      m_pool  = pool;
> >
> >      m_lastNonB = NULL;
> > +    m_isSceneTransition = false;
> >      m_scratch  = NULL;
> >      m_tld      = NULL;
> >      m_filled   = false;
> > @@ -1267,10 +1268,16 @@
> >
> >      int numBFrames = 0;
> >      int numAnalyzed = numFrames;
> > -    if (m_param->scenecutThreshold && scenecut(frames, 0, 1, true,
> origNumFrames, maxSearch))
> > +
> > +    if (m_param->bFrameAdaptive)
> >      {
> > -        frames[1]->sliceType = X265_TYPE_I;
> > -        return;
> > +        bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
> > +        /* When scenecut threshold is set, use scenecut detection for I
> frame placements */
> > +        if (!m_param->scenecutThreshold && isScenecut)
> > +        {
> > +            frames[1]->sliceType = X265_TYPE_I;
> > +            return;
> > +        }
> >      }
> >
> >      if (m_param->bframes)
> > @@ -1357,14 +1364,13 @@
> >          /* Check scenecut on the first minigop. */
> >          for (int j = 1; j < numBFrames + 1; j++)
> >          {
> > -            if (m_param->scenecutThreshold && scenecut(frames, j, j +
> 1, false, origNumFrames, maxSearch))
> > +            if (m_param->scenecutThreshold && scenecut(frames, j, j +
> 1, false, origNumFrames))
> >              {
> >                  frames[j]->sliceType = X265_TYPE_P;
> >                  numAnalyzed = j;
> >                  break;
> >              }
> >          }
> > -
> >          resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2,
> numAnalyzed + 1);
> >      }
> >      else
> > @@ -1388,46 +1394,97 @@
> >      if (bIsVbvLookahead)
> >          vbvLookahead(frames, numFrames, bKeyframe);
> >
> > +     int maxp1 = X265_MIN(m_param->bframes + 1, origNumFrames);
> >      /* Restore frame types for all frames that haven't actually been
> decided yet. */
> >      for (int j = resetStart; j <= numFrames; j++)
> > +    {
> >          frames[j]->sliceType = X265_TYPE_AUTO;
> > +        /* If any frame marked as scenecut is being restarted for
> sliceDecision,
> > +         * undo scene Transition flag */
> > +        if (j <= maxp1 && frames[j]->bScenecut && m_isSceneTransition)
> > +            m_isSceneTransition = false;
> > +    }
> >  }
> >
> > -bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool
> bRealScenecut, int numFrames, int maxSearch)
> > +bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool
> bRealScenecut, int numFrames)
> >  {
> >      /* Only do analysis during a normal scenecut check. */
> >      if (bRealScenecut && m_param->bframes)
> >      {
> >          int origmaxp1 = p0 + 1;
> >          /* Look ahead to avoid coding short flashes as scenecuts. */
> > -        if (m_param->bFrameAdaptive == X265_B_ADAPT_TRELLIS)
> > -            /* Don't analyse any more frames than the trellis would
> have covered. */
> > -            origmaxp1 += m_param->bframes;
> > -        else
> > -            origmaxp1++;
> > +        origmaxp1 += m_param->bframes;
> >          int maxp1 = X265_MIN(origmaxp1, numFrames);
> > -
> > +        bool fluctuate = false;
> > +        bool noScenecuts = false;
> > +        int64_t avgSatdCost = 0;
> > +        if (frames[0]->costEst[1][0] > -1)
> > +            avgSatdCost = frames[0]->costEst[1][0];
> > +        int cnt = 1;
> >          /* Where A and B are scenes: AAAAAABBBAAAAAA
> >           * If BBB is shorter than (maxp1-p0), it is detected as a flash
> >           * and not considered a scenecut. */
> >          for (int cp1 = p1; cp1 <= maxp1; cp1++)
> >          {
> >              if (!scenecutInternal(frames, p0, cp1, false))
> > +            {
> >                  /* Any frame in between p0 and cur_p1 cannot be a real
> scenecut. */
> >                  for (int i = cp1; i > p0; i--)
> > +                {
> >                      frames[i]->bScenecut = false;
> > +                    noScenecuts = false;
> > +                }
> > +            }
> > +            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
> > +            {
> > +                /* If current frame is a Scenecut from p0 frame as well
> as Scenecut from
> > +                 * preceeding frame, mark it as a Scenecut */
> > +                frames[cp1]->bScenecut = true;
> > +                noScenecuts = true;
> > +            }
> > +
> > +            /* compute average satdcost of all the frames in the
> mini-gop to confirm
> > +             * whether there is any great fluctuation among them to
> rule out false positives */
> > +            X265_CHECK(frames[cp1]->costEst[cp1 - p0][0]!= -1, "costEst
> is not done \n");
> > +            avgSatdCost += frames[cp1]->costEst[cp1 - p0][0];
> > +            cnt++;
> >          }
> > +        /* Identify possible scene fluctuations by comparing the satd
> cost of the frames.
> > +         * This could denote the beginning or ending of scene
> transitions.
> > +         * During a scene transition(fade in/fade outs), if fluctuate
> remains false,
> > +         * then the scene had completed its transition or stabilized.
> > +         */
> >
> > -        /* Where A-F are scenes: AAAAABBCCDDEEFFFFFF
> > -         * If each of BB ... EE are shorter than (maxp1-p0), they are
> > -         * detected as flashes and not considered scenecuts.
> > -         * Instead, the first F frame becomes a scenecut.
> > -         * If the video ends before F, no frame becomes a scenecut. */
> > -        for (int cp0 = p0; cp0 <= maxp1; cp0++)
> > +        if (noScenecuts)
> >          {
> > -            if (origmaxp1 > maxSearch || (cp0 < maxp1 &&
> scenecutInternal(frames, cp0, maxp1, false)))
> > -                /* If cur_p0 is the p0 of a scenecut, it cannot be the
> p1 of a scenecut. */
> > -                frames[cp0]->bScenecut = false;
> > +            fluctuate = false;
> > +            avgSatdCost /= cnt;
> > +            for (int i= p1 ; i <= maxp1; i++)
> > +            {
> > +                if (abs(frames[i]->costEst[i - p0][0] - avgSatdCost)  >
> 0.1 * avgSatdCost)
> > +                {
> > +                    fluctuate = true;
> > +                    if (!m_isSceneTransition && frames[i]->bScenecut)
> > +                    {
> > +                        m_isSceneTransition = true;
> > +                        /* just mark the first scenechange in the scene
> transition as a scenecut. */
> > +                        for (int j = i + 1; j <= maxp1; j++)
> > +                        {
> > +                            frames[j]->bScenecut = false;
>
> don't need braces here or below
>
> > +                        }
> > +                        break;
> > +                    }
> > +                }
> > +                if (frames[i]->bScenecut)
> > +                {
> > +                    frames[i]->bScenecut = false;
> > +                }
>
> this assignment can be done unconditionally
>
> > +            }
> > +        }
> > +        if (!fluctuate && !noScenecuts)
> > +        {
> > +            /* Signal end of scene transitioning */
> > +            m_isSceneTransition = false;
> >          }
> >      }
> >
> > @@ -1451,22 +1508,23 @@
> >
> >      /* magic numbers pulled out of thin air */
> >      float threshMin = (float)(threshMax * 0.25);
> > -    float bias;
> > -
> > -    if (m_param->keyframeMin == m_param->keyframeMax)
> > -        threshMin = threshMax;
> > -    if (gopSize <= m_param->keyframeMin / 4)
> > -        bias = threshMin / 4;
> > -    else if (gopSize <= m_param->keyframeMin)
> > -        bias = threshMin * gopSize / m_param->keyframeMin;
> > -    else
> > +    double bias = 0.05;
> > +    if (bRealScenecut)
> >      {
> > -        bias = threshMin
> > -            + (threshMax - threshMin)
> > -            * (gopSize - m_param->keyframeMin)
> > -            / (m_param->keyframeMax - m_param->keyframeMin);
> > +        if (m_param->keyframeMin == m_param->keyframeMax)
> > +            threshMin = threshMax;
> > +        if (gopSize <= m_param->keyframeMin / 4)
> > +            bias = threshMin / 4;
> > +        else if (gopSize <= m_param->keyframeMin)
> > +            bias = threshMin * gopSize / m_param->keyframeMin;
> > +        else
> > +        {
> > +            bias = threshMin
> > +                + (threshMax - threshMin)
> > +                * (gopSize - m_param->keyframeMin)
> > +                / (m_param->keyframeMax - m_param->keyframeMin);
> > +        }
> >      }
> > -
> >      bool res = pcost >= (1.0 - bias) * icost;
> >      if (res && bRealScenecut)
> >      {
> > diff -r d56b2466c044 -r 51320561d41f source/encoder/slicetype.h
> > --- a/source/encoder/slicetype.h      Wed Aug 12 18:12:20 2015 +0530
> > +++ b/source/encoder/slicetype.h      Wed Jul 22 00:29:26 2015 +0530
> > @@ -127,7 +127,7 @@
> >      int           m_numCoopSlices;
> >      int           m_numRowsPerSlice;
> >      bool          m_filled;
> > -
> > +    bool          m_isSceneTransition;
> >      Lookahead(x265_param *param, ThreadPool *pool);
> >
> >  #if DETAILED_CU_STATS
> > @@ -156,7 +156,7 @@
> >      void    slicetypeAnalyse(Lowres **frames, bool bKeyframe);
> >
> >      /* called by slicetypeAnalyse() to make slice decisions */
> > -    bool    scenecut(Lowres **frames, int p0, int p1, bool
> bRealScenecut, int numFrames, int maxSearch);
> > +    bool    scenecut(Lowres **frames, int p0, int p1, bool
> bRealScenecut, int numFrames);
> >      bool    scenecutInternal(Lowres **frames, int p0, int p1, bool
> bRealScenecut);
> >      void    slicetypePath(Lowres **frames, int length,
> char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
> >      int64_t slicetypePathCost(Lowres **frames, char *path, int64_t
> threshold);
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150817/37f91684/attachment-0001.html>


More information about the x265-devel mailing list