[x265] [PATCH 1 of 2] Improvements to hist-based scenecut algorithm.

Pooja Venkatesan pooja at multicorewareinc.com
Thu Jun 25 17:30:23 CEST 2020


>From 2777c2e3389eaf556f3420bc0717171bbcf97e52 Mon Sep 17 00:00:00 2001
From: Pooja Venkatesan <pooja at multicorewareinc.com>
Date: Thu, 25 Jun 2020 20:42:50 +0530
Subject: [PATCH] Improvements to hist-based scenecut algorithm.

This patch does the following:
1. Add min and max threshold intervals to detect scenecuts.
2. For those within the range,
    Compare colour and edge histogram along with inter and intra satdcosts
to detect scenecuts.
3. Handle scene transitions.
4. Change default value of hist-threshold to 0.03
---
 doc/reST/cli.rst             |  7 +--
 source/common/lowres.cpp     |  2 +
 source/common/lowres.h       |  5 ++
 source/common/param.cpp      |  2 +-
 source/encoder/encoder.cpp   | 25 ++++++++--
 source/encoder/encoder.h     |  2 +-
 source/encoder/slicetype.cpp | 88 +++++++++++++++++++++++++++---------
 source/x265.h                |  2 +-
 8 files changed, 101 insertions(+), 32 deletions(-)

diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
index b9d795ace..23b74c3d8 100644
--- a/doc/reST/cli.rst
+++ b/doc/reST/cli.rst
@@ -1468,9 +1468,10 @@ Slice decision options
 .. option:: --hist-threshold <0.0..1.0>

  This value represents the threshold for normalized SAD of edge histograms
used in scenecut detection.
- This requires :option:`--hist-scenecut` to be enabled. For example, a
value of 0.2 indicates that a frame with normalized SAD value
- greater than 0.2 against the previous frame as scenecut.
- Default 0.01.
+ This requires :option:`--hist-scenecut` to be enabled. For example, a
value of 0.2 indicates that a frame with normalized SAD value
+ greater than 0.2 against the previous frame as scenecut.
+ Increasing the threshold reduces the number of scenecuts detected.
+ Default 0.03.

 .. option:: --radl <integer>

diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
index e8dd991bc..8e19ac17c 100644
--- a/source/common/lowres.cpp
+++ b/source/common/lowres.cpp
@@ -266,6 +266,8 @@ void Lowres::init(PicYuv *origPic, int poc)
     indB = 0;
     memset(costEst, -1, sizeof(costEst));
     memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
+    interPCostPercDiff = 0.0;
+    intraCostPercDiff = 0.0;

     if (qpAqOffset && invQscaleFactor)
         memset(costEstAq, -1, sizeof(costEstAq));
diff --git a/source/common/lowres.h b/source/common/lowres.h
index 5c50fad67..200b1f032 100644
--- a/source/common/lowres.h
+++ b/source/common/lowres.h
@@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
     uint16_t* propagateCost;
     double    weightedCostDelta[X265_BFRAME_MAX + 2];
     ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
+    /* For hist-based scenecut */
+    bool   m_bIsMaxThres;
+    double interPCostPercDiff;
+    double intraCostPercDiff;
+
     bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
     void destroy();
     void init(PicYuv *origPic, int poc);
diff --git a/source/common/param.cpp b/source/common/param.cpp
index 925f0c460..8c0498efc 100644
--- a/source/common/param.cpp
+++ b/source/common/param.cpp
@@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)
     param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
     param->bBPyramid = 1;
     param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
-    param->edgeTransitionThreshold = 0.01;
+    param->edgeTransitionThreshold = 0.03;
     param->bHistBasedSceneCut = 0;
     param->lookaheadSlices = 8;
     param->lookaheadThreads = 0;
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index f6bc5408d..bec7ff5c0 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value,
int32_t minValue, int32_t maxValue
     return (double)(value - minValue) * (rangeEnd - rangeStart) /
(maxValue - minValue) + rangeStart;
 }

-void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
maxUVSad, double edgeSad)
+void Encoder::findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup,
double maxUVSad, double edgeSad)
 {
+    double minEdgeT = m_edgeHistThreshold * 0.5;
+    double minChromaT = minEdgeT * 10.0;
+    double maxEdgeT = m_edgeHistThreshold * 1.5;
+    double maxChromaT = maxEdgeT * 10.0;
     pic->frameData.bScenecut = false;

     if (pic->poc == 0)
@@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic,
bool& bDup, double maxUVSad, doub
         {
             bDup = true;
         }
-        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold))
+        else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
+        {
+            pic->frameData.bScenecut = false;
+        }
+        else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
+        {
+            pic->frameData.bScenecut = true;
+            isMax = true;
+        }
+        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold
+                 || (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold))
         {
             pic->frameData.bScenecut = true;
             bDup = false;
-            x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
pic->poc);
         }
     }
 }
@@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture* pic_out)
     bool dontRead = false;
     bool bdropFrame = false;
     bool dropflag = false;
+    bool isMaxThreshold = false;

     if (m_exportedPic)
     {
@@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture* pic_out)
             {
                 double maxUVSad = 0.0, edgeSad = 0.0;
                 computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
-                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
+                findSceneCuts(pic, isMaxThreshold, bdropFrame, maxUVSad,
edgeSad);
             }
         }

@@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture* pic_out)
         if (m_param->bHistBasedSceneCut)
         {
             inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut
== 1) ? true : false;
+            inFrame->m_lowres.m_bIsMaxThres = isMaxThreshold;
         }
         if (m_param->bHistBasedSceneCut && m_param->analysisSave)
         {
@@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)

    if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
    {
-       p->edgeTransitionThreshold = 0.01;
+       p->edgeTransitionThreshold = 0.03;
        x265_log(p, X265_LOG_WARNING, "using  default threshold %.2lf for
scene cut detection\n", p->edgeTransitionThreshold);
    }

diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
index fd6b3e72c..1d4fe2476 100644
--- a/source/encoder/encoder.h
+++ b/source/encoder/encoder.h
@@ -373,7 +373,7 @@ public:
     bool computeHistograms(x265_picture *pic);
     void computeHistogramSAD(double *maxUVNormalizedSAD, double
*edgeNormalizedSAD, int curPoc);
     double normalizeRange(int32_t value, int32_t minValue, int32_t
maxValue, double rangeStart, double rangeEnd);
-    void findSceneCuts(x265_picture *pic, bool& bDup, double
m_maxUVSADVal, double m_edgeSADVal);
+    void findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup, double
m_maxUVSADVal, double m_edgeSADVal);

     void initRefIdx();
     void analyseRefIdx(int *numRefIdx);
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index 0a95e77d2..27052ca4e 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -2001,10 +2001,40 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
bool bKeyframe)
     int numAnalyzed = numFrames;
     bool isScenecut = false;

-    /* When scenecut threshold is set, use scenecut detection for I frame
placements */
     if (m_param->bHistBasedSceneCut)
-        isScenecut = frames[1]->bScenecut;
-    else
+    {
+        for (int i = numFrames - 1; i > 0; i--)
+        {
+            if (frames[i]->interPCostPercDiff > 0.0)
+                continue;
+            int64_t interCost = frames[i]->costEst[1][0];
+            int64_t intraCost = frames[i]->costEst[0][0];
+            if (interCost < 0 || intraCost < 0)
+                continue;
+            int times = 0;
+            double averageP = 0.0, averageI = 0.0;
+            for (int j = i - 1; j >= 0 && times < 5; j--, times++)
+            {
+                if (frames[j]->costEst[0][0] > 0 &&
frames[j]->costEst[1][0] > 0)
+                {
+                    averageI += frames[j]->costEst[0][0];
+                    averageP += frames[j]->costEst[1][0];
+                }
+                else
+                    times--;
+            }
+            if (times)
+            {
+                averageI = averageI / times;
+                averageP = averageP / times;
+                frames[i]->interPCostPercDiff = abs(interCost - averageP)
/ X265_MIN(interCost, averageP) * 100;
+                frames[i]->intraCostPercDiff = abs(intraCost - averageI) /
X265_MIN(intraCost, averageI) * 100;
+            }
+        }
+    }
+
+    /* When scenecut threshold is set, use scenecut detection for I frame
placements */
+    if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut &&
frames[1]->bScenecut))
         isScenecut = scenecut(frames, 0, 1, true, origNumFrames);

     if (isScenecut && (m_param->bHistBasedSceneCut ||
m_param->scenecutThreshold))
@@ -2018,17 +2048,16 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
bool bKeyframe)
         m_extendGopBoundary = false;
         for (int i = m_param->bframes + 1; i < origNumFrames; i +=
m_param->bframes + 1)
         {
-            if (!m_param->bHistBasedSceneCut)
+            if (!m_param->bHistBasedSceneCut ||
(m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
                 scenecut(frames, i, i + 1, true, origNumFrames);

             for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
origNumFrames); j++)
             {
-                if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut
&& scenecutInternal(frames, j - 1, j, true)) ||
-                    (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
-                    {
-                        m_extendGopBoundary = true;
-                        break;
-                    }
+                if (frames[j]->bScenecut && scenecutInternal(frames, j -
1, j, true))
+                {
+                    m_extendGopBoundary = true;
+                    break;
+                }
             }
             if (m_extendGopBoundary)
                 break;
@@ -2133,14 +2162,15 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
bool bKeyframe)
         {
             for (int j = 1; j < numBFrames + 1; j++)
             {
-                if ((!m_param->bHistBasedSceneCut && scenecut(frames, j, j
+ 1, false, origNumFrames)) ||
-                    (m_param->bHistBasedSceneCut && frames[j +
1]->bScenecut) ||
-                    (bForceRADL && (frames[j]->frameNum == preRADL)))
-                    {
-                        frames[j]->sliceType = X265_TYPE_P;
-                        numAnalyzed = j;
-                        break;
-                    }
+                bool isNextScenecut = false;
+                if (!m_param->bHistBasedSceneCut ||
(m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
+                    isNextScenecut = scenecut(frames, j, j + 1, false,
origNumFrames);
+                if (isNextScenecut || (bForceRADL && frames[j]->frameNum
== preRADL))
+                {
+                    frames[j]->sliceType = X265_TYPE_P;
+                    numAnalyzed = j;
+                    break;
+                }
             }
         }
         resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
+ 1);
@@ -2203,7 +2233,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0, int
p1, bool bRealScenecut, in
          * and not considered a scenecut. */
         for (int cp1 = p1; cp1 <= maxp1; cp1++)
         {
-            if (!scenecutInternal(frames, p0, cp1, false))
+            if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames,
p0, cp1, false))
             {
                 /* Any frame in between p0 and cur_p1 cannot be a real
scenecut. */
                 for (int i = cp1; i > p0; i--)
@@ -2212,7 +2242,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0, int
p1, bool bRealScenecut, in
                     noScenecuts = false;
                 }
             }
-            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
+            else if ((m_param->bHistBasedSceneCut &&
frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1,
false))
             {
                 /* If current frame is a Scenecut from p0 frame as well as
Scenecut from
                  * preceeding frame, mark it as a Scenecut */
@@ -2273,6 +2303,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
int p1, bool bRealScenecut, in

     if (!frames[p1]->bScenecut)
         return false;
+    /* Check only scene transitions if max threshold */
+    if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
+        return frames[p1]->bScenecut;
+
     return scenecutInternal(frames, p0, p1, bRealScenecut);
 }

@@ -2289,7 +2323,19 @@ bool Lookahead::scenecutInternal(Lowres **frames,
int p0, int p1, bool bRealScen
     /* magic numbers pulled out of thin air */
     float threshMin = (float)(threshMax * 0.25);
     double bias = m_param->scenecutBias;
-    if (bRealScenecut)
+    if (m_param->bHistBasedSceneCut)
+    {
+        double minT = 50.0 * (1 + m_param->edgeTransitionThreshold);
+        if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff >
minT)
+        {
+            if (bRealScenecut && frame->bScenecut)
+                x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
frame->frameNum);
+            return frame->bScenecut;
+        }
+        else
+            return false;
+    }
+    else if (bRealScenecut)
     {
         if (m_param->keyframeMin == m_param->keyframeMax)
             threshMin = threshMax;
diff --git a/source/x265.h b/source/x265.h
index 1e6f9ece6..32feb2bca 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -1860,7 +1860,7 @@ typedef struct x265_param
     /* A genuine threshold used for histogram based scene cut detection.
      * This threshold determines whether a frame is a scenecut or not
      * when compared against the edge and chroma histogram sad values.
-     * Default 0.01. Range: Real number in the interval (0,2). */
+     * Default 0.03. Range: Real number in the interval (0,1). */
     double    edgeTransitionThreshold;

     /* Enables histogram based scenecut detection algorithm to detect
scenecuts. Default disabled */
-- 
2.24.0.windows.2
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200625/06c628d9/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: SCD1.diff
Type: application/octet-stream
Size: 14810 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200625/06c628d9/attachment-0001.obj>


More information about the x265-devel mailing list