<div dir="ltr">From 2777c2e3389eaf556f3420bc0717171bbcf97e52 Mon Sep 17 00:00:00 2001<br>From: Pooja Venkatesan <<a href="mailto:pooja@multicorewareinc.com">pooja@multicorewareinc.com</a>><br>Date: Thu, 25 Jun 2020 20:42:50 +0530<br>Subject: [PATCH] Improvements to hist-based scenecut algorithm.<br><br>This patch does the following:<br>1. Add min and max threshold intervals to detect scenecuts.<br>2. For those within the range,<br>    Compare colour and edge histogram along with inter and intra satdcosts to detect scenecuts.<br>3. Handle scene transitions.<br>4. Change default value of hist-threshold to 0.03<br>---<br> doc/reST/cli.rst             |  7 +--<br> source/common/lowres.cpp     |  2 +<br> source/common/lowres.h       |  5 ++<br> source/common/param.cpp      |  2 +-<br> source/encoder/encoder.cpp   | 25 ++++++++--<br> source/encoder/encoder.h     |  2 +-<br> source/encoder/slicetype.cpp | 88 +++++++++++++++++++++++++++---------<br> source/x265.h                |  2 +-<br> 8 files changed, 101 insertions(+), 32 deletions(-)<br><br>diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst<br>index b9d795ace..23b74c3d8 100644<br>--- a/doc/reST/cli.rst<br>+++ b/doc/reST/cli.rst<br>@@ -1468,9 +1468,10 @@ Slice decision options<br> .. option:: --hist-threshold <0.0..1.0><br> <br>    This value represents the threshold for normalized SAD of edge histograms used in scenecut detection.<br>-        This requires :option:`--hist-scenecut` to be enabled. For example, a value of 0.2 indicates that a frame with normalized SAD value<br>-  greater than 0.2 against the previous frame as scenecut.<br>-     Default 0.01.<br>+        This requires :option:`--hist-scenecut` to be enabled. For example, a value of 0.2 indicates that a frame with normalized SAD value <br>+ greater than 0.2 against the previous frame as scenecut. <br>+    Increasing the threshold reduces the number of scenecuts detected.<br>+   Default 0.03.<br>        <br> .. option:: --radl <integer><br>       <br>diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp<br>index e8dd991bc..8e19ac17c 100644<br>--- a/source/common/lowres.cpp<br>+++ b/source/common/lowres.cpp<br>@@ -266,6 +266,8 @@ void Lowres::init(PicYuv *origPic, int poc)<br>     indB = 0;<br>     memset(costEst, -1, sizeof(costEst));<br>     memset(weightedCostDelta, 0, sizeof(weightedCostDelta));<br>+    interPCostPercDiff = 0.0;<br>+    intraCostPercDiff = 0.0;<br> <br>     if (qpAqOffset && invQscaleFactor)<br>         memset(costEstAq, -1, sizeof(costEstAq));<br>diff --git a/source/common/lowres.h b/source/common/lowres.h<br>index 5c50fad67..200b1f032 100644<br>--- a/source/common/lowres.h<br>+++ b/source/common/lowres.h<br>@@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes<br>     uint16_t* propagateCost;<br>     double    weightedCostDelta[X265_BFRAME_MAX + 2];<br>     ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];<br>+    /* For hist-based scenecut */<br>+    bool   m_bIsMaxThres;<br>+    double interPCostPercDiff;<br>+    double intraCostPercDiff;<br>+<br>     bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);<br>     void destroy();<br>     void init(PicYuv *origPic, int poc);<br>diff --git a/source/common/param.cpp b/source/common/param.cpp<br>index 925f0c460..8c0498efc 100644<br>--- a/source/common/param.cpp<br>+++ b/source/common/param.cpp<br>@@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)<br>     param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;<br>     param->bBPyramid = 1;<br>     param->scenecutThreshold = 40; /* Magic number pulled in from x264 */<br>-    param->edgeTransitionThreshold = 0.01;<br>+    param->edgeTransitionThreshold = 0.03;<br>     param->bHistBasedSceneCut = 0;<br>     param->lookaheadSlices = 8;<br>     param->lookaheadThreads = 0;<br>diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp<br>index f6bc5408d..bec7ff5c0 100644<br>--- a/source/encoder/encoder.cpp<br>+++ b/source/encoder/encoder.cpp<br>@@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value, int32_t minValue, int32_t maxValue<br>     return (double)(value - minValue) * (rangeEnd - rangeStart) / (maxValue - minValue) + rangeStart;<br> }<br> <br>-void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double maxUVSad, double edgeSad)<br>+void Encoder::findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup, double maxUVSad, double edgeSad)<br> {<br>+    double minEdgeT = m_edgeHistThreshold * 0.5;<br>+    double minChromaT = minEdgeT * 10.0;<br>+    double maxEdgeT = m_edgeHistThreshold * 1.5;<br>+    double maxChromaT = maxEdgeT * 10.0;<br>     pic->frameData.bScenecut = false;<br> <br>     if (pic->poc == 0)<br>@@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double maxUVSad, doub<br>         {<br>             bDup = true;<br>         }<br>-        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >= m_chromaHistThreshold))<br>+        else if (edgeSad < minEdgeT && maxUVSad < minChromaT)<br>+        {<br>+            pic->frameData.bScenecut = false;<br>+        }<br>+        else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)<br>+        {<br>+            pic->frameData.bScenecut = true;<br>+            isMax = true;<br>+        }<br>+        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= m_scaledChromaThreshold<br>+                 || (edgeSad > m_edgeHistThreshold && maxUVSad >= m_chromaHistThreshold))<br>         {<br>             pic->frameData.bScenecut = true;<br>             bDup = false;<br>-            x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", pic->poc);<br>         }<br>     }<br> }<br>@@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in, x265_picture* pic_out)<br>     bool dontRead = false;<br>     bool bdropFrame = false;<br>     bool dropflag = false;<br>+    bool isMaxThreshold = false;<br> <br>     if (m_exportedPic)<br>     {<br>@@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in, x265_picture* pic_out)<br>             {<br>                 double maxUVSad = 0.0, edgeSad = 0.0;<br>                 computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);<br>-                findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);<br>+                findSceneCuts(pic, isMaxThreshold, bdropFrame, maxUVSad, edgeSad);<br>             }<br>         }<br> <br>@@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in, x265_picture* pic_out)<br>         if (m_param->bHistBasedSceneCut)<br>         {<br>             inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut == 1) ? true : false;<br>+            inFrame->m_lowres.m_bIsMaxThres = isMaxThreshold;<br>         }<br>         if (m_param->bHistBasedSceneCut && m_param->analysisSave)<br>         {<br>@@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)<br> <br>    if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)<br>    {<br>-       p->edgeTransitionThreshold = 0.01;<br>+       p->edgeTransitionThreshold = 0.03;<br>        x265_log(p, X265_LOG_WARNING, "using  default threshold %.2lf for scene cut detection\n", p->edgeTransitionThreshold);<br>    }<br> <br>diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h<br>index fd6b3e72c..1d4fe2476 100644<br>--- a/source/encoder/encoder.h<br>+++ b/source/encoder/encoder.h<br>@@ -373,7 +373,7 @@ public:<br>     bool computeHistograms(x265_picture *pic);<br>     void computeHistogramSAD(double *maxUVNormalizedSAD, double *edgeNormalizedSAD, int curPoc);<br>     double normalizeRange(int32_t value, int32_t minValue, int32_t maxValue, double rangeStart, double rangeEnd);<br>-    void findSceneCuts(x265_picture *pic, bool& bDup, double m_maxUVSADVal, double m_edgeSADVal);<br>+    void findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup, double m_maxUVSADVal, double m_edgeSADVal);<br> <br>     void initRefIdx();<br>     void analyseRefIdx(int *numRefIdx);<br>diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp<br>index 0a95e77d2..27052ca4e 100644<br>--- a/source/encoder/slicetype.cpp<br>+++ b/source/encoder/slicetype.cpp<br>@@ -2001,10 +2001,40 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, bool bKeyframe)<br>     int numAnalyzed = numFrames;<br>     bool isScenecut = false;<br> <br>-    /* When scenecut threshold is set, use scenecut detection for I frame placements */<br>     if (m_param->bHistBasedSceneCut)<br>-        isScenecut = frames[1]->bScenecut;<br>-    else<br>+    {<br>+        for (int i = numFrames - 1; i > 0; i--)<br>+        {<br>+            if (frames[i]->interPCostPercDiff > 0.0)<br>+                continue;<br>+            int64_t interCost = frames[i]->costEst[1][0];<br>+            int64_t intraCost = frames[i]->costEst[0][0];<br>+            if (interCost < 0 || intraCost < 0)<br>+                continue;<br>+            int times = 0;<br>+            double averageP = 0.0, averageI = 0.0;<br>+            for (int j = i - 1; j >= 0 && times < 5; j--, times++)<br>+            {<br>+                if (frames[j]->costEst[0][0] > 0 && frames[j]->costEst[1][0] > 0)<br>+                {<br>+                    averageI += frames[j]->costEst[0][0];<br>+                    averageP += frames[j]->costEst[1][0];<br>+                }<br>+                else<br>+                    times--;<br>+            }<br>+            if (times)<br>+            {<br>+                averageI = averageI / times;<br>+                averageP = averageP / times;<br>+                frames[i]->interPCostPercDiff = abs(interCost - averageP) / X265_MIN(interCost, averageP) * 100;<br>+                frames[i]->intraCostPercDiff = abs(intraCost - averageI) / X265_MIN(intraCost, averageI) * 100;<br>+            }<br>+        }<br>+    }<br>+<br>+    /* When scenecut threshold is set, use scenecut detection for I frame placements */<br>+    if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut && frames[1]->bScenecut))<br>         isScenecut = scenecut(frames, 0, 1, true, origNumFrames);<br> <br>     if (isScenecut && (m_param->bHistBasedSceneCut || m_param->scenecutThreshold))<br>@@ -2018,17 +2048,16 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, bool bKeyframe)<br>         m_extendGopBoundary = false;<br>         for (int i = m_param->bframes + 1; i < origNumFrames; i += m_param->bframes + 1)<br>         {<br>-            if (!m_param->bHistBasedSceneCut)<br>+            if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))<br>                 scenecut(frames, i, i + 1, true, origNumFrames);<br> <br>             for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, origNumFrames); j++)<br>             {<br>-                if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true)) || <br>-                    (m_param->bHistBasedSceneCut && frames[j]->bScenecut))<br>-                    {<br>-                        m_extendGopBoundary = true;<br>-                        break;<br>-                    }<br>+                if (frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true))<br>+                {<br>+                    m_extendGopBoundary = true;<br>+                    break;<br>+                }<br>             }<br>             if (m_extendGopBoundary)<br>                 break;<br>@@ -2133,14 +2162,15 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, bool bKeyframe)<br>         {<br>             for (int j = 1; j < numBFrames + 1; j++)<br>             {<br>-                if ((!m_param->bHistBasedSceneCut && scenecut(frames, j, j + 1, false, origNumFrames)) ||<br>-                    (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut) ||<br>-                    (bForceRADL && (frames[j]->frameNum == preRADL)))<br>-                    {<br>-                        frames[j]->sliceType = X265_TYPE_P;<br>-                        numAnalyzed = j;<br>-                        break;<br>-                    }<br>+                bool isNextScenecut = false;<br>+                if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))<br>+                    isNextScenecut = scenecut(frames, j, j + 1, false, origNumFrames);<br>+                if (isNextScenecut || (bForceRADL && frames[j]->frameNum == preRADL))<br>+                {<br>+                    frames[j]->sliceType = X265_TYPE_P;<br>+                    numAnalyzed = j;<br>+                    break;<br>+                }<br>             }<br>         }<br>         resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed + 1);<br>@@ -2203,7 +2233,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, in<br>          * and not considered a scenecut. */<br>         for (int cp1 = p1; cp1 <= maxp1; cp1++)<br>         {<br>-            if (!scenecutInternal(frames, p0, cp1, false))<br>+            if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames, p0, cp1, false))<br>             {<br>                 /* Any frame in between p0 and cur_p1 cannot be a real scenecut. */<br>                 for (int i = cp1; i > p0; i--)<br>@@ -2212,7 +2242,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, in<br>                     noScenecuts = false;<br>                 }<br>             }<br>-            else if (scenecutInternal(frames, cp1 - 1, cp1, false))<br>+            else if ((m_param->bHistBasedSceneCut && frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1, false))<br>             {<br>                 /* If current frame is a Scenecut from p0 frame as well as Scenecut from<br>                  * preceeding frame, mark it as a Scenecut */<br>@@ -2273,6 +2303,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, in<br> <br>     if (!frames[p1]->bScenecut)<br>         return false;<br>+    /* Check only scene transitions if max threshold */<br>+    if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)<br>+        return frames[p1]->bScenecut;<br>+<br>     return scenecutInternal(frames, p0, p1, bRealScenecut);<br> }<br> <br>@@ -2289,7 +2323,19 @@ bool Lookahead::scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScen<br>     /* magic numbers pulled out of thin air */<br>     float threshMin = (float)(threshMax * 0.25);<br>     double bias = m_param->scenecutBias;<br>-    if (bRealScenecut)<br>+    if (m_param->bHistBasedSceneCut)<br>+    {<br>+        double minT = 50.0 * (1 + m_param->edgeTransitionThreshold);<br>+        if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff > minT)<br>+        {<br>+            if (bRealScenecut && frame->bScenecut)<br>+                x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", frame->frameNum);<br>+            return frame->bScenecut;<br>+        }<br>+        else<br>+            return false;<br>+    }<br>+    else if (bRealScenecut)<br>     {<br>         if (m_param->keyframeMin == m_param->keyframeMax)<br>             threshMin = threshMax;<br>diff --git a/source/x265.h b/source/x265.h<br>index 1e6f9ece6..32feb2bca 100644<br>--- a/source/x265.h<br>+++ b/source/x265.h<br>@@ -1860,7 +1860,7 @@ typedef struct x265_param<br>     /* A genuine threshold used for histogram based scene cut detection.<br>      * This threshold determines whether a frame is a scenecut or not<br>      * when compared against the edge and chroma histogram sad values.<br>-     * Default 0.01. Range: Real number in the interval (0,2). */<br>+     * Default 0.03. Range: Real number in the interval (0,1). */<br>     double    edgeTransitionThreshold;<br> <br>     /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */<br>-- <br>2.24.0.windows.2<br><br></div>