[x265] [PATCH]Fixed the --hist-scenecut feature to consider the max variation in chroma histograms along with luma edge histograms. Also fixed the formula for normalizing both the SAD values from 0.0 to 1.0. This would alleviate the false positive scene-cuts observed with this feature.

Praveen Kumar Karadugattu praveenkumar at multicorewareinc.com
Tue Jun 9 17:04:31 CEST 2020


>From 0bc864dbc48624902e5a8314d9ec49ce19a84146 Mon Sep 17 00:00:00 2001
From: Praveen Kumar Karadugattu <praveenkumar at multicorewareinc.com>
Date: Tue, 9 Jun 2020 20:27:48 +0530
Subject: [PATCH] Fixed the --hist-scenecut feature to consider the max
 variation in chroma histograms along with luma edge histograms. Also fixed
 the formula for normalizing both the SAD values from 0.0 to 1.0. This would
 alleviate the false positive scene-cuts observed with this feature.
---
 doc/reST/cli.rst           |  8 ++---
 source/common/param.cpp    |  4 +--
 source/encoder/encoder.cpp | 75
++++++++++++++++++++++------------------------
 source/encoder/encoder.h   |  6 ++--
 source/x265cli.cpp         |  4 +--
 5 files changed, 46 insertions(+), 51 deletions(-)
 mode change 100644 => 100755 doc/reST/cli.rst
 mode change 100644 => 100755 source/common/param.cpp
 mode change 100644 => 100755 source/encoder/encoder.cpp
 mode change 100644 => 100755 source/encoder/encoder.h
 mode change 100644 => 100755 source/x265cli.cpp
diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
old mode 100644
new mode 100755
index eceec40..c9e288e
--- a/doc/reST/cli.rst
+++ b/doc/reST/cli.rst
@@ -1462,13 +1462,13 @@ Slice decision options
 .. option:: --hist-scenecut, --no-hist-scenecut

  Indicates that scenecuts need to be detected using luma edge and chroma
histograms.
- :option: `--hist-scenecut` enables scenecut detection using the
histograms and disables the default scene cut algorithm.
- :option: `--no-hist-scenecut` disables histogram based scenecut algorithm.
+ :option:`--hist-scenecut` enables scenecut detection using the histograms
and disables the default scene cut algorithm.
+ :option:`--no-hist-scenecut` disables histogram based scenecut algorithm.

-.. option:: --hist-threshold <0.0..2.0>
+.. option:: --hist-threshold <0.0..1.0>

  This value represents the threshold for normalized SAD of edge histograms
used in scenecut detection.
- This requires :option: `--hist-scenecut` to be enabled. For example, a
value of 0.2 indicates that a frame with normalized SAD value
+ This requires :option:`--hist-scenecut` to be enabled. For example, a
value of 0.2 indicates that a frame with normalized SAD value
  greater than 0.2 against the previous frame as scenecut.
  Default 0.01.

diff --git a/source/common/param.cpp b/source/common/param.cpp
old mode 100644
new mode 100755
index fb7244e..925f0c4
--- a/source/common/param.cpp
+++ b/source/common/param.cpp
@@ -1688,8 +1688,8 @@ int x265_check_params(x265_param* param)
           "scenecutThreshold must be greater than 0");
     CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
             "scenecut-bias must be between 0 and 100");
-    CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 <
param->edgeTransitionThreshold,
-            "hist-threshold must be between 0.0 and 2.0");
+    CHECK(param->edgeTransitionThreshold < 0.0 || 1.0 <
param->edgeTransitionThreshold,
+            "hist-threshold must be between 0.0 and 1.0");
     CHECK(param->radl < 0 || param->radl > param->bframes,
           "radl must be between 0 and bframes");
     CHECK(param->rdPenalty < 0 || param->rdPenalty > 2,
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
old mode 100644
new mode 100755
index 752e5b2..f6bc540
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -222,12 +222,9 @@ void Encoder::create()
         uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1;
         m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes);
         m_edgeHistThreshold = m_param->edgeTransitionThreshold;
-        m_chromaHistThreshold = m_edgeHistThreshold * 10.0;
-        m_chromaHistThreshold = x265_min(m_chromaHistThreshold,
MAX_SCENECUT_THRESHOLD);
-        m_scaledEdgeThreshold = m_edgeHistThreshold *
SCENECUT_STRENGTH_FACTOR;
-        m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold,
MAX_SCENECUT_THRESHOLD);
-        m_scaledChromaThreshold = m_chromaHistThreshold *
SCENECUT_STRENGTH_FACTOR;
-        m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold,
MAX_SCENECUT_THRESHOLD);
+        m_chromaHistThreshold = x265_min(m_edgeHistThreshold * 10.0,
MAX_SCENECUT_THRESHOLD);
+        m_scaledEdgeThreshold = x265_min(m_edgeHistThreshold *
SCENECUT_STRENGTH_FACTOR, MAX_SCENECUT_THRESHOLD);
+        m_scaledChromaThreshold = x265_min(m_chromaHistThreshold *
SCENECUT_STRENGTH_FACTOR, MAX_SCENECUT_THRESHOLD);
         if (m_param->sourceBitDepth != m_param->internalBitDepth)
         {
             int size = m_param->sourceWidth * m_param->sourceHeight;
@@ -1450,13 +1447,14 @@ bool Encoder::computeHistograms(x265_picture *pic)
     memset(edgeHist, 0, EDGE_BINS * sizeof(int32_t));
     for (uint32_t i = 0; i < m_planeSizes[0]; i++)
     {
-        if (!m_edgePic[i])
-           edgeHist[0]++;
+        if (m_edgePic[i])
+            edgeHist[1]++;
         else
-           edgeHist[1]++;
+            edgeHist[0]++;
     }
+
     /* Y Histogram Calculation */
-    int32_t* yHist = m_curYUVHist[0];
+    int32_t *yHist = m_curYUVHist[0];
     memset(yHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
     for (uint32_t i = 0; i < m_planeSizes[0]; i++)
     {
@@ -1468,7 +1466,7 @@ bool Encoder::computeHistograms(x265_picture *pic)
     {
         /* U Histogram Calculation */
         int32_t *uHist = m_curYUVHist[1];
-        memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+        memset(uHist, 0, sizeof(m_curYUVHist[1]));
         for (uint32_t i = 0; i < m_planeSizes[1]; i++)
         {
             pixelVal = planeU[i];
@@ -1478,52 +1476,56 @@ bool Encoder::computeHistograms(x265_picture *pic)
         /* V Histogram Calculation */
         pixelVal = 0;
         int32_t *vHist = m_curYUVHist[2];
-        memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+        memset(vHist, 0, sizeof(m_curYUVHist[2]));
         for (uint32_t i = 0; i < m_planeSizes[2]; i++)
         {
             pixelVal = planeV[i];
             vHist[pixelVal]++;
         }
-        for (int i = 0; i < HISTOGRAM_BINS; i++)
-        {
-            m_curMaxUVHist[i] = x265_max(uHist[i], vHist[i]);
-        }
     }
     return true;
 }

-void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double
*edgeNormalizedSad, int curPoc)
+void Encoder::computeHistogramSAD(double *normalizedMaxUVSad, double
*normalizedEdgeSad, int curPoc)
 {

     if (curPoc == 0)
     {   /* first frame is scenecut by default no sad computation for the
same. */
-        *maxUVNormalizedSad = 0.0;
-        *edgeNormalizedSad  = 0.0;
+        *normalizedMaxUVSad = 0.0;
+        *normalizedEdgeSad = 0.0;
     }
     else
     {
-        /* compute sum of absolute difference of normalized histogram bins
for maxUV and edge histograms. */
-        int32_t edgefreqDiff = 0;
-        int32_t maxUVfreqDiff = 0;
-        double  edgeProbabilityDiff = 0;
+        /* compute sum of absolute differences of histogram bins of chroma
and luma edge response between the current and prev pictures. */
+        int32_t edgeHistSad = 0;
+        int32_t uHistSad = 0;
+        int32_t vHistSad = 0;
+        double normalizedUSad = 0.0;
+        double normalizedVSad = 0.0;

         for (int j = 0; j < HISTOGRAM_BINS; j++)
         {
             if (j < 2)
             {
-                edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
-                edgeProbabilityDiff = (double) edgefreqDiff /
m_planeSizes[0];
-                *edgeNormalizedSad += edgeProbabilityDiff;
+                edgeHistSad += abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
             }
-            maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]);
-            *maxUVNormalizedSad += (double)maxUVfreqDiff / m_planeSizes[2];
+            uHistSad += abs(m_curYUVHist[1][j] - m_prevYUVHist[1][j]);
+            vHistSad += abs(m_curYUVHist[2][j] - m_prevYUVHist[2][j]);
         }
+        *normalizedEdgeSad = normalizeRange(edgeHistSad, 0, 2 *
m_planeSizes[0], 0.0, 1.0);
+        normalizedUSad = normalizeRange(uHistSad, 0, 2 * m_planeSizes[1],
0.0, 1.0);
+        normalizedVSad = normalizeRange(vHistSad, 0, 2 * m_planeSizes[2],
0.0, 1.0);
+        *normalizedMaxUVSad = x265_max(normalizedUSad, normalizedVSad);
     }

     /* store histograms of previous frame for reference */
-    size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t);
-    memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize);
-    memcpy(m_prevEdgeHist, m_curEdgeHist, 2 * sizeof(int32_t));
+    memcpy(m_prevEdgeHist, m_curEdgeHist, sizeof(m_curEdgeHist));
+    memcpy(m_prevYUVHist, m_curYUVHist, sizeof(m_curYUVHist));
+}
+
+double Encoder::normalizeRange(int32_t value, int32_t minValue, int32_t
maxValue, double rangeStart, double rangeEnd)
+{
+    return (double)(value - minValue) * (rangeEnd - rangeStart) /
(maxValue - minValue) + rangeStart;
 }

 void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
maxUVSad, double edgeSad)
@@ -1542,20 +1544,13 @@ void Encoder::findSceneCuts(x265_picture *pic,
bool& bDup, double maxUVSad, doub
         {
             bDup = true;
         }
-        else if (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold)
-        {
-            pic->frameData.bScenecut = true;
-            bDup = false;
-        }
-        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold)
+        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold))
         {
             pic->frameData.bScenecut = true;
             bDup = false;
+            x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
pic->poc);
         }
     }
-
-    if (pic->frameData.bScenecut)
-       x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", pic->poc);
 }

 /**
diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
old mode 100644
new mode 100755
index 2c45886..fd6b3e7
--- a/source/encoder/encoder.h
+++ b/source/encoder/encoder.h
@@ -163,7 +163,7 @@ class RateControl;
 class ThreadPool;
 class FrameData;

-#define MAX_SCENECUT_THRESHOLD 2.0
+#define MAX_SCENECUT_THRESHOLD 1.0
 #define SCENECUT_STRENGTH_FACTOR 2.0

 class Encoder : public x265_encoder
@@ -257,8 +257,7 @@ public:
     pixel*             m_edgePic;
     pixel*             m_inputPic[3];
     int32_t            m_curYUVHist[3][HISTOGRAM_BINS];
-    int32_t            m_curMaxUVHist[HISTOGRAM_BINS];
-    int32_t            m_prevMaxUVHist[HISTOGRAM_BINS];
+    int32_t            m_prevYUVHist[3][HISTOGRAM_BINS];
     int32_t            m_curEdgeHist[2];
     int32_t            m_prevEdgeHist[2];
     uint32_t           m_planeSizes[3];
@@ -373,6 +372,7 @@ public:

     bool computeHistograms(x265_picture *pic);
     void computeHistogramSAD(double *maxUVNormalizedSAD, double
*edgeNormalizedSAD, int curPoc);
+    double normalizeRange(int32_t value, int32_t minValue, int32_t
maxValue, double rangeStart, double rangeEnd);
     void findSceneCuts(x265_picture *pic, bool& bDup, double
m_maxUVSADVal, double m_edgeSADVal);

     void initRefIdx();
diff --git a/source/x265cli.cpp b/source/x265cli.cpp
old mode 100644
new mode 100755
index 5b95698..fa0036b
--- a/source/x265cli.cpp
+++ b/source/x265cli.cpp
@@ -175,7 +175,7 @@ namespace X265_NS {
         H1("   --scenecut-bias <0..100.0>    Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
         H0("   --hist-scenecut               Enables histogram based
scene-cut detection using histogram based algorithm.\n");
         H0("   --no-hist-scenecut            Disables histogram based
scene-cut detection using histogram based algorithm.\n");
-        H1("   --hist-threshold <0.0..2.0>   Luma Edge histogram's
Normalized SAD threshold for histogram based scenecut detection Default
%.2f\n", param->edgeTransitionThreshold);
+        H1("   --hist-threshold <0.0..1.0>   Luma Edge histogram's
Normalized SAD threshold for histogram based scenecut detection Default
%.2f\n", param->edgeTransitionThreshold);
         H0("   --[no-]fades                  Enable detection and handling
of fade-in regions. Default %s\n", OPT(param->bEnableFades));
         H1("   --[no-]scenecut-aware-qp      Enable increasing QP for
frames inside the scenecut window after scenecut. Default %s\n",
OPT(param->bEnableSceneCutAwareQp));
         H1("   --scenecut-window <0..1000>   QP incremental duration(in
milliseconds) when scenecut-aware-qp is enabled. Default %d\n",
param->scenecutWindow);
@@ -1059,4 +1059,4 @@ namespace X265_NS {

 #ifdef __cplusplus
 }
-#endif
\ No newline at end of file
+#endif
-- 
1.8.3.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200609/3b181875/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Fixed-the-hist-scenecut-feature-to-consider-the-max-.patch
Type: application/octet-stream
Size: 12653 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200609/3b181875/attachment-0001.obj>


More information about the x265-devel mailing list