[x265] [x265 Patch] Histogram Based Scene Cut Detection

Srikanth Kurapati srikanth.kurapati at multicorewareinc.com
Mon Nov 18 07:49:51 CET 2019


# HG changeset patch
# User Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
# Date 1573649311 -19800
#      Wed Nov 13 18:18:31 2019 +0530
# Node ID 40beab295ca274bf62cb2fd2e732da722d10eea3
# Parent  04db2bfee5d628d931d1407355b909ac8ff1c898
Histogram based scenecut detection

This patch does the following.
1.Identifies scenecuts by thresholding against sad of edge and chroma
histograms.
2.Add option "--hist-scenecut" to enable histogram based scenecut method.
3.Add option "--hist-threshold" to provide threshold for determining
scene-cuts.
3.Optimizes frame duplication through reuse of sad for marking duplicate
frames.

diff -r 04db2bfee5d6 -r 40beab295ca2 doc/reST/cli.rst
--- a/doc/reST/cli.rst Thu Oct 31 16:23:27 2019 +0530
+++ b/doc/reST/cli.rst Wed Nov 13 18:18:31 2019 +0530
@@ -1426,7 +1426,23 @@
  This value represents the percentage difference between the inter cost and
  intra cost of a frame used in scenecut detection. For example, a value of
5 indicates,
  if the inter cost of a frame is greater than or equal to 95 percent of
the intra cost of the frame,
- then detect this frame as scenecut. Values between 5 and 15 are
recommended. Default 5.
+ then detect this frame as scenecut. Values between 5 and 15 are
recommended.
+ This value is evaluated only when --scenecut is enabled else it is
ignored. Default 5.
+
+.. option:: --hist-scenecut, --no-hist-scenecut
+
+ indicates that scenecuts need to be detected using luma edge and chroma
histograms.
+ option: `--hist-scenecut` enables scenecut detection using the histograms
and disables the default scene cut algorithm.
+ option: `--no-hist-scenecut` disables histogram based scenecut algorithm.
+
+ Note that if --hist-scenecut and --scenecut are enabled together the
first choice of user is considered for processing.
+
+.. option:: --hist-threshold <0.0..2.0>
+
+ This value represents the threshold for normalized SAD of edge histograms
used in scenecut detection.
+ This requires hist-scenecut to be enabled. For example, a value of 0.2
indicates that a frame with normalized SAD value
+ greater than 0.2 against the previous frame as scenecut.
+ Default 0.01.

 .. option:: --radl <integer>

diff -r 04db2bfee5d6 -r 40beab295ca2 source/CMakeLists.txt
--- a/source/CMakeLists.txt Thu Oct 31 16:23:27 2019 +0530
+++ b/source/CMakeLists.txt Wed Nov 13 18:18:31 2019 +0530
@@ -29,7 +29,7 @@
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 182)
+set(X265_BUILD 183)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 04db2bfee5d6 -r 40beab295ca2 source/common/common.h
--- a/source/common/common.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/common/common.h Wed Nov 13 18:18:31 2019 +0530
@@ -129,12 +129,16 @@
 typedef uint64_t sum2_t;
 typedef uint64_t pixel4;
 typedef int64_t  ssum2_t;
+#define HISTOGRAM_BINS 1024
+#define SHIFT 1
 #else
 typedef uint8_t  pixel;
 typedef uint16_t sum_t;
 typedef uint32_t sum2_t;
 typedef uint32_t pixel4;
 typedef int32_t  ssum2_t; // Signed sum
+#define HISTOGRAM_BINS 256
+#define SHIFT 0
 #endif // if HIGH_BIT_DEPTH

 #if X265_DEPTH < 10
diff -r 04db2bfee5d6 -r 40beab295ca2 source/common/param.cpp
--- a/source/common/param.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/common/param.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -167,6 +167,8 @@
     param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
     param->bBPyramid = 1;
     param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
+    param->edgeTransitionThreshold = 0.01;
+    param->bHistBasedSceneCut = false;
     param->lookaheadSlices = 8;
     param->lookaheadThreads = 0;
     param->scenecutBias = 5.0;
@@ -572,6 +574,7 @@
             param->bframes = 0;
             param->lookaheadDepth = 0;
             param->scenecutThreshold = 0;
+            param->bHistBasedSceneCut = false;
             param->rc.cuTree = 0;
             param->frameNumThreads = 1;
         }
@@ -614,7 +617,7 @@
     return 0;
 }

-static int x265_atobool(const char* str, bool& bError)
+static bool x265_atobool(const char* str, bool& bError)
 {
     if (!strcmp(str, "1") ||
         !strcmp(str, "true") ||
@@ -764,6 +767,7 @@
     bool bNameWasBool = false;
     bool bValueWasNull = !value;
     bool bExtraParams = false;
+    static int scenecutChoice = -1;
     char nameBuf[64];
     static int count;

@@ -920,11 +924,16 @@
     OPT("lookahead-slices") p->lookaheadSlices = atoi(value);
     OPT("scenecut")
     {
-        p->scenecutThreshold = atobool(value);
-        if (bError || p->scenecutThreshold)
+        if (scenecutChoice == -1)
         {
-            bError = false;
-            p->scenecutThreshold = atoi(value);
+            p->scenecutThreshold = atobool(value);
+            if (bError || p->scenecutThreshold)
+            {
+                bError = false;
+                p->scenecutThreshold = atoi(value);
+                p->bHistBasedSceneCut = false;
+                scenecutChoice = 0;
+            }
         }
     }
     OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value);
@@ -1191,6 +1200,46 @@
         OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS =
atobool(value);
         OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value);
         OPT("scenecut-bias") p->scenecutBias = atof(value);
+        OPT("hist-scenecut")
+        {
+            if (scenecutChoice == -1)
+            {
+                p->bHistBasedSceneCut = atobool(value);
+                if (bError)
+                {
+                    bError = false;
+                    p->bHistBasedSceneCut = false;
+                }
+                if (p->bHistBasedSceneCut)
+                {
+                    bError = false;
+                    p->scenecutThreshold = 0;
+                    scenecutChoice = 1;
+                }
+            }
+            else
+            {
+                p->bHistBasedSceneCut = atobool(value);
+                p->bHistBasedSceneCut = false;
+            }
+        }
+        OPT("hist-threshold")
+        {
+            if (p->bHistBasedSceneCut)
+            {
+                p->edgeTransitionThreshold = atof(value);
+                if (bError)
+                {
+                    bError = false;
+                    p->edgeTransitionThreshold = 0.01;
+                    x265_log(p, X265_LOG_INFO, "Using  default threshold
%.2lf for scene cut detection\n", p->edgeTransitionThreshold);
+                }
+            }
+            else
+            {
+                x265_log(p, X265_LOG_WARNING, "Histogram based scene cut
detection not enabled\n", p->edgeTransitionThreshold);
+            }
+        }
         OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
         OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
         OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine =
atobool(value);
@@ -1631,8 +1680,16 @@
           "Valid Logging level -1:none 0:error 1:warning 2:info 3:debug
4:full");
     CHECK(param->scenecutThreshold < 0,
           "scenecutThreshold must be greater than 0");
-    CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
-           "scenecut-bias must be between 0 and 100");
+    if (param->scenecutThreshold)
+    {
+        CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
+            "scenecut-bias must be between 0 and 100");
+    }
+    else if (param->bHistBasedSceneCut)
+    {
+        CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 <
param->edgeTransitionThreshold,
+            "hist-threshold must be between 0.0 and 2.0");
+    }
     CHECK(param->radl < 0 || param->radl > param->bframes,
           "radl must be between 0 and bframes");
     CHECK(param->rdPenalty < 0 || param->rdPenalty > 2,
@@ -1792,9 +1849,13 @@
         x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge
  : %s / %d / %d / %d\n",
             x265_motion_est_names[param->searchMethod],
param->searchRange, param->subpelRefine, param->maxNumMergeCand);

-    if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
-        x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax,
param->scenecutThreshold, param->scenecutBias * 100);
-    else
+    if (param->scenecutThreshold && param->keyframeMax != INT_MAX)
+        x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
bias  : %d / %d / %d / %.2lf \n",
+                 param->keyframeMin, param->keyframeMax,
param->scenecutThreshold, param->scenecutBias * 100);
+    else if (param->bHistBasedSceneCut && param->keyframeMax != INT_MAX)
+        x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
edge threshold  : %d / %d / %d / %.2lf\n",
+                 param->keyframeMin, param->keyframeMax,
param->bHistBasedSceneCut, param->edgeTransitionThreshold);
+    else if (param->keyframeMax == INT_MAX)
         x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut
  : disabled\n");

     if (param->cbQpOffset || param->crQpOffset)
@@ -1961,6 +2022,8 @@
     s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth);
     s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices);
     s += sprintf(s, " scenecut=%d", p->scenecutThreshold);
+    s += sprintf(s, " hist-scenecut=%d", p->bHistBasedSceneCut);
+    s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
     s += sprintf(s, " radl=%d", p->radl);
     BOOL(p->bEnableHRDConcatFlag, "splice");
     BOOL(p->bIntraRefresh, "intra-refresh");
@@ -2108,6 +2171,8 @@
     BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps");
     BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps");
     s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias);
+    s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
+
     BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp");
     BOOL(p->bAQMotion, "aq-motion");
     BOOL(p->bEmitHDRSEI, "hdr");
@@ -2261,6 +2326,7 @@
     dst->lookaheadSlices = src->lookaheadSlices;
     dst->lookaheadThreads = src->lookaheadThreads;
     dst->scenecutThreshold = src->scenecutThreshold;
+    dst->bHistBasedSceneCut = src->bHistBasedSceneCut;
     dst->bIntraRefresh = src->bIntraRefresh;
     dst->maxCUSize = src->maxCUSize;
     dst->minCUSize = src->minCUSize;
@@ -2420,6 +2486,7 @@
     dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS;
     dst->bMultiPassOptRPS = src->bMultiPassOptRPS;
     dst->scenecutBias = src->scenecutBias;
+    dst->edgeTransitionThreshold = src->edgeTransitionThreshold;
     dst->gopLookahead = src->lookaheadDepth;
     dst->bOptCUDeltaQP = src->bOptCUDeltaQP;
     dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion;
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/encoder.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -130,12 +130,17 @@
 #if SVT_HEVC
     m_svtAppData = NULL;
 #endif
-
     m_prevTonemapPayload.payload = NULL;
     m_startPoint = 0;
     m_saveCTUSize = 0;
+    m_edgePic = NULL;
+    m_edgeHistThreshold = 0;
+    m_chromaHistThreshold = 0.0;
+    m_scaledEdgeThreshold = 0.0;
+    m_scaledChromaThreshold = 0.0;
     m_zoneIndex = 0;
 }
+
 inline char *strcatFilename(const char *input, const char *suffix)
 {
     char *output = X265_MALLOC(char, strlen(input) + strlen(suffix) + 1);
@@ -210,6 +215,24 @@
         }
     }

+    if (m_param->bHistBasedSceneCut)
+    {
+        for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes;
i++)
+        {
+            m_planeSizes[i] = m_param->sourceWidth * m_param->sourceHeight
>> x265_cli_csps[m_param->internalCsp].height[i];
+        }
+        uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1;
+        m_edgePic = X265_MALLOC(pixel, m_planeSizes[0]*pixelbytes);
+        double strengthFactor = 2.0;
+        m_edgeHistThreshold = m_param->edgeTransitionThreshold;
+        m_chromaHistThreshold = m_edgeHistThreshold * 10.0;
+        m_chromaHistThreshold = x265_min(m_chromaHistThreshold,
MAX_SCENECUT_THRESHOLD);
+        m_scaledEdgeThreshold = m_edgeHistThreshold * strengthFactor;
+        m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold,
MAX_SCENECUT_THRESHOLD);
+        m_scaledChromaThreshold = m_chromaHistThreshold * strengthFactor;
+        m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold,
MAX_SCENECUT_THRESHOLD);
+    }
+
     // Do not allow WPP if only one row or fewer than 3 columns, it is
pointless and unstable
     if (rows == 1 || cols < 3)
     {
@@ -854,6 +877,12 @@
         }
     }

+    if (m_param->bHistBasedSceneCut)
+    {
+        if(m_edgePic != NULL)
+           X265_FREE_ZERO(m_edgePic);
+    }
+
     for (int i = 0; i < m_param->frameNumThreads; i++)
     {
         if (m_frameEncoder[i])
@@ -1313,6 +1342,141 @@
     dest->planes[2] = (char*)dest->planes[1] + src->stride[1] *
(src->height >> x265_cli_csps[src->colorSpace].height[1]);
 }

+bool Encoder::computeHistograms(x265_picture *pic)
+{
+    pixel *src = (pixel*)pic->planes[0];
+    size_t bufSize = sizeof(pixel) * m_planeSizes[0];
+    int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
+    int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1;
+    memset(m_edgePic, 0, bufSize*numBytes);
+
+    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height,
pic->width, false))
+    {
+       x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
+       return false;
+    }
+
+    pixel pixelVal;
+    int64_t size = pic->height * (pic->stride[0] >> SHIFT);
+    int32_t *edgeHist = m_curEdgeHist;
+    memset(edgeHist, 0, 2 * sizeof(int32_t));
+    for (int64_t i = 0; i < size; i++)
+    {
+        if (!m_edgePic[i])
+           edgeHist[0]++;
+        else
+           edgeHist[1]++;
+    }
+
+    /*U Histogram Calculation*/
+    int32_t HeightL = (pic->height >>
x265_cli_csps[pic->colorSpace].height[1]);
+    size = HeightL * (pic->stride[1] >> SHIFT);
+    int32_t *uHist = m_curUVHist[0];
+    pixel *chromaPlane = (pixel *)pic->planes[1];
+
+    memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+
+    for (int64_t i = 0; i < size; i++)
+    {
+        pixelVal = chromaPlane[i];
+        uHist[pixelVal]++;
+    }
+
+    /*V Histogram Calculation */
+    if (planeCount == 3)
+    {
+        pixelVal = 0;
+        int32_t heightV = (pic->height >>
x265_cli_csps[pic->colorSpace].height[2]);
+        size = heightV * (pic->stride[2] >> SHIFT);
+        int32_t *vHist = m_curUVHist[1];
+        chromaPlane = (pixel *)pic->planes[2];
+
+        memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+        for (int64_t i = 0; i < size; i++)
+        {
+            pixelVal = chromaPlane[i];
+            vHist[pixelVal]++;
+        }
+        for (int i = 0; i < HISTOGRAM_BINS; i++)
+        {
+            m_curMaxUVHist[i] = x265_max(uHist[i],vHist[i]);
+        }
+    }
+    else
+    {   /* in case of bi planar color space */
+
 memcpy(m_curMaxUVHist,m_curUVHist[0],HISTOGRAM_BINS*sizeof(int32_t));
+    }
+
+    return true;
+}
+
+void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double
*edgeNormalizedSad, int curPoc)
+{
+
+    if (curPoc == 0)
+    {  /* first frame is scenecut by default no sad computation for the
same. */
+       *maxUVNormalizedSad = 0.0;
+       *edgeNormalizedSad  = 0.0;
+    }
+    else
+    {
+        /* compute sum of absolute difference of normalized histogram bins
for maxUV and edge histograms. */
+        int32_t edgefreqDiff = 0;
+        int32_t maxUVfreqDiff = 0;
+        double  edgeProbabilityDiff = 0;
+
+        for (int j = 0; j < HISTOGRAM_BINS; j++)
+        {
+            if (j < 2 )
+            {
+               edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
+               edgeProbabilityDiff = (double) edgefreqDiff /
m_planeSizes[0];
+               *edgeNormalizedSad += edgeProbabilityDiff;
+            }
+            maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]);
+            *maxUVNormalizedSad += (double)maxUVfreqDiff / m_planeSizes[2];
+        }
+    }
+
+    /* store histograms of previous frame for reference */
+    size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t);
+    memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize);
+    memcpy(m_prevEdgeHist, m_curEdgeHist, 2*sizeof(int32_t));
+
+}
+
+void Encoder::findSceneCuts(x265_picture * pic, bool& bDup, double
maxUVSad, double edgeSad)
+{
+    pic->frameData.bScenecut = false;
+
+    if (pic->poc == 0)
+    {
+       /* for first frame */
+       pic->frameData.bScenecut = false;
+       bDup = false;
+    }
+    else
+    {
+        if (edgeSad == 0.0 && maxUVSad == 0.0)
+        {
+           bDup = true;
+        }
+        else if (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold)
+        {
+             pic->frameData.bScenecut = true;
+             bDup = false;
+        }
+        else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold)
+        {
+             pic->frameData.bScenecut = true;
+             bDup = false;
+        }
+    }
+
+    if (pic->frameData.bScenecut)
+       x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",pic->poc);
+}
+
 /**
  * Feed one new input frame into the encoder, get one frame out. If pic_in
is
  * NULL, a flush condition is implied and pic_in must be NULL for all
subsequent
@@ -1339,6 +1503,8 @@
     const x265_picture* inputPic = NULL;
     static int written = 0, read = 0;
     bool dontRead = false;
+    bool bdropFrame = false;
+    bool dropflag = false;

     if (m_exportedPic)
     {
@@ -1350,6 +1516,17 @@
     }
     if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in &&
(read < written)))
     {
+        if (m_param->bHistBasedSceneCut && pic_in)
+        {
+            x265_picture *pic = (x265_picture *) pic_in;
+            if (computeHistograms(pic))
+            {
+               double  maxUVSad = 0.0, edgeSad = 0.0;
+               computeHistogramSAD(&maxUVSad, &edgeSad,pic_in->poc);
+               findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
+            }
+        }
+
         if ((m_param->bEnableFrameDuplication && !pic_in && (read <
written)))
             dontRead = true;
         else
@@ -1393,9 +1570,27 @@
                     written++;
                 }

-                psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
-
-                if (psnrWeight >= m_param->dupThreshold)
+                if (m_param->bEnableFrameDuplication &&
m_param->bHistBasedSceneCut)
+                {
+                    if (!bdropFrame &&
m_dupBuffer[1]->dupPic->frameData.bScenecut == false)
+                    {
+                       psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
+                       if (psnrWeight >= m_param->dupThreshold)
+                          dropflag = true;
+                    }
+                    else
+                    {
+                       dropflag = true;
+                    }
+                }
+                else if (m_param->bEnableFrameDuplication)
+                {
+                    psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
+                    if (psnrWeight >= m_param->dupThreshold)
+                       dropflag = true;
+                }
+
+                if (dropflag)
                 {
                     if (m_dupBuffer[0]->bDup)
                     {
@@ -1498,6 +1693,10 @@
         inFrame->m_poc       = ++m_pocLast;
         inFrame->m_userData  = inputPic->userData;
         inFrame->m_pts       = inputPic->pts;
+        if (m_param->bHistBasedSceneCut)
+        {
+           inFrame->m_lowres.bScenecut = inputPic->frameData.bScenecut;
+        }
         inFrame->m_forceqp   = inputPic->forceqp;
         inFrame->m_param     = (m_reconfigure || m_reconfigureRc) ?
m_latestParam : m_param;
         inFrame->m_picStruct = inputPic->picStruct;
@@ -3209,6 +3408,7 @@
          * adaptive I frame placement */
         p->keyframeMax = INT_MAX;
         p->scenecutThreshold = 0;
+        p->bHistBasedSceneCut = 0;
     }
     else if (p->keyframeMax <= 1)
     {
@@ -3222,6 +3422,7 @@
         p->lookaheadDepth = 0;
         p->bframes = 0;
         p->scenecutThreshold = 0;
+        p->bHistBasedSceneCut = 0;
         p->bFrameAdaptive = 0;
         p->rc.cuTree = 0;
         p->bEnableWeightedPred = 0;
@@ -3881,6 +4082,17 @@
             m_param->searchMethod = m_param->hmeSearchMethod[2];
         }
     }
+
+    if (p->scenecutThreshold && p->edgeTransitionThreshold != 0.01)
+    {
+       x265_log(p, X265_LOG_WARNING, "using default scenecut-bias %.2lf
for scene cut detection\n",p->scenecutBias);
+    }
+    else if (p->bHistBasedSceneCut && p->edgeTransitionThreshold == 0.0)
+    {
+         p->edgeTransitionThreshold = 0.01;
+         x265_log(p, X265_LOG_WARNING, "using  default threshold %.2lf for
scene cut detection\n", p->edgeTransitionThreshold);
+    }
+
 }

 void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc,
const x265_picture* picIn, int paramBytes)
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/encoder.h
--- a/source/encoder/encoder.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/encoder.h Wed Nov 13 18:18:31 2019 +0530
@@ -156,7 +156,6 @@
     bool bDup;
 };

-
 class FrameEncoder;
 class DPB;
 class Lookahead;
@@ -164,6 +163,8 @@
 class ThreadPool;
 class FrameData;

+#define MAX_SCENECUT_THRESHOLD 2.0
+
 class Encoder : public x265_encoder
 {
 public:
@@ -228,7 +229,7 @@
     bool               m_reconfigureRc;
     bool               m_reconfigureZone;

-    int               m_saveCtuDistortionLevel;
+    int                m_saveCtuDistortionLevel;

     /* Begin intra refresh when one not in progress or else begin one as
soon as the current
      * one is done. Requires bIntraRefresh to be set.*/
@@ -245,11 +246,24 @@
     Lock               m_rpsInSpsLock;
     int                m_rpsInSpsCount;
     /* For HDR*/
-    double                m_cB;
-    double                m_cR;
+    double             m_cB;
+    double             m_cR;
+
+    int                m_bToneMap; // Enables tone-mapping
+    int                m_enableNal;

-    int                     m_bToneMap; // Enables tone-mapping
-    int                     m_enableNal;
+    /* For histogram based scene-cut detection */
+    pixel*             m_edgePic;
+    int32_t            m_curUVHist[2][HISTOGRAM_BINS];
+    int32_t            m_curMaxUVHist[HISTOGRAM_BINS];
+    int32_t            m_prevMaxUVHist[HISTOGRAM_BINS];
+    int32_t            m_curEdgeHist[2];
+    int32_t            m_prevEdgeHist[2];
+    uint32_t           m_planeSizes[3];
+    double             m_edgeHistThreshold;
+    double             m_chromaHistThreshold;
+    double             m_scaledEdgeThreshold;
+    double             m_scaledChromaThreshold;

 #ifdef ENABLE_HDR10_PLUS
     const hdr10plus_api     *m_hdr10plus_api;
@@ -355,6 +369,10 @@

     void copyPicture(x265_picture *dest, const x265_picture *src);

+    bool computeHistograms(x265_picture *pic);
+    void computeHistogramSAD(double *maxUVNormalizedSAD, double
*edgeNormalizedSAD, int curPoc);
+    void findSceneCuts(x265_picture * pic, bool& bDup, double
m_maxUVSADVal, double m_edgeSADVal);
+
     void initRefIdx();
     void analyseRefIdx(int *numRefIdx);
     void updateRefIdx();
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/ratecontrol.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -508,6 +508,7 @@
                 CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
                 CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
                 CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold);
+                CMP_OPT_FIRST_PASS("hist-threshold",
m_param->edgeTransitionThreshold);
                 CMP_OPT_FIRST_PASS("intra-refresh",
m_param->bIntraRefresh);
                 if (m_param->bMultiPassOptRPS)
                 {
@@ -1200,6 +1201,7 @@
             m_param->rc.bStatRead = 0;
             m_param->bFrameAdaptive = 0;
             m_param->scenecutThreshold = 0;
+            m_param->bHistBasedSceneCut = false;
             m_param->rc.cuTree = 0;
             if (m_param->bframes > 1)
                 m_param->bframes = 1;
@@ -2284,7 +2286,7 @@
     if (m_isVbv && m_currentSatd > 0 && curFrame)
     {
         if (m_param->lookaheadDepth || m_param->rc.cuTree ||
-            m_param->scenecutThreshold ||
+            (m_param->scenecutThreshold || m_param->bHistBasedSceneCut) ||
             (m_param->bFrameAdaptive && m_param->bframes))
         {
            /* Lookahead VBV: If lookahead is done, raise the quantizer as
necessary
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/slicetype.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -85,6 +85,69 @@

 } // end anonymous namespace

+namespace X265_NS {
+
+bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t
stride, int height, int width, bool bcalcTheta)
+{
+    intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0,
colThree = 0;
+    intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0,
bottomRight = 0;
+
+    const int startIndex = 1;
+
+    if (!edgePic || !refPic || (!edgeTheta && bcalcTheta))
+    {
+        return false;
+    }
+    else
+    {
+        float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
+        float gradientMagnitude = 0;
+        pixel blackPixel = 0;
+
+        //Applying Sobel filter expect for border pixels
+        height = height - startIndex;
+        width = width - startIndex;
+        for (int rowNum = startIndex; rowNum < height; rowNum++)
+        {
+            rowTwo = rowNum * stride;
+            rowOne = rowTwo - stride;
+            rowThree = rowTwo + stride;
+
+            for (int colNum = startIndex; colNum < width; colNum++)
+            {
+
+                /* Horizontal and vertical gradients
+                [ -3   0   3 ]        [-3   -10  -3 ]
+            gH =[ -10  0   10]   gV = [ 0    0    0 ]
+                [ -3   0   3 ]        [ 3    10   3 ] */
+
+                colTwo = colNum;
+                colOne = colTwo - startIndex;
+                colThree = colTwo + startIndex;
+                middle = rowTwo + colTwo;
+                topLeft = rowOne + colOne;
+                topRight = rowOne + colThree;
+                bottomLeft = rowThree + colOne;
+                bottomRight = rowThree + colThree;
+                gradientH = (float)(-3 * refPic[topLeft] + 3 *
refPic[topRight] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo +
colThree] - 3 * refPic[bottomLeft] + 3 * refPic[bottomRight]);
+                gradientV = (float)(-3 * refPic[topLeft] - 10 *
refPic[rowOne + colTwo] - 3 * refPic[topRight] + 3 * refPic[bottomLeft] +
10 * refPic[rowThree + colTwo] + 3 * refPic[bottomRight]);
+                gradientMagnitude = sqrtf(gradientH * gradientH +
gradientV * gradientV);
+                if(bcalcTheta)
+                  {
+                    edgeTheta[middle] = 0;
+                    radians = atan2(gradientV, gradientH);
+                    theta = (float)((radians * 180) / PI);
+                    if (theta < 0)
+                       theta = 180 + theta;
+                    edgeTheta[middle] = (pixel)theta;
+                  }
+                edgePic[middle] = (pixel)(gradientMagnitude >=
edgeThreshold ? edgeThreshold : blackPixel);
+            }
+        }
+        return true;
+    }
+}
+
 void edgeFilter(Frame *curFrame, x265_param* param)
 {
     int height = curFrame->m_fencPic->m_picHeight;
@@ -114,6 +177,7 @@
     //Applying Gaussian filter on the picture
     src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
     refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY
* stride + curFrame->m_fencPic->m_lumaMarginX;
+    edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
stride + curFrame->m_fencPic->m_lumaMarginX;
     pixel pixelValue = 0;

     for (int rowNum = 0; rowNum < height; rowNum++)
@@ -146,51 +210,8 @@
         }
     }

-#if HIGH_BIT_DEPTH //10-bit build
-    float threshold = 1023;
-    pixel whitePixel = 1023;
-#else
-    float threshold = 255;
-    pixel whitePixel = 255;
-#endif
-#define PI 3.14159265
-
-    float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
-    float gradientMagnitude = 0;
-    pixel blackPixel = 0;
-    edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
stride + curFrame->m_fencPic->m_lumaMarginX;
-    //Applying Sobel filter on the gaussian filtered picture
-    for (int rowNum = 0; rowNum < height; rowNum++)
-    {
-        for (int colNum = 0; colNum < width; colNum++)
-        {
-            edgeTheta[(rowNum*stride) + colNum] = 0;
-            if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1)
&& (colNum != width - 1)) //Ignoring the border pixels of the picture
-            {
-                /*Horizontal and vertical gradients
-                       [ -3   0   3 ]        [-3   -10  -3 ]
-                  gH = [ -10  0   10]   gV = [ 0    0    0 ]
-                       [ -3   0   3 ]        [ 3    10   3 ]*/
-
-                const intptr_t rowOne = (rowNum - 1)*stride, colOne =
colNum -1;
-                const intptr_t rowTwo = rowNum * stride, colTwo = colNum;
-                const intptr_t rowThree = (rowNum + 1)*stride, colThree =
colNum + 1;
-                const intptr_t index = (rowNum*stride) + colNum;
-
-                gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 *
refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 *
refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 *
refPic[rowThree + colThree]);
-                gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 *
refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 *
refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 *
refPic[rowThree + colThree]);
-
-                gradientMagnitude = sqrtf(gradientH * gradientH +
gradientV * gradientV);
-                radians = atan2(gradientV, gradientH);
-                theta = (float)((radians * 180) / PI);
-                if (theta < 0)
-                    theta = 180 + theta;
-                edgeTheta[(rowNum*stride) + colNum] = (pixel)theta;
-
-                edgePic[index] = gradientMagnitude >= threshold ?
whitePixel : blackPixel;
-            }
-        }
-    }
+    if(!computeEdge(edgePic, refPic, edgeTheta, stride, height, width,
true))
+        x265_log(NULL, X265_LOG_ERROR, "Failed edge computation!");
 }

 //Find the angle of a block by averaging the pixel angles
@@ -1471,7 +1492,7 @@

     if (m_lastNonB && !m_param->rc.bStatRead &&
         ((m_param->bFrameAdaptive && m_param->bframes) ||
-         m_param->rc.cuTree || m_param->scenecutThreshold ||
+         m_param->rc.cuTree || m_param->scenecutThreshold ||
m_param->bHistBasedSceneCut ||
          (m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
     {
         slicetypeAnalyse(frames, false);
@@ -1971,10 +1992,15 @@

     int numBFrames = 0;
     int numAnalyzed = numFrames;
-    bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
+    bool isScenecut = false;

     /* When scenecut threshold is set, use scenecut detection for I frame
placements */
-    if (m_param->scenecutThreshold && isScenecut)
+    if (m_param->scenecutThreshold)
+        isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
+    else if (m_param->bHistBasedSceneCut)
+        isScenecut = frames[1]->bScenecut;
+
+    if (isScenecut)
     {
         frames[1]->sliceType = X265_TYPE_I;
         return;
@@ -1985,14 +2011,17 @@
         m_extendGopBoundary = false;
         for (int i = m_param->bframes + 1; i < origNumFrames; i +=
m_param->bframes + 1)
         {
-            scenecut(frames, i, i + 1, true, origNumFrames);
+            if (m_param->scenecutThreshold)
+               scenecut(frames, i, i + 1, true, origNumFrames);
+
             for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
origNumFrames); j++)
             {
-                if (frames[j]->bScenecut && scenecutInternal(frames, j -
1, j, true) )
-                {
-                    m_extendGopBoundary = true;
-                    break;
-                }
+                if (( m_param->scenecutThreshold && frames[j]->bScenecut
&& scenecutInternal(frames, j - 1, j, true)) ||
+                    (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
+                    {
+                        m_extendGopBoundary = true;
+                        break;
+                    }
             }
             if (m_extendGopBoundary)
                 break;
@@ -2097,13 +2126,14 @@
         {
             for (int j = 1; j < numBFrames + 1; j++)
             {
-                if (scenecut(frames, j, j + 1, false, origNumFrames) ||
+                if ((m_param->scenecutThreshold && scenecut(frames, j, j +
1, false, origNumFrames)) ||
+                    (m_param->bHistBasedSceneCut && frames[j +
1]->bScenecut) ||
                     (bForceRADL && (frames[j]->frameNum == preRADL)))
-                {
-                    frames[j]->sliceType = X265_TYPE_P;
-                    numAnalyzed = j;
-                    break;
-                }
+                    {
+                        frames[j]->sliceType = X265_TYPE_P;
+                        numAnalyzed = j;
+                        break;
+                    }
             }
         }
         resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
+ 1);
@@ -3289,3 +3319,5 @@
     fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
     fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost,
LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));
 }
+
+}
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/slicetype.h Wed Nov 13 18:18:31 2019 +0530
@@ -43,6 +43,13 @@
 #define AQ_EDGE_BIAS 0.5
 #define EDGE_INCLINATION 45

+#ifdef HIGH_BIT_DEPTH
+#define edgeThreshold 1023.0
+#else
+#define edgeThreshold 255.0
+#endif
+#define PI 3.14159265
+
 /* Thread local data for lookahead tasks */
 struct LookaheadTLD
 {
@@ -258,6 +265,7 @@
     CostEstimateGroup& operator=(const CostEstimateGroup&);
 };

-}
+bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t
stride, int height, int width, bool bcalcTheta);

+}
 #endif // ifndef X265_SLICETYPE_H
diff -r 04db2bfee5d6 -r 40beab295ca2 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Thu Oct 31 16:23:27 2019 +0530
+++ b/source/test/regression-tests.txt Wed Nov 13 18:18:31 2019 +0530
@@ -159,6 +159,7 @@
 Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60
--hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
 Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones 0,139,q=32
 Island_960x540_420p_8bit_24fps.yuv,--no-cutree --aq-mode 0 --bitrate 6000
--scenecut-aware-qp
+sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
--hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000
--vbv-bufsize 15000 --vbv-maxrate 12000

 # Main12 intraCost overflow bug test
 720p50_parkrun_ter.y4m,--preset medium
diff -r 04db2bfee5d6 -r 40beab295ca2 source/x265.h
--- a/source/x265.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/x265.h Wed Nov 13 18:18:31 2019 +0530
@@ -211,7 +211,7 @@
     uint32_t                          numCUsInFrame;
     uint32_t                          numPartitions;
     uint32_t                          depthBytes;
-    int                               bScenecut;
+    bool                              bScenecut;
     x265_weight_param*                wt;
     x265_analysis_inter_data*         interData;
     x265_analysis_intra_data*         intraData;
@@ -294,7 +294,7 @@
     double           avgChromaVLevel;

     char             sliceType;
-    int              bScenecut;
+    bool             bScenecut;
     double           ipCostRatio;
     int              frameLatency;
     x265_cu_stats    cuStats;
@@ -1024,7 +1024,8 @@
     int       lookaheadSlices;

     /* An arbitrary threshold which determines how aggressively the
lookahead
-     * should detect scene cuts. The default (40) is recommended. */
+     * should detect scene cuts for cost based scenecut detection.
+     * The default (40) is recommended. */
     int       scenecutThreshold;

     /* Replace keyframes by using a column of intra blocks that move
across the video
@@ -1846,6 +1847,16 @@
     /* The offset by which QP is incremented for inter-frames when
bEnableSceneCutAwareQp is set.
      * Default is +5. */
     int       maxQpDelta;
+
+    /* A genuine threshold used for histogram based scene cut detection.
+      * This threshold determines whether a frame is a scenecut or not
+      * when compared against the edge and chroma histogram sad values.
+      * Default 0.01. Range: Real number in the interval (0,2). */
+      double    edgeTransitionThreshold;
+
+    /* Enables histogram based scenecut detection algorithm to detect
scenecuts. */
+      bool      bHistBasedSceneCut;
+
 } x265_param;
 /* x265_param_alloc:
  *  Allocates an x265_param instance. The returned param structure is not
diff -r 04db2bfee5d6 -r 40beab295ca2 source/x265cli.h
--- a/source/x265cli.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/x265cli.h Wed Nov 13 18:18:31 2019 +0530
@@ -129,6 +129,9 @@
     { "scenecut",       required_argument, NULL, 0 },
     { "no-scenecut",          no_argument, NULL, 0 },
     { "scenecut-bias",  required_argument, NULL, 0 },
+    { "hist-scenecut",        no_argument, NULL, 0},
+    { "no-hist-scenecut",     no_argument, NULL, 0},
+    { "hist-threshold", required_argument, NULL, 0},
     { "fades",                no_argument, NULL, 0 },
     { "no-fades",             no_argument, NULL, 0 },
     { "scenecut-aware-qp",    no_argument, NULL, 0 },
@@ -489,7 +492,10 @@
     H0("   --gop-lookahead <integer>     Extends gop boundary if a
scenecut is found within this from keyint boundary. Default 0\n");
     H0("   --no-scenecut                 Disable adaptive I-frame
decision\n");
     H0("   --scenecut <integer>          How aggressively to insert extra
I-frames. Default %d\n", param->scenecutThreshold);
-    H1("   --scenecut-bias <0..100.0>    Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
+    H1("   --scenecut-bias <0..100.0>    Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
+    H0("   --hist-scenecut               Enables histogram based scene-cut
detection using histogram based algorithm.\n");
+    H0("   --no-hist-scenecut            Disables histogram based
scene-cut detection using histogram based algorithm.\n");
+    H1("   --hist-threshold <0.0..2.0>   Luma Edge histogram's Normalized
SAD threshold for histogram based scenecut detection Default %.2f\n",
param->edgeTransitionThreshold);
     H0("   --[no-]fades                  Enable detection and handling of
fade-in regions. Default %s\n", OPT(param->bEnableFades));
     H1("   --[no-]scenecut-aware-qp      Enable increasing QP for frames
inside the scenecut window after scenecut. Default %s\n",
OPT(param->bEnableSceneCutAwareQp));
     H1("   --scenecut-window <0..1000>   QP incremental duration(in
milliseconds) when scenecut-aware-qp is enabled. Default %d\n",
param->scenecutWindow);

-- 
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191118/96a42f34/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265_SCD.patch
Type: application/octet-stream
Size: 40136 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191118/96a42f34/attachment-0001.obj>


More information about the x265-devel mailing list