[x265] [PATCH 1 of 2] Improvements to hist-based scenecut algorithm.
Pooja Venkatesan
pooja at multicorewareinc.com
Mon Jun 29 12:41:57 CEST 2020
Hi,
I am working on the review comments on this patch series. Will be sending
the updated patches soon. Stay tuned!
Regards,
*Pooja Venkatesan*,
Video Codec Engineer,
Media & AI analytics BU
On Thu, Jun 25, 2020 at 9:00 PM Pooja Venkatesan <pooja at multicorewareinc.com>
wrote:
> From 2777c2e3389eaf556f3420bc0717171bbcf97e52 Mon Sep 17 00:00:00 2001
> From: Pooja Venkatesan <pooja at multicorewareinc.com>
> Date: Thu, 25 Jun 2020 20:42:50 +0530
> Subject: [PATCH] Improvements to hist-based scenecut algorithm.
>
> This patch does the following:
> 1. Add min and max threshold intervals to detect scenecuts.
> 2. For those within the range,
> Compare colour and edge histogram along with inter and intra satdcosts
> to detect scenecuts.
> 3. Handle scene transitions.
> 4. Change default value of hist-threshold to 0.03
> ---
> doc/reST/cli.rst | 7 +--
> source/common/lowres.cpp | 2 +
> source/common/lowres.h | 5 ++
> source/common/param.cpp | 2 +-
> source/encoder/encoder.cpp | 25 ++++++++--
> source/encoder/encoder.h | 2 +-
> source/encoder/slicetype.cpp | 88 +++++++++++++++++++++++++++---------
> source/x265.h | 2 +-
> 8 files changed, 101 insertions(+), 32 deletions(-)
>
> diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
> index b9d795ace..23b74c3d8 100644
> --- a/doc/reST/cli.rst
> +++ b/doc/reST/cli.rst
> @@ -1468,9 +1468,10 @@ Slice decision options
> .. option:: --hist-threshold <0.0..1.0>
>
> This value represents the threshold for normalized SAD of edge
> histograms used in scenecut detection.
> - This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> - greater than 0.2 against the previous frame as scenecut.
> - Default 0.01.
> + This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> + greater than 0.2 against the previous frame as scenecut.
> + Increasing the threshold reduces the number of scenecuts detected.
> + Default 0.03.
>
> .. option:: --radl <integer>
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index e8dd991bc..8e19ac17c 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -266,6 +266,8 @@ void Lowres::init(PicYuv *origPic, int poc)
> indB = 0;
> memset(costEst, -1, sizeof(costEst));
> memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
> + interPCostPercDiff = 0.0;
> + intraCostPercDiff = 0.0;
>
> if (qpAqOffset && invQscaleFactor)
> memset(costEstAq, -1, sizeof(costEstAq));
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 5c50fad67..200b1f032 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
> uint16_t* propagateCost;
> double weightedCostDelta[X265_BFRAME_MAX + 2];
> ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
> + /* For hist-based scenecut */
> + bool m_bIsMaxThres;
> + double interPCostPercDiff;
> + double intraCostPercDiff;
> +
> bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
> void destroy();
> void init(PicYuv *origPic, int poc);
> diff --git a/source/common/param.cpp b/source/common/param.cpp
> index 925f0c460..8c0498efc 100644
> --- a/source/common/param.cpp
> +++ b/source/common/param.cpp
> @@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)
> param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
> param->bBPyramid = 1;
> param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
> - param->edgeTransitionThreshold = 0.01;
> + param->edgeTransitionThreshold = 0.03;
> param->bHistBasedSceneCut = 0;
> param->lookaheadSlices = 8;
> param->lookaheadThreads = 0;
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index f6bc5408d..bec7ff5c0 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value,
> int32_t minValue, int32_t maxValue
> return (double)(value - minValue) * (rangeEnd - rangeStart) /
> (maxValue - minValue) + rangeStart;
> }
>
> -void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
> maxUVSad, double edgeSad)
> +void Encoder::findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup,
> double maxUVSad, double edgeSad)
> {
> + double minEdgeT = m_edgeHistThreshold * 0.5;
> + double minChromaT = minEdgeT * 10.0;
> + double maxEdgeT = m_edgeHistThreshold * 1.5;
> + double maxChromaT = maxEdgeT * 10.0;
> pic->frameData.bScenecut = false;
>
> if (pic->poc == 0)
> @@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic,
> bool& bDup, double maxUVSad, doub
> {
> bDup = true;
> }
> - else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
> + else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
> + {
> + pic->frameData.bScenecut = false;
> + }
> + else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
> + {
> + pic->frameData.bScenecut = true;
> + isMax = true;
> + }
> + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold
> + || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
> {
> pic->frameData.bScenecut = true;
> bDup = false;
> - x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> pic->poc);
> }
> }
> }
> @@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
> bool dontRead = false;
> bool bdropFrame = false;
> bool dropflag = false;
> + bool isMaxThreshold = false;
>
> if (m_exportedPic)
> {
> @@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
> {
> double maxUVSad = 0.0, edgeSad = 0.0;
> computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
> - findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
> + findSceneCuts(pic, isMaxThreshold, bdropFrame, maxUVSad,
> edgeSad);
> }
> }
>
> @@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
> if (m_param->bHistBasedSceneCut)
> {
> inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut
> == 1) ? true : false;
> + inFrame->m_lowres.m_bIsMaxThres = isMaxThreshold;
> }
> if (m_param->bHistBasedSceneCut && m_param->analysisSave)
> {
> @@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)
>
> if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
> {
> - p->edgeTransitionThreshold = 0.01;
> + p->edgeTransitionThreshold = 0.03;
> x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for
> scene cut detection\n", p->edgeTransitionThreshold);
> }
>
> diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
> index fd6b3e72c..1d4fe2476 100644
> --- a/source/encoder/encoder.h
> +++ b/source/encoder/encoder.h
> @@ -373,7 +373,7 @@ public:
> bool computeHistograms(x265_picture *pic);
> void computeHistogramSAD(double *maxUVNormalizedSAD, double
> *edgeNormalizedSAD, int curPoc);
> double normalizeRange(int32_t value, int32_t minValue, int32_t
> maxValue, double rangeStart, double rangeEnd);
> - void findSceneCuts(x265_picture *pic, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal);
> + void findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal);
>
> void initRefIdx();
> void analyseRefIdx(int *numRefIdx);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 0a95e77d2..27052ca4e 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -2001,10 +2001,40 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
> int numAnalyzed = numFrames;
> bool isScenecut = false;
>
> - /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> if (m_param->bHistBasedSceneCut)
> - isScenecut = frames[1]->bScenecut;
> - else
> + {
> + for (int i = numFrames - 1; i > 0; i--)
> + {
> + if (frames[i]->interPCostPercDiff > 0.0)
> + continue;
> + int64_t interCost = frames[i]->costEst[1][0];
> + int64_t intraCost = frames[i]->costEst[0][0];
> + if (interCost < 0 || intraCost < 0)
> + continue;
> + int times = 0;
> + double averageP = 0.0, averageI = 0.0;
> + for (int j = i - 1; j >= 0 && times < 5; j--, times++)
> + {
> + if (frames[j]->costEst[0][0] > 0 &&
> frames[j]->costEst[1][0] > 0)
> + {
> + averageI += frames[j]->costEst[0][0];
> + averageP += frames[j]->costEst[1][0];
> + }
> + else
> + times--;
> + }
> + if (times)
> + {
> + averageI = averageI / times;
> + averageP = averageP / times;
> + frames[i]->interPCostPercDiff = abs(interCost - averageP)
> / X265_MIN(interCost, averageP) * 100;
> + frames[i]->intraCostPercDiff = abs(intraCost - averageI)
> / X265_MIN(intraCost, averageI) * 100;
> + }
> + }
> + }
> +
> + /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> + if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut &&
> frames[1]->bScenecut))
> isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>
> if (isScenecut && (m_param->bHistBasedSceneCut ||
> m_param->scenecutThreshold))
> @@ -2018,17 +2048,16 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
> m_extendGopBoundary = false;
> for (int i = m_param->bframes + 1; i < origNumFrames; i +=
> m_param->bframes + 1)
> {
> - if (!m_param->bHistBasedSceneCut)
> + if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
> scenecut(frames, i, i + 1, true, origNumFrames);
>
> for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
> origNumFrames); j++)
> {
> - if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut
> && scenecutInternal(frames, j - 1, j, true)) ||
> - (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
> - {
> - m_extendGopBoundary = true;
> - break;
> - }
> + if (frames[j]->bScenecut && scenecutInternal(frames, j -
> 1, j, true))
> + {
> + m_extendGopBoundary = true;
> + break;
> + }
> }
> if (m_extendGopBoundary)
> break;
> @@ -2133,14 +2162,15 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
> {
> for (int j = 1; j < numBFrames + 1; j++)
> {
> - if ((!m_param->bHistBasedSceneCut && scenecut(frames, j,
> j + 1, false, origNumFrames)) ||
> - (m_param->bHistBasedSceneCut && frames[j +
> 1]->bScenecut) ||
> - (bForceRADL && (frames[j]->frameNum == preRADL)))
> - {
> - frames[j]->sliceType = X265_TYPE_P;
> - numAnalyzed = j;
> - break;
> - }
> + bool isNextScenecut = false;
> + if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
> + isNextScenecut = scenecut(frames, j, j + 1, false,
> origNumFrames);
> + if (isNextScenecut || (bForceRADL && frames[j]->frameNum
> == preRADL))
> + {
> + frames[j]->sliceType = X265_TYPE_P;
> + numAnalyzed = j;
> + break;
> + }
> }
> }
> resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
> + 1);
> @@ -2203,7 +2233,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
> * and not considered a scenecut. */
> for (int cp1 = p1; cp1 <= maxp1; cp1++)
> {
> - if (!scenecutInternal(frames, p0, cp1, false))
> + if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames,
> p0, cp1, false))
> {
> /* Any frame in between p0 and cur_p1 cannot be a real
> scenecut. */
> for (int i = cp1; i > p0; i--)
> @@ -2212,7 +2242,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
> noScenecuts = false;
> }
> }
> - else if (scenecutInternal(frames, cp1 - 1, cp1, false))
> + else if ((m_param->bHistBasedSceneCut &&
> frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1,
> false))
> {
> /* If current frame is a Scenecut from p0 frame as well
> as Scenecut from
> * preceeding frame, mark it as a Scenecut */
> @@ -2273,6 +2303,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>
> if (!frames[p1]->bScenecut)
> return false;
> + /* Check only scene transitions if max threshold */
> + if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
> + return frames[p1]->bScenecut;
> +
> return scenecutInternal(frames, p0, p1, bRealScenecut);
> }
>
> @@ -2289,7 +2323,19 @@ bool Lookahead::scenecutInternal(Lowres **frames,
> int p0, int p1, bool bRealScen
> /* magic numbers pulled out of thin air */
> float threshMin = (float)(threshMax * 0.25);
> double bias = m_param->scenecutBias;
> - if (bRealScenecut)
> + if (m_param->bHistBasedSceneCut)
> + {
> + double minT = 50.0 * (1 + m_param->edgeTransitionThreshold);
> + if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff
> > minT)
> + {
> + if (bRealScenecut && frame->bScenecut)
> + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> frame->frameNum);
> + return frame->bScenecut;
> + }
> + else
> + return false;
> + }
> + else if (bRealScenecut)
> {
> if (m_param->keyframeMin == m_param->keyframeMax)
> threshMin = threshMax;
> diff --git a/source/x265.h b/source/x265.h
> index 1e6f9ece6..32feb2bca 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -1860,7 +1860,7 @@ typedef struct x265_param
> /* A genuine threshold used for histogram based scene cut detection.
> * This threshold determines whether a frame is a scenecut or not
> * when compared against the edge and chroma histogram sad values.
> - * Default 0.01. Range: Real number in the interval (0,2). */
> + * Default 0.03. Range: Real number in the interval (0,1). */
> double edgeTransitionThreshold;
>
> /* Enables histogram based scenecut detection algorithm to detect
> scenecuts. Default disabled */
> --
> 2.24.0.windows.2
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200629/d559072d/attachment-0001.html>
More information about the x265-devel
mailing list