[x265] [PATCH 1 of 2] Improvements to hist-based scenecut algorithm.
Aruna Matheswaran
aruna at multicorewareinc.com
Mon Jun 29 16:42:31 CEST 2020
Pushed into master.
On Mon, Jun 29, 2020 at 6:43 PM Pooja Venkatesan <pooja at multicorewareinc.com>
wrote:
> From 671506e40fbbb0aed825013e434c7c39cec2bb93 Mon Sep 17 00:00:00 2001
> From: Pooja Venkatesan <pooja at multicorewareinc.com>
> Date: Thu, 25 Jun 2020 20:42:50 +0530
> Subject: [PATCH] Improvements to hist-based scenecut algorithm.
>
> This patch does the following:
> 1. Strengthens scenecut detection using threshold intervals, spatial and
> temporal properties.
> 2. Change default value of hist-threshold to 0.03
> ---
> doc/reST/cli.rst | 7 +--
> source/common/lowres.cpp | 3 ++
> source/common/lowres.h | 5 ++
> source/common/param.cpp | 2 +-
> source/encoder/encoder.cpp | 25 ++++++++--
> source/encoder/encoder.h | 5 +-
> source/encoder/slicetype.cpp | 89 +++++++++++++++++++++++++++---------
> source/encoder/slicetype.h | 1 +
> source/x265.h | 2 +-
> 9 files changed, 107 insertions(+), 32 deletions(-)
>
> diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
> index b9d795ace..23b74c3d8 100644
> --- a/doc/reST/cli.rst
> +++ b/doc/reST/cli.rst
> @@ -1468,9 +1468,10 @@ Slice decision options
> .. option:: --hist-threshold <0.0..1.0>
>
> This value represents the threshold for normalized SAD of edge
> histograms used in scenecut detection.
> - This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> - greater than 0.2 against the previous frame as scenecut.
> - Default 0.01.
> + This requires :option:`--hist-scenecut` to be enabled. For example, a
> value of 0.2 indicates that a frame with normalized SAD value
> + greater than 0.2 against the previous frame as scenecut.
> + Increasing the threshold reduces the number of scenecuts detected.
> + Default 0.03.
>
> .. option:: --radl <integer>
>
> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
> index e8dd991bc..db1c2d159 100644
> --- a/source/common/lowres.cpp
> +++ b/source/common/lowres.cpp
> @@ -266,6 +266,9 @@ void Lowres::init(PicYuv *origPic, int poc)
> indB = 0;
> memset(costEst, -1, sizeof(costEst));
> memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
> + interPCostPercDiff = 0.0;
> + intraCostPercDiff = 0.0;
> + m_bIsMaxThres = false;
>
> if (qpAqOffset && invQscaleFactor)
> memset(costEstAq, -1, sizeof(costEstAq));
> diff --git a/source/common/lowres.h b/source/common/lowres.h
> index 5c50fad67..200b1f032 100644
> --- a/source/common/lowres.h
> +++ b/source/common/lowres.h
> @@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
> uint16_t* propagateCost;
> double weightedCostDelta[X265_BFRAME_MAX + 2];
> ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
> + /* For hist-based scenecut */
> + bool m_bIsMaxThres;
> + double interPCostPercDiff;
> + double intraCostPercDiff;
> +
> bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
> void destroy();
> void init(PicYuv *origPic, int poc);
> diff --git a/source/common/param.cpp b/source/common/param.cpp
> index 925f0c460..8c0498efc 100644
> --- a/source/common/param.cpp
> +++ b/source/common/param.cpp
> @@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)
> param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
> param->bBPyramid = 1;
> param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
> - param->edgeTransitionThreshold = 0.01;
> + param->edgeTransitionThreshold = 0.03;
> param->bHistBasedSceneCut = 0;
> param->lookaheadSlices = 8;
> param->lookaheadThreads = 0;
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index f6bc5408d..0c6fd80bf 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value,
> int32_t minValue, int32_t maxValue
> return (double)(value - minValue) * (rangeEnd - rangeStart) /
> (maxValue - minValue) + rangeStart;
> }
>
> -void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
> maxUVSad, double edgeSad)
> +void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
> maxUVSad, double edgeSad, bool& isMaxThres)
> {
> + double minEdgeT = m_edgeHistThreshold * MIN_EDGE_FACTOR;
> + double minChromaT = minEdgeT * SCENECUT_CHROMA_FACTOR;
> + double maxEdgeT = m_edgeHistThreshold * MAX_EDGE_FACTOR;
> + double maxChromaT = maxEdgeT * SCENECUT_CHROMA_FACTOR;
> pic->frameData.bScenecut = false;
>
> if (pic->poc == 0)
> @@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic,
> bool& bDup, double maxUVSad, doub
> {
> bDup = true;
> }
> - else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
> + else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
> + {
> + pic->frameData.bScenecut = false;
> + }
> + else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
> + {
> + pic->frameData.bScenecut = true;
> + isMaxThres = true;
> + }
> + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold
> + || (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold))
> {
> pic->frameData.bScenecut = true;
> bDup = false;
> - x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> pic->poc);
> }
> }
> }
> @@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
> bool dontRead = false;
> bool bdropFrame = false;
> bool dropflag = false;
> + bool isMaxThres = false;
>
> if (m_exportedPic)
> {
> @@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
> {
> double maxUVSad = 0.0, edgeSad = 0.0;
> computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
> - findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
> + findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad,
> isMaxThres);
> }
> }
>
> @@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in,
> x265_picture* pic_out)
> if (m_param->bHistBasedSceneCut)
> {
> inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut
> == 1) ? true : false;
> + inFrame->m_lowres.m_bIsMaxThres = isMaxThres;
> }
> if (m_param->bHistBasedSceneCut && m_param->analysisSave)
> {
> @@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)
>
> if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
> {
> - p->edgeTransitionThreshold = 0.01;
> + p->edgeTransitionThreshold = 0.03;
> x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for
> scene cut detection\n", p->edgeTransitionThreshold);
> }
>
> diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
> index fd6b3e72c..507c42d5e 100644
> --- a/source/encoder/encoder.h
> +++ b/source/encoder/encoder.h
> @@ -165,6 +165,9 @@ class FrameData;
>
> #define MAX_SCENECUT_THRESHOLD 1.0
> #define SCENECUT_STRENGTH_FACTOR 2.0
> +#define MIN_EDGE_FACTOR 0.5
> +#define MAX_EDGE_FACTOR 1.5
> +#define SCENECUT_CHROMA_FACTOR 10.0
>
> class Encoder : public x265_encoder
> {
> @@ -373,7 +376,7 @@ public:
> bool computeHistograms(x265_picture *pic);
> void computeHistogramSAD(double *maxUVNormalizedSAD, double
> *edgeNormalizedSAD, int curPoc);
> double normalizeRange(int32_t value, int32_t minValue, int32_t
> maxValue, double rangeStart, double rangeEnd);
> - void findSceneCuts(x265_picture *pic, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal);
> + void findSceneCuts(x265_picture *pic, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal, bool& isMaxThres);
>
> void initRefIdx();
> void analyseRefIdx(int *numRefIdx);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 0a95e77d2..d3783cfe1 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -2001,10 +2001,41 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
> int numAnalyzed = numFrames;
> bool isScenecut = false;
>
> - /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> + /* Temporal computations for scenecut detection */
> if (m_param->bHistBasedSceneCut)
> - isScenecut = frames[1]->bScenecut;
> - else
> + {
> + for (int i = numFrames - 1; i > 0; i--)
> + {
> + if (frames[i]->interPCostPercDiff > 0.0)
> + continue;
> + int64_t interCost = frames[i]->costEst[1][0];
> + int64_t intraCost = frames[i]->costEst[0][0];
> + if (interCost < 0 || intraCost < 0)
> + continue;
> + int times = 0;
> + double averagePcost = 0.0, averageIcost = 0.0;
> + for (int j = i - 1; j >= 0 && times < 5; j--, times++)
> + {
> + if (frames[j]->costEst[0][0] > 0 &&
> frames[j]->costEst[1][0] > 0)
> + {
> + averageIcost += frames[j]->costEst[0][0];
> + averagePcost += frames[j]->costEst[1][0];
> + }
> + else
> + times--;
> + }
> + if (times)
> + {
> + averageIcost = averageIcost / times;
> + averagePcost = averagePcost / times;
> + frames[i]->interPCostPercDiff = abs(interCost -
> averagePcost) / X265_MIN(interCost, averagePcost) * 100;
> + frames[i]->intraCostPercDiff = abs(intraCost -
> averageIcost) / X265_MIN(intraCost, averageIcost) * 100;
> + }
> + }
> + }
> +
> + /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> + if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut &&
> frames[1]->bScenecut))
> isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>
> if (isScenecut && (m_param->bHistBasedSceneCut ||
> m_param->scenecutThreshold))
> @@ -2018,17 +2049,16 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
> m_extendGopBoundary = false;
> for (int i = m_param->bframes + 1; i < origNumFrames; i +=
> m_param->bframes + 1)
> {
> - if (!m_param->bHistBasedSceneCut)
> + if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
> scenecut(frames, i, i + 1, true, origNumFrames);
>
> for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
> origNumFrames); j++)
> {
> - if ((!m_param->bHistBasedSceneCut && frames[j]->bScenecut
> && scenecutInternal(frames, j - 1, j, true)) ||
> - (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
> - {
> - m_extendGopBoundary = true;
> - break;
> - }
> + if (frames[j]->bScenecut && scenecutInternal(frames, j -
> 1, j, true))
> + {
> + m_extendGopBoundary = true;
> + break;
> + }
> }
> if (m_extendGopBoundary)
> break;
> @@ -2133,14 +2163,15 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
> {
> for (int j = 1; j < numBFrames + 1; j++)
> {
> - if ((!m_param->bHistBasedSceneCut && scenecut(frames, j,
> j + 1, false, origNumFrames)) ||
> - (m_param->bHistBasedSceneCut && frames[j +
> 1]->bScenecut) ||
> - (bForceRADL && (frames[j]->frameNum == preRADL)))
> - {
> - frames[j]->sliceType = X265_TYPE_P;
> - numAnalyzed = j;
> - break;
> - }
> + bool isNextScenecut = false;
> + if (!m_param->bHistBasedSceneCut ||
> (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
> + isNextScenecut = scenecut(frames, j, j + 1, false,
> origNumFrames);
> + if (isNextScenecut || (bForceRADL && frames[j]->frameNum
> == preRADL))
> + {
> + frames[j]->sliceType = X265_TYPE_P;
> + numAnalyzed = j;
> + break;
> + }
> }
> }
> resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
> + 1);
> @@ -2203,7 +2234,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
> * and not considered a scenecut. */
> for (int cp1 = p1; cp1 <= maxp1; cp1++)
> {
> - if (!scenecutInternal(frames, p0, cp1, false))
> + if (!m_param->bHistBasedSceneCut && !scenecutInternal(frames,
> p0, cp1, false))
> {
> /* Any frame in between p0 and cur_p1 cannot be a real
> scenecut. */
> for (int i = cp1; i > p0; i--)
> @@ -2212,7 +2243,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
> noScenecuts = false;
> }
> }
> - else if (scenecutInternal(frames, cp1 - 1, cp1, false))
> + else if ((m_param->bHistBasedSceneCut &&
> frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1,
> false))
> {
> /* If current frame is a Scenecut from p0 frame as well
> as Scenecut from
> * preceeding frame, mark it as a Scenecut */
> @@ -2273,6 +2304,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
> int p1, bool bRealScenecut, in
>
> if (!frames[p1]->bScenecut)
> return false;
> + /* Check only scene transitions if max threshold */
> + if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
> + return frames[p1]->bScenecut;
> +
> return scenecutInternal(frames, p0, p1, bRealScenecut);
> }
>
> @@ -2289,7 +2324,19 @@ bool Lookahead::scenecutInternal(Lowres **frames,
> int p0, int p1, bool bRealScen
> /* magic numbers pulled out of thin air */
> float threshMin = (float)(threshMax * 0.25);
> double bias = m_param->scenecutBias;
> - if (bRealScenecut)
> + if (m_param->bHistBasedSceneCut)
> + {
> + double minT = TEMPORAL_SCENECUT_THRESHOLD * (1 +
> m_param->edgeTransitionThreshold);
> + if (frame->interPCostPercDiff > minT || frame->intraCostPercDiff
> > minT)
> + {
> + if (bRealScenecut && frame->bScenecut)
> + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
> frame->frameNum);
> + return frame->bScenecut;
> + }
> + else
> + return false;
> + }
> + else if (bRealScenecut)
> {
> if (m_param->keyframeMin == m_param->keyframeMax)
> threshMin = threshMax;
> diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
> index 31cce972b..6484ad8a0 100644
> --- a/source/encoder/slicetype.h
> +++ b/source/encoder/slicetype.h
> @@ -42,6 +42,7 @@ class Lookahead;
> #define LOWRES_COST_SHIFT 14
> #define AQ_EDGE_BIAS 0.5
> #define EDGE_INCLINATION 45
> +#define TEMPORAL_SCENECUT_THRESHOLD 50
>
> #if HIGH_BIT_DEPTH
> #define EDGE_THRESHOLD 1023.0
> diff --git a/source/x265.h b/source/x265.h
> index 1e6f9ece6..32feb2bca 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -1860,7 +1860,7 @@ typedef struct x265_param
> /* A genuine threshold used for histogram based scene cut detection.
> * This threshold determines whether a frame is a scenecut or not
> * when compared against the edge and chroma histogram sad values.
> - * Default 0.01. Range: Real number in the interval (0,2). */
> + * Default 0.03. Range: Real number in the interval (0,1). */
> double edgeTransitionThreshold;
>
> /* Enables histogram based scenecut detection algorithm to detect
> scenecuts. Default disabled */
> --
> 2.24.0.windows.2
>
> Regards,
> *Pooja Venkatesan*,
> Video Codec Engineer,
> Media & AI analytics BU
>
>
>
> On Mon, Jun 29, 2020 at 4:11 PM Pooja Venkatesan <
> pooja at multicorewareinc.com> wrote:
>
>> Hi,
>>
>> I am working on the review comments on this patch series. Will be sending
>> the updated patches soon. Stay tuned!
>>
>> Regards,
>> *Pooja Venkatesan*,
>> Video Codec Engineer,
>> Media & AI analytics BU
>>
>>
>>
>> On Thu, Jun 25, 2020 at 9:00 PM Pooja Venkatesan <
>> pooja at multicorewareinc.com> wrote:
>>
>>> From 2777c2e3389eaf556f3420bc0717171bbcf97e52 Mon Sep 17 00:00:00 2001
>>> From: Pooja Venkatesan <pooja at multicorewareinc.com>
>>> Date: Thu, 25 Jun 2020 20:42:50 +0530
>>> Subject: [PATCH] Improvements to hist-based scenecut algorithm.
>>>
>>> This patch does the following:
>>> 1. Add min and max threshold intervals to detect scenecuts.
>>> 2. For those within the range,
>>> Compare colour and edge histogram along with inter and intra
>>> satdcosts to detect scenecuts.
>>> 3. Handle scene transitions.
>>> 4. Change default value of hist-threshold to 0.03
>>> ---
>>> doc/reST/cli.rst | 7 +--
>>> source/common/lowres.cpp | 2 +
>>> source/common/lowres.h | 5 ++
>>> source/common/param.cpp | 2 +-
>>> source/encoder/encoder.cpp | 25 ++++++++--
>>> source/encoder/encoder.h | 2 +-
>>> source/encoder/slicetype.cpp | 88 +++++++++++++++++++++++++++---------
>>> source/x265.h | 2 +-
>>> 8 files changed, 101 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
>>> index b9d795ace..23b74c3d8 100644
>>> --- a/doc/reST/cli.rst
>>> +++ b/doc/reST/cli.rst
>>> @@ -1468,9 +1468,10 @@ Slice decision options
>>> .. option:: --hist-threshold <0.0..1.0>
>>>
>>> This value represents the threshold for normalized SAD of edge
>>> histograms used in scenecut detection.
>>> - This requires :option:`--hist-scenecut` to be enabled. For example, a
>>> value of 0.2 indicates that a frame with normalized SAD value
>>> - greater than 0.2 against the previous frame as scenecut.
>>> - Default 0.01.
>>> + This requires :option:`--hist-scenecut` to be enabled. For example, a
>>> value of 0.2 indicates that a frame with normalized SAD value
>>> + greater than 0.2 against the previous frame as scenecut.
>>> + Increasing the threshold reduces the number of scenecuts detected.
>>> + Default 0.03.
>>>
>>> .. option:: --radl <integer>
>>>
>>> diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
>>> index e8dd991bc..8e19ac17c 100644
>>> --- a/source/common/lowres.cpp
>>> +++ b/source/common/lowres.cpp
>>> @@ -266,6 +266,8 @@ void Lowres::init(PicYuv *origPic, int poc)
>>> indB = 0;
>>> memset(costEst, -1, sizeof(costEst));
>>> memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
>>> + interPCostPercDiff = 0.0;
>>> + intraCostPercDiff = 0.0;
>>>
>>> if (qpAqOffset && invQscaleFactor)
>>> memset(costEstAq, -1, sizeof(costEstAq));
>>> diff --git a/source/common/lowres.h b/source/common/lowres.h
>>> index 5c50fad67..200b1f032 100644
>>> --- a/source/common/lowres.h
>>> +++ b/source/common/lowres.h
>>> @@ -234,6 +234,11 @@ struct Lowres : public ReferencePlanes
>>> uint16_t* propagateCost;
>>> double weightedCostDelta[X265_BFRAME_MAX + 2];
>>> ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
>>> + /* For hist-based scenecut */
>>> + bool m_bIsMaxThres;
>>> + double interPCostPercDiff;
>>> + double intraCostPercDiff;
>>> +
>>> bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
>>> void destroy();
>>> void init(PicYuv *origPic, int poc);
>>> diff --git a/source/common/param.cpp b/source/common/param.cpp
>>> index 925f0c460..8c0498efc 100644
>>> --- a/source/common/param.cpp
>>> +++ b/source/common/param.cpp
>>> @@ -168,7 +168,7 @@ void x265_param_default(x265_param* param)
>>> param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
>>> param->bBPyramid = 1;
>>> param->scenecutThreshold = 40; /* Magic number pulled in from x264
>>> */
>>> - param->edgeTransitionThreshold = 0.01;
>>> + param->edgeTransitionThreshold = 0.03;
>>> param->bHistBasedSceneCut = 0;
>>> param->lookaheadSlices = 8;
>>> param->lookaheadThreads = 0;
>>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>>> index f6bc5408d..bec7ff5c0 100644
>>> --- a/source/encoder/encoder.cpp
>>> +++ b/source/encoder/encoder.cpp
>>> @@ -1528,8 +1528,12 @@ double Encoder::normalizeRange(int32_t value,
>>> int32_t minValue, int32_t maxValue
>>> return (double)(value - minValue) * (rangeEnd - rangeStart) /
>>> (maxValue - minValue) + rangeStart;
>>> }
>>>
>>> -void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
>>> maxUVSad, double edgeSad)
>>> +void Encoder::findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup,
>>> double maxUVSad, double edgeSad)
>>> {
>>> + double minEdgeT = m_edgeHistThreshold * 0.5;
>>> + double minChromaT = minEdgeT * 10.0;
>>> + double maxEdgeT = m_edgeHistThreshold * 1.5;
>>> + double maxChromaT = maxEdgeT * 10.0;
>>> pic->frameData.bScenecut = false;
>>>
>>> if (pic->poc == 0)
>>> @@ -1544,11 +1548,20 @@ void Encoder::findSceneCuts(x265_picture *pic,
>>> bool& bDup, double maxUVSad, doub
>>> {
>>> bDup = true;
>>> }
>>> - else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
>>> m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
>>> m_chromaHistThreshold))
>>> + else if (edgeSad < minEdgeT && maxUVSad < minChromaT)
>>> + {
>>> + pic->frameData.bScenecut = false;
>>> + }
>>> + else if (edgeSad > maxEdgeT && maxUVSad > maxChromaT)
>>> + {
>>> + pic->frameData.bScenecut = true;
>>> + isMax = true;
>>> + }
>>> + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
>>> m_scaledChromaThreshold
>>> + || (edgeSad > m_edgeHistThreshold && maxUVSad >=
>>> m_chromaHistThreshold))
>>> {
>>> pic->frameData.bScenecut = true;
>>> bDup = false;
>>> - x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
>>> pic->poc);
>>> }
>>> }
>>> }
>>> @@ -1581,6 +1594,7 @@ int Encoder::encode(const x265_picture* pic_in,
>>> x265_picture* pic_out)
>>> bool dontRead = false;
>>> bool bdropFrame = false;
>>> bool dropflag = false;
>>> + bool isMaxThreshold = false;
>>>
>>> if (m_exportedPic)
>>> {
>>> @@ -1607,7 +1621,7 @@ int Encoder::encode(const x265_picture* pic_in,
>>> x265_picture* pic_out)
>>> {
>>> double maxUVSad = 0.0, edgeSad = 0.0;
>>> computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
>>> - findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
>>> + findSceneCuts(pic, isMaxThreshold, bdropFrame,
>>> maxUVSad, edgeSad);
>>> }
>>> }
>>>
>>> @@ -1786,6 +1800,7 @@ int Encoder::encode(const x265_picture* pic_in,
>>> x265_picture* pic_out)
>>> if (m_param->bHistBasedSceneCut)
>>> {
>>> inFrame->m_lowres.bScenecut =
>>> (inputPic->frameData.bScenecut == 1) ? true : false;
>>> + inFrame->m_lowres.m_bIsMaxThres = isMaxThreshold;
>>> }
>>> if (m_param->bHistBasedSceneCut && m_param->analysisSave)
>>> {
>>> @@ -4261,7 +4276,7 @@ void Encoder::configure(x265_param *p)
>>>
>>> if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
>>> {
>>> - p->edgeTransitionThreshold = 0.01;
>>> + p->edgeTransitionThreshold = 0.03;
>>> x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf
>>> for scene cut detection\n", p->edgeTransitionThreshold);
>>> }
>>>
>>> diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
>>> index fd6b3e72c..1d4fe2476 100644
>>> --- a/source/encoder/encoder.h
>>> +++ b/source/encoder/encoder.h
>>> @@ -373,7 +373,7 @@ public:
>>> bool computeHistograms(x265_picture *pic);
>>> void computeHistogramSAD(double *maxUVNormalizedSAD, double
>>> *edgeNormalizedSAD, int curPoc);
>>> double normalizeRange(int32_t value, int32_t minValue, int32_t
>>> maxValue, double rangeStart, double rangeEnd);
>>> - void findSceneCuts(x265_picture *pic, bool& bDup, double
>>> m_maxUVSADVal, double m_edgeSADVal);
>>> + void findSceneCuts(x265_picture *pic, bool& isMax, bool& bDup,
>>> double m_maxUVSADVal, double m_edgeSADVal);
>>>
>>> void initRefIdx();
>>> void analyseRefIdx(int *numRefIdx);
>>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
>>> index 0a95e77d2..27052ca4e 100644
>>> --- a/source/encoder/slicetype.cpp
>>> +++ b/source/encoder/slicetype.cpp
>>> @@ -2001,10 +2001,40 @@ void Lookahead::slicetypeAnalyse(Lowres
>>> **frames, bool bKeyframe)
>>> int numAnalyzed = numFrames;
>>> bool isScenecut = false;
>>>
>>> - /* When scenecut threshold is set, use scenecut detection for I
>>> frame placements */
>>> if (m_param->bHistBasedSceneCut)
>>> - isScenecut = frames[1]->bScenecut;
>>> - else
>>> + {
>>> + for (int i = numFrames - 1; i > 0; i--)
>>> + {
>>> + if (frames[i]->interPCostPercDiff > 0.0)
>>> + continue;
>>> + int64_t interCost = frames[i]->costEst[1][0];
>>> + int64_t intraCost = frames[i]->costEst[0][0];
>>> + if (interCost < 0 || intraCost < 0)
>>> + continue;
>>> + int times = 0;
>>> + double averageP = 0.0, averageI = 0.0;
>>> + for (int j = i - 1; j >= 0 && times < 5; j--, times++)
>>> + {
>>> + if (frames[j]->costEst[0][0] > 0 &&
>>> frames[j]->costEst[1][0] > 0)
>>> + {
>>> + averageI += frames[j]->costEst[0][0];
>>> + averageP += frames[j]->costEst[1][0];
>>> + }
>>> + else
>>> + times--;
>>> + }
>>> + if (times)
>>> + {
>>> + averageI = averageI / times;
>>> + averageP = averageP / times;
>>> + frames[i]->interPCostPercDiff = abs(interCost -
>>> averageP) / X265_MIN(interCost, averageP) * 100;
>>> + frames[i]->intraCostPercDiff = abs(intraCost -
>>> averageI) / X265_MIN(intraCost, averageI) * 100;
>>> + }
>>> + }
>>> + }
>>> +
>>> + /* When scenecut threshold is set, use scenecut detection for I
>>> frame placements */
>>> + if (!m_param->bHistBasedSceneCut || (m_param->bHistBasedSceneCut &&
>>> frames[1]->bScenecut))
>>> isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>>>
>>> if (isScenecut && (m_param->bHistBasedSceneCut ||
>>> m_param->scenecutThreshold))
>>> @@ -2018,17 +2048,16 @@ void Lookahead::slicetypeAnalyse(Lowres
>>> **frames, bool bKeyframe)
>>> m_extendGopBoundary = false;
>>> for (int i = m_param->bframes + 1; i < origNumFrames; i +=
>>> m_param->bframes + 1)
>>> {
>>> - if (!m_param->bHistBasedSceneCut)
>>> + if (!m_param->bHistBasedSceneCut ||
>>> (m_param->bHistBasedSceneCut && frames[i + 1]->bScenecut))
>>> scenecut(frames, i, i + 1, true, origNumFrames);
>>>
>>> for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
>>> origNumFrames); j++)
>>> {
>>> - if ((!m_param->bHistBasedSceneCut &&
>>> frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true)) ||
>>> - (m_param->bHistBasedSceneCut &&
>>> frames[j]->bScenecut))
>>> - {
>>> - m_extendGopBoundary = true;
>>> - break;
>>> - }
>>> + if (frames[j]->bScenecut && scenecutInternal(frames, j
>>> - 1, j, true))
>>> + {
>>> + m_extendGopBoundary = true;
>>> + break;
>>> + }
>>> }
>>> if (m_extendGopBoundary)
>>> break;
>>> @@ -2133,14 +2162,15 @@ void Lookahead::slicetypeAnalyse(Lowres
>>> **frames, bool bKeyframe)
>>> {
>>> for (int j = 1; j < numBFrames + 1; j++)
>>> {
>>> - if ((!m_param->bHistBasedSceneCut && scenecut(frames,
>>> j, j + 1, false, origNumFrames)) ||
>>> - (m_param->bHistBasedSceneCut && frames[j +
>>> 1]->bScenecut) ||
>>> - (bForceRADL && (frames[j]->frameNum == preRADL)))
>>> - {
>>> - frames[j]->sliceType = X265_TYPE_P;
>>> - numAnalyzed = j;
>>> - break;
>>> - }
>>> + bool isNextScenecut = false;
>>> + if (!m_param->bHistBasedSceneCut ||
>>> (m_param->bHistBasedSceneCut && frames[j + 1]->bScenecut))
>>> + isNextScenecut = scenecut(frames, j, j + 1, false,
>>> origNumFrames);
>>> + if (isNextScenecut || (bForceRADL &&
>>> frames[j]->frameNum == preRADL))
>>> + {
>>> + frames[j]->sliceType = X265_TYPE_P;
>>> + numAnalyzed = j;
>>> + break;
>>> + }
>>> }
>>> }
>>> resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2,
>>> numAnalyzed + 1);
>>> @@ -2203,7 +2233,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
>>> int p1, bool bRealScenecut, in
>>> * and not considered a scenecut. */
>>> for (int cp1 = p1; cp1 <= maxp1; cp1++)
>>> {
>>> - if (!scenecutInternal(frames, p0, cp1, false))
>>> + if (!m_param->bHistBasedSceneCut &&
>>> !scenecutInternal(frames, p0, cp1, false))
>>> {
>>> /* Any frame in between p0 and cur_p1 cannot be a real
>>> scenecut. */
>>> for (int i = cp1; i > p0; i--)
>>> @@ -2212,7 +2242,7 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
>>> int p1, bool bRealScenecut, in
>>> noScenecuts = false;
>>> }
>>> }
>>> - else if (scenecutInternal(frames, cp1 - 1, cp1, false))
>>> + else if ((m_param->bHistBasedSceneCut &&
>>> frames[cp1]->m_bIsMaxThres) || scenecutInternal(frames, cp1 - 1, cp1,
>>> false))
>>> {
>>> /* If current frame is a Scenecut from p0 frame as well
>>> as Scenecut from
>>> * preceeding frame, mark it as a Scenecut */
>>> @@ -2273,6 +2303,10 @@ bool Lookahead::scenecut(Lowres **frames, int p0,
>>> int p1, bool bRealScenecut, in
>>>
>>> if (!frames[p1]->bScenecut)
>>> return false;
>>> + /* Check only scene transitions if max threshold */
>>> + if (m_param->bHistBasedSceneCut && frames[p1]->m_bIsMaxThres)
>>> + return frames[p1]->bScenecut;
>>> +
>>> return scenecutInternal(frames, p0, p1, bRealScenecut);
>>> }
>>>
>>> @@ -2289,7 +2323,19 @@ bool Lookahead::scenecutInternal(Lowres **frames,
>>> int p0, int p1, bool bRealScen
>>> /* magic numbers pulled out of thin air */
>>> float threshMin = (float)(threshMax * 0.25);
>>> double bias = m_param->scenecutBias;
>>> - if (bRealScenecut)
>>> + if (m_param->bHistBasedSceneCut)
>>> + {
>>> + double minT = 50.0 * (1 + m_param->edgeTransitionThreshold);
>>> + if (frame->interPCostPercDiff > minT ||
>>> frame->intraCostPercDiff > minT)
>>> + {
>>> + if (bRealScenecut && frame->bScenecut)
>>> + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
>>> frame->frameNum);
>>> + return frame->bScenecut;
>>> + }
>>> + else
>>> + return false;
>>> + }
>>> + else if (bRealScenecut)
>>> {
>>> if (m_param->keyframeMin == m_param->keyframeMax)
>>> threshMin = threshMax;
>>> diff --git a/source/x265.h b/source/x265.h
>>> index 1e6f9ece6..32feb2bca 100644
>>> --- a/source/x265.h
>>> +++ b/source/x265.h
>>> @@ -1860,7 +1860,7 @@ typedef struct x265_param
>>> /* A genuine threshold used for histogram based scene cut detection.
>>> * This threshold determines whether a frame is a scenecut or not
>>> * when compared against the edge and chroma histogram sad values.
>>> - * Default 0.01. Range: Real number in the interval (0,2). */
>>> + * Default 0.03. Range: Real number in the interval (0,1). */
>>> double edgeTransitionThreshold;
>>>
>>> /* Enables histogram based scenecut detection algorithm to detect
>>> scenecuts. Default disabled */
>>> --
>>> 2.24.0.windows.2
>>>
>>>
--
Regards,
*Aruna Matheswaran,*
Video Codec Engineer,
Media & AI analytics BU,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200629/9f755b64/attachment-0001.html>
More information about the x265-devel
mailing list