[x265] [PATCH]Fixed the normalization formula to ouput 0 to 1 and considered max chroma histogram SAD in histogram based scene cut detection
Praveen Kumar Karadugattu
praveenkumar at multicorewareinc.com
Mon Jun 22 08:08:00 CEST 2020
>From 44704e10a60ae314ecd13dfb84c0c4f82d6c1a55 Mon Sep 17 00:00:00 2001
From: Praveen Kumar Karadugattu <praveenkumar at multicorewareinc.com>
Date: Wed, 17 Jun 2020 19:28:06 +0530
Subject: [PATCH] Fixed the normalization formula to ouput 0 to 1 and
considered max chroma histogram SAD in histogram based scene cut detection
---
doc/reST/cli.rst | 8 ++---
source/common/param.cpp | 4 +--
source/encoder/encoder.cpp | 75
++++++++++++++++++++++------------------------
source/encoder/encoder.h | 6 ++--
source/x265cli.cpp | 2 +-
5 files changed, 45 insertions(+), 50 deletions(-)
diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
index eceec40..c9e288e 100644
--- a/doc/reST/cli.rst
+++ b/doc/reST/cli.rst
@@ -1462,13 +1462,13 @@ Slice decision options
.. option:: --hist-scenecut, --no-hist-scenecut
Indicates that scenecuts need to be detected using luma edge and chroma
histograms.
- :option: `--hist-scenecut` enables scenecut detection using the
histograms and disables the default scene cut algorithm.
- :option: `--no-hist-scenecut` disables histogram based scenecut algorithm.
+ :option:`--hist-scenecut` enables scenecut detection using the histograms
and disables the default scene cut algorithm.
+ :option:`--no-hist-scenecut` disables histogram based scenecut algorithm.
-.. option:: --hist-threshold <0.0..2.0>
+.. option:: --hist-threshold <0.0..1.0>
This value represents the threshold for normalized SAD of edge histograms
used in scenecut detection.
- This requires :option: `--hist-scenecut` to be enabled. For example, a
value of 0.2 indicates that a frame with normalized SAD value
+ This requires :option:`--hist-scenecut` to be enabled. For example, a
value of 0.2 indicates that a frame with normalized SAD value
greater than 0.2 against the previous frame as scenecut.
Default 0.01.
diff --git a/source/common/param.cpp b/source/common/param.cpp
index fb7244e..925f0c4 100644
--- a/source/common/param.cpp
+++ b/source/common/param.cpp
@@ -1688,8 +1688,8 @@ int x265_check_params(x265_param* param)
"scenecutThreshold must be greater than 0");
CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
"scenecut-bias must be between 0 and 100");
- CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 <
param->edgeTransitionThreshold,
- "hist-threshold must be between 0.0 and 2.0");
+ CHECK(param->edgeTransitionThreshold < 0.0 || 1.0 <
param->edgeTransitionThreshold,
+ "hist-threshold must be between 0.0 and 1.0");
CHECK(param->radl < 0 || param->radl > param->bframes,
"radl must be between 0 and bframes");
CHECK(param->rdPenalty < 0 || param->rdPenalty > 2,
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 752e5b2..f6bc540 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -222,12 +222,9 @@ void Encoder::create()
uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1;
m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes);
m_edgeHistThreshold = m_param->edgeTransitionThreshold;
- m_chromaHistThreshold = m_edgeHistThreshold * 10.0;
- m_chromaHistThreshold = x265_min(m_chromaHistThreshold,
MAX_SCENECUT_THRESHOLD);
- m_scaledEdgeThreshold = m_edgeHistThreshold *
SCENECUT_STRENGTH_FACTOR;
- m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold,
MAX_SCENECUT_THRESHOLD);
- m_scaledChromaThreshold = m_chromaHistThreshold *
SCENECUT_STRENGTH_FACTOR;
- m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold,
MAX_SCENECUT_THRESHOLD);
+ m_chromaHistThreshold = x265_min(m_edgeHistThreshold * 10.0,
MAX_SCENECUT_THRESHOLD);
+ m_scaledEdgeThreshold = x265_min(m_edgeHistThreshold *
SCENECUT_STRENGTH_FACTOR, MAX_SCENECUT_THRESHOLD);
+ m_scaledChromaThreshold = x265_min(m_chromaHistThreshold *
SCENECUT_STRENGTH_FACTOR, MAX_SCENECUT_THRESHOLD);
if (m_param->sourceBitDepth != m_param->internalBitDepth)
{
int size = m_param->sourceWidth * m_param->sourceHeight;
@@ -1450,13 +1447,14 @@ bool Encoder::computeHistograms(x265_picture *pic)
memset(edgeHist, 0, EDGE_BINS * sizeof(int32_t));
for (uint32_t i = 0; i < m_planeSizes[0]; i++)
{
- if (!m_edgePic[i])
- edgeHist[0]++;
+ if (m_edgePic[i])
+ edgeHist[1]++;
else
- edgeHist[1]++;
+ edgeHist[0]++;
}
+
/* Y Histogram Calculation */
- int32_t* yHist = m_curYUVHist[0];
+ int32_t *yHist = m_curYUVHist[0];
memset(yHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
for (uint32_t i = 0; i < m_planeSizes[0]; i++)
{
@@ -1468,7 +1466,7 @@ bool Encoder::computeHistograms(x265_picture *pic)
{
/* U Histogram Calculation */
int32_t *uHist = m_curYUVHist[1];
- memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+ memset(uHist, 0, sizeof(m_curYUVHist[1]));
for (uint32_t i = 0; i < m_planeSizes[1]; i++)
{
pixelVal = planeU[i];
@@ -1478,52 +1476,56 @@ bool Encoder::computeHistograms(x265_picture *pic)
/* V Histogram Calculation */
pixelVal = 0;
int32_t *vHist = m_curYUVHist[2];
- memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+ memset(vHist, 0, sizeof(m_curYUVHist[2]));
for (uint32_t i = 0; i < m_planeSizes[2]; i++)
{
pixelVal = planeV[i];
vHist[pixelVal]++;
}
- for (int i = 0; i < HISTOGRAM_BINS; i++)
- {
- m_curMaxUVHist[i] = x265_max(uHist[i], vHist[i]);
- }
}
return true;
}
-void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double
*edgeNormalizedSad, int curPoc)
+void Encoder::computeHistogramSAD(double *normalizedMaxUVSad, double
*normalizedEdgeSad, int curPoc)
{
if (curPoc == 0)
{ /* first frame is scenecut by default no sad computation for the
same. */
- *maxUVNormalizedSad = 0.0;
- *edgeNormalizedSad = 0.0;
+ *normalizedMaxUVSad = 0.0;
+ *normalizedEdgeSad = 0.0;
}
else
{
- /* compute sum of absolute difference of normalized histogram bins
for maxUV and edge histograms. */
- int32_t edgefreqDiff = 0;
- int32_t maxUVfreqDiff = 0;
- double edgeProbabilityDiff = 0;
+ /* compute sum of absolute differences of histogram bins of chroma
and luma edge response between the current and prev pictures. */
+ int32_t edgeHistSad = 0;
+ int32_t uHistSad = 0;
+ int32_t vHistSad = 0;
+ double normalizedUSad = 0.0;
+ double normalizedVSad = 0.0;
for (int j = 0; j < HISTOGRAM_BINS; j++)
{
if (j < 2)
{
- edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
- edgeProbabilityDiff = (double) edgefreqDiff /
m_planeSizes[0];
- *edgeNormalizedSad += edgeProbabilityDiff;
+ edgeHistSad += abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
}
- maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]);
- *maxUVNormalizedSad += (double)maxUVfreqDiff / m_planeSizes[2];
+ uHistSad += abs(m_curYUVHist[1][j] - m_prevYUVHist[1][j]);
+ vHistSad += abs(m_curYUVHist[2][j] - m_prevYUVHist[2][j]);
}
+ *normalizedEdgeSad = normalizeRange(edgeHistSad, 0, 2 *
m_planeSizes[0], 0.0, 1.0);
+ normalizedUSad = normalizeRange(uHistSad, 0, 2 * m_planeSizes[1],
0.0, 1.0);
+ normalizedVSad = normalizeRange(vHistSad, 0, 2 * m_planeSizes[2],
0.0, 1.0);
+ *normalizedMaxUVSad = x265_max(normalizedUSad, normalizedVSad);
}
/* store histograms of previous frame for reference */
- size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t);
- memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize);
- memcpy(m_prevEdgeHist, m_curEdgeHist, 2 * sizeof(int32_t));
+ memcpy(m_prevEdgeHist, m_curEdgeHist, sizeof(m_curEdgeHist));
+ memcpy(m_prevYUVHist, m_curYUVHist, sizeof(m_curYUVHist));
+}
+
+double Encoder::normalizeRange(int32_t value, int32_t minValue, int32_t
maxValue, double rangeStart, double rangeEnd)
+{
+ return (double)(value - minValue) * (rangeEnd - rangeStart) /
(maxValue - minValue) + rangeStart;
}
void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
maxUVSad, double edgeSad)
@@ -1542,20 +1544,13 @@ void Encoder::findSceneCuts(x265_picture *pic,
bool& bDup, double maxUVSad, doub
{
bDup = true;
}
- else if (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold)
- {
- pic->frameData.bScenecut = true;
- bDup = false;
- }
- else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold)
+ else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold || (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold))
{
pic->frameData.bScenecut = true;
bDup = false;
+ x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",
pic->poc);
}
}
-
- if (pic->frameData.bScenecut)
- x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", pic->poc);
}
/**
diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
index 2c45886..fd6b3e7 100644
--- a/source/encoder/encoder.h
+++ b/source/encoder/encoder.h
@@ -163,7 +163,7 @@ class RateControl;
class ThreadPool;
class FrameData;
-#define MAX_SCENECUT_THRESHOLD 2.0
+#define MAX_SCENECUT_THRESHOLD 1.0
#define SCENECUT_STRENGTH_FACTOR 2.0
class Encoder : public x265_encoder
@@ -257,8 +257,7 @@ public:
pixel* m_edgePic;
pixel* m_inputPic[3];
int32_t m_curYUVHist[3][HISTOGRAM_BINS];
- int32_t m_curMaxUVHist[HISTOGRAM_BINS];
- int32_t m_prevMaxUVHist[HISTOGRAM_BINS];
+ int32_t m_prevYUVHist[3][HISTOGRAM_BINS];
int32_t m_curEdgeHist[2];
int32_t m_prevEdgeHist[2];
uint32_t m_planeSizes[3];
@@ -373,6 +372,7 @@ public:
bool computeHistograms(x265_picture *pic);
void computeHistogramSAD(double *maxUVNormalizedSAD, double
*edgeNormalizedSAD, int curPoc);
+ double normalizeRange(int32_t value, int32_t minValue, int32_t
maxValue, double rangeStart, double rangeEnd);
void findSceneCuts(x265_picture *pic, bool& bDup, double
m_maxUVSADVal, double m_edgeSADVal);
void initRefIdx();
diff --git a/source/x265cli.cpp b/source/x265cli.cpp
index 5b95698..b53dc2b 100644
--- a/source/x265cli.cpp
+++ b/source/x265cli.cpp
@@ -175,7 +175,7 @@ namespace X265_NS {
H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
H0(" --hist-scenecut Enables histogram based
scene-cut detection using histogram based algorithm.\n");
H0(" --no-hist-scenecut Disables histogram based
scene-cut detection using histogram based algorithm.\n");
- H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's
Normalized SAD threshold for histogram based scenecut detection Default
%.2f\n", param->edgeTransitionThreshold);
+ H1(" --hist-threshold <0.0..1.0> Luma Edge histogram's
Normalized SAD threshold for histogram based scenecut detection Default
%.2f\n", param->edgeTransitionThreshold);
H0(" --[no-]fades Enable detection and handling
of fade-in regions. Default %s\n", OPT(param->bEnableFades));
H1(" --[no-]scenecut-aware-qp Enable increasing QP for
frames inside the scenecut window after scenecut. Default %s\n",
OPT(param->bEnableSceneCutAwareQp));
H1(" --scenecut-window <0..1000> QP incremental duration(in
milliseconds) when scenecut-aware-qp is enabled. Default %d\n",
param->scenecutWindow);
--
1.8.3.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200622/18c83d6e/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Fixed-the-normalization-formula-to-ouput-0-to-1-and-.patch
Type: application/octet-stream
Size: 12007 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200622/18c83d6e/attachment-0001.obj>
More information about the x265-devel
mailing list