[x265] [x265 Patch] Histogram Based Scene Cut Detection
Srikanth Kurapati
srikanth.kurapati at multicorewareinc.com
Mon Nov 18 07:49:51 CET 2019
# HG changeset patch
# User Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
# Date 1573649311 -19800
# Wed Nov 13 18:18:31 2019 +0530
# Node ID 40beab295ca274bf62cb2fd2e732da722d10eea3
# Parent 04db2bfee5d628d931d1407355b909ac8ff1c898
Histogram based scenecut detection
This patch does the following.
1.Identifies scenecuts by thresholding against sad of edge and chroma
histograms.
2.Add option "--hist-scenecut" to enable histogram based scenecut method.
3.Add option "--hist-threshold" to provide threshold for determining
scene-cuts.
3.Optimizes frame duplication through reuse of sad for marking duplicate
frames.
diff -r 04db2bfee5d6 -r 40beab295ca2 doc/reST/cli.rst
--- a/doc/reST/cli.rst Thu Oct 31 16:23:27 2019 +0530
+++ b/doc/reST/cli.rst Wed Nov 13 18:18:31 2019 +0530
@@ -1426,7 +1426,23 @@
This value represents the percentage difference between the inter cost and
intra cost of a frame used in scenecut detection. For example, a value of
5 indicates,
if the inter cost of a frame is greater than or equal to 95 percent of
the intra cost of the frame,
- then detect this frame as scenecut. Values between 5 and 15 are
recommended. Default 5.
+ then detect this frame as scenecut. Values between 5 and 15 are
recommended.
+ This value is evaluated only when --scenecut is enabled else it is
ignored. Default 5.
+
+.. option:: --hist-scenecut, --no-hist-scenecut
+
+ indicates that scenecuts need to be detected using luma edge and chroma
histograms.
+ option: `--hist-scenecut` enables scenecut detection using the histograms
and disables the default scene cut algorithm.
+ option: `--no-hist-scenecut` disables histogram based scenecut algorithm.
+
+ Note that if --hist-scenecut and --scenecut are enabled together the
first choice of user is considered for processing.
+
+.. option:: --hist-threshold <0.0..2.0>
+
+ This value represents the threshold for normalized SAD of edge histograms
used in scenecut detection.
+ This requires hist-scenecut to be enabled. For example, a value of 0.2
indicates that a frame with normalized SAD value
+ greater than 0.2 against the previous frame as scenecut.
+ Default 0.01.
.. option:: --radl <integer>
diff -r 04db2bfee5d6 -r 40beab295ca2 source/CMakeLists.txt
--- a/source/CMakeLists.txt Thu Oct 31 16:23:27 2019 +0530
+++ b/source/CMakeLists.txt Wed Nov 13 18:18:31 2019 +0530
@@ -29,7 +29,7 @@
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 182)
+set(X265_BUILD 183)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 04db2bfee5d6 -r 40beab295ca2 source/common/common.h
--- a/source/common/common.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/common/common.h Wed Nov 13 18:18:31 2019 +0530
@@ -129,12 +129,16 @@
typedef uint64_t sum2_t;
typedef uint64_t pixel4;
typedef int64_t ssum2_t;
+#define HISTOGRAM_BINS 1024
+#define SHIFT 1
#else
typedef uint8_t pixel;
typedef uint16_t sum_t;
typedef uint32_t sum2_t;
typedef uint32_t pixel4;
typedef int32_t ssum2_t; // Signed sum
+#define HISTOGRAM_BINS 256
+#define SHIFT 0
#endif // if HIGH_BIT_DEPTH
#if X265_DEPTH < 10
diff -r 04db2bfee5d6 -r 40beab295ca2 source/common/param.cpp
--- a/source/common/param.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/common/param.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -167,6 +167,8 @@
param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
param->bBPyramid = 1;
param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
+ param->edgeTransitionThreshold = 0.01;
+ param->bHistBasedSceneCut = false;
param->lookaheadSlices = 8;
param->lookaheadThreads = 0;
param->scenecutBias = 5.0;
@@ -572,6 +574,7 @@
param->bframes = 0;
param->lookaheadDepth = 0;
param->scenecutThreshold = 0;
+ param->bHistBasedSceneCut = false;
param->rc.cuTree = 0;
param->frameNumThreads = 1;
}
@@ -614,7 +617,7 @@
return 0;
}
-static int x265_atobool(const char* str, bool& bError)
+static bool x265_atobool(const char* str, bool& bError)
{
if (!strcmp(str, "1") ||
!strcmp(str, "true") ||
@@ -764,6 +767,7 @@
bool bNameWasBool = false;
bool bValueWasNull = !value;
bool bExtraParams = false;
+ static int scenecutChoice = -1;
char nameBuf[64];
static int count;
@@ -920,11 +924,16 @@
OPT("lookahead-slices") p->lookaheadSlices = atoi(value);
OPT("scenecut")
{
- p->scenecutThreshold = atobool(value);
- if (bError || p->scenecutThreshold)
+ if (scenecutChoice == -1)
{
- bError = false;
- p->scenecutThreshold = atoi(value);
+ p->scenecutThreshold = atobool(value);
+ if (bError || p->scenecutThreshold)
+ {
+ bError = false;
+ p->scenecutThreshold = atoi(value);
+ p->bHistBasedSceneCut = false;
+ scenecutChoice = 0;
+ }
}
}
OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value);
@@ -1191,6 +1200,46 @@
OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS =
atobool(value);
OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value);
OPT("scenecut-bias") p->scenecutBias = atof(value);
+ OPT("hist-scenecut")
+ {
+ if (scenecutChoice == -1)
+ {
+ p->bHistBasedSceneCut = atobool(value);
+ if (bError)
+ {
+ bError = false;
+ p->bHistBasedSceneCut = false;
+ }
+ if (p->bHistBasedSceneCut)
+ {
+ bError = false;
+ p->scenecutThreshold = 0;
+ scenecutChoice = 1;
+ }
+ }
+ else
+ {
+ p->bHistBasedSceneCut = atobool(value);
+ p->bHistBasedSceneCut = false;
+ }
+ }
+ OPT("hist-threshold")
+ {
+ if (p->bHistBasedSceneCut)
+ {
+ p->edgeTransitionThreshold = atof(value);
+ if (bError)
+ {
+ bError = false;
+ p->edgeTransitionThreshold = 0.01;
+ x265_log(p, X265_LOG_INFO, "Using default threshold
%.2lf for scene cut detection\n", p->edgeTransitionThreshold);
+ }
+ }
+ else
+ {
+ x265_log(p, X265_LOG_WARNING, "Histogram based scene cut
detection not enabled\n", p->edgeTransitionThreshold);
+ }
+ }
OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine =
atobool(value);
@@ -1631,8 +1680,16 @@
"Valid Logging level -1:none 0:error 1:warning 2:info 3:debug
4:full");
CHECK(param->scenecutThreshold < 0,
"scenecutThreshold must be greater than 0");
- CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
- "scenecut-bias must be between 0 and 100");
+ if (param->scenecutThreshold)
+ {
+ CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
+ "scenecut-bias must be between 0 and 100");
+ }
+ else if (param->bHistBasedSceneCut)
+ {
+ CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 <
param->edgeTransitionThreshold,
+ "hist-threshold must be between 0.0 and 2.0");
+ }
CHECK(param->radl < 0 || param->radl > param->bframes,
"radl must be between 0 and bframes");
CHECK(param->rdPenalty < 0 || param->rdPenalty > 2,
@@ -1792,9 +1849,13 @@
x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge
: %s / %d / %d / %d\n",
x265_motion_est_names[param->searchMethod],
param->searchRange, param->subpelRefine, param->maxNumMergeCand);
- if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
- x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax,
param->scenecutThreshold, param->scenecutBias * 100);
- else
+ if (param->scenecutThreshold && param->keyframeMax != INT_MAX)
+ x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
bias : %d / %d / %d / %.2lf \n",
+ param->keyframeMin, param->keyframeMax,
param->scenecutThreshold, param->scenecutBias * 100);
+ else if (param->bHistBasedSceneCut && param->keyframeMax != INT_MAX)
+ x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
edge threshold : %d / %d / %d / %.2lf\n",
+ param->keyframeMin, param->keyframeMax,
param->bHistBasedSceneCut, param->edgeTransitionThreshold);
+ else if (param->keyframeMax == INT_MAX)
x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut
: disabled\n");
if (param->cbQpOffset || param->crQpOffset)
@@ -1961,6 +2022,8 @@
s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth);
s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices);
s += sprintf(s, " scenecut=%d", p->scenecutThreshold);
+ s += sprintf(s, " hist-scenecut=%d", p->bHistBasedSceneCut);
+ s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
s += sprintf(s, " radl=%d", p->radl);
BOOL(p->bEnableHRDConcatFlag, "splice");
BOOL(p->bIntraRefresh, "intra-refresh");
@@ -2108,6 +2171,8 @@
BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps");
BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps");
s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias);
+ s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
+
BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp");
BOOL(p->bAQMotion, "aq-motion");
BOOL(p->bEmitHDRSEI, "hdr");
@@ -2261,6 +2326,7 @@
dst->lookaheadSlices = src->lookaheadSlices;
dst->lookaheadThreads = src->lookaheadThreads;
dst->scenecutThreshold = src->scenecutThreshold;
+ dst->bHistBasedSceneCut = src->bHistBasedSceneCut;
dst->bIntraRefresh = src->bIntraRefresh;
dst->maxCUSize = src->maxCUSize;
dst->minCUSize = src->minCUSize;
@@ -2420,6 +2486,7 @@
dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS;
dst->bMultiPassOptRPS = src->bMultiPassOptRPS;
dst->scenecutBias = src->scenecutBias;
+ dst->edgeTransitionThreshold = src->edgeTransitionThreshold;
dst->gopLookahead = src->lookaheadDepth;
dst->bOptCUDeltaQP = src->bOptCUDeltaQP;
dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion;
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/encoder.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -130,12 +130,17 @@
#if SVT_HEVC
m_svtAppData = NULL;
#endif
-
m_prevTonemapPayload.payload = NULL;
m_startPoint = 0;
m_saveCTUSize = 0;
+ m_edgePic = NULL;
+ m_edgeHistThreshold = 0;
+ m_chromaHistThreshold = 0.0;
+ m_scaledEdgeThreshold = 0.0;
+ m_scaledChromaThreshold = 0.0;
m_zoneIndex = 0;
}
+
inline char *strcatFilename(const char *input, const char *suffix)
{
char *output = X265_MALLOC(char, strlen(input) + strlen(suffix) + 1);
@@ -210,6 +215,24 @@
}
}
+ if (m_param->bHistBasedSceneCut)
+ {
+ for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes;
i++)
+ {
+ m_planeSizes[i] = m_param->sourceWidth * m_param->sourceHeight
>> x265_cli_csps[m_param->internalCsp].height[i];
+ }
+ uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1;
+ m_edgePic = X265_MALLOC(pixel, m_planeSizes[0]*pixelbytes);
+ double strengthFactor = 2.0;
+ m_edgeHistThreshold = m_param->edgeTransitionThreshold;
+ m_chromaHistThreshold = m_edgeHistThreshold * 10.0;
+ m_chromaHistThreshold = x265_min(m_chromaHistThreshold,
MAX_SCENECUT_THRESHOLD);
+ m_scaledEdgeThreshold = m_edgeHistThreshold * strengthFactor;
+ m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold,
MAX_SCENECUT_THRESHOLD);
+ m_scaledChromaThreshold = m_chromaHistThreshold * strengthFactor;
+ m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold,
MAX_SCENECUT_THRESHOLD);
+ }
+
// Do not allow WPP if only one row or fewer than 3 columns, it is
pointless and unstable
if (rows == 1 || cols < 3)
{
@@ -854,6 +877,12 @@
}
}
+ if (m_param->bHistBasedSceneCut)
+ {
+ if(m_edgePic != NULL)
+ X265_FREE_ZERO(m_edgePic);
+ }
+
for (int i = 0; i < m_param->frameNumThreads; i++)
{
if (m_frameEncoder[i])
@@ -1313,6 +1342,141 @@
dest->planes[2] = (char*)dest->planes[1] + src->stride[1] *
(src->height >> x265_cli_csps[src->colorSpace].height[1]);
}
+bool Encoder::computeHistograms(x265_picture *pic)
+{
+ pixel *src = (pixel*)pic->planes[0];
+ size_t bufSize = sizeof(pixel) * m_planeSizes[0];
+ int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
+ int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1;
+ memset(m_edgePic, 0, bufSize*numBytes);
+
+ if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height,
pic->width, false))
+ {
+ x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
+ return false;
+ }
+
+ pixel pixelVal;
+ int64_t size = pic->height * (pic->stride[0] >> SHIFT);
+ int32_t *edgeHist = m_curEdgeHist;
+ memset(edgeHist, 0, 2 * sizeof(int32_t));
+ for (int64_t i = 0; i < size; i++)
+ {
+ if (!m_edgePic[i])
+ edgeHist[0]++;
+ else
+ edgeHist[1]++;
+ }
+
+ /*U Histogram Calculation*/
+ int32_t HeightL = (pic->height >>
x265_cli_csps[pic->colorSpace].height[1]);
+ size = HeightL * (pic->stride[1] >> SHIFT);
+ int32_t *uHist = m_curUVHist[0];
+ pixel *chromaPlane = (pixel *)pic->planes[1];
+
+ memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+
+ for (int64_t i = 0; i < size; i++)
+ {
+ pixelVal = chromaPlane[i];
+ uHist[pixelVal]++;
+ }
+
+ /*V Histogram Calculation */
+ if (planeCount == 3)
+ {
+ pixelVal = 0;
+ int32_t heightV = (pic->height >>
x265_cli_csps[pic->colorSpace].height[2]);
+ size = heightV * (pic->stride[2] >> SHIFT);
+ int32_t *vHist = m_curUVHist[1];
+ chromaPlane = (pixel *)pic->planes[2];
+
+ memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
+ for (int64_t i = 0; i < size; i++)
+ {
+ pixelVal = chromaPlane[i];
+ vHist[pixelVal]++;
+ }
+ for (int i = 0; i < HISTOGRAM_BINS; i++)
+ {
+ m_curMaxUVHist[i] = x265_max(uHist[i],vHist[i]);
+ }
+ }
+ else
+ { /* in case of bi planar color space */
+
memcpy(m_curMaxUVHist,m_curUVHist[0],HISTOGRAM_BINS*sizeof(int32_t));
+ }
+
+ return true;
+}
+
+void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double
*edgeNormalizedSad, int curPoc)
+{
+
+ if (curPoc == 0)
+ { /* first frame is scenecut by default no sad computation for the
same. */
+ *maxUVNormalizedSad = 0.0;
+ *edgeNormalizedSad = 0.0;
+ }
+ else
+ {
+ /* compute sum of absolute difference of normalized histogram bins
for maxUV and edge histograms. */
+ int32_t edgefreqDiff = 0;
+ int32_t maxUVfreqDiff = 0;
+ double edgeProbabilityDiff = 0;
+
+ for (int j = 0; j < HISTOGRAM_BINS; j++)
+ {
+ if (j < 2 )
+ {
+ edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
+ edgeProbabilityDiff = (double) edgefreqDiff /
m_planeSizes[0];
+ *edgeNormalizedSad += edgeProbabilityDiff;
+ }
+ maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]);
+ *maxUVNormalizedSad += (double)maxUVfreqDiff / m_planeSizes[2];
+ }
+ }
+
+ /* store histograms of previous frame for reference */
+ size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t);
+ memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize);
+ memcpy(m_prevEdgeHist, m_curEdgeHist, 2*sizeof(int32_t));
+
+}
+
+void Encoder::findSceneCuts(x265_picture * pic, bool& bDup, double
maxUVSad, double edgeSad)
+{
+ pic->frameData.bScenecut = false;
+
+ if (pic->poc == 0)
+ {
+ /* for first frame */
+ pic->frameData.bScenecut = false;
+ bDup = false;
+ }
+ else
+ {
+ if (edgeSad == 0.0 && maxUVSad == 0.0)
+ {
+ bDup = true;
+ }
+ else if (edgeSad > m_edgeHistThreshold && maxUVSad >=
m_chromaHistThreshold)
+ {
+ pic->frameData.bScenecut = true;
+ bDup = false;
+ }
+ else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
m_scaledChromaThreshold)
+ {
+ pic->frameData.bScenecut = true;
+ bDup = false;
+ }
+ }
+
+ if (pic->frameData.bScenecut)
+ x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",pic->poc);
+}
+
/**
* Feed one new input frame into the encoder, get one frame out. If pic_in
is
* NULL, a flush condition is implied and pic_in must be NULL for all
subsequent
@@ -1339,6 +1503,8 @@
const x265_picture* inputPic = NULL;
static int written = 0, read = 0;
bool dontRead = false;
+ bool bdropFrame = false;
+ bool dropflag = false;
if (m_exportedPic)
{
@@ -1350,6 +1516,17 @@
}
if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in &&
(read < written)))
{
+ if (m_param->bHistBasedSceneCut && pic_in)
+ {
+ x265_picture *pic = (x265_picture *) pic_in;
+ if (computeHistograms(pic))
+ {
+ double maxUVSad = 0.0, edgeSad = 0.0;
+ computeHistogramSAD(&maxUVSad, &edgeSad,pic_in->poc);
+ findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
+ }
+ }
+
if ((m_param->bEnableFrameDuplication && !pic_in && (read <
written)))
dontRead = true;
else
@@ -1393,9 +1570,27 @@
written++;
}
- psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
-
- if (psnrWeight >= m_param->dupThreshold)
+ if (m_param->bEnableFrameDuplication &&
m_param->bHistBasedSceneCut)
+ {
+ if (!bdropFrame &&
m_dupBuffer[1]->dupPic->frameData.bScenecut == false)
+ {
+ psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
+ if (psnrWeight >= m_param->dupThreshold)
+ dropflag = true;
+ }
+ else
+ {
+ dropflag = true;
+ }
+ }
+ else if (m_param->bEnableFrameDuplication)
+ {
+ psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
+ if (psnrWeight >= m_param->dupThreshold)
+ dropflag = true;
+ }
+
+ if (dropflag)
{
if (m_dupBuffer[0]->bDup)
{
@@ -1498,6 +1693,10 @@
inFrame->m_poc = ++m_pocLast;
inFrame->m_userData = inputPic->userData;
inFrame->m_pts = inputPic->pts;
+ if (m_param->bHistBasedSceneCut)
+ {
+ inFrame->m_lowres.bScenecut = inputPic->frameData.bScenecut;
+ }
inFrame->m_forceqp = inputPic->forceqp;
inFrame->m_param = (m_reconfigure || m_reconfigureRc) ?
m_latestParam : m_param;
inFrame->m_picStruct = inputPic->picStruct;
@@ -3209,6 +3408,7 @@
* adaptive I frame placement */
p->keyframeMax = INT_MAX;
p->scenecutThreshold = 0;
+ p->bHistBasedSceneCut = 0;
}
else if (p->keyframeMax <= 1)
{
@@ -3222,6 +3422,7 @@
p->lookaheadDepth = 0;
p->bframes = 0;
p->scenecutThreshold = 0;
+ p->bHistBasedSceneCut = 0;
p->bFrameAdaptive = 0;
p->rc.cuTree = 0;
p->bEnableWeightedPred = 0;
@@ -3881,6 +4082,17 @@
m_param->searchMethod = m_param->hmeSearchMethod[2];
}
}
+
+ if (p->scenecutThreshold && p->edgeTransitionThreshold != 0.01)
+ {
+ x265_log(p, X265_LOG_WARNING, "using default scenecut-bias %.2lf
for scene cut detection\n",p->scenecutBias);
+ }
+ else if (p->bHistBasedSceneCut && p->edgeTransitionThreshold == 0.0)
+ {
+ p->edgeTransitionThreshold = 0.01;
+ x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for
scene cut detection\n", p->edgeTransitionThreshold);
+ }
+
}
void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc,
const x265_picture* picIn, int paramBytes)
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/encoder.h
--- a/source/encoder/encoder.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/encoder.h Wed Nov 13 18:18:31 2019 +0530
@@ -156,7 +156,6 @@
bool bDup;
};
-
class FrameEncoder;
class DPB;
class Lookahead;
@@ -164,6 +163,8 @@
class ThreadPool;
class FrameData;
+#define MAX_SCENECUT_THRESHOLD 2.0
+
class Encoder : public x265_encoder
{
public:
@@ -228,7 +229,7 @@
bool m_reconfigureRc;
bool m_reconfigureZone;
- int m_saveCtuDistortionLevel;
+ int m_saveCtuDistortionLevel;
/* Begin intra refresh when one not in progress or else begin one as
soon as the current
* one is done. Requires bIntraRefresh to be set.*/
@@ -245,11 +246,24 @@
Lock m_rpsInSpsLock;
int m_rpsInSpsCount;
/* For HDR*/
- double m_cB;
- double m_cR;
+ double m_cB;
+ double m_cR;
+
+ int m_bToneMap; // Enables tone-mapping
+ int m_enableNal;
- int m_bToneMap; // Enables tone-mapping
- int m_enableNal;
+ /* For histogram based scene-cut detection */
+ pixel* m_edgePic;
+ int32_t m_curUVHist[2][HISTOGRAM_BINS];
+ int32_t m_curMaxUVHist[HISTOGRAM_BINS];
+ int32_t m_prevMaxUVHist[HISTOGRAM_BINS];
+ int32_t m_curEdgeHist[2];
+ int32_t m_prevEdgeHist[2];
+ uint32_t m_planeSizes[3];
+ double m_edgeHistThreshold;
+ double m_chromaHistThreshold;
+ double m_scaledEdgeThreshold;
+ double m_scaledChromaThreshold;
#ifdef ENABLE_HDR10_PLUS
const hdr10plus_api *m_hdr10plus_api;
@@ -355,6 +369,10 @@
void copyPicture(x265_picture *dest, const x265_picture *src);
+ bool computeHistograms(x265_picture *pic);
+ void computeHistogramSAD(double *maxUVNormalizedSAD, double
*edgeNormalizedSAD, int curPoc);
+ void findSceneCuts(x265_picture * pic, bool& bDup, double
m_maxUVSADVal, double m_edgeSADVal);
+
void initRefIdx();
void analyseRefIdx(int *numRefIdx);
void updateRefIdx();
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/ratecontrol.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -508,6 +508,7 @@
CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold);
+ CMP_OPT_FIRST_PASS("hist-threshold",
m_param->edgeTransitionThreshold);
CMP_OPT_FIRST_PASS("intra-refresh",
m_param->bIntraRefresh);
if (m_param->bMultiPassOptRPS)
{
@@ -1200,6 +1201,7 @@
m_param->rc.bStatRead = 0;
m_param->bFrameAdaptive = 0;
m_param->scenecutThreshold = 0;
+ m_param->bHistBasedSceneCut = false;
m_param->rc.cuTree = 0;
if (m_param->bframes > 1)
m_param->bframes = 1;
@@ -2284,7 +2286,7 @@
if (m_isVbv && m_currentSatd > 0 && curFrame)
{
if (m_param->lookaheadDepth || m_param->rc.cuTree ||
- m_param->scenecutThreshold ||
+ (m_param->scenecutThreshold || m_param->bHistBasedSceneCut) ||
(m_param->bFrameAdaptive && m_param->bframes))
{
/* Lookahead VBV: If lookahead is done, raise the quantizer as
necessary
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/slicetype.cpp Wed Nov 13 18:18:31 2019 +0530
@@ -85,6 +85,69 @@
} // end anonymous namespace
+namespace X265_NS {
+
+bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t
stride, int height, int width, bool bcalcTheta)
+{
+ intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0,
colThree = 0;
+ intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0,
bottomRight = 0;
+
+ const int startIndex = 1;
+
+ if (!edgePic || !refPic || (!edgeTheta && bcalcTheta))
+ {
+ return false;
+ }
+ else
+ {
+ float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
+ float gradientMagnitude = 0;
+ pixel blackPixel = 0;
+
+ //Applying Sobel filter expect for border pixels
+ height = height - startIndex;
+ width = width - startIndex;
+ for (int rowNum = startIndex; rowNum < height; rowNum++)
+ {
+ rowTwo = rowNum * stride;
+ rowOne = rowTwo - stride;
+ rowThree = rowTwo + stride;
+
+ for (int colNum = startIndex; colNum < width; colNum++)
+ {
+
+ /* Horizontal and vertical gradients
+ [ -3 0 3 ] [-3 -10 -3 ]
+ gH =[ -10 0 10] gV = [ 0 0 0 ]
+ [ -3 0 3 ] [ 3 10 3 ] */
+
+ colTwo = colNum;
+ colOne = colTwo - startIndex;
+ colThree = colTwo + startIndex;
+ middle = rowTwo + colTwo;
+ topLeft = rowOne + colOne;
+ topRight = rowOne + colThree;
+ bottomLeft = rowThree + colOne;
+ bottomRight = rowThree + colThree;
+ gradientH = (float)(-3 * refPic[topLeft] + 3 *
refPic[topRight] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo +
colThree] - 3 * refPic[bottomLeft] + 3 * refPic[bottomRight]);
+ gradientV = (float)(-3 * refPic[topLeft] - 10 *
refPic[rowOne + colTwo] - 3 * refPic[topRight] + 3 * refPic[bottomLeft] +
10 * refPic[rowThree + colTwo] + 3 * refPic[bottomRight]);
+ gradientMagnitude = sqrtf(gradientH * gradientH +
gradientV * gradientV);
+ if(bcalcTheta)
+ {
+ edgeTheta[middle] = 0;
+ radians = atan2(gradientV, gradientH);
+ theta = (float)((radians * 180) / PI);
+ if (theta < 0)
+ theta = 180 + theta;
+ edgeTheta[middle] = (pixel)theta;
+ }
+ edgePic[middle] = (pixel)(gradientMagnitude >=
edgeThreshold ? edgeThreshold : blackPixel);
+ }
+ }
+ return true;
+ }
+}
+
void edgeFilter(Frame *curFrame, x265_param* param)
{
int height = curFrame->m_fencPic->m_picHeight;
@@ -114,6 +177,7 @@
//Applying Gaussian filter on the picture
src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY
* stride + curFrame->m_fencPic->m_lumaMarginX;
+ edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
stride + curFrame->m_fencPic->m_lumaMarginX;
pixel pixelValue = 0;
for (int rowNum = 0; rowNum < height; rowNum++)
@@ -146,51 +210,8 @@
}
}
-#if HIGH_BIT_DEPTH //10-bit build
- float threshold = 1023;
- pixel whitePixel = 1023;
-#else
- float threshold = 255;
- pixel whitePixel = 255;
-#endif
-#define PI 3.14159265
-
- float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
- float gradientMagnitude = 0;
- pixel blackPixel = 0;
- edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
stride + curFrame->m_fencPic->m_lumaMarginX;
- //Applying Sobel filter on the gaussian filtered picture
- for (int rowNum = 0; rowNum < height; rowNum++)
- {
- for (int colNum = 0; colNum < width; colNum++)
- {
- edgeTheta[(rowNum*stride) + colNum] = 0;
- if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1)
&& (colNum != width - 1)) //Ignoring the border pixels of the picture
- {
- /*Horizontal and vertical gradients
- [ -3 0 3 ] [-3 -10 -3 ]
- gH = [ -10 0 10] gV = [ 0 0 0 ]
- [ -3 0 3 ] [ 3 10 3 ]*/
-
- const intptr_t rowOne = (rowNum - 1)*stride, colOne =
colNum -1;
- const intptr_t rowTwo = rowNum * stride, colTwo = colNum;
- const intptr_t rowThree = (rowNum + 1)*stride, colThree =
colNum + 1;
- const intptr_t index = (rowNum*stride) + colNum;
-
- gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 *
refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 *
refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 *
refPic[rowThree + colThree]);
- gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 *
refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 *
refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 *
refPic[rowThree + colThree]);
-
- gradientMagnitude = sqrtf(gradientH * gradientH +
gradientV * gradientV);
- radians = atan2(gradientV, gradientH);
- theta = (float)((radians * 180) / PI);
- if (theta < 0)
- theta = 180 + theta;
- edgeTheta[(rowNum*stride) + colNum] = (pixel)theta;
-
- edgePic[index] = gradientMagnitude >= threshold ?
whitePixel : blackPixel;
- }
- }
- }
+ if(!computeEdge(edgePic, refPic, edgeTheta, stride, height, width,
true))
+ x265_log(NULL, X265_LOG_ERROR, "Failed edge computation!");
}
//Find the angle of a block by averaging the pixel angles
@@ -1471,7 +1492,7 @@
if (m_lastNonB && !m_param->rc.bStatRead &&
((m_param->bFrameAdaptive && m_param->bframes) ||
- m_param->rc.cuTree || m_param->scenecutThreshold ||
+ m_param->rc.cuTree || m_param->scenecutThreshold ||
m_param->bHistBasedSceneCut ||
(m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
{
slicetypeAnalyse(frames, false);
@@ -1971,10 +1992,15 @@
int numBFrames = 0;
int numAnalyzed = numFrames;
- bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
+ bool isScenecut = false;
/* When scenecut threshold is set, use scenecut detection for I frame
placements */
- if (m_param->scenecutThreshold && isScenecut)
+ if (m_param->scenecutThreshold)
+ isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
+ else if (m_param->bHistBasedSceneCut)
+ isScenecut = frames[1]->bScenecut;
+
+ if (isScenecut)
{
frames[1]->sliceType = X265_TYPE_I;
return;
@@ -1985,14 +2011,17 @@
m_extendGopBoundary = false;
for (int i = m_param->bframes + 1; i < origNumFrames; i +=
m_param->bframes + 1)
{
- scenecut(frames, i, i + 1, true, origNumFrames);
+ if (m_param->scenecutThreshold)
+ scenecut(frames, i, i + 1, true, origNumFrames);
+
for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
origNumFrames); j++)
{
- if (frames[j]->bScenecut && scenecutInternal(frames, j -
1, j, true) )
- {
- m_extendGopBoundary = true;
- break;
- }
+ if (( m_param->scenecutThreshold && frames[j]->bScenecut
&& scenecutInternal(frames, j - 1, j, true)) ||
+ (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
+ {
+ m_extendGopBoundary = true;
+ break;
+ }
}
if (m_extendGopBoundary)
break;
@@ -2097,13 +2126,14 @@
{
for (int j = 1; j < numBFrames + 1; j++)
{
- if (scenecut(frames, j, j + 1, false, origNumFrames) ||
+ if ((m_param->scenecutThreshold && scenecut(frames, j, j +
1, false, origNumFrames)) ||
+ (m_param->bHistBasedSceneCut && frames[j +
1]->bScenecut) ||
(bForceRADL && (frames[j]->frameNum == preRADL)))
- {
- frames[j]->sliceType = X265_TYPE_P;
- numAnalyzed = j;
- break;
- }
+ {
+ frames[j]->sliceType = X265_TYPE_P;
+ numAnalyzed = j;
+ break;
+ }
}
}
resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
+ 1);
@@ -3289,3 +3319,5 @@
fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost,
LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));
}
+
+}
diff -r 04db2bfee5d6 -r 40beab295ca2 source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/encoder/slicetype.h Wed Nov 13 18:18:31 2019 +0530
@@ -43,6 +43,13 @@
#define AQ_EDGE_BIAS 0.5
#define EDGE_INCLINATION 45
+#ifdef HIGH_BIT_DEPTH
+#define edgeThreshold 1023.0
+#else
+#define edgeThreshold 255.0
+#endif
+#define PI 3.14159265
+
/* Thread local data for lookahead tasks */
struct LookaheadTLD
{
@@ -258,6 +265,7 @@
CostEstimateGroup& operator=(const CostEstimateGroup&);
};
-}
+bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t
stride, int height, int width, bool bcalcTheta);
+}
#endif // ifndef X265_SLICETYPE_H
diff -r 04db2bfee5d6 -r 40beab295ca2 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Thu Oct 31 16:23:27 2019 +0530
+++ b/source/test/regression-tests.txt Wed Nov 13 18:18:31 2019 +0530
@@ -159,6 +159,7 @@
Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60
--hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones 0,139,q=32
Island_960x540_420p_8bit_24fps.yuv,--no-cutree --aq-mode 0 --bitrate 6000
--scenecut-aware-qp
+sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
--hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000
--vbv-bufsize 15000 --vbv-maxrate 12000
# Main12 intraCost overflow bug test
720p50_parkrun_ter.y4m,--preset medium
diff -r 04db2bfee5d6 -r 40beab295ca2 source/x265.h
--- a/source/x265.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/x265.h Wed Nov 13 18:18:31 2019 +0530
@@ -211,7 +211,7 @@
uint32_t numCUsInFrame;
uint32_t numPartitions;
uint32_t depthBytes;
- int bScenecut;
+ bool bScenecut;
x265_weight_param* wt;
x265_analysis_inter_data* interData;
x265_analysis_intra_data* intraData;
@@ -294,7 +294,7 @@
double avgChromaVLevel;
char sliceType;
- int bScenecut;
+ bool bScenecut;
double ipCostRatio;
int frameLatency;
x265_cu_stats cuStats;
@@ -1024,7 +1024,8 @@
int lookaheadSlices;
/* An arbitrary threshold which determines how aggressively the
lookahead
- * should detect scene cuts. The default (40) is recommended. */
+ * should detect scene cuts for cost based scenecut detection.
+ * The default (40) is recommended. */
int scenecutThreshold;
/* Replace keyframes by using a column of intra blocks that move
across the video
@@ -1846,6 +1847,16 @@
/* The offset by which QP is incremented for inter-frames when
bEnableSceneCutAwareQp is set.
* Default is +5. */
int maxQpDelta;
+
+ /* A genuine threshold used for histogram based scene cut detection.
+ * This threshold determines whether a frame is a scenecut or not
+ * when compared against the edge and chroma histogram sad values.
+ * Default 0.01. Range: Real number in the interval (0,2). */
+ double edgeTransitionThreshold;
+
+ /* Enables histogram based scenecut detection algorithm to detect
scenecuts. */
+ bool bHistBasedSceneCut;
+
} x265_param;
/* x265_param_alloc:
* Allocates an x265_param instance. The returned param structure is not
diff -r 04db2bfee5d6 -r 40beab295ca2 source/x265cli.h
--- a/source/x265cli.h Thu Oct 31 16:23:27 2019 +0530
+++ b/source/x265cli.h Wed Nov 13 18:18:31 2019 +0530
@@ -129,6 +129,9 @@
{ "scenecut", required_argument, NULL, 0 },
{ "no-scenecut", no_argument, NULL, 0 },
{ "scenecut-bias", required_argument, NULL, 0 },
+ { "hist-scenecut", no_argument, NULL, 0},
+ { "no-hist-scenecut", no_argument, NULL, 0},
+ { "hist-threshold", required_argument, NULL, 0},
{ "fades", no_argument, NULL, 0 },
{ "no-fades", no_argument, NULL, 0 },
{ "scenecut-aware-qp", no_argument, NULL, 0 },
@@ -489,7 +492,10 @@
H0(" --gop-lookahead <integer> Extends gop boundary if a
scenecut is found within this from keyint boundary. Default 0\n");
H0(" --no-scenecut Disable adaptive I-frame
decision\n");
H0(" --scenecut <integer> How aggressively to insert extra
I-frames. Default %d\n", param->scenecutThreshold);
- H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
+ H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
+ H0(" --hist-scenecut Enables histogram based scene-cut
detection using histogram based algorithm.\n");
+ H0(" --no-hist-scenecut Disables histogram based
scene-cut detection using histogram based algorithm.\n");
+ H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized
SAD threshold for histogram based scenecut detection Default %.2f\n",
param->edgeTransitionThreshold);
H0(" --[no-]fades Enable detection and handling of
fade-in regions. Default %s\n", OPT(param->bEnableFades));
H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames
inside the scenecut window after scenecut. Default %s\n",
OPT(param->bEnableSceneCutAwareQp));
H1(" --scenecut-window <0..1000> QP incremental duration(in
milliseconds) when scenecut-aware-qp is enabled. Default %d\n",
param->scenecutWindow);
--
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191118/96a42f34/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265_SCD.patch
Type: application/octet-stream
Size: 40136 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191118/96a42f34/attachment-0001.obj>
More information about the x265-devel
mailing list