[x265] [x265 Patch 2 of 2] Integration of histogram based scenecut detection with frame duplication
Srikanth Kurapati
srikanth.kurapati at multicorewareinc.com
Thu Oct 17 16:11:05 CEST 2019
# HG changeset patch
# User Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
# Date 1571319729 -19800
# Thu Oct 17 19:12:09 2019 +0530
# Node ID c6d2c44753634202f399033021cf064bdd9efbb7
# Parent 978a57943c8f622de41ddb1931504d6df4ebafc1
Integration of Histogram based scenecut detection and frame duplication
features in encoder.
1. Add option "--hist-scenecut" and "--hist-threshold' to enable improved
scenecut method for slice type decisions,rate control and a threshold for
determining scene-cuts.
2. Identifies scenecuts using sad of edge and chroma histogram based
thresholding in encoder.
3. Removes duplicate edgefilter code and uses global definition for use in
scene cut detection and aq in Lookahead.
diff -r 978a57943c8f -r c6d2c4475363 doc/reST/cli.rst
--- a/doc/reST/cli.rst Thu Oct 17 18:43:11 2019 +0530
+++ b/doc/reST/cli.rst Thu Oct 17 19:12:09 2019 +0530
@@ -1426,7 +1426,20 @@
This value represents the percentage difference between the inter cost and
intra cost of a frame used in scenecut detection. For example, a value of
5 indicates,
if the inter cost of a frame is greater than or equal to 95 percent of
the intra cost of the frame,
- then detect this frame as scenecut. Values between 5 and 15 are
recommended. Default 5.
+ then detect this frame as scenecut. Values between 5 and 15 are
recommended.
+ This value is evaluated only when --scenecut is enabled else it is
ignored. Default 5.
+
+.. option:: --hist-scenecut, --no-hist-scenecut
+
+ indicates that I-frames need to be inserted using edge and color
histogram based scenecut algorithm.
+ option: `--hist-scencut` enables adaptive I frame placement using this
method and disables the default scene cut algorithm.
+ option:`--no-hist-scenecut` adaptive I frame placement.
+
+.. option:: --hist-threshold <0.0..2.0>
+
+ This value represents the threshold for SAD of edge histograms used in
scenecut detection. This requires hist-scenecut to be enabled.
+ For example, a value of 0.2 indicates that a frame with SAD value greater
than 0.2 against the previous frame as scenecut.
+ Values between 0.0 and 2.0 are recommended. This value is evaluated only
when --hist-scenecut is enabled. Default 0.01.
.. option:: --radl <integer>
diff -r 978a57943c8f -r c6d2c4475363 source/CMakeLists.txt
--- a/source/CMakeLists.txt Thu Oct 17 18:43:11 2019 +0530
+++ b/source/CMakeLists.txt Thu Oct 17 19:12:09 2019 +0530
@@ -29,7 +29,7 @@
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 180)
+set(X265_BUILD 181)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 978a57943c8f -r c6d2c4475363 source/common/param.cpp
--- a/source/common/param.cpp Thu Oct 17 18:43:11 2019 +0530
+++ b/source/common/param.cpp Thu Oct 17 19:12:09 2019 +0530
@@ -167,6 +167,8 @@
param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
param->bBPyramid = 1;
param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
+ param->edgeTransitionThreshold = 0.01;
+ param->bHistBasedSceneCut = false;
param->lookaheadSlices = 8;
param->lookaheadThreads = 0;
param->scenecutBias = 5.0;
@@ -567,6 +569,7 @@
param->bframes = 0;
param->lookaheadDepth = 0;
param->scenecutThreshold = 0;
+ param->bHistBasedSceneCut = false;
param->rc.cuTree = 0;
param->frameNumThreads = 1;
}
@@ -609,7 +612,7 @@
return 0;
}
-static int x265_atobool(const char* str, bool& bError)
+static bool x265_atobool(const char* str, bool& bError)
{
if (!strcmp(str, "1") ||
!strcmp(str, "true") ||
@@ -920,6 +923,7 @@
{
bError = false;
p->scenecutThreshold = atoi(value);
+ p->bHistBasedSceneCut = false;
}
}
OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value);
@@ -1186,6 +1190,32 @@
OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS =
atobool(value);
OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value);
OPT("scenecut-bias") p->scenecutBias = atof(value);
+ OPT("hist-scenecut")
+ {
+ p->bHistBasedSceneCut = atobool(value);
+
+ if (bError)
+ {
+ bError = false;
+ p->bHistBasedSceneCut = false;
+ }
+
+ if (p->bHistBasedSceneCut)
+ {
+ bError = false;
+ p->scenecutThreshold = 0;
+ }
+
+ }
+ OPT("hist-threshold") {
+ p->edgeTransitionThreshold = atof(value);
+ if (bError)
+ {
+ bError = false;
+ p->edgeTransitionThreshold = 0.01;
+ x265_log(p, X265_LOG_INFO, "using default threshold %.2lf
for scene cut detection\n", p->edgeTransitionThreshold);
+ }
+ }
OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine =
atobool(value);
@@ -1623,8 +1653,16 @@
"Valid Logging level -1:none 0:error 1:warning 2:info 3:debug
4:full");
CHECK(param->scenecutThreshold < 0,
"scenecutThreshold must be greater than 0");
- CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
- "scenecut-bias must be between 0 and 100");
+ if (param->scenecutThreshold)
+ {
+ CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
+ "scenecut-bias must be between 0 and 100");
+ }
+ else if (param->bHistBasedSceneCut)
+ {
+ CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 <
param->edgeTransitionThreshold,
+ "hist-threshold must be between 0.0 and 2.0");
+ }
CHECK(param->radl < 0 || param->radl > param->bframes,
"radl must be between 0 and bframes");
CHECK(param->rdPenalty < 0 || param->rdPenalty > 2,
@@ -1780,10 +1818,21 @@
x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge
: %s / %d / %d / %d\n",
x265_motion_est_names[param->searchMethod],
param->searchRange, param->subpelRefine, param->maxNumMergeCand);
- if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
- x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax,
param->scenecutThreshold, param->scenecutBias * 100);
+ if (param->scenecutThreshold && param->keyframeMax != INT_MAX)
+ param->edgeTransitionThreshold = 0.0;
+ else if (param->bHistBasedSceneCut && param->keyframeMax != INT_MAX)
+ param->scenecutBias = 0.0;
+ else if (param->keyframeMax != INT_MAX)
+ {
+ param->edgeTransitionThreshold = 0.0;
+ param->scenecutBias = 0.0;
+ }
+
+ if (param->keyframeMax == INT_MAX)
+ x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut
: disabled\n");
else
- x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut
: disabled\n");
+ x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
bias / threshold : %d / %d / %d / %.2lf / %.2lf\n",
+ param->keyframeMin, param->keyframeMax, (
param->bHistBasedSceneCut || param->scenecutThreshold ),
param->scenecutBias * 100, param->edgeTransitionThreshold);
if (param->cbQpOffset || param->crQpOffset)
x265_log(param, X265_LOG_INFO, "Cb/Cr QP Offset
: %d / %d\n", param->cbQpOffset, param->crQpOffset);
@@ -1949,6 +1998,8 @@
s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth);
s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices);
s += sprintf(s, " scenecut=%d", p->scenecutThreshold);
+ s += sprintf(s, " hist-scenecut=%d", p->bHistBasedSceneCut);
+ s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
s += sprintf(s, " radl=%d", p->radl);
BOOL(p->bEnableHRDConcatFlag, "splice");
BOOL(p->bIntraRefresh, "intra-refresh");
@@ -2096,6 +2147,8 @@
BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps");
BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps");
s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias);
+ s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
+
BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp");
BOOL(p->bAQMotion, "aq-motion");
BOOL(p->bEmitHDRSEI, "hdr");
@@ -2246,6 +2299,7 @@
dst->lookaheadSlices = src->lookaheadSlices;
dst->lookaheadThreads = src->lookaheadThreads;
dst->scenecutThreshold = src->scenecutThreshold;
+ dst->bHistBasedSceneCut = src->bHistBasedSceneCut;
dst->bIntraRefresh = src->bIntraRefresh;
dst->maxCUSize = src->maxCUSize;
dst->minCUSize = src->minCUSize;
@@ -2403,6 +2457,7 @@
dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS;
dst->bMultiPassOptRPS = src->bMultiPassOptRPS;
dst->scenecutBias = src->scenecutBias;
+ dst->edgeTransitionThreshold = src->edgeTransitionThreshold;
dst->gopLookahead = src->lookaheadDepth;
dst->bOptCUDeltaQP = src->bOptCUDeltaQP;
dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion;
diff -r 978a57943c8f -r c6d2c4475363 source/encoder/api.cpp
--- a/source/encoder/api.cpp Thu Oct 17 18:43:11 2019 +0530
+++ b/source/encoder/api.cpp Thu Oct 17 19:12:09 2019 +0530
@@ -31,6 +31,7 @@
#include "nal.h"
#include "bitcost.h"
#include "svt.h"
+#include "histscenecut.h"
#if ENABLE_LIBVMAF
#include "libvmaf.h"
@@ -118,7 +119,10 @@
x265_log(param, X265_LOG_INFO, "build info %s\n", PFX(build_info_str));
encoder = new Encoder;
-
+ encoder->m_sadStats = new
sad_stats(x265_cli_csps[p->internalCsp].planes,param->edgeTransitionThreshold);
+ encoder->m_histogramsOfAdjFrames = new YuvHistogram[2];
+ encoder->m_histogramsOfAdjFrames[0].initHistograms(p);
+ encoder->m_histogramsOfAdjFrames[1].initHistograms(p);
#ifdef SVT_HEVC
if (param->bEnableSvtHevc)
@@ -810,6 +814,7 @@
CHECKED_MALLOC_ZERO(interData->ref, int32_t,
analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir);
}
analysis->interData = interData;
+ analysis->bScenecut = false;
return;
@@ -925,6 +930,7 @@
pic->rpu.payloadSize = 0;
pic->rpu.payload = NULL;
pic->picStruct = 0;
+ pic->bufUpdated = false;
if ((param->analysisSave || param->analysisLoad) ||
(param->bAnalysisType == AVC_INFO))
{
@@ -934,7 +940,9 @@
uint32_t numCUsInFrame = widthInCU * heightInCU;
pic->analysisData.numCUsInFrame = numCUsInFrame;
pic->analysisData.numPartitions = param->num4x4Partitions;
+ pic->analysisData.bScenecut = false;
}
+
}
void x265_picture_free(x265_picture *p)
@@ -956,7 +964,8 @@
{
if (param && param->rc.zonefileCount) {
for (int i = 0; i < param->rc.zonefileCount; i++)
- x265_free(param->rc.zones[i].zoneParam);
+ if(param->rc.zones[i].zoneParam)
+ x265_free(param->rc.zones[i].zoneParam);
}
if (param && (param->rc.zoneCount || param->rc.zonefileCount))
x265_free(param->rc.zones);
diff -r 978a57943c8f -r c6d2c4475363 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Oct 17 18:43:11 2019 +0530
+++ b/source/encoder/encoder.cpp Thu Oct 17 19:12:09 2019 +0530
@@ -119,6 +119,9 @@
m_frameEncoder[i] = NULL;
for (uint32_t i = 0; i < DUP_BUFFER; i++)
m_dupBuffer[i] = NULL;
+
+ m_histogramsOfAdjFrames = NULL;
+ m_sadStats = NULL;
MotionEstimate::initScales();
#if ENABLE_HDR10_PLUS
@@ -162,7 +165,9 @@
int rows = (p->sourceHeight + p->maxCUSize - 1) >>
g_log2Size[p->maxCUSize];
int cols = (p->sourceWidth + p->maxCUSize - 1) >>
g_log2Size[p->maxCUSize];
- if (m_param->bEnableFrameDuplication)
+
+
+ if (m_param->bEnableFrameDuplication || m_param->bHistBasedSceneCut)
{
size_t framesize = 0;
int pixelbytes = p->sourceBitDepth > 8 ? 2 : 1;
@@ -184,6 +189,7 @@
m_dupBuffer[i]->dupPlane = NULL;
m_dupBuffer[i]->dupPlane = X265_MALLOC(char, framesize);
m_dupBuffer[i]->dupPic->planes[0] = m_dupBuffer[i]->dupPlane;
+ m_dupBuffer[i]->bufUpdated = false;
m_dupBuffer[i]->bOccupied = false;
m_dupBuffer[i]->bDup = false;
}
@@ -820,7 +826,7 @@
m_exportedPic = NULL;
}
- if (m_param->bEnableFrameDuplication)
+ if (m_param->bEnableFrameDuplication || m_param->bHistBasedSceneCut)
{
for (uint32_t i = 0; i < DUP_BUFFER; i++)
{
@@ -1280,6 +1286,37 @@
return psnrWeight = (psnrY * 6 + psnrU + psnrV) / 8;
}
+void Encoder::updateSceneCutAndFrameDuplicateFlags()
+{
+ /* SCD computation and drop flag*/
+ for (int i = 0; i < DUP_BUFFER; i++)
+ {
+ if (m_dupBuffer[i]->bufUpdated)
+ {
+ m_histogramsOfAdjFrames[i].setUpdateFlag(true);
+ m_histogramsOfAdjFrames[i].edgeFilter(m_dupBuffer[i]->dupPic);
+
m_histogramsOfAdjFrames[i].computeHistograms(*m_dupBuffer[i]->dupPic);
+ m_sadStats->computeSadValue(m_histogramsOfAdjFrames,
m_histogramsOfAdjFrames->m_planeSizes);
+ m_sadStats->findSceneCuts(m_dupBuffer[i]->dupPic,
m_dupBuffer[i]->bDup);
+
+ if (m_dupBuffer[i]->dupPic->analysisData.bScenecut)
+ {
+ x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d edge
hist sad: %0.4lf maxuv hist sad: %0.4lf\n",
+
m_dupBuffer[i]->dupPic->poc,m_dupBuffer[i]->dupPic->analysisData.edgeSadValue,m_dupBuffer[i]->dupPic->analysisData.chromaSadValue);
+ }
+
+ if (m_dupBuffer[1]->bufUpdated)
+ m_histogramsOfAdjFrames[0] = m_histogramsOfAdjFrames[1];
+ }
+ }
+
+ }
+
+/* TBD
+- to be updated for missing parameters in case of re-use else where and
improvised to copy constructor / assignment operator of x265 picture data
structure.
+- benefits avoid function and use language features appropriately.
+*/
+
void Encoder::copyPicture(x265_picture *dest, const x265_picture *src)
{
dest->poc = src->poc;
@@ -1299,6 +1336,29 @@
memcpy(dest->planes[0], src->planes[0], src->framesize * sizeof(char));
dest->planes[1] = (char*)dest->planes[0] + src->stride[0] *
src->height;
dest->planes[2] = (char*)dest->planes[1] + src->stride[1] *
(src->height >> x265_cli_csps[src->colorSpace].height[1]);
+ memcpy(&dest->analysisData, &src->analysisData,
sizeof(src->analysisData));
+
+}
+
+void Encoder::setPictureFlags(int idx)
+{
+ m_dupBuffer[idx]->bOccupied = true;
+ m_dupBuffer[idx]->bufUpdated = true;
+ m_dupBuffer[idx]->bDup = false;
+}
+
+void Encoder::unsetPictureFlags(int idx)
+{
+ if (idx == 1)
+ {
+ m_dupBuffer[idx]->bOccupied = false;
+ m_dupBuffer[idx]->bufUpdated = false;
+ m_dupBuffer[idx]->bDup = false;
+ }
+ else if (idx == 0)
+ {
+ m_dupBuffer[idx]->bufUpdated = false;
+ }
}
/**
@@ -1327,7 +1387,9 @@
const x265_picture* inputPic = NULL;
static int written = 0, read = 0;
bool dontRead = false;
-
+ bool isScenecutEnabled = m_param->bHistBasedSceneCut;
+ bool dropflag = false;
+
if (m_exportedPic)
{
if (!m_param->bUseAnalysisFile && m_param->analysisSave)
@@ -1336,9 +1398,9 @@
m_exportedPic = NULL;
m_dpb->recycleUnreferenced();
}
- if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in &&
(read < written)))
- {
- if ((m_param->bEnableFrameDuplication && !pic_in && (read <
written)))
+ if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in &&
(read < written)) || (isScenecutEnabled && !pic_in && (read < written)))
+ {
+ if ((m_param->bEnableFrameDuplication && !pic_in && (read <
written)) || (isScenecutEnabled && !pic_in && (read < written)))
dontRead = true;
else
{
@@ -1361,7 +1423,7 @@
}
}
- if (m_param->bEnableFrameDuplication)
+ if (m_param->bEnableFrameDuplication || isScenecutEnabled )
{
double psnrWeight = 0;
@@ -1372,6 +1434,12 @@
copyPicture(m_dupBuffer[0]->dupPic, pic_in);
m_dupBuffer[0]->bOccupied = true;
written++;
+ if (m_param->bHistBasedSceneCut)
+ {
+ setPictureFlags(0);
+ updateSceneCutAndFrameDuplicateFlags();
+ unsetPictureFlags(0);
+ }
return 0;
}
else if (!m_dupBuffer[1]->bOccupied)
@@ -1379,31 +1447,58 @@
copyPicture(m_dupBuffer[1]->dupPic, pic_in);
m_dupBuffer[1]->bOccupied = true;
written++;
+ if (m_param->bHistBasedSceneCut)
+ {
+ setPictureFlags(1);
+ updateSceneCutAndFrameDuplicateFlags();
+ unsetPictureFlags(1);
+ }
}
- psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
-
- if (psnrWeight >= m_param->dupThreshold)
+ if (m_param->bEnableFrameDuplication &&
m_param->bHistBasedSceneCut)
{
- if (m_dupBuffer[0]->bDup)
+ if (m_dupBuffer[1]->bDup == false &&
m_dupBuffer[1]->dupPic->analysisData.bScenecut == false)
{
- m_dupBuffer[0]->dupPic->picStruct = tripling;
- m_dupBuffer[0]->bDup = false;
- read++;
+ psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
+ if (psnrWeight >= m_param->dupThreshold)
+ dropflag = true;
}
else
{
- m_dupBuffer[0]->dupPic->picStruct = doubling;
- m_dupBuffer[0]->bDup = true;
- m_dupBuffer[1]->bOccupied = false;
- read++;
- return 0;
+ dropflag = true;
}
}
- else if (m_dupBuffer[0]->bDup)
+ else if (m_param->bEnableFrameDuplication)
+ {
+ psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
m_dupBuffer[1]->dupPic, m_param);
+ if (psnrWeight >= m_param->dupThreshold)
+ dropflag = true;
+ }
+
+ if (m_param->bEnableFrameDuplication)
+ {
+ if (dropflag)
+ {
+ if (m_dupBuffer[0]->bDup)
+ {
+ m_dupBuffer[0]->dupPic->picStruct = tripling;
+ m_dupBuffer[0]->bDup = false;
+ read++;
+ }
+ else
+ {
+ m_dupBuffer[0]->dupPic->picStruct = doubling;
+ m_dupBuffer[0]->bDup = true;
+ m_dupBuffer[1]->bOccupied = false;
+ read++;
+ return 0;
+ }
+ }
+ else if (m_dupBuffer[0]->bDup)
m_dupBuffer[0]->bDup = false;
- else
- m_dupBuffer[0]->dupPic->picStruct = 0;
+ else
+ m_dupBuffer[0]->dupPic->picStruct = 0;
+ }
}
if (read < written)
@@ -1485,7 +1580,11 @@
inFrame->m_poc = ++m_pocLast;
inFrame->m_userData = inputPic->userData;
- inFrame->m_pts = inputPic->pts;
+ inFrame->m_pts = inputPic->pts;
+ if (m_param->bHistBasedSceneCut)
+ {
+ inFrame->m_lowres.bScenecut = inputPic->analysisData.bScenecut;
+ }
inFrame->m_forceqp = inputPic->forceqp;
inFrame->m_param = (m_reconfigure || m_reconfigureRc) ?
m_latestParam : m_param;
inFrame->m_picStruct = inputPic->picStruct;
@@ -1613,7 +1712,7 @@
m_param->bUseRcStats = 0;
}
- if (m_param->bEnableFrameDuplication && ((read < written) ||
(m_dupBuffer[0]->dupPic->picStruct == tripling && (read <= written))))
+ if ( (m_param->bEnableFrameDuplication || isScenecutEnabled) &&
((read < written) || (m_dupBuffer[0]->dupPic->picStruct == tripling &&
(read <= written))))
{
if (m_dupBuffer[0]->dupPic->picStruct == tripling)
m_dupBuffer[0]->bOccupied = m_dupBuffer[1]->bOccupied =
false;
@@ -3162,6 +3261,7 @@
* adaptive I frame placement */
p->keyframeMax = INT_MAX;
p->scenecutThreshold = 0;
+ p->bHistBasedSceneCut = 0;
}
else if (p->keyframeMax <= 1)
{
@@ -3175,6 +3275,7 @@
p->lookaheadDepth = 0;
p->bframes = 0;
p->scenecutThreshold = 0;
+ p->bHistBasedSceneCut = 0;
p->bFrameAdaptive = 0;
p->rc.cuTree = 0;
p->bEnableWeightedPred = 0;
@@ -3828,6 +3929,20 @@
m_param->searchMethod = m_param->hmeSearchMethod[2];
}
}
+
+ if (p->bHistBasedSceneCut && p->scenecutThreshold) {
+ p->scenecutThreshold = 0;
+ p->bHistBasedSceneCut = false;
+ x265_log(p, X265_LOG_WARNING, "Amibigious choice. disabling scene
cut detection \n");
+ }
+ else if (p->scenecutThreshold && p->edgeTransitionThreshold != 0.01) {
+ x265_log(p, X265_LOG_WARNING, "using scenecut-bias %.2lf for
scene cut detection\n",p->scenecutBias);
+ }
+ else if (p->bHistBasedSceneCut && p->edgeTransitionThreshold == 0.0) {
+ p->edgeTransitionThreshold = 0.01;
+ x265_log(p, X265_LOG_INFO, "using default threshold %.2lf for
scene cut detection\n", p->edgeTransitionThreshold);
+ }
+
}
void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc,
const x265_picture* picIn, int paramBytes)
diff -r 978a57943c8f -r c6d2c4475363 source/encoder/encoder.h
--- a/source/encoder/encoder.h Thu Oct 17 18:43:11 2019 +0530
+++ b/source/encoder/encoder.h Thu Oct 17 19:12:09 2019 +0530
@@ -32,6 +32,8 @@
#include "nal.h"
#include "framedata.h"
#include "svt.h"
+#include "histscenecut.h"
+
#ifdef ENABLE_HDR10_PLUS
#include "dynamicHDR10/hdr10plus.h"
#endif
@@ -154,6 +156,9 @@
//Flag to check whether the picture has duplicated.
bool bDup;
+
+ bool bufUpdated;
+
};
@@ -195,6 +200,9 @@
ThreadPool* m_threadPool;
FrameEncoder* m_frameEncoder[X265_MAX_FRAME_THREADS];
+
+ YuvHistogram* m_histogramsOfAdjFrames;
+ sad_stats* m_sadStats;
DPB* m_dpb;
Frame* m_exportedPic;
FILE* m_analysisFileIn;
@@ -279,6 +287,10 @@
if (m_prevTonemapPayload.payload != NULL)
X265_FREE(m_prevTonemapPayload.payload);
#endif
+ delete m_sadStats;
+ m_sadStats = NULL;
+ delete[] m_histogramsOfAdjFrames;
+ m_histogramsOfAdjFrames = NULL;
};
void create();
@@ -349,6 +361,12 @@
void copyPicture(x265_picture *dest, const x265_picture *src);
+ void unsetPictureFlags(int index);
+
+ void setPictureFlags(int index);
+
+ void updateSceneCutAndFrameDuplicateFlags();
+
void initRefIdx();
void analyseRefIdx(int *numRefIdx);
void updateRefIdx();
@@ -364,6 +382,7 @@
void initSPS(SPS *sps);
void initPPS(PPS *pps);
};
+
}
#endif // ifndef X265_ENCODER_H
diff -r 978a57943c8f -r c6d2c4475363 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Thu Oct 17 18:43:11 2019 +0530
+++ b/source/encoder/ratecontrol.cpp Thu Oct 17 19:12:09 2019 +0530
@@ -493,6 +493,7 @@
CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold);
+ CMP_OPT_FIRST_PASS("hist-threshold",
m_param->edgeTransitionThreshold);
CMP_OPT_FIRST_PASS("intra-refresh",
m_param->bIntraRefresh);
if (m_param->bMultiPassOptRPS)
{
@@ -1183,6 +1184,7 @@
m_param->rc.bStatRead = 0;
m_param->bFrameAdaptive = 0;
m_param->scenecutThreshold = 0;
+ m_param->bHistBasedSceneCut = false;
m_param->rc.cuTree = 0;
if (m_param->bframes > 1)
m_param->bframes = 1;
@@ -2173,7 +2175,7 @@
if (m_isVbv && m_currentSatd > 0 && curFrame)
{
if (m_param->lookaheadDepth || m_param->rc.cuTree ||
- m_param->scenecutThreshold ||
+ (m_param->scenecutThreshold || m_param->bHistBasedSceneCut) ||
(m_param->bFrameAdaptive && m_param->bframes))
{
/* Lookahead VBV: If lookahead is done, raise the quantizer as
necessary
diff -r 978a57943c8f -r c6d2c4475363 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Thu Oct 17 18:43:11 2019 +0530
+++ b/source/encoder/slicetype.cpp Thu Oct 17 19:12:09 2019 +0530
@@ -30,6 +30,7 @@
#include "primitives.h"
#include "lowres.h"
#include "mv.h"
+#include "histscenecut.h"
#include "slicetype.h"
#include "motion.h"
@@ -114,8 +115,8 @@
//Applying Gaussian filter on the picture
src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY
* stride + curFrame->m_fencPic->m_lumaMarginX;
+// edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
stride + curFrame->m_fencPic->m_lumaMarginX;
pixel pixelValue = 0;
-
for (int rowNum = 0; rowNum < height; rowNum++)
{
for (int colNum = 0; colNum < width; colNum++)
@@ -127,7 +128,7 @@
1 [4 9 12 9 4]
--- [5 12 15 12 5]
159 [4 9 12 9 4]
- [2 4 5 4 2]*/
+ [2 4 5 4 2] */
const intptr_t rowOne = (rowNum - 2)*stride, colOne =
colNum - 2;
const intptr_t rowTwo = (rowNum - 1)*stride, colTwo =
colNum - 1;
@@ -145,52 +146,8 @@
}
}
}
-
-#if HIGH_BIT_DEPTH //10-bit build
- float threshold = 1023;
- pixel whitePixel = 1023;
-#else
- float threshold = 255;
- pixel whitePixel = 255;
-#endif
-#define PI 3.14159265
-
- float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
- float gradientMagnitude = 0;
- pixel blackPixel = 0;
- edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
stride + curFrame->m_fencPic->m_lumaMarginX;
- //Applying Sobel filter on the gaussian filtered picture
- for (int rowNum = 0; rowNum < height; rowNum++)
- {
- for (int colNum = 0; colNum < width; colNum++)
- {
- edgeTheta[(rowNum*stride) + colNum] = 0;
- if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1)
&& (colNum != width - 1)) //Ignoring the border pixels of the picture
- {
- /*Horizontal and vertical gradients
- [ -3 0 3 ] [-3 -10 -3 ]
- gH = [ -10 0 10] gV = [ 0 0 0 ]
- [ -3 0 3 ] [ 3 10 3 ]*/
-
- const intptr_t rowOne = (rowNum - 1)*stride, colOne =
colNum -1;
- const intptr_t rowTwo = rowNum * stride, colTwo = colNum;
- const intptr_t rowThree = (rowNum + 1)*stride, colThree =
colNum + 1;
- const intptr_t index = (rowNum*stride) + colNum;
-
- gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 *
refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 *
refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 *
refPic[rowThree + colThree]);
- gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 *
refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 *
refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 *
refPic[rowThree + colThree]);
-
- gradientMagnitude = sqrtf(gradientH * gradientH +
gradientV * gradientV);
- radians = atan2(gradientV, gradientH);
- theta = (float)((radians * 180) / PI);
- if (theta < 0)
- theta = 180 + theta;
- edgeTheta[(rowNum*stride) + colNum] = (pixel)theta;
-
- edgePic[index] = gradientMagnitude >= threshold ?
whitePixel : blackPixel;
- }
- }
- }
+ if(!computeEdge(edgePic, refPic, edgeTheta, stride, height, width))
+ x265_log(NULL, X265_LOG_ERROR, "Failed edge computation!");
}
//Find the angle of a block by averaging the pixel angles
@@ -1471,7 +1428,7 @@
if (m_lastNonB && !m_param->rc.bStatRead &&
((m_param->bFrameAdaptive && m_param->bframes) ||
- m_param->rc.cuTree || m_param->scenecutThreshold ||
+ m_param->rc.cuTree || m_param->scenecutThreshold ||
m_param->bHistBasedSceneCut ||
(m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
{
slicetypeAnalyse(frames, false);
@@ -1962,10 +1919,15 @@
int numBFrames = 0;
int numAnalyzed = numFrames;
- bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
+ bool isScenecut = false;
/* When scenecut threshold is set, use scenecut detection for I frame
placements */
- if (m_param->scenecutThreshold && isScenecut)
+ if (m_param->scenecutThreshold)
+ isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
+ else if (m_param->bHistBasedSceneCut)
+ isScenecut = frames[1]->bScenecut;
+
+ if (isScenecut)
{
frames[1]->sliceType = X265_TYPE_I;
return;
@@ -1976,14 +1938,24 @@
m_extendGopBoundary = false;
for (int i = m_param->bframes + 1; i < origNumFrames; i +=
m_param->bframes + 1)
{
- scenecut(frames, i, i + 1, true, origNumFrames);
+ if (m_param->scenecutThreshold)
+ scenecut(frames, i, i + 1, true, origNumFrames);
+
for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
origNumFrames); j++)
{
- if (frames[j]->bScenecut && scenecutInternal(frames, j -
1, j, true) )
- {
- m_extendGopBoundary = true;
- break;
- }
+ if (m_param->scenecutThreshold)
+ {
+ if (frames[j]->bScenecut &&
scenecutInternal(frames, j - 1, j, true))
+ {
+ m_extendGopBoundary = true;
+ break;
+ }
+ }
+ else if(m_param->bHistBasedSceneCut &&
frames[j]->bScenecut)
+ {
+ m_extendGopBoundary = true;
+ break;
+ }
}
if (m_extendGopBoundary)
break;
@@ -2088,13 +2060,25 @@
{
for (int j = 1; j < numBFrames + 1; j++)
{
- if (scenecut(frames, j, j + 1, false, origNumFrames) ||
- (bForceRADL && (frames[j]->frameNum == preRADL)))
+ if (m_param->bHistBasedSceneCut)
{
- frames[j]->sliceType = X265_TYPE_P;
- numAnalyzed = j;
- break;
+ if (frames[j]->bScenecut || (bForceRADL &&
(frames[j]->frameNum == preRADL)))
+ {
+ frames[j]->sliceType = X265_TYPE_P;
+ numAnalyzed = j;
+ break;
+ }
}
+ else if (m_param->scenecutThreshold)
+ {
+ if ( scenecut(frames, j, j + 1, false, origNumFrames)
|| (bForceRADL && (frames[j]->frameNum == preRADL)) )
+ {
+ frames[j]->sliceType = X265_TYPE_P;
+ numAnalyzed = j;
+ break;
+ }
+ }
+
}
}
resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed
+ 1);
diff -r 978a57943c8f -r c6d2c4475363 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Thu Oct 17 18:43:11 2019 +0530
+++ b/source/test/regression-tests.txt Thu Oct 17 19:12:09 2019 +0530
@@ -158,6 +158,9 @@
ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao
--crf 20
Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60
--hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones 0,139,q=32
+sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
--hist-threshold 0.01
+Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60
--hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
--hist-scenecut --hist-threshold 0.01
+sintel_trailer_2k_1920x1080_24.yuv, --preset medium --scenecut 40
--scenecut-bias 20
# Main12 intraCost overflow bug test
720p50_parkrun_ter.y4m,--preset medium
diff -r 978a57943c8f -r c6d2c4475363 source/x265cli.h
--- a/source/x265cli.h Thu Oct 17 18:43:11 2019 +0530
+++ b/source/x265cli.h Thu Oct 17 19:12:09 2019 +0530
@@ -129,6 +129,9 @@
{ "scenecut", required_argument, NULL, 0 },
{ "no-scenecut", no_argument, NULL, 0 },
{ "scenecut-bias", required_argument, NULL, 0 },
+ { "hist-scenecut", no_argument, NULL, 0},
+ { "no-hist-scenecut", no_argument, NULL, 0},
+ { "hist-threshold", required_argument, NULL, 0},
{ "fades", no_argument, NULL, 0 },
{ "no-fades", no_argument, NULL, 0 },
{ "radl", required_argument, NULL, 0 },
@@ -485,7 +488,10 @@
H0(" --gop-lookahead <integer> Extends gop boundary if a
scenecut is found within this from keyint boundary. Default 0\n");
H0(" --no-scenecut Disable adaptive I-frame
decision\n");
H0(" --scenecut <integer> How aggressively to insert extra
I-frames. Default %d\n", param->scenecutThreshold);
- H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
+ H0(" --hist-scenecut ..... Enables improved scene-cut
detection using histogram based algorithm.");
+ H0(" --no-hist-scenecut Disables improved scene-cut
detection using histogram based algorithm. ");
+ H0(" --scenecut-bias <0..100.0> Bias for scenecut detection.
Default %.2f\n", param->scenecutBias);
+ H0(" --hist-threshold <0.0..2.0> Threshold for histogram based
scenecut detection Default %.2f\n", param->edgeTransitionThreshold);
H0(" --[no-]fades Enable detection and handling of
fade-in regions. Default %s\n", OPT(param->bEnableFades));
H0(" --radl <integer> Number of RADL pictures allowed
in front of IDR. Default %d\n", param->radl);
H0(" --intra-refresh Use Periodic Intra Refresh
instead of IDR frames\n");
--
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191017/da1bb3ac/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 2-HistSceneCut.patch
Type: application/octet-stream
Size: 36090 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191017/da1bb3ac/attachment-0001.obj>
More information about the x265-devel
mailing list