[x265] [x265 Patch] Histogram Based Scene Cut Detection
Srikanth Kurapati
srikanth.kurapati at multicorewareinc.com
Mon Nov 18 09:27:34 CET 2019
Please ignore this older version.
On Wed, Nov 13, 2019 at 7:19 PM Srikanth Kurapati <
srikanth.kurapati at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
> # Date 1573649311 -19800
> # Wed Nov 13 18:18:31 2019 +0530
> # Node ID 36d20a880ddc3df6089bb83ff4fb494f1113a03a
> # Parent 04db2bfee5d628d931d1407355b909ac8ff1c898
> Histogram based scenecut detection
>
> This patch does the following.
> 1.Identifies scenecuts by thresholding against sad of edge and chroma
> histograms.
> 2.Add option "--hist-scenecut" to enable histogram based scenecut method.
> 3.Add option "--hist-threshold" to provide threshold for determining
> scene-cuts.
> 3.Optimizes frame duplication through reuse of sad for marking duplicate
> frames.
>
> diff -r 04db2bfee5d6 -r 36d20a880ddc doc/reST/cli.rst
> --- a/doc/reST/cli.rst Thu Oct 31 16:23:27 2019 +0530
> +++ b/doc/reST/cli.rst Wed Nov 13 18:18:31 2019 +0530
> @@ -1426,7 +1426,23 @@
> This value represents the percentage difference between the inter cost
> and
> intra cost of a frame used in scenecut detection. For example, a value
> of 5 indicates,
> if the inter cost of a frame is greater than or equal to 95 percent of
> the intra cost of the frame,
> - then detect this frame as scenecut. Values between 5 and 15 are
> recommended. Default 5.
> + then detect this frame as scenecut. Values between 5 and 15 are
> recommended.
> + This value is evaluated only when --scenecut is enabled else it is
> ignored. Default 5.
> +
> +.. option:: --hist-scenecut, --no-hist-scenecut
> +
> + indicates that scenecuts need to be detected using luma edge and chroma
> histograms.
> + option: `--hist-scenecut` enables scenecut detection using the
> histograms and disables the default scene cut algorithm.
> + option: `--no-hist-scenecut` disables histogram based scenecut algorithm.
> +
> + Note that if --hist-scenecut and --scenecut are enabled together the
> first choice of user is considered for processing.
> +
> +.. option:: --hist-threshold <0.0..2.0>
> +
> + This value represents the threshold for normalized SAD of edge
> histograms used in scenecut detection.
> + This requires hist-scenecut to be enabled. For example, a value of 0.2
> indicates that a frame with normalized SAD value
> + greater than 0.2 against the previous frame as scenecut.
> + Default 0.01.
>
> .. option:: --radl <integer>
>
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/CMakeLists.txt
> --- a/source/CMakeLists.txt Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/CMakeLists.txt Wed Nov 13 18:18:31 2019 +0530
> @@ -29,7 +29,7 @@
> option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
> mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
> # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 182)
> +set(X265_BUILD 183)
> configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
> "${PROJECT_BINARY_DIR}/x265.def")
> configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/common/common.h
> --- a/source/common/common.h Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/common/common.h Wed Nov 13 18:18:31 2019 +0530
> @@ -129,12 +129,16 @@
> typedef uint64_t sum2_t;
> typedef uint64_t pixel4;
> typedef int64_t ssum2_t;
> +#define HISTOGRAM_BINS 1024
> +#define SHIFT 1
> #else
> typedef uint8_t pixel;
> typedef uint16_t sum_t;
> typedef uint32_t sum2_t;
> typedef uint32_t pixel4;
> typedef int32_t ssum2_t; // Signed sum
> +#define HISTOGRAM_BINS 256
> +#define SHIFT 0
> #endif // if HIGH_BIT_DEPTH
>
> #if X265_DEPTH < 10
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/common/param.cpp
> --- a/source/common/param.cpp Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/common/param.cpp Wed Nov 13 18:18:31 2019 +0530
> @@ -167,6 +167,8 @@
> param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
> param->bBPyramid = 1;
> param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
> + param->edgeTransitionThreshold = 0.01;
> + param->bHistBasedSceneCut = false;
> param->lookaheadSlices = 8;
> param->lookaheadThreads = 0;
> param->scenecutBias = 5.0;
> @@ -572,6 +574,7 @@
> param->bframes = 0;
> param->lookaheadDepth = 0;
> param->scenecutThreshold = 0;
> + param->bHistBasedSceneCut = false;
> param->rc.cuTree = 0;
> param->frameNumThreads = 1;
> }
> @@ -614,7 +617,7 @@
> return 0;
> }
>
> -static int x265_atobool(const char* str, bool& bError)
> +static bool x265_atobool(const char* str, bool& bError)
> {
> if (!strcmp(str, "1") ||
> !strcmp(str, "true") ||
> @@ -764,6 +767,7 @@
> bool bNameWasBool = false;
> bool bValueWasNull = !value;
> bool bExtraParams = false;
> + static int scenecutChoice = -1;
> char nameBuf[64];
> static int count;
>
> @@ -920,11 +924,17 @@
> OPT("lookahead-slices") p->lookaheadSlices = atoi(value);
> OPT("scenecut")
> {
> - p->scenecutThreshold = atobool(value);
> - if (bError || p->scenecutThreshold)
> + if (scenecutChoice == -1)
> {
> - bError = false;
> - p->scenecutThreshold = atoi(value);
> + p->scenecutThreshold = atobool(value);
> + if (bError || p->scenecutThreshold)
> + {
> + bError = false;
> + p->scenecutThreshold = atoi(value);
> + p->bHistBasedSceneCut = false;
> + x265_log(p, X265_LOG_INFO, "I/P cost based scenecut
> method enabled\n");
> + scenecutChoice = 0;
> + }
> }
> }
> OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value);
> @@ -1191,6 +1201,47 @@
> OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS =
> atobool(value);
> OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value);
> OPT("scenecut-bias") p->scenecutBias = atof(value);
> + OPT("hist-scenecut")
> + {
> + if (scenecutChoice == -1)
> + {
> + p->bHistBasedSceneCut = atobool(value);
> + if (bError)
> + {
> + bError = false;
> + p->bHistBasedSceneCut = false;
> + }
> + if (p->bHistBasedSceneCut)
> + {
> + bError = false;
> + p->scenecutThreshold = 0;
> + scenecutChoice = 1;
> + x265_log(p, X265_LOG_INFO, "Histogram based scenecut
> method enabled \n");
> + }
> + }
> + else
> + {
> + p->bHistBasedSceneCut = atobool(value);
> + p->bHistBasedSceneCut = false;
> + }
> + }
> + OPT("hist-threshold")
> + {
> + if (p->bHistBasedSceneCut)
> + {
> + p->edgeTransitionThreshold = atof(value);
> + if (bError)
> + {
> + bError = false;
> + p->edgeTransitionThreshold = 0.01;
> + x265_log(p, X265_LOG_INFO, "Using default threshold
> %.2lf for scene cut detection\n", p->edgeTransitionThreshold);
> + }
> + }
> + else
> + {
> + x265_log(p, X265_LOG_WARNING, "Histogram based scene cut
> detection not enabled\n", p->edgeTransitionThreshold);
> + }
> + }
> OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
> OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
> OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine =
> atobool(value);
> @@ -1631,8 +1682,16 @@
> "Valid Logging level -1:none 0:error 1:warning 2:info 3:debug
> 4:full");
> CHECK(param->scenecutThreshold < 0,
> "scenecutThreshold must be greater than 0");
> - CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
> - "scenecut-bias must be between 0 and 100");
> + if (param->scenecutThreshold)
> + {
> + CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
> + "scenecut-bias must be between 0 and 100");
> + }
> + else if (param->bHistBasedSceneCut)
> + {
> + CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 <
> param->edgeTransitionThreshold,
> + "hist-threshold must be between 0.0 and 2.0");
> + }
> CHECK(param->radl < 0 || param->radl > param->bframes,
> "radl must be between 0 and bframes");
> CHECK(param->rdPenalty < 0 || param->rdPenalty > 2,
> @@ -1792,9 +1851,13 @@
> x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge
> : %s / %d / %d / %d\n",
> x265_motion_est_names[param->searchMethod],
> param->searchRange, param->subpelRefine, param->maxNumMergeCand);
>
> - if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
> - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
> bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax,
> param->scenecutThreshold, param->scenecutBias * 100);
> - else
> + if (param->scenecutThreshold && param->keyframeMax != INT_MAX)
> + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
> bias : %d / %d / %d / %.2lf \n",
> + param->keyframeMin, param->keyframeMax,
> param->scenecutThreshold, param->scenecutBias * 100);
> + else if (param->bHistBasedSceneCut && param->keyframeMax != INT_MAX)
> + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
> edge threshold : %d / %d / %d / %.2lf\n",
> + param->keyframeMin, param->keyframeMax,
> param->bHistBasedSceneCut, param->edgeTransitionThreshold);
> + else if (param->keyframeMax == INT_MAX)
> x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut
> : disabled\n");
>
> if (param->cbQpOffset || param->crQpOffset)
> @@ -1961,6 +2024,8 @@
> s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth);
> s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices);
> s += sprintf(s, " scenecut=%d", p->scenecutThreshold);
> + s += sprintf(s, " hist-scenecut=%d", p->bHistBasedSceneCut);
> + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
> s += sprintf(s, " radl=%d", p->radl);
> BOOL(p->bEnableHRDConcatFlag, "splice");
> BOOL(p->bIntraRefresh, "intra-refresh");
> @@ -2108,6 +2173,8 @@
> BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps");
> BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps");
> s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias);
> + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
> +
> BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp");
> BOOL(p->bAQMotion, "aq-motion");
> BOOL(p->bEmitHDRSEI, "hdr");
> @@ -2261,6 +2328,7 @@
> dst->lookaheadSlices = src->lookaheadSlices;
> dst->lookaheadThreads = src->lookaheadThreads;
> dst->scenecutThreshold = src->scenecutThreshold;
> + dst->bHistBasedSceneCut = src->bHistBasedSceneCut;
> dst->bIntraRefresh = src->bIntraRefresh;
> dst->maxCUSize = src->maxCUSize;
> dst->minCUSize = src->minCUSize;
> @@ -2420,6 +2488,7 @@
> dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS;
> dst->bMultiPassOptRPS = src->bMultiPassOptRPS;
> dst->scenecutBias = src->scenecutBias;
> + dst->edgeTransitionThreshold = src->edgeTransitionThreshold;
> dst->gopLookahead = src->lookaheadDepth;
> dst->bOptCUDeltaQP = src->bOptCUDeltaQP;
> dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion;
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/encoder/encoder.cpp Wed Nov 13 18:18:31 2019 +0530
> @@ -130,12 +130,17 @@
> #if SVT_HEVC
> m_svtAppData = NULL;
> #endif
> -
> m_prevTonemapPayload.payload = NULL;
> m_startPoint = 0;
> m_saveCTUSize = 0;
> + m_edgePic = NULL;
> + m_edgeHistThreshold = 0;
> + m_chromaHistThreshold = 0.0;
> + m_scaledEdgeThreshold = 0.0;
> + m_scaledChromaThreshold = 0.0;
> m_zoneIndex = 0;
> }
> +
> inline char *strcatFilename(const char *input, const char *suffix)
> {
> char *output = X265_MALLOC(char, strlen(input) + strlen(suffix) + 1);
> @@ -210,6 +215,24 @@
> }
> }
>
> + if (m_param->bHistBasedSceneCut)
> + {
> + for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes;
> i++)
> + {
> + m_planeSizes[i] = m_param->sourceWidth * m_param->sourceHeight
> >> x265_cli_csps[m_param->internalCsp].height[i];
> + }
> + uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1;
> + m_edgePic = X265_MALLOC(pixel, m_planeSizes[0]*pixelbytes);
> + double strengthFactor = 2.0;
> + m_edgeHistThreshold = m_param->edgeTransitionThreshold;
> + m_chromaHistThreshold = m_edgeHistThreshold * 10.0;
> + m_chromaHistThreshold = x265_min(m_chromaHistThreshold,
> MAX_SCENECUT_THRESHOLD);
> + m_scaledEdgeThreshold = m_edgeHistThreshold * strengthFactor;
> + m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold,
> MAX_SCENECUT_THRESHOLD);
> + m_scaledChromaThreshold = m_chromaHistThreshold * strengthFactor;
> + m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold,
> MAX_SCENECUT_THRESHOLD);
> + }
> +
> // Do not allow WPP if only one row or fewer than 3 columns, it is
> pointless and unstable
> if (rows == 1 || cols < 3)
> {
> @@ -854,6 +877,12 @@
> }
> }
>
> + if (m_param->bHistBasedSceneCut)
> + {
> + if(m_edgePic != NULL)
> + X265_FREE_ZERO(m_edgePic);
> + }
> +
> for (int i = 0; i < m_param->frameNumThreads; i++)
> {
> if (m_frameEncoder[i])
> @@ -1313,6 +1342,141 @@
> dest->planes[2] = (char*)dest->planes[1] + src->stride[1] *
> (src->height >> x265_cli_csps[src->colorSpace].height[1]);
> }
>
> +bool Encoder::computeHistograms(x265_picture *pic)
> +{
> + pixel *src = (pixel*)pic->planes[0];
> + size_t bufSize = sizeof(pixel) * m_planeSizes[0];
> + int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
> + int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1;
> + memset(m_edgePic, 0, bufSize*numBytes);
> +
> + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height,
> pic->width, false))
> + {
> + x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
> + return false;
> + }
> +
> + pixel pixelVal;
> + int64_t size = pic->height*(pic->stride[0] >> SHIFT);
> + int32_t * edgeHist = m_curEdgeHist;
> + memset(edgeHist, 0, 2 * sizeof(int32_t));
> + for (int64_t i = 0; i < size; i++)
> + {
> + if (!m_edgePic[i])
> + edgeHist[0]++;
> + else
> + edgeHist[1]++;
> + }
> +
> + /*U Histogram Calculation*/
> + int32_t HeightL = (pic->height >>
> x265_cli_csps[pic->colorSpace].height[1]);
> + size = HeightL * (pic->stride[1] >> SHIFT);
> + int32_t * uHist = m_curUVHist[0];
> + pixel * chromaPlane = (pixel *)pic->planes[1];
> +
> + memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
> +
> + for (int64_t i = 0; i < size; i++)
> + {
> + pixelVal = chromaPlane[i];
> + uHist[pixelVal]++;
> + }
> +
> + /*V Histogram Calculation */
> + if (planeCount == 3)
> + {
> + pixelVal = 0;
> + int32_t heightV = (pic->height >>
> x265_cli_csps[pic->colorSpace].height[2]);
> + size = heightV * (pic->stride[2] >> SHIFT);
> + int32_t * vHist = m_curUVHist[1];
> + chromaPlane = (pixel *)pic->planes[2];
> +
> + memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
> + for (int64_t i = 0; i < size; i++)
> + {
> + pixelVal = chromaPlane[i];
> + vHist[pixelVal]++;
> + }
> + for (int i = 0; i < HISTOGRAM_BINS; i++)
> + {
> + m_curMaxUVHist[i] = x265_max(uHist[i],vHist[i]);
> + }
> + }
> + else
> + { /* in case of bi planar color space */
> +
> memcpy(m_curMaxUVHist,m_curUVHist[0],HISTOGRAM_BINS*sizeof(int32_t));
> + }
> +
> + return true;
> +}
> +
> +void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double
> *edgeNormalizedSad, int curPoc)
> +{
> +
> + if (curPoc == 0)
> + { /* first frame is scenecut by default no sad computation for the
> same. */
> + *maxUVNormalizedSad = 0.0;
> + *edgeNormalizedSad = 0.0;
> + }
> + else
> + {
> + /* compute sum of absolute difference of normalized histogram
> bins for maxUV and edge histograms. */
> + int32_t edgefreqDiff = 0;
> + int32_t maxUVfreqDiff = 0;
> + double edgeProbabilityDiff = 0;
> +
> + for (int j = 0; j < HISTOGRAM_BINS; j++)
> + {
> + if (j < 2 )
> + {
> + edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
> + edgeProbabilityDiff = (double) edgefreqDiff /
> m_planeSizes[0];
> + *edgeNormalizedSad += edgeProbabilityDiff;
> + }
> + maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]);
> + *maxUVNormalizedSad += (double)maxUVfreqDiff /
> m_planeSizes[2];
> + }
> + }
> +
> + /* store histograms of previous frame for reference */
> + size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t);
> + memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize);
> + memcpy(m_prevEdgeHist, m_curEdgeHist, 2*sizeof(int32_t));
> +
> +}
> +
> +void Encoder::findSceneCuts(x265_picture * pic, bool& bDup, double
> maxUVSad, double edgeSad)
> +{
> + pic->frameData.bScenecut = false;
> +
> + if (pic->poc == 0)
> + {
> + //for first frame
> + pic->frameData.bScenecut = false;
> + bDup = false;
> + }
> + else
> + {
> + if (edgeSad == 0.0 && maxUVSad == 0.0)
> + {
> + bDup = true;
> + }
> + else if (edgeSad > m_edgeHistThreshold && maxUVSad >=
> m_chromaHistThreshold)
> + {
> + pic->frameData.bScenecut = true;
> + bDup = false;
> + }
> + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
> m_scaledChromaThreshold)
> + {
> + pic->frameData.bScenecut = true;
> + bDup = false;
> + }
> + }
> +
> + if (pic->frameData.bScenecut)
> + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n",pic->poc);
> +}
> +
> /**
> * Feed one new input frame into the encoder, get one frame out. If
> pic_in is
> * NULL, a flush condition is implied and pic_in must be NULL for all
> subsequent
> @@ -1339,6 +1503,8 @@
> const x265_picture* inputPic = NULL;
> static int written = 0, read = 0;
> bool dontRead = false;
> + bool bdropFrame = false;
> + bool dropflag = false;
>
> if (m_exportedPic)
> {
> @@ -1350,6 +1516,17 @@
> }
> if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
> m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in &&
> (read < written)))
> {
> + if (m_param->bHistBasedSceneCut && pic_in)
> + {
> + x265_picture *pic = (x265_picture *) pic_in;
> + if (computeHistograms(pic))
> + {
> + double maxUVSad = 0.0, edgeSad = 0.0;
> + computeHistogramSAD(&maxUVSad, &edgeSad,pic_in->poc);
> + findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
> + }
> + }
> +
> if ((m_param->bEnableFrameDuplication && !pic_in && (read <
> written)))
> dontRead = true;
> else
> @@ -1393,9 +1570,27 @@
> written++;
> }
>
> - psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
> m_dupBuffer[1]->dupPic, m_param);
> -
> - if (psnrWeight >= m_param->dupThreshold)
> + if (m_param->bEnableFrameDuplication &&
> m_param->bHistBasedSceneCut)
> + {
> + if (!bdropFrame &&
> m_dupBuffer[1]->dupPic->frameData.bScenecut == false)
> + {
> + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
> m_dupBuffer[1]->dupPic, m_param);
> + if (psnrWeight >= m_param->dupThreshold)
> + dropflag = true;
> + }
> + else
> + {
> + dropflag = true;
> + }
> + }
> + else if (m_param->bEnableFrameDuplication)
> + {
> + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
> m_dupBuffer[1]->dupPic, m_param);
> + if (psnrWeight >= m_param->dupThreshold)
> + dropflag = true;
> + }
> +
> + if (dropflag)
> {
> if (m_dupBuffer[0]->bDup)
> {
> @@ -1498,6 +1693,10 @@
> inFrame->m_poc = ++m_pocLast;
> inFrame->m_userData = inputPic->userData;
> inFrame->m_pts = inputPic->pts;
> + if (m_param->bHistBasedSceneCut)
> + {
> + inFrame->m_lowres.bScenecut = inputPic->frameData.bScenecut;
> + }
> inFrame->m_forceqp = inputPic->forceqp;
> inFrame->m_param = (m_reconfigure || m_reconfigureRc) ?
> m_latestParam : m_param;
> inFrame->m_picStruct = inputPic->picStruct;
> @@ -3209,6 +3408,7 @@
> * adaptive I frame placement */
> p->keyframeMax = INT_MAX;
> p->scenecutThreshold = 0;
> + p->bHistBasedSceneCut = 0;
> }
> else if (p->keyframeMax <= 1)
> {
> @@ -3222,6 +3422,7 @@
> p->lookaheadDepth = 0;
> p->bframes = 0;
> p->scenecutThreshold = 0;
> + p->bHistBasedSceneCut = 0;
> p->bFrameAdaptive = 0;
> p->rc.cuTree = 0;
> p->bEnableWeightedPred = 0;
> @@ -3881,6 +4082,17 @@
> m_param->searchMethod = m_param->hmeSearchMethod[2];
> }
> }
> +
> + if (p->scenecutThreshold && p->edgeTransitionThreshold != 0.01)
> + {
> + x265_log(p, X265_LOG_WARNING, "using default scenecut-bias %.2lf
> for scene cut detection\n",p->scenecutBias);
> + }
> + else if (p->bHistBasedSceneCut && p->edgeTransitionThreshold == 0.0)
> + {
> + p->edgeTransitionThreshold = 0.01;
> + x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for
> scene cut detection\n", p->edgeTransitionThreshold);
> + }
> +
> }
>
> void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc,
> const x265_picture* picIn, int paramBytes)
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/encoder/encoder.h
> --- a/source/encoder/encoder.h Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/encoder/encoder.h Wed Nov 13 18:18:31 2019 +0530
> @@ -156,7 +156,6 @@
> bool bDup;
> };
>
> -
> class FrameEncoder;
> class DPB;
> class Lookahead;
> @@ -164,6 +163,8 @@
> class ThreadPool;
> class FrameData;
>
> +#define MAX_SCENECUT_THRESHOLD 2.0
> +
> class Encoder : public x265_encoder
> {
> public:
> @@ -228,7 +229,7 @@
> bool m_reconfigureRc;
> bool m_reconfigureZone;
>
> - int m_saveCtuDistortionLevel;
> + int m_saveCtuDistortionLevel;
>
> /* Begin intra refresh when one not in progress or else begin one as
> soon as the current
> * one is done. Requires bIntraRefresh to be set.*/
> @@ -245,11 +246,24 @@
> Lock m_rpsInSpsLock;
> int m_rpsInSpsCount;
> /* For HDR*/
> - double m_cB;
> - double m_cR;
> + double m_cB;
> + double m_cR;
> +
> + int m_bToneMap; // Enables tone-mapping
> + int m_enableNal;
>
> - int m_bToneMap; // Enables tone-mapping
> - int m_enableNal;
> + /* For histogram based scene-cut detection */
> + pixel* m_edgePic;
> + int32_t m_curUVHist[2][HISTOGRAM_BINS];
> + int32_t m_curMaxUVHist[HISTOGRAM_BINS];
> + int32_t m_prevMaxUVHist[HISTOGRAM_BINS];
> + int32_t m_curEdgeHist[2];
> + int32_t m_prevEdgeHist[2];
> + uint32_t m_planeSizes[3];
> + double m_edgeHistThreshold;
> + double m_chromaHistThreshold;
> + double m_scaledEdgeThreshold;
> + double m_scaledChromaThreshold;
>
> #ifdef ENABLE_HDR10_PLUS
> const hdr10plus_api *m_hdr10plus_api;
> @@ -355,6 +369,10 @@
>
> void copyPicture(x265_picture *dest, const x265_picture *src);
>
> + bool computeHistograms(x265_picture *pic);
> + void computeHistogramSAD(double *maxUVNormalizedSAD, double
> *edgeNormalizedSAD, int curPoc);
> + void findSceneCuts(x265_picture * pic, bool& bDup, double
> m_maxUVSADVal, double m_edgeSADVal);
> +
> void initRefIdx();
> void analyseRefIdx(int *numRefIdx);
> void updateRefIdx();
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/encoder/ratecontrol.cpp Wed Nov 13 18:18:31 2019 +0530
> @@ -508,6 +508,7 @@
> CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
> CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
> CMP_OPT_FIRST_PASS("scenecut",
> m_param->scenecutThreshold);
> + CMP_OPT_FIRST_PASS("hist-threshold",
> m_param->edgeTransitionThreshold);
> CMP_OPT_FIRST_PASS("intra-refresh",
> m_param->bIntraRefresh);
> if (m_param->bMultiPassOptRPS)
> {
> @@ -1200,6 +1201,7 @@
> m_param->rc.bStatRead = 0;
> m_param->bFrameAdaptive = 0;
> m_param->scenecutThreshold = 0;
> + m_param->bHistBasedSceneCut = false;
> m_param->rc.cuTree = 0;
> if (m_param->bframes > 1)
> m_param->bframes = 1;
> @@ -2284,7 +2286,7 @@
> if (m_isVbv && m_currentSatd > 0 && curFrame)
> {
> if (m_param->lookaheadDepth || m_param->rc.cuTree ||
> - m_param->scenecutThreshold ||
> + (m_param->scenecutThreshold || m_param->bHistBasedSceneCut) ||
> (m_param->bFrameAdaptive && m_param->bframes))
> {
> /* Lookahead VBV: If lookahead is done, raise the quantizer as
> necessary
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/encoder/slicetype.cpp Wed Nov 13 18:18:31 2019 +0530
> @@ -85,6 +85,69 @@
>
> } // end anonymous namespace
>
> +namespace X265_NS {
> +
> +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta,
> intptr_t stride, int height, int width, bool bcalcTheta)
> +{
> + intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo =
> 0, colThree = 0;
> + intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0,
> bottomRight = 0;
> +
> + const int startIndex = 1;
> +
> + if (!edgePic || !refPic || (!edgeTheta && bcalcTheta))
> + {
> + return false;
> + }
> + else
> + {
> + float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
> + float gradientMagnitude = 0;
> + pixel blackPixel = 0;
> +
> + //Applying Sobel filter expect for border pixels
> + height = height - startIndex;
> + width = width - startIndex;
> + for (int rowNum = startIndex; rowNum < height; rowNum++)
> + {
> + rowTwo = rowNum * stride;
> + rowOne = rowTwo - stride;
> + rowThree = rowTwo + stride;
> +
> + for (int colNum = startIndex; colNum < width; colNum++)
> + {
> +
> + /* Horizontal and vertical gradients
> + [ -3 0 3 ] [-3 -10 -3 ]
> + gH =[ -10 0 10] gV = [ 0 0 0 ]
> + [ -3 0 3 ] [ 3 10 3 ] */
> +
> + colTwo = colNum;
> + colOne = colTwo - startIndex;
> + colThree = colTwo + startIndex;
> + middle = rowTwo + colTwo;
> + topLeft = rowOne + colOne;
> + topRight = rowOne + colThree;
> + bottomLeft = rowThree + colOne;
> + bottomRight = rowThree + colThree;
> + gradientH = (float)(-3 * refPic[topLeft] + 3 *
> refPic[topRight] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo +
> colThree] - 3 * refPic[bottomLeft] + 3 * refPic[bottomRight]);
> + gradientV = (float)(-3 * refPic[topLeft] - 10 *
> refPic[rowOne + colTwo] - 3 * refPic[topRight] + 3 * refPic[bottomLeft] +
> 10 * refPic[rowThree + colTwo] + 3 * refPic[bottomRight]);
> + gradientMagnitude = sqrtf(gradientH * gradientH +
> gradientV * gradientV);
> + if(bcalcTheta)
> + {
> + edgeTheta[middle] = 0;
> + radians = atan2(gradientV, gradientH);
> + theta = (float)((radians * 180) / PI);
> + if (theta < 0)
> + theta = 180 + theta;
> + edgeTheta[middle] = (pixel)theta;
> + }
> + edgePic[middle] = (pixel)(gradientMagnitude >=
> edgeThreshold ? edgeThreshold : blackPixel);
> + }
> + }
> + return true;
> + }
> +}
> +
> void edgeFilter(Frame *curFrame, x265_param* param)
> {
> int height = curFrame->m_fencPic->m_picHeight;
> @@ -114,6 +177,7 @@
> //Applying Gaussian filter on the picture
> src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
> refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY
> * stride + curFrame->m_fencPic->m_lumaMarginX;
> + edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
> stride + curFrame->m_fencPic->m_lumaMarginX;
> pixel pixelValue = 0;
>
> for (int rowNum = 0; rowNum < height; rowNum++)
> @@ -146,51 +210,8 @@
> }
> }
>
> -#if HIGH_BIT_DEPTH //10-bit build
> - float threshold = 1023;
> - pixel whitePixel = 1023;
> -#else
> - float threshold = 255;
> - pixel whitePixel = 255;
> -#endif
> -#define PI 3.14159265
> -
> - float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
> - float gradientMagnitude = 0;
> - pixel blackPixel = 0;
> - edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
> stride + curFrame->m_fencPic->m_lumaMarginX;
> - //Applying Sobel filter on the gaussian filtered picture
> - for (int rowNum = 0; rowNum < height; rowNum++)
> - {
> - for (int colNum = 0; colNum < width; colNum++)
> - {
> - edgeTheta[(rowNum*stride) + colNum] = 0;
> - if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1)
> && (colNum != width - 1)) //Ignoring the border pixels of the picture
> - {
> - /*Horizontal and vertical gradients
> - [ -3 0 3 ] [-3 -10 -3 ]
> - gH = [ -10 0 10] gV = [ 0 0 0 ]
> - [ -3 0 3 ] [ 3 10 3 ]*/
> -
> - const intptr_t rowOne = (rowNum - 1)*stride, colOne =
> colNum -1;
> - const intptr_t rowTwo = rowNum * stride, colTwo = colNum;
> - const intptr_t rowThree = (rowNum + 1)*stride, colThree =
> colNum + 1;
> - const intptr_t index = (rowNum*stride) + colNum;
> -
> - gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 *
> refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 *
> refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 *
> refPic[rowThree + colThree]);
> - gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 *
> refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 *
> refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 *
> refPic[rowThree + colThree]);
> -
> - gradientMagnitude = sqrtf(gradientH * gradientH +
> gradientV * gradientV);
> - radians = atan2(gradientV, gradientH);
> - theta = (float)((radians * 180) / PI);
> - if (theta < 0)
> - theta = 180 + theta;
> - edgeTheta[(rowNum*stride) + colNum] = (pixel)theta;
> -
> - edgePic[index] = gradientMagnitude >= threshold ?
> whitePixel : blackPixel;
> - }
> - }
> - }
> + if(!computeEdge(edgePic, refPic, edgeTheta, stride, height, width,
> true))
> + x265_log(NULL, X265_LOG_ERROR, "Failed edge computation!");
> }
>
> //Find the angle of a block by averaging the pixel angles
> @@ -1471,7 +1492,7 @@
>
> if (m_lastNonB && !m_param->rc.bStatRead &&
> ((m_param->bFrameAdaptive && m_param->bframes) ||
> - m_param->rc.cuTree || m_param->scenecutThreshold ||
> + m_param->rc.cuTree || m_param->scenecutThreshold ||
> m_param->bHistBasedSceneCut ||
> (m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
> {
> slicetypeAnalyse(frames, false);
> @@ -1971,10 +1992,15 @@
>
> int numBFrames = 0;
> int numAnalyzed = numFrames;
> - bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
> + bool isScenecut = false;
>
> /* When scenecut threshold is set, use scenecut detection for I frame
> placements */
> - if (m_param->scenecutThreshold && isScenecut)
> + if (m_param->scenecutThreshold)
> + isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
> + else if (m_param->bHistBasedSceneCut)
> + isScenecut = frames[1]->bScenecut;
> +
> + if (isScenecut)
> {
> frames[1]->sliceType = X265_TYPE_I;
> return;
> @@ -1985,13 +2011,16 @@
> m_extendGopBoundary = false;
> for (int i = m_param->bframes + 1; i < origNumFrames; i +=
> m_param->bframes + 1)
> {
> - scenecut(frames, i, i + 1, true, origNumFrames);
> + if (m_param->scenecutThreshold)
> + scenecut(frames, i, i + 1, true, origNumFrames);
> +
> for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
> origNumFrames); j++)
> {
> - if (frames[j]->bScenecut && scenecutInternal(frames, j -
> 1, j, true) )
> + if (( m_param->scenecutThreshold && frames[j]->bScenecut
> && scenecutInternal(frames, j - 1, j, true)) ||
> + (m_param->bHistBasedSceneCut && frames[j]->bScenecut))
> {
> - m_extendGopBoundary = true;
> - break;
> + m_extendGopBoundary = true;
> + break;
> }
> }
> if (m_extendGopBoundary)
> @@ -2097,12 +2126,14 @@
> {
> for (int j = 1; j < numBFrames + 1; j++)
> {
> - if (scenecut(frames, j, j + 1, false, origNumFrames) ||
> + if ((m_param->scenecutThreshold && scenecut(frames, j, j
> + 1, false, origNumFrames)) ||
> + (m_param->bHistBasedSceneCut && frames[j +
> 1]->bScenecut) ||
> (bForceRADL && (frames[j]->frameNum == preRADL)))
> {
> - frames[j]->sliceType = X265_TYPE_P;
> - numAnalyzed = j;
> - break;
> + frames[j]->sliceType = X265_TYPE_P;
> + numAnalyzed = j;
> + break;
> +
> }
> }
> }
> @@ -3289,3 +3320,5 @@
> fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
> fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost,
> LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));
> }
> +
> +}
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/encoder/slicetype.h Wed Nov 13 18:18:31 2019 +0530
> @@ -43,6 +43,13 @@
> #define AQ_EDGE_BIAS 0.5
> #define EDGE_INCLINATION 45
>
> +#ifdef HIGH_BIT_DEPTH
> +#define edgeThreshold 1023.0
> +#else
> +#define edgeThreshold 255.0
> +#endif
> +#define PI 3.14159265
> +
> /* Thread local data for lookahead tasks */
> struct LookaheadTLD
> {
> @@ -258,6 +265,7 @@
> CostEstimateGroup& operator=(const CostEstimateGroup&);
> };
>
> -}
> +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta,
> intptr_t stride, int height, int width, bool bcalcTheta);
>
> +}
> #endif // ifndef X265_SLICETYPE_H
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/test/regression-tests.txt
> --- a/source/test/regression-tests.txt Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/test/regression-tests.txt Wed Nov 13 18:18:31 2019 +0530
> @@ -159,6 +159,7 @@
> Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60
> --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
> Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones 0,139,q=32
> Island_960x540_420p_8bit_24fps.yuv,--no-cutree --aq-mode 0 --bitrate 6000
> --scenecut-aware-qp
> +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
> --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000
> --vbv-bufsize 15000 --vbv-maxrate 12000
>
> # Main12 intraCost overflow bug test
> 720p50_parkrun_ter.y4m,--preset medium
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/x265.h
> --- a/source/x265.h Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/x265.h Wed Nov 13 18:18:31 2019 +0530
> @@ -211,7 +211,7 @@
> uint32_t numCUsInFrame;
> uint32_t numPartitions;
> uint32_t depthBytes;
> - int bScenecut;
> + bool bScenecut;
> x265_weight_param* wt;
> x265_analysis_inter_data* interData;
> x265_analysis_intra_data* intraData;
> @@ -294,7 +294,7 @@
> double avgChromaVLevel;
>
> char sliceType;
> - int bScenecut;
> + bool bScenecut;
> double ipCostRatio;
> int frameLatency;
> x265_cu_stats cuStats;
> @@ -1024,7 +1024,8 @@
> int lookaheadSlices;
>
> /* An arbitrary threshold which determines how aggressively the
> lookahead
> - * should detect scene cuts. The default (40) is recommended. */
> + * should detect scene cuts for cost based scenecut detection.
> + * The default (40) is recommended. */
> int scenecutThreshold;
>
> /* Replace keyframes by using a column of intra blocks that move
> across the video
> @@ -1846,6 +1847,16 @@
> /* The offset by which QP is incremented for inter-frames when
> bEnableSceneCutAwareQp is set.
> * Default is +5. */
> int maxQpDelta;
> +
> + /* A genuine threshold used for histogram based scene cut detection.
> + * This threshold determines whether a frame is a scenecut or not
> + * when compared against the edge and chroma histogram sad values.
> + * Default 0.01. Range: Real number in the interval (0,2). */
> + double edgeTransitionThreshold;
> +
> + /* Enables histogram based scenecut detection algorithm to detect
> scenecuts. */
> + bool bHistBasedSceneCut;
> +
> } x265_param;
> /* x265_param_alloc:
> * Allocates an x265_param instance. The returned param structure is not
> diff -r 04db2bfee5d6 -r 36d20a880ddc source/x265cli.h
> --- a/source/x265cli.h Thu Oct 31 16:23:27 2019 +0530
> +++ b/source/x265cli.h Wed Nov 13 18:18:31 2019 +0530
> @@ -129,6 +129,9 @@
> { "scenecut", required_argument, NULL, 0 },
> { "no-scenecut", no_argument, NULL, 0 },
> { "scenecut-bias", required_argument, NULL, 0 },
> + { "hist-scenecut", no_argument, NULL, 0},
> + { "no-hist-scenecut", no_argument, NULL, 0},
> + { "hist-threshold", required_argument, NULL, 0},
> { "fades", no_argument, NULL, 0 },
> { "no-fades", no_argument, NULL, 0 },
> { "scenecut-aware-qp", no_argument, NULL, 0 },
> @@ -489,7 +492,10 @@
> H0(" --gop-lookahead <integer> Extends gop boundary if a
> scenecut is found within this from keyint boundary. Default 0\n");
> H0(" --no-scenecut Disable adaptive I-frame
> decision\n");
> H0(" --scenecut <integer> How aggressively to insert extra
> I-frames. Default %d\n", param->scenecutThreshold);
> - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
> Default %.2f\n", param->scenecutBias);
> + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
> Default %.2f\n", param->scenecutBias);
> + H0(" --hist-scenecut Enables histogram based
> scene-cut detection using histogram based algorithm.\n");
> + H0(" --no-hist-scenecut Disables histogram based
> scene-cut detection using histogram based algorithm.\n");
> + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized
> SAD threshold for histogram based scenecut detection Default %.2f\n",
> param->edgeTransitionThreshold);
> H0(" --[no-]fades Enable detection and handling of
> fade-in regions. Default %s\n", OPT(param->bEnableFades));
> H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames
> inside the scenecut window after scenecut. Default %s\n",
> OPT(param->bEnableSceneCutAwareQp));
> H1(" --scenecut-window <0..1000> QP incremental duration(in
> milliseconds) when scenecut-aware-qp is enabled. Default %d\n",
> param->scenecutWindow);
>
> --
> *With Regards,*
> *Srikanth Kurapati.*
>
--
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191118/9454ceee/attachment-0001.html>
More information about the x265-devel
mailing list