[x265] [PATCH 1 of 1] Feature: Histogram Based Scene Cut Detection
Aruna Matheswaran
aruna at multicorewareinc.com
Mon Nov 25 17:44:25 CET 2019
Pushed to default.
On Mon, Nov 25, 2019 at 7:01 PM Praveen Kumar Karadugattu <
praveenkumar at multicorewareinc.com> wrote:
> This patch has been reviewed and looks good to me.
>
> Regards,
> Praveen
>
> On Mon, Nov 25, 2019 at 6:53 PM Srikanth Kurapati <
> srikanth.kurapati at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
>> # Date 1573649311 -19800
>> # Wed Nov 13 18:18:31 2019 +0530
>> # Node ID 97a9eca413d83cd03ae0fa95957160bdf70c170b
>> # Parent 04db2bfee5d628d931d1407355b909ac8ff1c898
>> Histogram Based Scene Cut Detection.
>>
>> This patch does the following.
>> 1.Finds scene cuts by thresholding normalized SAD of edge and chroma
>> histograms.
>> 2.Add option "--hist-scenecut" to enable histogram based scene cut
>> detection.
>> 3.Add option "--hist-threshold" to provide threshold for determining
>> scene cuts.
>> 3.Optimizes frame duplication by reusing normalized SAD to mark duplicate
>> frames.
>>
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 doc/reST/cli.rst
>> --- a/doc/reST/cli.rst Thu Oct 31 16:23:27 2019 +0530
>> +++ b/doc/reST/cli.rst Wed Nov 13 18:18:31 2019 +0530
>> @@ -1426,7 +1426,20 @@
>> This value represents the percentage difference between the inter cost
>> and
>> intra cost of a frame used in scenecut detection. For example, a value
>> of 5 indicates,
>> if the inter cost of a frame is greater than or equal to 95 percent of
>> the intra cost of the frame,
>> - then detect this frame as scenecut. Values between 5 and 15 are
>> recommended. Default 5.
>> + then detect this frame as scenecut. Values between 5 and 15 are
>> recommended. Default 5.
>> +
>> +.. option:: --hist-scenecut, --no-hist-scenecut
>> +
>> + Indicates that scenecuts need to be detected using luma edge and chroma
>> histograms.
>> + option: `--hist-scenecut` enables scenecut detection using the
>> histograms and disables the default scene cut algorithm.
>> + option: `--no-hist-scenecut` disables histogram based scenecut
>> algorithm.
>> +
>> +.. option:: --hist-threshold <0.0..2.0>
>> +
>> + This value represents the threshold for normalized SAD of edge
>> histograms used in scenecut detection.
>> + This requires option: `--hist-scenecut` to be enabled. For example, a
>> value of 0.2 indicates that a frame with normalized SAD value
>> + greater than 0.2 against the previous frame as scenecut.
>> + Default 0.01.
>>
>> .. option:: --radl <integer>
>>
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/CMakeLists.txt
>> --- a/source/CMakeLists.txt Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/CMakeLists.txt Wed Nov 13 18:18:31 2019 +0530
>> @@ -29,7 +29,7 @@
>> option(STATIC_LINK_CRT "Statically link C runtime for release builds"
>> OFF)
>> mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>> # X265_BUILD must be incremented each time the public API is changed
>> -set(X265_BUILD 182)
>> +set(X265_BUILD 183)
>> configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>> "${PROJECT_BINARY_DIR}/x265.def")
>> configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/common/common.h
>> --- a/source/common/common.h Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/common/common.h Wed Nov 13 18:18:31 2019 +0530
>> @@ -129,12 +129,16 @@
>> typedef uint64_t sum2_t;
>> typedef uint64_t pixel4;
>> typedef int64_t ssum2_t;
>> +#define HISTOGRAM_BINS 1024
>> +#define SHIFT 1
>> #else
>> typedef uint8_t pixel;
>> typedef uint16_t sum_t;
>> typedef uint32_t sum2_t;
>> typedef uint32_t pixel4;
>> typedef int32_t ssum2_t; // Signed sum
>> +#define HISTOGRAM_BINS 256
>> +#define SHIFT 0
>> #endif // if HIGH_BIT_DEPTH
>>
>> #if X265_DEPTH < 10
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/common/param.cpp
>> --- a/source/common/param.cpp Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/common/param.cpp Wed Nov 13 18:18:31 2019 +0530
>> @@ -167,6 +167,8 @@
>> param->bFrameAdaptive = X265_B_ADAPT_TRELLIS;
>> param->bBPyramid = 1;
>> param->scenecutThreshold = 40; /* Magic number pulled in from x264 */
>> + param->edgeTransitionThreshold = 0.01;
>> + param->bHistBasedSceneCut = 0;
>> param->lookaheadSlices = 8;
>> param->lookaheadThreads = 0;
>> param->scenecutBias = 5.0;
>> @@ -572,6 +574,7 @@
>> param->bframes = 0;
>> param->lookaheadDepth = 0;
>> param->scenecutThreshold = 0;
>> + param->bHistBasedSceneCut = 0;
>> param->rc.cuTree = 0;
>> param->frameNumThreads = 1;
>> }
>> @@ -920,12 +923,13 @@
>> OPT("lookahead-slices") p->lookaheadSlices = atoi(value);
>> OPT("scenecut")
>> {
>> - p->scenecutThreshold = atobool(value);
>> - if (bError || p->scenecutThreshold)
>> - {
>> - bError = false;
>> - p->scenecutThreshold = atoi(value);
>> - }
>> + p->scenecutThreshold = atobool(value);
>> + if (bError || p->scenecutThreshold)
>> + {
>> + bError = false;
>> + p->scenecutThreshold = atoi(value);
>> + p->bHistBasedSceneCut = 0;
>> + }
>> }
>> OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value);
>> OPT("keyint") p->keyframeMax = atoi(value);
>> @@ -1191,6 +1195,21 @@
>> OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS =
>> atobool(value);
>> OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value);
>> OPT("scenecut-bias") p->scenecutBias = atof(value);
>> + OPT("hist-scenecut")
>> + {
>> + p->bHistBasedSceneCut = atobool(value);
>> + if (bError)
>> + {
>> + bError = false;
>> + p->bHistBasedSceneCut = 0;
>> + }
>> + if (p->bHistBasedSceneCut)
>> + {
>> + bError = false;
>> + p->scenecutThreshold = 0;
>> + }
>> + }
>> + OPT("hist-threshold") p->edgeTransitionThreshold = atof(value);
>> OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
>> OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
>> OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine =
>> atobool(value);
>> @@ -1632,7 +1651,9 @@
>> CHECK(param->scenecutThreshold < 0,
>> "scenecutThreshold must be greater than 0");
>> CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias,
>> - "scenecut-bias must be between 0 and 100");
>> + "scenecut-bias must be between 0 and 100");
>> + CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 <
>> param->edgeTransitionThreshold,
>> + "hist-threshold must be between 0.0 and 2.0");
>> CHECK(param->radl < 0 || param->radl > param->bframes,
>> "radl must be between 0 and bframes");
>> CHECK(param->rdPenalty < 0 || param->rdPenalty > 2,
>> @@ -1792,9 +1813,13 @@
>> x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge
>> : %s / %d / %d / %d\n",
>> x265_motion_est_names[param->searchMethod],
>> param->searchRange, param->subpelRefine, param->maxNumMergeCand);
>>
>> - if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
>> - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
>> bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax,
>> param->scenecutThreshold, param->scenecutBias * 100);
>> - else
>> + if (param->scenecutThreshold && param->keyframeMax != INT_MAX)
>> + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
>> bias : %d / %d / %d / %.2lf \n",
>> + param->keyframeMin, param->keyframeMax,
>> param->scenecutThreshold, param->scenecutBias * 100);
>> + else if (param->bHistBasedSceneCut && param->keyframeMax != INT_MAX)
>> + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut /
>> edge threshold : %d / %d / %d / %.2lf\n",
>> + param->keyframeMin, param->keyframeMax,
>> param->bHistBasedSceneCut, param->edgeTransitionThreshold);
>> + else if (param->keyframeMax == INT_MAX)
>> x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut
>> : disabled\n");
>>
>> if (param->cbQpOffset || param->crQpOffset)
>> @@ -1961,6 +1986,7 @@
>> s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth);
>> s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices);
>> s += sprintf(s, " scenecut=%d", p->scenecutThreshold);
>> + s += sprintf(s, " hist-scenecut=%d", p->bHistBasedSceneCut);
>> s += sprintf(s, " radl=%d", p->radl);
>> BOOL(p->bEnableHRDConcatFlag, "splice");
>> BOOL(p->bIntraRefresh, "intra-refresh");
>> @@ -2108,6 +2134,7 @@
>> BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps");
>> BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps");
>> s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias);
>> + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold);
>> BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp");
>> BOOL(p->bAQMotion, "aq-motion");
>> BOOL(p->bEmitHDRSEI, "hdr");
>> @@ -2261,6 +2288,7 @@
>> dst->lookaheadSlices = src->lookaheadSlices;
>> dst->lookaheadThreads = src->lookaheadThreads;
>> dst->scenecutThreshold = src->scenecutThreshold;
>> + dst->bHistBasedSceneCut = src->bHistBasedSceneCut;
>> dst->bIntraRefresh = src->bIntraRefresh;
>> dst->maxCUSize = src->maxCUSize;
>> dst->minCUSize = src->minCUSize;
>> @@ -2420,6 +2448,7 @@
>> dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS;
>> dst->bMultiPassOptRPS = src->bMultiPassOptRPS;
>> dst->scenecutBias = src->scenecutBias;
>> + dst->edgeTransitionThreshold = src->edgeTransitionThreshold;
>> dst->gopLookahead = src->lookaheadDepth;
>> dst->bOptCUDeltaQP = src->bOptCUDeltaQP;
>> dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion;
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/encoder.cpp
>> --- a/source/encoder/encoder.cpp Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/encoder/encoder.cpp Wed Nov 13 18:18:31 2019 +0530
>> @@ -130,12 +130,17 @@
>> #if SVT_HEVC
>> m_svtAppData = NULL;
>> #endif
>> -
>> m_prevTonemapPayload.payload = NULL;
>> m_startPoint = 0;
>> m_saveCTUSize = 0;
>> + m_edgePic = NULL;
>> + m_edgeHistThreshold = 0;
>> + m_chromaHistThreshold = 0.0;
>> + m_scaledEdgeThreshold = 0.0;
>> + m_scaledChromaThreshold = 0.0;
>> m_zoneIndex = 0;
>> }
>> +
>> inline char *strcatFilename(const char *input, const char *suffix)
>> {
>> char *output = X265_MALLOC(char, strlen(input) + strlen(suffix) + 1);
>> @@ -210,6 +215,23 @@
>> }
>> }
>>
>> + if (m_param->bHistBasedSceneCut)
>> + {
>> + for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes;
>> i++)
>> + {
>> + m_planeSizes[i] = m_param->sourceWidth *
>> m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i];
>> + }
>> + uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1;
>> + m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes);
>> + m_edgeHistThreshold = m_param->edgeTransitionThreshold;
>> + m_chromaHistThreshold = m_edgeHistThreshold * 10.0;
>> + m_chromaHistThreshold = x265_min(m_chromaHistThreshold,
>> MAX_SCENECUT_THRESHOLD);
>> + m_scaledEdgeThreshold = m_edgeHistThreshold *
>> SCENECUT_STRENGTH_FACTOR;
>> + m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold,
>> MAX_SCENECUT_THRESHOLD);
>> + m_scaledChromaThreshold = m_chromaHistThreshold *
>> SCENECUT_STRENGTH_FACTOR;
>> + m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold,
>> MAX_SCENECUT_THRESHOLD);
>> + }
>> +
>> // Do not allow WPP if only one row or fewer than 3 columns, it is
>> pointless and unstable
>> if (rows == 1 || cols < 3)
>> {
>> @@ -854,6 +876,12 @@
>> }
>> }
>>
>> + if (m_param->bHistBasedSceneCut)
>> + {
>> + if(m_edgePic != NULL)
>> + X265_FREE_ZERO(m_edgePic);
>> + }
>> +
>> for (int i = 0; i < m_param->frameNumThreads; i++)
>> {
>> if (m_frameEncoder[i])
>> @@ -1313,6 +1341,142 @@
>> dest->planes[2] = (char*)dest->planes[1] + src->stride[1] *
>> (src->height >> x265_cli_csps[src->colorSpace].height[1]);
>> }
>>
>> +bool Encoder::computeHistograms(x265_picture *pic)
>> +{
>> + pixel *src = (pixel *) pic->planes[0];
>> + size_t bufSize = sizeof(pixel) * m_planeSizes[0];
>> + int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
>> + int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1;
>> + memset(m_edgePic, 0, bufSize * numBytes);
>> +
>> + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height,
>> pic->width, false))
>> + {
>> + x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
>> + return false;
>> + }
>> +
>> + pixel pixelVal;
>> + int64_t size = pic->height * (pic->stride[0] >> SHIFT);
>> + int32_t *edgeHist = m_curEdgeHist;
>> + memset(edgeHist, 0, 2 * sizeof(int32_t));
>> + for (int64_t i = 0; i < size; i++)
>> + {
>> + if (!m_edgePic[i])
>> + edgeHist[0]++;
>> + else
>> + edgeHist[1]++;
>> + }
>> +
>> + if (pic->colorSpace != X265_CSP_I400)
>> + {
>> + /* U Histogram Calculation */
>> + int32_t HeightL = (pic->height >>
>> x265_cli_csps[pic->colorSpace].height[1]);
>> + size = HeightL * (pic->stride[1] >> SHIFT);
>> + int32_t *uHist = m_curUVHist[0];
>> + pixel *chromaPlane = (pixel *) pic->planes[1];
>> +
>> + memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
>> +
>> + for (int64_t i = 0; i < size; i++)
>> + {
>> + pixelVal = chromaPlane[i];
>> + uHist[pixelVal]++;
>> + }
>> +
>> + /* V Histogram Calculation */
>> + if (planeCount == 3)
>> + {
>> + pixelVal = 0;
>> + int32_t heightV = (pic->height >>
>> x265_cli_csps[pic->colorSpace].height[2]);
>> + size = heightV * (pic->stride[2] >> SHIFT);
>> + int32_t *vHist = m_curUVHist[1];
>> + chromaPlane = (pixel *) pic->planes[2];
>> +
>> + memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t));
>> + for (int64_t i = 0; i < size; i++)
>> + {
>> + pixelVal = chromaPlane[i];
>> + vHist[pixelVal]++;
>> + }
>> + for (int i = 0; i < HISTOGRAM_BINS; i++)
>> + {
>> + m_curMaxUVHist[i] = x265_max(uHist[i], vHist[i]);
>> + }
>> + }
>> + else
>> + { /* in case of bi planar color space */
>> + memcpy(m_curMaxUVHist, m_curUVHist[0], HISTOGRAM_BINS *
>> sizeof(int32_t));
>> + }
>> + }
>> + return true;
>> +}
>> +
>> +void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double
>> *edgeNormalizedSad, int curPoc)
>> +{
>> +
>> + if (curPoc == 0)
>> + { /* first frame is scenecut by default no sad computation for the
>> same. */
>> + *maxUVNormalizedSad = 0.0;
>> + *edgeNormalizedSad = 0.0;
>> + }
>> + else
>> + {
>> + /* compute sum of absolute difference of normalized histogram
>> bins for maxUV and edge histograms. */
>> + int32_t edgefreqDiff = 0;
>> + int32_t maxUVfreqDiff = 0;
>> + double edgeProbabilityDiff = 0;
>> +
>> + for (int j = 0; j < HISTOGRAM_BINS; j++)
>> + {
>> + if (j < 2)
>> + {
>> + edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]);
>> + edgeProbabilityDiff = (double) edgefreqDiff /
>> m_planeSizes[0];
>> + *edgeNormalizedSad += edgeProbabilityDiff;
>> + }
>> + maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]);
>> + *maxUVNormalizedSad += (double)maxUVfreqDiff /
>> m_planeSizes[2];
>> + }
>> + }
>> +
>> + /* store histograms of previous frame for reference */
>> + size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t);
>> + memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize);
>> + memcpy(m_prevEdgeHist, m_curEdgeHist, 2 * sizeof(int32_t));
>> +}
>> +
>> +void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double
>> maxUVSad, double edgeSad)
>> +{
>> + pic->frameData.bScenecut = false;
>> +
>> + if (pic->poc == 0)
>> + {
>> + /* for first frame */
>> + pic->frameData.bScenecut = false;
>> + bDup = false;
>> + }
>> + else
>> + {
>> + if (edgeSad == 0.0 && maxUVSad == 0.0)
>> + {
>> + bDup = true;
>> + }
>> + else if (edgeSad > m_edgeHistThreshold && maxUVSad >=
>> m_chromaHistThreshold)
>> + {
>> + pic->frameData.bScenecut = true;
>> + bDup = false;
>> + }
>> + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >=
>> m_scaledChromaThreshold)
>> + {
>> + pic->frameData.bScenecut = true;
>> + bDup = false;
>> + }
>> + }
>> +
>> + if (pic->frameData.bScenecut)
>> + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", pic->poc);
>> +}
>> +
>> /**
>> * Feed one new input frame into the encoder, get one frame out. If
>> pic_in is
>> * NULL, a flush condition is implied and pic_in must be NULL for all
>> subsequent
>> @@ -1339,6 +1503,8 @@
>> const x265_picture* inputPic = NULL;
>> static int written = 0, read = 0;
>> bool dontRead = false;
>> + bool bdropFrame = false;
>> + bool dropflag = false;
>>
>> if (m_exportedPic)
>> {
>> @@ -1350,6 +1516,17 @@
>> }
>> if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
>> m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in &&
>> (read < written)))
>> {
>> + if (m_param->bHistBasedSceneCut && pic_in)
>> + {
>> + x265_picture *pic = (x265_picture *) pic_in;
>> + if (computeHistograms(pic))
>> + {
>> + double maxUVSad = 0.0, edgeSad = 0.0;
>> + computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc);
>> + findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad);
>> + }
>> + }
>> +
>> if ((m_param->bEnableFrameDuplication && !pic_in && (read <
>> written)))
>> dontRead = true;
>> else
>> @@ -1368,7 +1545,7 @@
>> if (pic_in->bitDepth < 8 || pic_in->bitDepth > 16)
>> {
>> x265_log(m_param, X265_LOG_ERROR, "Input bit depth (%d)
>> must be between 8 and 16\n",
>> - pic_in->bitDepth);
>> + pic_in->bitDepth);
>> return -1;
>> }
>> }
>> @@ -1393,9 +1570,27 @@
>> written++;
>> }
>>
>> - psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
>> m_dupBuffer[1]->dupPic, m_param);
>> -
>> - if (psnrWeight >= m_param->dupThreshold)
>> + if (m_param->bEnableFrameDuplication &&
>> m_param->bHistBasedSceneCut)
>> + {
>> + if (!bdropFrame &&
>> m_dupBuffer[1]->dupPic->frameData.bScenecut == false)
>> + {
>> + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
>> m_dupBuffer[1]->dupPic, m_param);
>> + if (psnrWeight >= m_param->dupThreshold)
>> + dropflag = true;
>> + }
>> + else
>> + {
>> + dropflag = true;
>> + }
>> + }
>> + else if (m_param->bEnableFrameDuplication)
>> + {
>> + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
>> m_dupBuffer[1]->dupPic, m_param);
>> + if (psnrWeight >= m_param->dupThreshold)
>> + dropflag = true;
>> + }
>> +
>> + if (dropflag)
>> {
>> if (m_dupBuffer[0]->bDup)
>> {
>> @@ -1428,7 +1623,7 @@
>> inputPic = pic_in;
>>
>> Frame *inFrame;
>> - x265_param* p = (m_reconfigure || m_reconfigureRc) ?
>> m_latestParam : m_param;
>> + x265_param *p = (m_reconfigure || m_reconfigureRc) ?
>> m_latestParam : m_param;
>> if (m_dpb->m_freeList.empty())
>> {
>> inFrame = new Frame;
>> @@ -1498,6 +1693,10 @@
>> inFrame->m_poc = ++m_pocLast;
>> inFrame->m_userData = inputPic->userData;
>> inFrame->m_pts = inputPic->pts;
>> + if (m_param->bHistBasedSceneCut)
>> + {
>> + inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut
>> == 1) ? true : false;
>> + }
>> inFrame->m_forceqp = inputPic->forceqp;
>> inFrame->m_param = (m_reconfigure || m_reconfigureRc) ?
>> m_latestParam : m_param;
>> inFrame->m_picStruct = inputPic->picStruct;
>> @@ -3209,6 +3408,7 @@
>> * adaptive I frame placement */
>> p->keyframeMax = INT_MAX;
>> p->scenecutThreshold = 0;
>> + p->bHistBasedSceneCut = 0;
>> }
>> else if (p->keyframeMax <= 1)
>> {
>> @@ -3222,6 +3422,7 @@
>> p->lookaheadDepth = 0;
>> p->bframes = 0;
>> p->scenecutThreshold = 0;
>> + p->bHistBasedSceneCut = 0;
>> p->bFrameAdaptive = 0;
>> p->rc.cuTree = 0;
>> p->bEnableWeightedPred = 0;
>> @@ -3881,6 +4082,13 @@
>> m_param->searchMethod = m_param->hmeSearchMethod[2];
>> }
>> }
>> +
>> + if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold)
>> + {
>> + p->edgeTransitionThreshold = 0.01;
>> + x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for
>> scene cut detection\n", p->edgeTransitionThreshold);
>> + }
>> +
>> }
>>
>> void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc,
>> const x265_picture* picIn, int paramBytes)
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/encoder.h
>> --- a/source/encoder/encoder.h Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/encoder/encoder.h Wed Nov 13 18:18:31 2019 +0530
>> @@ -156,7 +156,6 @@
>> bool bDup;
>> };
>>
>> -
>> class FrameEncoder;
>> class DPB;
>> class Lookahead;
>> @@ -164,6 +163,9 @@
>> class ThreadPool;
>> class FrameData;
>>
>> +#define MAX_SCENECUT_THRESHOLD 2.0
>> +#define SCENECUT_STRENGTH_FACTOR 2.0
>> +
>> class Encoder : public x265_encoder
>> {
>> public:
>> @@ -228,7 +230,7 @@
>> bool m_reconfigureRc;
>> bool m_reconfigureZone;
>>
>> - int m_saveCtuDistortionLevel;
>> + int m_saveCtuDistortionLevel;
>>
>> /* Begin intra refresh when one not in progress or else begin one as
>> soon as the current
>> * one is done. Requires bIntraRefresh to be set.*/
>> @@ -245,11 +247,24 @@
>> Lock m_rpsInSpsLock;
>> int m_rpsInSpsCount;
>> /* For HDR*/
>> - double m_cB;
>> - double m_cR;
>> + double m_cB;
>> + double m_cR;
>> +
>> + int m_bToneMap; // Enables tone-mapping
>> + int m_enableNal;
>>
>> - int m_bToneMap; // Enables tone-mapping
>> - int m_enableNal;
>> + /* For histogram based scene-cut detection */
>> + pixel* m_edgePic;
>> + int32_t m_curUVHist[2][HISTOGRAM_BINS];
>> + int32_t m_curMaxUVHist[HISTOGRAM_BINS];
>> + int32_t m_prevMaxUVHist[HISTOGRAM_BINS];
>> + int32_t m_curEdgeHist[2];
>> + int32_t m_prevEdgeHist[2];
>> + uint32_t m_planeSizes[3];
>> + double m_edgeHistThreshold;
>> + double m_chromaHistThreshold;
>> + double m_scaledEdgeThreshold;
>> + double m_scaledChromaThreshold;
>>
>> #ifdef ENABLE_HDR10_PLUS
>> const hdr10plus_api *m_hdr10plus_api;
>> @@ -355,6 +370,10 @@
>>
>> void copyPicture(x265_picture *dest, const x265_picture *src);
>>
>> + bool computeHistograms(x265_picture *pic);
>> + void computeHistogramSAD(double *maxUVNormalizedSAD, double
>> *edgeNormalizedSAD, int curPoc);
>> + void findSceneCuts(x265_picture *pic, bool& bDup, double
>> m_maxUVSADVal, double m_edgeSADVal);
>> +
>> void initRefIdx();
>> void analyseRefIdx(int *numRefIdx);
>> void updateRefIdx();
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/ratecontrol.cpp
>> --- a/source/encoder/ratecontrol.cpp Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/encoder/ratecontrol.cpp Wed Nov 13 18:18:31 2019 +0530
>> @@ -1200,6 +1200,7 @@
>> m_param->rc.bStatRead = 0;
>> m_param->bFrameAdaptive = 0;
>> m_param->scenecutThreshold = 0;
>> + m_param->bHistBasedSceneCut = 0;
>> m_param->rc.cuTree = 0;
>> if (m_param->bframes > 1)
>> m_param->bframes = 1;
>> @@ -2284,7 +2285,7 @@
>> if (m_isVbv && m_currentSatd > 0 && curFrame)
>> {
>> if (m_param->lookaheadDepth || m_param->rc.cuTree ||
>> - m_param->scenecutThreshold ||
>> + (m_param->scenecutThreshold || m_param->bHistBasedSceneCut)
>> ||
>> (m_param->bFrameAdaptive && m_param->bframes))
>> {
>> /* Lookahead VBV: If lookahead is done, raise the quantizer
>> as necessary
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/slicetype.cpp
>> --- a/source/encoder/slicetype.cpp Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/encoder/slicetype.cpp Wed Nov 13 18:18:31 2019 +0530
>> @@ -85,6 +85,69 @@
>>
>> } // end anonymous namespace
>>
>> +namespace X265_NS {
>> +
>> +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta,
>> intptr_t stride, int height, int width, bool bcalcTheta)
>> +{
>> + intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo =
>> 0, colThree = 0;
>> + intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0,
>> bottomRight = 0;
>> +
>> + const int startIndex = 1;
>> +
>> + if (!edgePic || !refPic || (!edgeTheta && bcalcTheta))
>> + {
>> + return false;
>> + }
>> + else
>> + {
>> + float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
>> + float gradientMagnitude = 0;
>> + pixel blackPixel = 0;
>> +
>> + //Applying Sobel filter expect for border pixels
>> + height = height - startIndex;
>> + width = width - startIndex;
>> + for (int rowNum = startIndex; rowNum < height; rowNum++)
>> + {
>> + rowTwo = rowNum * stride;
>> + rowOne = rowTwo - stride;
>> + rowThree = rowTwo + stride;
>> +
>> + for (int colNum = startIndex; colNum < width; colNum++)
>> + {
>> +
>> + /* Horizontal and vertical gradients
>> + [ -3 0 3 ] [-3 -10 -3 ]
>> + gH =[ -10 0 10] gV = [ 0 0 0 ]
>> + [ -3 0 3 ] [ 3 10 3 ] */
>> +
>> + colTwo = colNum;
>> + colOne = colTwo - startIndex;
>> + colThree = colTwo + startIndex;
>> + middle = rowTwo + colTwo;
>> + topLeft = rowOne + colOne;
>> + topRight = rowOne + colThree;
>> + bottomLeft = rowThree + colOne;
>> + bottomRight = rowThree + colThree;
>> + gradientH = (float)(-3 * refPic[topLeft] + 3 *
>> refPic[topRight] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo +
>> colThree] - 3 * refPic[bottomLeft] + 3 * refPic[bottomRight]);
>> + gradientV = (float)(-3 * refPic[topLeft] - 10 *
>> refPic[rowOne + colTwo] - 3 * refPic[topRight] + 3 * refPic[bottomLeft] +
>> 10 * refPic[rowThree + colTwo] + 3 * refPic[bottomRight]);
>> + gradientMagnitude = sqrtf(gradientH * gradientH +
>> gradientV * gradientV);
>> + if(bcalcTheta)
>> + {
>> + edgeTheta[middle] = 0;
>> + radians = atan2(gradientV, gradientH);
>> + theta = (float)((radians * 180) / PI);
>> + if (theta < 0)
>> + theta = 180 + theta;
>> + edgeTheta[middle] = (pixel)theta;
>> + }
>> + edgePic[middle] = (pixel)(gradientMagnitude >=
>> edgeThreshold ? edgeThreshold : blackPixel);
>> + }
>> + }
>> + return true;
>> + }
>> +}
>> +
>> void edgeFilter(Frame *curFrame, x265_param* param)
>> {
>> int height = curFrame->m_fencPic->m_picHeight;
>> @@ -114,6 +177,7 @@
>> //Applying Gaussian filter on the picture
>> src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
>> refPic = curFrame->m_gaussianPic +
>> curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> + edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
>> stride + curFrame->m_fencPic->m_lumaMarginX;
>> pixel pixelValue = 0;
>>
>> for (int rowNum = 0; rowNum < height; rowNum++)
>> @@ -146,51 +210,8 @@
>> }
>> }
>>
>> -#if HIGH_BIT_DEPTH //10-bit build
>> - float threshold = 1023;
>> - pixel whitePixel = 1023;
>> -#else
>> - float threshold = 255;
>> - pixel whitePixel = 255;
>> -#endif
>> -#define PI 3.14159265
>> -
>> - float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
>> - float gradientMagnitude = 0;
>> - pixel blackPixel = 0;
>> - edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
>> stride + curFrame->m_fencPic->m_lumaMarginX;
>> - //Applying Sobel filter on the gaussian filtered picture
>> - for (int rowNum = 0; rowNum < height; rowNum++)
>> - {
>> - for (int colNum = 0; colNum < width; colNum++)
>> - {
>> - edgeTheta[(rowNum*stride) + colNum] = 0;
>> - if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1)
>> && (colNum != width - 1)) //Ignoring the border pixels of the picture
>> - {
>> - /*Horizontal and vertical gradients
>> - [ -3 0 3 ] [-3 -10 -3 ]
>> - gH = [ -10 0 10] gV = [ 0 0 0 ]
>> - [ -3 0 3 ] [ 3 10 3 ]*/
>> -
>> - const intptr_t rowOne = (rowNum - 1)*stride, colOne =
>> colNum -1;
>> - const intptr_t rowTwo = rowNum * stride, colTwo = colNum;
>> - const intptr_t rowThree = (rowNum + 1)*stride, colThree
>> = colNum + 1;
>> - const intptr_t index = (rowNum*stride) + colNum;
>> -
>> - gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 *
>> refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 *
>> refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 *
>> refPic[rowThree + colThree]);
>> - gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 *
>> refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 *
>> refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 *
>> refPic[rowThree + colThree]);
>> -
>> - gradientMagnitude = sqrtf(gradientH * gradientH +
>> gradientV * gradientV);
>> - radians = atan2(gradientV, gradientH);
>> - theta = (float)((radians * 180) / PI);
>> - if (theta < 0)
>> - theta = 180 + theta;
>> - edgeTheta[(rowNum*stride) + colNum] = (pixel)theta;
>> -
>> - edgePic[index] = gradientMagnitude >= threshold ?
>> whitePixel : blackPixel;
>> - }
>> - }
>> - }
>> + if(!computeEdge(edgePic, refPic, edgeTheta, stride, height, width,
>> true))
>> + x265_log(NULL, X265_LOG_ERROR, "Failed edge computation!");
>> }
>>
>> //Find the angle of a block by averaging the pixel angles
>> @@ -1471,7 +1492,7 @@
>>
>> if (m_lastNonB && !m_param->rc.bStatRead &&
>> ((m_param->bFrameAdaptive && m_param->bframes) ||
>> - m_param->rc.cuTree || m_param->scenecutThreshold ||
>> + m_param->rc.cuTree || m_param->scenecutThreshold ||
>> m_param->bHistBasedSceneCut ||
>> (m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
>> {
>> slicetypeAnalyse(frames, false);
>> @@ -1971,10 +1992,15 @@
>>
>> int numBFrames = 0;
>> int numAnalyzed = numFrames;
>> - bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>> + bool isScenecut = false;
>>
>> /* When scenecut threshold is set, use scenecut detection for I
>> frame placements */
>> - if (m_param->scenecutThreshold && isScenecut)
>> + if (m_param->scenecutThreshold)
>> + isScenecut = scenecut(frames, 0, 1, true, origNumFrames);
>> + else if (m_param->bHistBasedSceneCut)
>> + isScenecut = frames[1]->bScenecut;
>> +
>> + if (isScenecut)
>> {
>> frames[1]->sliceType = X265_TYPE_I;
>> return;
>> @@ -1985,14 +2011,17 @@
>> m_extendGopBoundary = false;
>> for (int i = m_param->bframes + 1; i < origNumFrames; i +=
>> m_param->bframes + 1)
>> {
>> - scenecut(frames, i, i + 1, true, origNumFrames);
>> + if (m_param->scenecutThreshold)
>> + scenecut(frames, i, i + 1, true, origNumFrames);
>> +
>> for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1,
>> origNumFrames); j++)
>> {
>> - if (frames[j]->bScenecut && scenecutInternal(frames, j -
>> 1, j, true) )
>> - {
>> - m_extendGopBoundary = true;
>> - break;
>> - }
>> + if (( m_param->scenecutThreshold && frames[j]->bScenecut
>> && scenecutInternal(frames, j - 1, j, true)) ||
>> + (m_param->bHistBasedSceneCut &&
>> frames[j]->bScenecut))
>> + {
>> + m_extendGopBoundary = true;
>> + break;
>> + }
>> }
>> if (m_extendGopBoundary)
>> break;
>> @@ -2097,13 +2126,14 @@
>> {
>> for (int j = 1; j < numBFrames + 1; j++)
>> {
>> - if (scenecut(frames, j, j + 1, false, origNumFrames) ||
>> + if ((m_param->scenecutThreshold && scenecut(frames, j, j
>> + 1, false, origNumFrames)) ||
>> + (m_param->bHistBasedSceneCut && frames[j +
>> 1]->bScenecut) ||
>> (bForceRADL && (frames[j]->frameNum == preRADL)))
>> - {
>> - frames[j]->sliceType = X265_TYPE_P;
>> - numAnalyzed = j;
>> - break;
>> - }
>> + {
>> + frames[j]->sliceType = X265_TYPE_P;
>> + numAnalyzed = j;
>> + break;
>> + }
>> }
>> }
>> resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2,
>> numAnalyzed + 1);
>> @@ -3289,3 +3319,5 @@
>> fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
>> fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost,
>> LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));
>> }
>> +
>> +}
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/slicetype.h
>> --- a/source/encoder/slicetype.h Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/encoder/slicetype.h Wed Nov 13 18:18:31 2019 +0530
>> @@ -43,6 +43,13 @@
>> #define AQ_EDGE_BIAS 0.5
>> #define EDGE_INCLINATION 45
>>
>> +#ifdef HIGH_BIT_DEPTH
>> +#define edgeThreshold 1023.0
>> +#else
>> +#define edgeThreshold 255.0
>> +#endif
>> +#define PI 3.14159265
>> +
>> /* Thread local data for lookahead tasks */
>> struct LookaheadTLD
>> {
>> @@ -258,6 +265,7 @@
>> CostEstimateGroup& operator=(const CostEstimateGroup&);
>> };
>>
>> -}
>> +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta,
>> intptr_t stride, int height, int width, bool bcalcTheta);
>>
>> +}
>> #endif // ifndef X265_SLICETYPE_H
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/test/regression-tests.txt
>> --- a/source/test/regression-tests.txt Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/test/regression-tests.txt Wed Nov 13 18:18:31 2019 +0530
>> @@ -159,6 +159,8 @@
>> Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold
>> 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
>> Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones
>> 0,139,q=32
>> Island_960x540_420p_8bit_24fps.yuv,--no-cutree --aq-mode 0 --bitrate
>> 6000 --scenecut-aware-qp
>> +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
>> --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000
>> --vbv-bufsize 15000 --vbv-maxrate 12000
>> +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
>> --hist-threshold 0.02
>>
>> # Main12 intraCost overflow bug test
>> 720p50_parkrun_ter.y4m,--preset medium
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/x265.h
>> --- a/source/x265.h Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/x265.h Wed Nov 13 18:18:31 2019 +0530
>> @@ -1024,7 +1024,8 @@
>> int lookaheadSlices;
>>
>> /* An arbitrary threshold which determines how aggressively the
>> lookahead
>> - * should detect scene cuts. The default (40) is recommended. */
>> + * should detect scene cuts for cost based scenecut detection.
>> + * The default (40) is recommended. */
>> int scenecutThreshold;
>>
>> /* Replace keyframes by using a column of intra blocks that move
>> across the video
>> @@ -1839,14 +1840,24 @@
>> * Default is disabled. */
>> int bEnableSceneCutAwareQp;
>>
>> - /*The duration(in milliseconds) for which there is a reduction in
>> the bits spent on the inter-frames after a scenecut
>> + /* The duration(in milliseconds) for which there is a reduction in
>> the bits spent on the inter-frames after a scenecut
>> * by increasing their QP, when bEnableSceneCutAwareQp is set.
>> Default is 500ms.*/
>> int scenecutWindow;
>>
>> /* The offset by which QP is incremented for inter-frames when
>> bEnableSceneCutAwareQp is set.
>> * Default is +5. */
>> int maxQpDelta;
>> +
>> + /* A genuine threshold used for histogram based scene cut detection.
>> + * This threshold determines whether a frame is a scenecut or not
>> + * when compared against the edge and chroma histogram sad values.
>> + * Default 0.01. Range: Real number in the interval (0,2). */
>> + double edgeTransitionThreshold;
>> +
>> + /* Enables histogram based scenecut detection algorithm to detect
>> scenecuts. Default disabled */
>> + int bHistBasedSceneCut;
>> } x265_param;
>> +
>> /* x265_param_alloc:
>> * Allocates an x265_param instance. The returned param structure is not
>> * special in any way, but using this method together with
>> x265_param_free()
>> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/x265cli.h
>> --- a/source/x265cli.h Thu Oct 31 16:23:27 2019 +0530
>> +++ b/source/x265cli.h Wed Nov 13 18:18:31 2019 +0530
>> @@ -129,6 +129,9 @@
>> { "scenecut", required_argument, NULL, 0 },
>> { "no-scenecut", no_argument, NULL, 0 },
>> { "scenecut-bias", required_argument, NULL, 0 },
>> + { "hist-scenecut", no_argument, NULL, 0},
>> + { "no-hist-scenecut", no_argument, NULL, 0},
>> + { "hist-threshold", required_argument, NULL, 0},
>> { "fades", no_argument, NULL, 0 },
>> { "no-fades", no_argument, NULL, 0 },
>> { "scenecut-aware-qp", no_argument, NULL, 0 },
>> @@ -489,7 +492,10 @@
>> H0(" --gop-lookahead <integer> Extends gop boundary if a
>> scenecut is found within this from keyint boundary. Default 0\n");
>> H0(" --no-scenecut Disable adaptive I-frame
>> decision\n");
>> H0(" --scenecut <integer> How aggressively to insert
>> extra I-frames. Default %d\n", param->scenecutThreshold);
>> - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
>> Default %.2f\n", param->scenecutBias);
>> + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection.
>> Default %.2f\n", param->scenecutBias);
>> + H0(" --hist-scenecut Enables histogram based
>> scene-cut detection using histogram based algorithm.\n");
>> + H0(" --no-hist-scenecut Disables histogram based
>> scene-cut detection using histogram based algorithm.\n");
>> + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's
>> Normalized SAD threshold for histogram based scenecut detection Default
>> %.2f\n", param->edgeTransitionThreshold);
>> H0(" --[no-]fades Enable detection and handling
>> of fade-in regions. Default %s\n", OPT(param->bEnableFades));
>> H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames
>> inside the scenecut window after scenecut. Default %s\n",
>> OPT(param->bEnableSceneCutAwareQp));
>> H1(" --scenecut-window <0..1000> QP incremental duration(in
>> milliseconds) when scenecut-aware-qp is enabled. Default %d\n",
>> param->scenecutWindow);
>>
>> --
>> *With Regards,*
>> *Srikanth Kurapati.*
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Regards,
*Aruna Matheswaran,*
Video Codec Engineer,
Media & AI analytics BU,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191125/5966d078/attachment-0001.html>
More information about the x265-devel
mailing list