[x265] [PATCH] Add emergency denoising when frame qp > QP_MAX_SPEC
Steve Borho
steve at borho.org
Thu Aug 20 05:17:35 CEST 2015
On 08/19, sagar at multicorewareinc.com wrote:
> # HG changeset patch
> # User Sagar Kotecha <sagar at multicorewareinc.com>
> # Date 1439978360 -19800
> # Node ID 8878f03570b2a8d0207f5e96bae3900de5653ec0
> # Parent 2980141a744a569ad6f60dbebdece76a4eababfd
> Add emergency denoising when frame qp > QP_MAX_SPEC
>
> This feature is ported from x264, and is turned on for VBV encodes
this needs to be documented in the reST docs somewhere, since our
emergency denoise works differently than in x264
> diff -r 2980141a744a -r 8878f03570b2 source/common/common.h
> --- a/source/common/common.h Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/common/common.h Wed Aug 19 15:29:20 2015 +0530
> @@ -317,6 +317,9 @@
> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
> #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
>
> +#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform
> +#define MAX_NUM_TR_CATEGORIES 16 // 32, 16, 8, 4 transform categories each for luma and chroma
> +
> namespace X265_NS {
>
> enum { SAO_NUM_OFFSET = 4 };
> diff -r 2980141a744a -r 8878f03570b2 source/common/quant.cpp
> --- a/source/common/quant.cpp Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/common/quant.cpp Wed Aug 19 15:29:20 2015 +0530
> @@ -444,12 +444,12 @@
> primitives.cu[sizeIdx].dct(m_fencShortBuf, m_fencDctCoeff, trSize);
> }
>
> - if (m_nr)
> + if (m_nr && m_nr->offset)
> {
> /* denoise is not applied to intra residual, so DST can be ignored */
> int cat = sizeIdx + 4 * !isLuma + 8 * !isIntra;
> int numCoeff = 1 << (log2TrSize * 2);
> - primitives.denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offsetDenoise[cat], numCoeff);
> + primitives.denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offset[cat], numCoeff);
> m_nr->count[cat]++;
> }
> }
> diff -r 2980141a744a -r 8878f03570b2 source/common/quant.h
> --- a/source/common/quant.h Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/common/quant.h Wed Aug 19 15:29:20 2015 +0530
> @@ -59,18 +59,19 @@
> }
> };
>
> -#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE /* Maximum number of transform coefficients, for a 32x32 transform */
> -#define MAX_NUM_TR_CATEGORIES 16 /* 32, 16, 8, 4 transform categories each for luma and chroma */
> -
> // NOTE: MUST be 16-byte aligned for asm code
> struct NoiseReduction
> {
> /* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32
> * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32
> * Intra 0..7 - Inter 8..15 */
> - uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
> - uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
> - uint32_t count[MAX_NUM_TR_CATEGORIES];
> + uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
> + uint32_t nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
> + uint32_t nrCount[MAX_NUM_TR_CATEGORIES];
> +
> + ALIGN_VAR_16(uint16_t, (*offset)[MAX_NUM_TR_COEFFS]);
> + uint32_t(*residualSum)[MAX_NUM_TR_COEFFS];
> + uint32_t *count;
> };
>
> class Quant
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/encoder/encoder.cpp Wed Aug 19 15:29:20 2015 +0530
> @@ -68,6 +68,7 @@
> m_latestParam = NULL;
> m_threadPool = NULL;
> m_analysisFile = NULL;
> + m_offsetEmergency = NULL;
> for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)
> m_frameEncoder[i] = NULL;
>
> @@ -187,6 +188,7 @@
> {
> x265_log(m_param, X265_LOG_ERROR, "Unable to allocate scaling list arrays\n");
> m_aborted = true;
> + return;
> }
> else if (!m_param->scalingLists || !strcmp(m_param->scalingLists, "off"))
> m_scalingList.m_bEnabled = false;
> @@ -194,7 +196,6 @@
> m_scalingList.setDefaultScalingList();
> else if (m_scalingList.parseScalingList(m_param->scalingLists))
> m_aborted = true;
> - m_scalingList.setupQuantMatrices();
>
> m_lookahead = new Lookahead(m_param, m_threadPool);
> if (m_numPools)
> @@ -221,6 +222,83 @@
> }
> }
>
> + if (m_param->rc.vbvBufferSize)
> + {
> + m_offsetEmergency = (uint16_t(*)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS])X265_MALLOC(uint16_t, MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS * (QP_MAX_MAX - QP_MAX_SPEC));
> + if (!m_offsetEmergency)
> + {
> + x265_log(m_param, X265_LOG_ERROR, "Unable to allocate memory\n");
> + m_aborted = true;
> + return;
> + }
> +
> + bool scalingEnabled = m_scalingList.m_bEnabled;
> + if (!scalingEnabled)
> + {
> + m_scalingList.setDefaultScalingList();
> + m_scalingList.setupQuantMatrices();
> + }
> + else
> + m_scalingList.setupQuantMatrices();
> +
> + for (int q = 0; q < QP_MAX_MAX - QP_MAX_SPEC; q++)
> + {
> + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
> + {
> + uint16_t *nrOffset = m_offsetEmergency[q][cat];
> +
> + int trSize = cat & 3;
> +
> + int coefCount = 1 << ((trSize + 2) * 2);
> +
> + /* Denoise chroma first then luma, then DC. */
> + int dcThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;
> + int lumaThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;
> + int chromaThreshold = 0;
> +
> + int thresh = (cat < 4 || (cat >= 8 && cat < 12)) ? lumaThreshold : chromaThreshold;
> +
> + double quantF = (double)(1ULL << (q / 6 + 16 + 8));
> +
> + for (int i = 0; i < coefCount; i++)
> + {
> + uint16_t max = (1 << (7 + X265_DEPTH)) - 1;
> + /* True "emergency mode": remove all DCT coefficients */
> + if (q == QP_MAX_MAX - QP_MAX_SPEC - 1)
> + {
> + nrOffset[i] = max;
> + continue;
> + }
> +
> + int iThresh = i == 0 ? dcThreshold : thresh;
> + if (q < iThresh)
> + {
> + nrOffset[i] = 0;
> + continue;
> + }
> +
> + int numList = (cat >= 8) * 3 + ((int)!iThresh);
> +
> + double pos = (double)(q - iThresh + 1) / (QP_MAX_MAX - QP_MAX_SPEC - iThresh);
> + double start = quantF / (m_scalingList.m_quantCoef[trSize][numList][QP_MAX_SPEC % 6][i]);
> +
> + // Formula chosen as an exponential scale to vaguely mimic the effects of a higher quantizer.
> + double bias = (pow(2, pos * (QP_MAX_MAX - QP_MAX_SPEC)) * 0.003 - 0.003) * start;
> + nrOffset[i] = (uint16_t)X265_MIN(bias + 0.5, max);
> + }
> + }
> + }
> +
> + if (!scalingEnabled)
> + {
> + m_scalingList.m_bEnabled = false;
> + m_scalingList.m_bDataPresent = false;
> + m_scalingList.setupQuantMatrices();
> + }
> + }
> + else
> + m_scalingList.setupQuantMatrices();
> +
> for (int i = 0; i < m_param->frameNumThreads; i++)
> {
> m_frameEncoder[i]->start();
> @@ -314,6 +392,8 @@
> delete m_rateControl;
> }
>
> + X265_FREE(m_offsetEmergency);
> +
> if (m_analysisFile)
> fclose(m_analysisFile);
>
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.h
> --- a/source/encoder/encoder.h Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/encoder/encoder.h Wed Aug 19 15:29:20 2015 +0530
> @@ -130,6 +130,10 @@
> bool m_aborted; // fatal error detected
> bool m_reconfigured; // reconfigure of encoder detected
>
> + uint16_t (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
> + ALIGN_VAR_32(uint32_t, m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);
> + uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES];
> +
> Encoder();
> ~Encoder() {}
>
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/encoder/frameencoder.cpp Wed Aug 19 15:29:20 2015 +0530
> @@ -135,7 +135,7 @@
> ok &= m_rce.picTimingSEI && m_rce.hrdTiming;
> }
>
> - if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
> + if (m_param->noiseReductionIntra || m_param->noiseReductionInter || m_param->rc.vbvBufferSize)
> m_nr = X265_MALLOC(NoiseReduction, 1);
> if (m_nr)
> memset(m_nr, 0, sizeof(NoiseReduction));
> @@ -362,11 +362,47 @@
> }
> }
>
> + int numTLD;
> + if (m_pool)
> + numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;
> + else
> + numTLD = 1;
> +
> /* Get the QP for this frame from rate control. This call may block until
> * frames ahead of it in encode order have called rateControlEnd() */
> int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, m_top);
> m_rce.newQp = qp;
>
> + if (m_nr)
> + {
> + if (qp > QP_MAX_SPEC && m_frame->m_param->rc.vbvBufferSize)
> + {
> + for (int i = 0; i < numTLD; i++)
> + {
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_top->m_residualSumEmergency;
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_top->m_countEmergency;
> + }
> + }
> + else
> + {
> + if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
> + {
> + for (int i = 0; i < numTLD; i++)
> + {
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;
> + }
> + }
> + else
> + {
> + for (int i = 0; i < numTLD; i++)
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = NULL;
> + }
> + }
> + }
> +
> /* Clip slice QP to 0-51 spec range before encoding */
> slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);
>
> @@ -699,37 +735,36 @@
> }
> }
>
> - int numTLD;
> - if (m_pool)
> - numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;
> - else
> - numTLD = 1;
> -
> if (m_nr)
> {
> - /* Accumulate NR statistics from all worker threads */
> - for (int i = 0; i < numTLD; i++)
> - {
> - NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
> - for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
> - {
> - for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
> - m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff];
> -
> - m_nr->count[cat] += nr->count[cat];
> - }
> - }
> -
> - noiseReductionUpdate();
> -
> - /* Copy updated NR coefficients back to all worker threads */
> - for (int i = 0; i < numTLD; i++)
> - {
> - NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
> - memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> - memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);
> - memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> - }
> + bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC || !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra || m_param->noiseReductionInter);
> +
> + if (nrEnabled)
> + {
> + /* Accumulate NR statistics from all worker threads */
> + for (int i = 0; i < numTLD; i++)
> + {
> + NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
> + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
> + {
> + for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
> + m_nr->nrResidualSum[cat][coeff] += nr->nrResidualSum[cat][coeff];
> +
> + m_nr->nrCount[cat] += nr->nrCount[cat];
> + }
> + }
> +
> + noiseReductionUpdate();
> +
> + /* Copy updated NR coefficients back to all worker threads */
> + for (int i = 0; i < numTLD; i++)
> + {
> + NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
> + memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise, sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> + memset(nr->nrCount, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES);
> + memset(nr->nrResidualSum, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> + }
> + }
> }
>
> #if DETAILED_CU_STATS
> @@ -1253,25 +1288,25 @@
> int trSize = cat & 3;
> int coefCount = 1 << ((trSize + 2) * 2);
>
> - if (m_nr->count[cat] > maxBlocksPerTrSize[trSize])
> + if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])
> {
> for (int i = 0; i < coefCount; i++)
> - m_nr->residualSum[cat][i] >>= 1;
> - m_nr->count[cat] >>= 1;
> + m_nr->nrResidualSum[cat][i] >>= 1;
> + m_nr->nrCount[cat] >>= 1;
> }
>
> int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter;
> - uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat];
> + uint64_t scaledCount = (uint64_t)nrStrength * m_nr->nrCount[cat];
>
> for (int i = 0; i < coefCount; i++)
> {
> - uint64_t value = scaledCount + m_nr->residualSum[cat][i] / 2;
> - uint64_t denom = m_nr->residualSum[cat][i] + 1;
> - m_nr->offsetDenoise[cat][i] = (uint16_t)(value / denom);
> + uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i] / 2;
> + uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;
> + m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);
> }
>
> // Don't denoise DC coefficients
> - m_nr->offsetDenoise[cat][0] = 0;
> + m_nr->nrOffsetDenoise[cat][0] = 0;
> }
> }
>
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/encoder/search.cpp Wed Aug 19 15:29:20 2015 +0530
> @@ -80,7 +80,7 @@
> m_me.init(param.searchMethod, param.subpelRefine, param.internalCsp);
>
> bool ok = m_quant.init(param.rdoqLevel, param.psyRdoq, scalingList, m_entropyCoder);
> - if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
> + if (m_param->noiseReductionIntra || m_param->noiseReductionInter || m_param->rc.vbvBufferSize)
> ok &= m_quant.allocNoiseReduction(param);
>
> ok &= Predict::allocBuffers(param.internalCsp); /* sets m_hChromaShift & m_vChromaShift */
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list