<div dir="ltr"><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:#000000"><br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Aug 20, 2015 at 8:47 AM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">On 08/19, <a href="mailto:sagar@multicorewareinc.com">sagar@multicorewareinc.com</a> wrote:<br>
> # HG changeset patch<br>
> # User Sagar Kotecha <<a href="mailto:sagar@multicorewareinc.com">sagar@multicorewareinc.com</a>><br>
> # Date 1439978360 -19800<br>
> # Node ID 8878f03570b2a8d0207f5e96bae3900de5653ec0<br>
> # Parent 2980141a744a569ad6f60dbebdece76a4eababfd<br>
> Add emergency denoising when frame qp > QP_MAX_SPEC<br>
><br>
> This feature is ported from x264, and is turned on for VBV encodes<br>
<br>
</span>this needs to be documented in the reST docs somewhere, since our<br>
emergency denoise works differently than in x264<br>
<div><div class="h5"><br></div></div></blockquote><div><br></div><div><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:rgb(0,0,0);display:inline">OK, I Will send it in separate patch.</div></div><div><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:rgb(0,0,0);display:inline"></div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div class="h5">
> diff -r 2980141a744a -r 8878f03570b2 source/common/common.h<br>
> --- a/source/common/common.h Tue Aug 18 12:45:52 2015 +0530<br>
> +++ b/source/common/common.h Wed Aug 19 15:29:20 2015 +0530<br>
> @@ -317,6 +317,9 @@<br>
> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)<br>
> #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8<br>
><br>
> +#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform<br>
> +#define MAX_NUM_TR_CATEGORIES 16 // 32, 16, 8, 4 transform categories each for luma and chroma<br>
> +<br>
> namespace X265_NS {<br>
><br>
> enum { SAO_NUM_OFFSET = 4 };<br>
> diff -r 2980141a744a -r 8878f03570b2 source/common/quant.cpp<br>
> --- a/source/common/quant.cpp Tue Aug 18 12:45:52 2015 +0530<br>
> +++ b/source/common/quant.cpp Wed Aug 19 15:29:20 2015 +0530<br>
> @@ -444,12 +444,12 @@<br>
> <a href="http://primitives.cu" rel="noreferrer" target="_blank">primitives.cu</a>[sizeIdx].dct(m_fencShortBuf, m_fencDctCoeff, trSize);<br>
> }<br>
><br>
> - if (m_nr)<br>
> + if (m_nr && m_nr->offset)<br>
> {<br>
> /* denoise is not applied to intra residual, so DST can be ignored */<br>
> int cat = sizeIdx + 4 * !isLuma + 8 * !isIntra;<br>
> int numCoeff = 1 << (log2TrSize * 2);<br>
> - primitives.denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offsetDenoise[cat], numCoeff);<br>
> + primitives.denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offset[cat], numCoeff);<br>
> m_nr->count[cat]++;<br>
> }<br>
> }<br>
> diff -r 2980141a744a -r 8878f03570b2 source/common/quant.h<br>
> --- a/source/common/quant.h Tue Aug 18 12:45:52 2015 +0530<br>
> +++ b/source/common/quant.h Wed Aug 19 15:29:20 2015 +0530<br>
> @@ -59,18 +59,19 @@<br>
> }<br>
> };<br>
><br>
> -#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE /* Maximum number of transform coefficients, for a 32x32 transform */<br>
> -#define MAX_NUM_TR_CATEGORIES 16 /* 32, 16, 8, 4 transform categories each for luma and chroma */<br>
> -<br>
> // NOTE: MUST be 16-byte aligned for asm code<br>
> struct NoiseReduction<br>
> {<br>
> /* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32<br>
> * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32<br>
> * Intra 0..7 - Inter 8..15 */<br>
> - uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>
> - uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>
> - uint32_t count[MAX_NUM_TR_CATEGORIES];<br>
> + uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>
> + uint32_t nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>
> + uint32_t nrCount[MAX_NUM_TR_CATEGORIES];<br>
> +<br>
> + ALIGN_VAR_16(uint16_t, (*offset)[MAX_NUM_TR_COEFFS]);<br>
> + uint32_t(*residualSum)[MAX_NUM_TR_COEFFS];<br>
> + uint32_t *count;<br>
> };<br>
><br>
> class Quant<br>
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.cpp<br>
> --- a/source/encoder/encoder.cpp Tue Aug 18 12:45:52 2015 +0530<br>
> +++ b/source/encoder/encoder.cpp Wed Aug 19 15:29:20 2015 +0530<br>
> @@ -68,6 +68,7 @@<br>
> m_latestParam = NULL;<br>
> m_threadPool = NULL;<br>
> m_analysisFile = NULL;<br>
> + m_offsetEmergency = NULL;<br>
> for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)<br>
> m_frameEncoder[i] = NULL;<br>
><br>
> @@ -187,6 +188,7 @@<br>
> {<br>
> x265_log(m_param, X265_LOG_ERROR, "Unable to allocate scaling list arrays\n");<br>
> m_aborted = true;<br>
> + return;<br>
> }<br>
> else if (!m_param->scalingLists || !strcmp(m_param->scalingLists, "off"))<br>
> m_scalingList.m_bEnabled = false;<br>
> @@ -194,7 +196,6 @@<br>
> m_scalingList.setDefaultScalingList();<br>
> else if (m_scalingList.parseScalingList(m_param->scalingLists))<br>
> m_aborted = true;<br>
> - m_scalingList.setupQuantMatrices();<br>
><br>
> m_lookahead = new Lookahead(m_param, m_threadPool);<br>
> if (m_numPools)<br>
> @@ -221,6 +222,83 @@<br>
> }<br>
> }<br>
><br>
> + if (m_param->rc.vbvBufferSize)<br>
> + {<br>
> + m_offsetEmergency = (uint16_t(*)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS])X265_MALLOC(uint16_t, MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS * (QP_MAX_MAX - QP_MAX_SPEC));<br>
> + if (!m_offsetEmergency)<br>
> + {<br>
> + x265_log(m_param, X265_LOG_ERROR, "Unable to allocate memory\n");<br>
> + m_aborted = true;<br>
> + return;<br>
> + }<br>
> +<br>
> + bool scalingEnabled = m_scalingList.m_bEnabled;<br>
> + if (!scalingEnabled)<br>
> + {<br>
> + m_scalingList.setDefaultScalingList();<br>
> + m_scalingList.setupQuantMatrices();<br>
> + }<br>
> + else<br>
> + m_scalingList.setupQuantMatrices();<br>
> +<br>
> + for (int q = 0; q < QP_MAX_MAX - QP_MAX_SPEC; q++)<br>
> + {<br>
> + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)<br>
> + {<br>
> + uint16_t *nrOffset = m_offsetEmergency[q][cat];<br>
> +<br>
> + int trSize = cat & 3;<br>
> +<br>
> + int coefCount = 1 << ((trSize + 2) * 2);<br>
> +<br>
> + /* Denoise chroma first then luma, then DC. */<br>
> + int dcThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;<br>
> + int lumaThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;<br>
> + int chromaThreshold = 0;<br>
> +<br>
> + int thresh = (cat < 4 || (cat >= 8 && cat < 12)) ? lumaThreshold : chromaThreshold;<br>
> +<br>
> + double quantF = (double)(1ULL << (q / 6 + 16 + 8));<br>
> +<br>
> + for (int i = 0; i < coefCount; i++)<br>
> + {<br>
> + uint16_t max = (1 << (7 + X265_DEPTH)) - 1;<br>
> + /* True "emergency mode": remove all DCT coefficients */<br>
> + if (q == QP_MAX_MAX - QP_MAX_SPEC - 1)<br>
> + {<br>
> + nrOffset[i] = max;<br>
> + continue;<br>
> + }<br>
> +<br>
> + int iThresh = i == 0 ? dcThreshold : thresh;<br>
> + if (q < iThresh)<br>
> + {<br>
> + nrOffset[i] = 0;<br>
> + continue;<br>
> + }<br>
> +<br>
> + int numList = (cat >= 8) * 3 + ((int)!iThresh);<br>
> +<br>
> + double pos = (double)(q - iThresh + 1) / (QP_MAX_MAX - QP_MAX_SPEC - iThresh);<br>
> + double start = quantF / (m_scalingList.m_quantCoef[trSize][numList][QP_MAX_SPEC % 6][i]);<br>
> +<br>
> + // Formula chosen as an exponential scale to vaguely mimic the effects of a higher quantizer.<br>
> + double bias = (pow(2, pos * (QP_MAX_MAX - QP_MAX_SPEC)) * 0.003 - 0.003) * start;<br>
> + nrOffset[i] = (uint16_t)X265_MIN(bias + 0.5, max);<br>
> + }<br>
> + }<br>
> + }<br>
> +<br>
> + if (!scalingEnabled)<br>
> + {<br>
> + m_scalingList.m_bEnabled = false;<br>
> + m_scalingList.m_bDataPresent = false;<br>
> + m_scalingList.setupQuantMatrices();<br>
> + }<br>
> + }<br>
> + else<br>
> + m_scalingList.setupQuantMatrices();<br>
> +<br>
> for (int i = 0; i < m_param->frameNumThreads; i++)<br>
> {<br>
> m_frameEncoder[i]->start();<br>
> @@ -314,6 +392,8 @@<br>
> delete m_rateControl;<br>
> }<br>
><br>
> + X265_FREE(m_offsetEmergency);<br>
> +<br>
> if (m_analysisFile)<br>
> fclose(m_analysisFile);<br>
><br>
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.h<br>
> --- a/source/encoder/encoder.h Tue Aug 18 12:45:52 2015 +0530<br>
> +++ b/source/encoder/encoder.h Wed Aug 19 15:29:20 2015 +0530<br>
> @@ -130,6 +130,10 @@<br>
> bool m_aborted; // fatal error detected<br>
> bool m_reconfigured; // reconfigure of encoder detected<br>
><br>
> + uint16_t (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>
> + ALIGN_VAR_32(uint32_t, m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);<br>
> + uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES];<br>
> +<br>
> Encoder();<br>
> ~Encoder() {}<br>
><br>
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/frameencoder.cpp<br>
> --- a/source/encoder/frameencoder.cpp Tue Aug 18 12:45:52 2015 +0530<br>
> +++ b/source/encoder/frameencoder.cpp Wed Aug 19 15:29:20 2015 +0530<br>
> @@ -135,7 +135,7 @@<br>
> ok &= m_rce.picTimingSEI && m_rce.hrdTiming;<br>
> }<br>
><br>
> - if (m_param->noiseReductionIntra || m_param->noiseReductionInter)<br>
> + if (m_param->noiseReductionIntra || m_param->noiseReductionInter || m_param->rc.vbvBufferSize)<br>
> m_nr = X265_MALLOC(NoiseReduction, 1);<br>
> if (m_nr)<br>
> memset(m_nr, 0, sizeof(NoiseReduction));<br>
> @@ -362,11 +362,47 @@<br>
> }<br>
> }<br>
><br>
> + int numTLD;<br>
> + if (m_pool)<br>
> + numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;<br>
> + else<br>
> + numTLD = 1;<br>
> +<br>
> /* Get the QP for this frame from rate control. This call may block until<br>
> * frames ahead of it in encode order have called rateControlEnd() */<br>
> int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, m_top);<br>
> m_rce.newQp = qp;<br>
><br>
> + if (m_nr)<br>
> + {<br>
> + if (qp > QP_MAX_SPEC && m_frame->m_param->rc.vbvBufferSize)<br>
> + {<br>
> + for (int i = 0; i < numTLD; i++)<br>
> + {<br>
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];<br>
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_top->m_residualSumEmergency;<br>
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_top->m_countEmergency;<br>
> + }<br>
> + }<br>
> + else<br>
> + {<br>
> + if (m_param->noiseReductionIntra || m_param->noiseReductionInter)<br>
> + {<br>
> + for (int i = 0; i < numTLD; i++)<br>
> + {<br>
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;<br>
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;<br>
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;<br>
> + }<br>
> + }<br>
> + else<br>
> + {<br>
> + for (int i = 0; i < numTLD; i++)<br>
> + m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = NULL;<br>
> + }<br>
> + }<br>
> + }<br>
> +<br>
> /* Clip slice QP to 0-51 spec range before encoding */<br>
> slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);<br>
><br>
> @@ -699,37 +735,36 @@<br>
> }<br>
> }<br>
><br>
> - int numTLD;<br>
> - if (m_pool)<br>
> - numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;<br>
> - else<br>
> - numTLD = 1;<br>
> -<br>
> if (m_nr)<br>
> {<br>
> - /* Accumulate NR statistics from all worker threads */<br>
> - for (int i = 0; i < numTLD; i++)<br>
> - {<br>
> - NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>
> - for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)<br>
> - {<br>
> - for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)<br>
> - m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff];<br>
> -<br>
> - m_nr->count[cat] += nr->count[cat];<br>
> - }<br>
> - }<br>
> -<br>
> - noiseReductionUpdate();<br>
> -<br>
> - /* Copy updated NR coefficients back to all worker threads */<br>
> - for (int i = 0; i < numTLD; i++)<br>
> - {<br>
> - NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>
> - memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>
> - memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);<br>
> - memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>
> - }<br>
> + bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC || !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra || m_param->noiseReductionInter);<br>
> +<br>
> + if (nrEnabled)<br>
> + {<br>
> + /* Accumulate NR statistics from all worker threads */<br>
> + for (int i = 0; i < numTLD; i++)<br>
> + {<br>
> + NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>
> + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)<br>
> + {<br>
> + for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)<br>
> + m_nr->nrResidualSum[cat][coeff] += nr->nrResidualSum[cat][coeff];<br>
> +<br>
> + m_nr->nrCount[cat] += nr->nrCount[cat];<br>
> + }<br>
> + }<br>
> +<br>
> + noiseReductionUpdate();<br>
> +<br>
> + /* Copy updated NR coefficients back to all worker threads */<br>
> + for (int i = 0; i < numTLD; i++)<br>
> + {<br>
> + NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>
> + memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise, sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>
> + memset(nr->nrCount, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES);<br>
> + memset(nr->nrResidualSum, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>
> + }<br>
> + }<br>
> }<br>
><br>
> #if DETAILED_CU_STATS<br>
> @@ -1253,25 +1288,25 @@<br>
> int trSize = cat & 3;<br>
> int coefCount = 1 << ((trSize + 2) * 2);<br>
><br>
> - if (m_nr->count[cat] > maxBlocksPerTrSize[trSize])<br>
> + if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])<br>
> {<br>
> for (int i = 0; i < coefCount; i++)<br>
> - m_nr->residualSum[cat][i] >>= 1;<br>
> - m_nr->count[cat] >>= 1;<br>
> + m_nr->nrResidualSum[cat][i] >>= 1;<br>
> + m_nr->nrCount[cat] >>= 1;<br>
> }<br>
><br>
> int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter;<br>
> - uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat];<br>
> + uint64_t scaledCount = (uint64_t)nrStrength * m_nr->nrCount[cat];<br>
><br>
> for (int i = 0; i < coefCount; i++)<br>
> {<br>
> - uint64_t value = scaledCount + m_nr->residualSum[cat][i] / 2;<br>
> - uint64_t denom = m_nr->residualSum[cat][i] + 1;<br>
> - m_nr->offsetDenoise[cat][i] = (uint16_t)(value / denom);<br>
> + uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i] / 2;<br>
> + uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;<br>
> + m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);<br>
> }<br>
><br>
> // Don't denoise DC coefficients<br>
> - m_nr->offsetDenoise[cat][0] = 0;<br>
> + m_nr->nrOffsetDenoise[cat][0] = 0;<br>
> }<br>
> }<br>
><br>
> diff -r 2980141a744a -r 8878f03570b2 source/encoder/search.cpp<br>
> --- a/source/encoder/search.cpp Tue Aug 18 12:45:52 2015 +0530<br>
> +++ b/source/encoder/search.cpp Wed Aug 19 15:29:20 2015 +0530<br>
> @@ -80,7 +80,7 @@<br>
> m_me.init(param.searchMethod, param.subpelRefine, param.internalCsp);<br>
><br>
> bool ok = m_quant.init(param.rdoqLevel, param.psyRdoq, scalingList, m_entropyCoder);<br>
> - if (m_param->noiseReductionIntra || m_param->noiseReductionInter)<br>
> + if (m_param->noiseReductionIntra || m_param->noiseReductionInter || m_param->rc.vbvBufferSize)<br>
> ok &= m_quant.allocNoiseReduction(param);<br>
><br>
> ok &= Predict::allocBuffers(param.internalCsp); /* sets m_hChromaShift & m_vChromaShift */<br>
</div></div>> _______________________________________________<br>
> x265-devel mailing list<br>
> <a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
> <a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<span class="HOEnZb"><font color="#888888"><br>
--<br>
Steve Borho<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</font></span></blockquote></div><br></div></div>