[x265] [PATCH] noiseReduction: make NoiseReduction deterministic
Steve Borho
steve at borho.org
Fri Oct 10 20:36:30 CEST 2014
On 10/10, deepthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1412916802 -19800
> # Fri Oct 10 10:23:22 2014 +0530
> # Node ID 6c8981f48732eab1d5691a279517e13aff9bba2e
> # Parent 4495af3b30bb7a361c3252a8800f8dfa9f191145
> noiseReduction: make NoiseReduction deterministic.
>
> NoiseReduction is now deterministic for a given NR strength and
> frame encoder count.
>
> diff -r 4495af3b30bb -r 6c8981f48732 source/common/quant.cpp
> --- a/source/common/quant.cpp Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/common/quant.cpp Fri Oct 10 10:23:22 2014 +0530
> @@ -158,7 +158,7 @@
> m_fencShortBuf = NULL;
> }
>
> -bool Quant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy)
> +bool Quant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy, int noiseReduction)
> {
> m_entropyCoder = &entropy;
> m_useRDOQ = useRDOQ;
> @@ -167,7 +167,7 @@
> m_resiDctCoeff = X265_MALLOC(int32_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
> m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
> m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
> -
> + m_nr = noiseReduction > 0 ? X265_MALLOC(NoiseReduction, 1) : NULL;
> return m_resiDctCoeff && m_fencShortBuf;
> }
>
> @@ -175,6 +175,7 @@
> {
> X265_FREE(m_resiDctCoeff);
> X265_FREE(m_fencShortBuf);
> + X265_FREE(m_nr);
> }
>
> void Quant::setQPforQuant(const TComDataCU* cu)
> diff -r 4495af3b30bb -r 6c8981f48732 source/common/quant.h
> --- a/source/common/quant.h Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/common/quant.h Fri Oct 10 10:23:22 2014 +0530
> @@ -84,7 +84,7 @@
> ~Quant();
>
> /* one-time setup */
> - bool init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy);
> + bool init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy, int NoiseReduction);
>
> /* CU setup */
> void setQPforQuant(const TComDataCU* cu);
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/analysis.h Fri Oct 10 10:23:22 2014 +0530
> @@ -173,7 +173,7 @@
> struct ThreadLocalData
> {
> Analysis analysis;
> -
> +
> ~ThreadLocalData() { analysis.destroy(); }
> };
>
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/frameencoder.cpp Fri Oct 10 10:23:22 2014 +0530
> @@ -325,6 +325,20 @@
> m_frameStats.percentSkip = (double)totalSkip / totalCuCount;
> }
>
> + if (m_nr)
> + {
> + for(int row = 0; row < m_numRows; row++)
> + {
> + /* Accumulate count and residualSum from each row CU encode into FrameEncoder::m_nr */
> + for (int i = 0; i < 8; i++)
> + {
> + for (int j = 0; j < 1024; j++)
> + m_nr->residualSum[i][j] += m_rows[row].noiseReduction.residualSum[i][j];
> + m_nr->count[i] += m_rows[row].noiseReduction.count[i];
> + }
> + }
> + }
lots of white-space nits.
8 and 1024 are somewhat magic numbers
> m_bs.resetBits();
> m_entropyCoder.load(m_initSliceContext);
> m_entropyCoder.setBitstream(&m_bs);
> @@ -616,7 +630,7 @@
> // setup thread-local data
> Slice *slice = m_frame->m_picSym->m_slice;
> TComPicYuv* fenc = m_frame->getPicYuvOrg();
> - tld.analysis.m_quant.m_nr = m_nr;
> +
> tld.analysis.m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
> tld.analysis.m_log = &tld.analysis.m_sliceTypeLog[m_frame->m_picSym->m_slice->m_sliceType];
> tld.analysis.setQP(slice, slice->m_sliceQp);
> @@ -668,6 +682,14 @@
> rowCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
> }
>
> + if (m_nr)
> + {
> + NoiseReduction* nr = tld.analysis.m_quant.m_nr;
> + memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint32_t) * 1024 * 8);
> + memset(nr->count, 0, sizeof(uint32_t) * 8);
> + memset(nr->residualSum, 0, sizeof(uint32_t) * 1024 * 8);
> + }
> +
> cu->loadCTUData(m_param->maxCUSize);
> tld.analysis.m_quant.setQPforQuant(cu);
> tld.analysis.compressCTU(cu, rowCoder); // Does all the CU analysis
> @@ -683,6 +705,18 @@
> // Completed CU processing
> curRow.completed++;
>
> + if (m_nr)
> + {
> + NoiseReduction* nr = tld.analysis.m_quant.m_nr;
> + /* Accumulate count and residualSum from each CU encode into CTURow */
> + for (int i = 0; i < 8; i++)
> + {
> + for (int j = 0; j < 1024; j++)
> + curRow.noiseReduction.residualSum[i][j] += nr->residualSum[i][j];
> + curRow.noiseReduction.count[i] += nr->count[i];
> + }
> + }
This won't work with --pmode. Multiple worker threads could be doing
quant for this CTU. This is why I suggest an array in ThreadLocalData
indexed by FrameEncoder. Each TLD will be doing at most one quant() at a
time, and we only need to accumulate NR stats per frame encoder. And
this approach doesn't require memcopies or accumulations per CTU like
this (only per-frame).
> // copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
> if (m_param->rc.bStatWrite)
> {
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/frameencoder.h Fri Oct 10 10:23:22 2014 +0530
> @@ -53,6 +53,8 @@
> Entropy rowGoOnCoder; /* store context between CTUs, code bitstream if !SAO */
>
> FrameStats rowStats;
> +
> + NoiseReduction noiseReduction; /* accumulates count and residualSum from each CU encode */
>
> /* Threading variables */
>
> @@ -80,6 +82,8 @@
> busy = false;
> completed = 0;
> memset(&rowStats, 0, sizeof(rowStats));
> + memset(noiseReduction.count, 0, sizeof(uint32_t) * 8);
> + memset(noiseReduction.residualSum, 0, sizeof(uint32_t) * 1024 * 8);
> rowGoOnCoder.load(initContext);
> }
> };
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/search.cpp Fri Oct 10 10:23:22 2014 +0530
> @@ -77,7 +77,7 @@
> m_me.setSearchMethod(param->searchMethod);
> m_me.setSubpelRefine(param->subpelRefine);
>
> - bool ok = m_quant.init(m_bEnableRDOQ, param->psyRdoq, scalingList, m_entropyCoder);
> + bool ok = m_quant.init(m_bEnableRDOQ, param->psyRdoq, scalingList, m_entropyCoder, param->noiseReduction);
> ok &= Predict::allocBuffers(param->internalCsp);
> ok &= m_predTempYuv.create(MAX_CU_SIZE, MAX_CU_SIZE, param->internalCsp);
> ok &= m_bidirPredYuv[0].create(MAX_CU_SIZE, MAX_CU_SIZE, m_param->internalCsp);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list