[x265] [PATCH] noiseReduction: make noiseReduction deterministic for a given number of frameEncoders
Steve Borho
steve at borho.org
Tue Oct 14 21:11:04 CEST 2014
On 10/14, deepthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
> # Date 1413278604 -19800
> # Tue Oct 14 14:53:24 2014 +0530
> # Node ID c6e786dbbfaa39822799d17e6c32d49c6141a7fb
> # Parent 38b5733cc629dd16db770e6a93b4f994e13336f3
> noiseReduction: make noiseReduction deterministic for a given number of frameEncoders.
>
> diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.cpp
> --- a/source/common/frame.cpp Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/common/frame.cpp Tue Oct 14 14:53:24 2014 +0530
> @@ -43,6 +43,7 @@
> m_picSym = NULL;
> m_reconRowCount.set(0);
> m_countRefEncoders = 0;
> + m_frameEncoderID = 0;
> memset(&m_lowres, 0, sizeof(m_lowres));
> m_next = NULL;
> m_prev = NULL;
> diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.h
> --- a/source/common/frame.h Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/common/frame.h Tue Oct 14 14:53:24 2014 +0530
> @@ -50,6 +50,7 @@
> TComPicSym* m_picSym;
> TComPicYuv* m_reconPicYuv;
> int m_POC;
> + int m_frameEncoderID; // To identify the ID of the frameEncoder processing this frame
>
> //** Frame Parallelism - notification between FrameEncoders of available motion reference rows **
> ThreadSafeInteger m_reconRowCount; // count of CTU rows completely reconstructed and extended for motion reference
> diff -r 38b5733cc629 -r c6e786dbbfaa source/common/quant.cpp
> --- a/source/common/quant.cpp Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/common/quant.cpp Tue Oct 14 14:53:24 2014 +0530
> @@ -156,6 +156,7 @@
> m_resiDctCoeff = NULL;
> m_fencDctCoeff = NULL;
> m_fencShortBuf = NULL;
> + m_nr = NULL;
> }
>
> bool Quant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy)
> diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/encoder/analysis.cpp Tue Oct 14 14:53:24 2014 +0530
> @@ -292,7 +292,11 @@
> if (!jobId || m_param->rdLevel > 4)
> {
> slave->m_quant.setQPforQuant(cu);
> - slave->m_quant.m_nr = m_quant.m_nr;
> + if(m_param->noiseReduction)
w-s
> + {
> + int frameEncoderID = cu->m_pic->m_frameEncoderID;
> + slave->m_quant.m_nr = &m_tld[threadId].m_nr[frameEncoderID];
> + }
> slave->m_rdContexts[depth].cur.load(m_rdContexts[depth].cur);
> }
> }
> diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.h
> --- a/source/encoder/analysis.h Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/encoder/analysis.h Tue Oct 14 14:53:24 2014 +0530
> @@ -172,7 +172,9 @@
> struct ThreadLocalData
> {
> Analysis analysis;
> -
> + NoiseReduction *m_nr;
> +
> + ThreadLocalData() { m_nr = NULL; }
> ~ThreadLocalData() { analysis.destroy(); }
ThreadLocalData is a struct, so members don't get m_ prefixes.
The destructor should be removed and make a destroy() function which
releases the noise reduction array and calls analysis.destroy();
> };
>
> diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/encoder/encoder.cpp Tue Oct 14 14:53:24 2014 +0530
> @@ -74,6 +74,7 @@
> m_csvfpt = NULL;
> m_param = NULL;
> m_threadPool = 0;
> + m_numThreadLocalData = 0;
> }
>
> void Encoder::create()
> @@ -162,15 +163,17 @@
>
> /* Allocate thread local data, one for each thread pool worker and
> * if --no-wpp, one for each frame encoder */
> - int numLocalData = poolThreadCount;
> + m_numThreadLocalData = poolThreadCount;
> if (!m_param->bEnableWavefront)
> - numLocalData += m_param->frameNumThreads;
> - m_threadLocalData = new ThreadLocalData[numLocalData];
> - for (int i = 0; i < numLocalData; i++)
> + m_numThreadLocalData += m_param->frameNumThreads;
> + m_threadLocalData = new ThreadLocalData[m_numThreadLocalData];
> + for (int i = 0; i < m_numThreadLocalData; i++)
> {
> m_threadLocalData[i].analysis.setThreadPool(m_threadPool);
> m_threadLocalData[i].analysis.initSearch(m_param, m_scalingList);
> m_threadLocalData[i].analysis.create(g_maxCUDepth + 1, g_maxCUSize, m_threadLocalData);
> + if(m_param->noiseReduction)
> + m_threadLocalData[i].m_nr = new NoiseReduction[m_param->frameNumThreads];
w-s, and this should be X265_MALLOC(NoiseReduction, m_param->frameNumThreads);
> }
>
> if (!m_param->bEnableWavefront)
> @@ -240,6 +243,12 @@
> delete [] m_frameEncoder;
> }
>
> + if (m_param->noiseReduction)
> + {
> + for (int i = 0; i < m_numThreadLocalData; i++)
> + delete [] m_threadLocalData[i].m_nr;
use destroy() for this
> + }
> +
> delete [] m_threadLocalData;
>
> if (m_lookahead)
> @@ -400,6 +409,7 @@
> m_lookahead->flush();
>
> FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
> + curEncoder->m_frameEncoderID = m_curEncoder;
this is too opaque. better to pass the ID to the frame encoder init()
function
> m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
> int ret = 0;
>
> diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.h
> --- a/source/encoder/encoder.h Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/encoder/encoder.h Tue Oct 14 14:53:24 2014 +0530
> @@ -116,6 +116,7 @@
> PPS m_pps;
> NALList m_nalList;
> ScalingList m_scalingList; // quantization matrix information
> + int m_numThreadLocalData;
>
> int m_lastBPSEI;
> uint32_t m_numDelayedPic;
> diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/encoder/frameencoder.cpp Tue Oct 14 14:53:24 2014 +0530
> @@ -45,6 +45,7 @@
> , m_frame(NULL)
> {
> m_totalTime = 0;
> + m_frameEncoderID = 0;
> m_bAllRowsStop = false;
> m_vbvResetTriggerRow = -1;
> m_outStreams = NULL;
> @@ -156,6 +157,7 @@
> PPAScopeEvent(FrameEncoder_compressFrame);
> int64_t startCompressTime = x265_mdate();
> Slice* slice = m_frame->m_picSym->m_slice;
> + m_frame->m_frameEncoderID = m_frameEncoderID; // Each Frame knows the ID of the FrameEncoder encoding it
>
> /* Emit access unit delimiter unless this is the first frame and the user is
> * not repeating headers (since AUD is supposed to be the first NAL in the access
> @@ -393,8 +395,36 @@
> if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
> m_top->m_aborted = true;
>
> + /* Accumulate NR statistics from all worker threads */
> + if (m_nr)
> + {
> + for (int i = 0; i < m_top->m_numThreadLocalData; i++)
> + {
> + NoiseReduction* nr = &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
> + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
> + {
> + for(int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
> + m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff];
> +
> + m_nr->count[cat] += nr->count[cat];
> + }
> + }
> + }
> +
> noiseReductionUpdate();
>
> + /* Copy updated NR coefficients back to all worker threads */
> + if (m_nr)
> + {
> + for (int i = 0; i < m_top->m_numThreadLocalData; i++)
> + {
> + NoiseReduction* nr = &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
> + memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> + memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);
> + memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> + }
> + }
someone needs to initialize the NR structs in ThreadLocalData when they
are allocated. that's probably where the non-determinism is coming from
> // Decrement referenced frame reference counts, allow them to be recycled
> for (int l = 0; l < numPredDir; l++)
> {
> @@ -616,7 +646,8 @@
> // setup thread-local data
> Slice *slice = m_frame->m_picSym->m_slice;
> TComPicYuv* fenc = m_frame->getPicYuvOrg();
> - tld.analysis.m_quant.m_nr = m_nr;
> + if(m_param->noiseReduction)
w-s
> + tld.analysis.m_quant.m_nr = &tld.m_nr[m_frameEncoderID];
> tld.analysis.m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
> tld.analysis.m_log = &tld.analysis.m_sliceTypeLog[m_frame->m_picSym->m_slice->m_sliceType];
> tld.analysis.setQP(slice, slice->m_sliceQp);
> diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Tue Oct 14 14:35:30 2014 +0530
> +++ b/source/encoder/frameencoder.h Tue Oct 14 14:53:24 2014 +0530
> @@ -148,7 +148,7 @@
> int m_filterRowDelayCus;
> Event m_completionEvent;
> int64_t m_totalTime;
> -
> + int m_frameEncoderID;
> protected:
w-s, try to keep the blank line before protected:
>
> /* analyze / compress frame, can be run in parallel within reference constraints */
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list