[x265] [PATCH] noiseReduction: make noiseReduction deterministic for a given number of frameEncoders
Deepthi Nandakumar
deepthi at multicorewareinc.com
Wed Oct 15 02:39:50 CEST 2014
Any reason why threadLocalData->nr needs to use malloc and not new?
On Wed, Oct 15, 2014 at 5:54 AM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:
> Aah, you' re right. In my older patch, the encoder was initialising
> threadLocalData.nr . Somehow, I missed that while making the new patch.
>
> On Wed, Oct 15, 2014 at 12:41 AM, Steve Borho <steve at borho.org> wrote:
>
>> On 10/14, deepthi at multicorewareinc.com wrote:
>> > # HG changeset patch
>> > # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
>> > # Date 1413278604 -19800
>> > # Tue Oct 14 14:53:24 2014 +0530
>> > # Node ID c6e786dbbfaa39822799d17e6c32d49c6141a7fb
>> > # Parent 38b5733cc629dd16db770e6a93b4f994e13336f3
>> > noiseReduction: make noiseReduction deterministic for a given number of
>> frameEncoders.
>> >
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.cpp
>> > --- a/source/common/frame.cpp Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/common/frame.cpp Tue Oct 14 14:53:24 2014 +0530
>> > @@ -43,6 +43,7 @@
>> > m_picSym = NULL;
>> > m_reconRowCount.set(0);
>> > m_countRefEncoders = 0;
>> > + m_frameEncoderID = 0;
>> > memset(&m_lowres, 0, sizeof(m_lowres));
>> > m_next = NULL;
>> > m_prev = NULL;
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.h
>> > --- a/source/common/frame.h Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/common/frame.h Tue Oct 14 14:53:24 2014 +0530
>> > @@ -50,6 +50,7 @@
>> > TComPicSym* m_picSym;
>> > TComPicYuv* m_reconPicYuv;
>> > int m_POC;
>> > + int m_frameEncoderID; // To identify the ID of
>> the frameEncoder processing this frame
>> >
>> > //** Frame Parallelism - notification between FrameEncoders of
>> available motion reference rows **
>> > ThreadSafeInteger m_reconRowCount; // count of CTU rows
>> completely reconstructed and extended for motion reference
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/common/quant.cpp
>> > --- a/source/common/quant.cpp Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/common/quant.cpp Tue Oct 14 14:53:24 2014 +0530
>> > @@ -156,6 +156,7 @@
>> > m_resiDctCoeff = NULL;
>> > m_fencDctCoeff = NULL;
>> > m_fencShortBuf = NULL;
>> > + m_nr = NULL;
>> > }
>> >
>> > bool Quant::init(bool useRDOQ, double psyScale, const ScalingList&
>> scalingList, Entropy& entropy)
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.cpp
>> > --- a/source/encoder/analysis.cpp Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/encoder/analysis.cpp Tue Oct 14 14:53:24 2014 +0530
>> > @@ -292,7 +292,11 @@
>> > if (!jobId || m_param->rdLevel > 4)
>> > {
>> > slave->m_quant.setQPforQuant(cu);
>> > - slave->m_quant.m_nr = m_quant.m_nr;
>> > + if(m_param->noiseReduction)
>>
>> w-s
>>
>> > + {
>> > + int frameEncoderID = cu->m_pic->m_frameEncoderID;
>> > + slave->m_quant.m_nr =
>> &m_tld[threadId].m_nr[frameEncoderID];
>> > + }
>> >
>> slave->m_rdContexts[depth].cur.load(m_rdContexts[depth].cur);
>> > }
>> > }
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.h
>> > --- a/source/encoder/analysis.h Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/encoder/analysis.h Tue Oct 14 14:53:24 2014 +0530
>> > @@ -172,7 +172,9 @@
>> > struct ThreadLocalData
>> > {
>> > Analysis analysis;
>> > -
>> > + NoiseReduction *m_nr;
>> > +
>> > + ThreadLocalData() { m_nr = NULL; }
>> > ~ThreadLocalData() { analysis.destroy(); }
>>
>> ThreadLocalData is a struct, so members don't get m_ prefixes.
>>
>> The destructor should be removed and make a destroy() function which
>> releases the noise reduction array and calls analysis.destroy();
>>
>> > };
>> >
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.cpp
>> > --- a/source/encoder/encoder.cpp Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/encoder/encoder.cpp Tue Oct 14 14:53:24 2014 +0530
>> > @@ -74,6 +74,7 @@
>> > m_csvfpt = NULL;
>> > m_param = NULL;
>> > m_threadPool = 0;
>> > + m_numThreadLocalData = 0;
>> > }
>> >
>> > void Encoder::create()
>> > @@ -162,15 +163,17 @@
>> >
>> > /* Allocate thread local data, one for each thread pool worker and
>> > * if --no-wpp, one for each frame encoder */
>> > - int numLocalData = poolThreadCount;
>> > + m_numThreadLocalData = poolThreadCount;
>> > if (!m_param->bEnableWavefront)
>> > - numLocalData += m_param->frameNumThreads;
>> > - m_threadLocalData = new ThreadLocalData[numLocalData];
>> > - for (int i = 0; i < numLocalData; i++)
>> > + m_numThreadLocalData += m_param->frameNumThreads;
>> > + m_threadLocalData = new ThreadLocalData[m_numThreadLocalData];
>> > + for (int i = 0; i < m_numThreadLocalData; i++)
>> > {
>> > m_threadLocalData[i].analysis.setThreadPool(m_threadPool);
>> > m_threadLocalData[i].analysis.initSearch(m_param,
>> m_scalingList);
>> > m_threadLocalData[i].analysis.create(g_maxCUDepth + 1,
>> g_maxCUSize, m_threadLocalData);
>> > + if(m_param->noiseReduction)
>> > + m_threadLocalData[i].m_nr = new
>> NoiseReduction[m_param->frameNumThreads];
>>
>> w-s, and this should be X265_MALLOC(NoiseReduction,
>> m_param->frameNumThreads);
>>
>> > }
>> >
>> > if (!m_param->bEnableWavefront)
>> > @@ -240,6 +243,12 @@
>> > delete [] m_frameEncoder;
>> > }
>> >
>> > + if (m_param->noiseReduction)
>> > + {
>> > + for (int i = 0; i < m_numThreadLocalData; i++)
>> > + delete [] m_threadLocalData[i].m_nr;
>>
>> use destroy() for this
>>
>> > + }
>> > +
>> > delete [] m_threadLocalData;
>> >
>> > if (m_lookahead)
>> > @@ -400,6 +409,7 @@
>> > m_lookahead->flush();
>> >
>> > FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
>> > + curEncoder->m_frameEncoderID = m_curEncoder;
>>
>> this is too opaque. better to pass the ID to the frame encoder init()
>> function
>>
>> > m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
>> > int ret = 0;
>> >
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.h
>> > --- a/source/encoder/encoder.h Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/encoder/encoder.h Tue Oct 14 14:53:24 2014 +0530
>> > @@ -116,6 +116,7 @@
>> > PPS m_pps;
>> > NALList m_nalList;
>> > ScalingList m_scalingList; // quantization matrix
>> information
>> > + int m_numThreadLocalData;
>> >
>> > int m_lastBPSEI;
>> > uint32_t m_numDelayedPic;
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.cpp
>> > --- a/source/encoder/frameencoder.cpp Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/encoder/frameencoder.cpp Tue Oct 14 14:53:24 2014 +0530
>> > @@ -45,6 +45,7 @@
>> > , m_frame(NULL)
>> > {
>> > m_totalTime = 0;
>> > + m_frameEncoderID = 0;
>> > m_bAllRowsStop = false;
>> > m_vbvResetTriggerRow = -1;
>> > m_outStreams = NULL;
>> > @@ -156,6 +157,7 @@
>> > PPAScopeEvent(FrameEncoder_compressFrame);
>> > int64_t startCompressTime = x265_mdate();
>> > Slice* slice = m_frame->m_picSym->m_slice;
>> > + m_frame->m_frameEncoderID = m_frameEncoderID; // Each Frame knows
>> the ID of the FrameEncoder encoding it
>> >
>> > /* Emit access unit delimiter unless this is the first frame and
>> the user is
>> > * not repeating headers (since AUD is supposed to be the first
>> NAL in the access
>> > @@ -393,8 +395,36 @@
>> > if (m_top->m_rateControl->rateControlEnd(m_frame,
>> m_accessUnitBits, &m_rce, &m_frameStats) < 0)
>> > m_top->m_aborted = true;
>> >
>> > + /* Accumulate NR statistics from all worker threads */
>> > + if (m_nr)
>> > + {
>> > + for (int i = 0; i < m_top->m_numThreadLocalData; i++)
>> > + {
>> > + NoiseReduction* nr =
>> &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
>> > + for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
>> > + {
>> > + for(int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
>> > + m_nr->residualSum[cat][coeff] +=
>> nr->residualSum[cat][coeff];
>> > +
>> > + m_nr->count[cat] += nr->count[cat];
>> > + }
>> > + }
>> > + }
>> > +
>> > noiseReductionUpdate();
>> >
>> > + /* Copy updated NR coefficients back to all worker threads */
>> > + if (m_nr)
>> > + {
>> > + for (int i = 0; i < m_top->m_numThreadLocalData; i++)
>> > + {
>> > + NoiseReduction* nr =
>> &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
>> > + memcpy(nr->offsetDenoise, m_nr->offsetDenoise,
>> sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
>> > + memset(nr->count, 0, sizeof(uint32_t) *
>> MAX_NUM_TR_CATEGORIES);
>> > + memset(nr->residualSum, 0, sizeof(uint32_t) *
>> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
>> > + }
>> > + }
>>
>> someone needs to initialize the NR structs in ThreadLocalData when they
>> are allocated. that's probably where the non-determinism is coming from
>>
>> > // Decrement referenced frame reference counts, allow them to be
>> recycled
>> > for (int l = 0; l < numPredDir; l++)
>> > {
>> > @@ -616,7 +646,8 @@
>> > // setup thread-local data
>> > Slice *slice = m_frame->m_picSym->m_slice;
>> > TComPicYuv* fenc = m_frame->getPicYuvOrg();
>> > - tld.analysis.m_quant.m_nr = m_nr;
>> > + if(m_param->noiseReduction)
>>
>> w-s
>>
>> > + tld.analysis.m_quant.m_nr = &tld.m_nr[m_frameEncoderID];
>> > tld.analysis.m_me.setSourcePlane(fenc->getLumaAddr(),
>> fenc->getStride());
>> > tld.analysis.m_log =
>> &tld.analysis.m_sliceTypeLog[m_frame->m_picSym->m_slice->m_sliceType];
>> > tld.analysis.setQP(slice, slice->m_sliceQp);
>> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.h
>> > --- a/source/encoder/frameencoder.h Tue Oct 14 14:35:30 2014 +0530
>> > +++ b/source/encoder/frameencoder.h Tue Oct 14 14:53:24 2014 +0530
>> > @@ -148,7 +148,7 @@
>> > int m_filterRowDelayCus;
>> > Event m_completionEvent;
>> > int64_t m_totalTime;
>> > -
>> > + int m_frameEncoderID;
>> > protected:
>>
>> w-s, try to keep the blank line before protected:
>>
>> >
>> > /* analyze / compress frame, can be run in parallel within
>> reference constraints */
>> > _______________________________________________
>> > x265-devel mailing list
>> > x265-devel at videolan.org
>> > https://mailman.videolan.org/listinfo/x265-devel
>>
>> --
>> Steve Borho
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141015/57198be1/attachment-0001.html>
More information about the x265-devel
mailing list