[x265] [PATCH] noiseReduction: make noiseReduction deterministic for a given number of frameEncoders

Deepthi Nandakumar deepthi at multicorewareinc.com
Wed Oct 15 02:24:28 CEST 2014


Aah, you' re right. In my older patch, the encoder was initialising
threadLocalData.nr . Somehow, I missed that while making the new patch.

On Wed, Oct 15, 2014 at 12:41 AM, Steve Borho <steve at borho.org> wrote:

> On 10/14, deepthi at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
> > # Date 1413278604 -19800
> > #      Tue Oct 14 14:53:24 2014 +0530
> > # Node ID c6e786dbbfaa39822799d17e6c32d49c6141a7fb
> > # Parent  38b5733cc629dd16db770e6a93b4f994e13336f3
> > noiseReduction: make noiseReduction deterministic for a given number of
> frameEncoders.
> >
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.cpp
> > --- a/source/common/frame.cpp Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/common/frame.cpp Tue Oct 14 14:53:24 2014 +0530
> > @@ -43,6 +43,7 @@
> >      m_picSym = NULL;
> >      m_reconRowCount.set(0);
> >      m_countRefEncoders = 0;
> > +    m_frameEncoderID = 0;
> >      memset(&m_lowres, 0, sizeof(m_lowres));
> >      m_next = NULL;
> >      m_prev = NULL;
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.h
> > --- a/source/common/frame.h   Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/common/frame.h   Tue Oct 14 14:53:24 2014 +0530
> > @@ -50,6 +50,7 @@
> >      TComPicSym*       m_picSym;
> >      TComPicYuv*       m_reconPicYuv;
> >      int               m_POC;
> > +    int               m_frameEncoderID;     // To identify the ID of
> the frameEncoder processing this frame
> >
> >      //** Frame Parallelism - notification between FrameEncoders of
> available motion reference rows **
> >      ThreadSafeInteger m_reconRowCount;      // count of CTU rows
> completely reconstructed and extended for motion reference
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/common/quant.cpp
> > --- a/source/common/quant.cpp Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/common/quant.cpp Tue Oct 14 14:53:24 2014 +0530
> > @@ -156,6 +156,7 @@
> >      m_resiDctCoeff = NULL;
> >      m_fencDctCoeff = NULL;
> >      m_fencShortBuf = NULL;
> > +    m_nr           = NULL;
> >  }
> >
> >  bool Quant::init(bool useRDOQ, double psyScale, const ScalingList&
> scalingList, Entropy& entropy)
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp     Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/encoder/analysis.cpp     Tue Oct 14 14:53:24 2014 +0530
> > @@ -292,7 +292,11 @@
> >          if (!jobId || m_param->rdLevel > 4)
> >          {
> >              slave->m_quant.setQPforQuant(cu);
> > -            slave->m_quant.m_nr = m_quant.m_nr;
> > +            if(m_param->noiseReduction)
>
> w-s
>
> > +            {
> > +                int frameEncoderID = cu->m_pic->m_frameEncoderID;
> > +                slave->m_quant.m_nr =
> &m_tld[threadId].m_nr[frameEncoderID];
> > +            }
> >
> slave->m_rdContexts[depth].cur.load(m_rdContexts[depth].cur);
> >          }
> >      }
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.h
> > --- a/source/encoder/analysis.h       Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/encoder/analysis.h       Tue Oct 14 14:53:24 2014 +0530
> > @@ -172,7 +172,9 @@
> >  struct ThreadLocalData
> >  {
> >      Analysis analysis;
> > -
> > +    NoiseReduction *m_nr;
> > +
> > +    ThreadLocalData() { m_nr = NULL; }
> >      ~ThreadLocalData() { analysis.destroy(); }
>
> ThreadLocalData is a struct, so members don't get m_ prefixes.
>
> The destructor should be removed and make a destroy() function which
> releases the noise reduction array and calls analysis.destroy();
>
> >  };
> >
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.cpp
> > --- a/source/encoder/encoder.cpp      Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/encoder/encoder.cpp      Tue Oct 14 14:53:24 2014 +0530
> > @@ -74,6 +74,7 @@
> >      m_csvfpt = NULL;
> >      m_param = NULL;
> >      m_threadPool = 0;
> > +    m_numThreadLocalData = 0;
> >  }
> >
> >  void Encoder::create()
> > @@ -162,15 +163,17 @@
> >
> >      /* Allocate thread local data, one for each thread pool worker and
> >       * if --no-wpp, one for each frame encoder */
> > -    int numLocalData = poolThreadCount;
> > +    m_numThreadLocalData = poolThreadCount;
> >      if (!m_param->bEnableWavefront)
> > -        numLocalData += m_param->frameNumThreads;
> > -    m_threadLocalData = new ThreadLocalData[numLocalData];
> > -    for (int i = 0; i < numLocalData; i++)
> > +        m_numThreadLocalData += m_param->frameNumThreads;
> > +    m_threadLocalData = new ThreadLocalData[m_numThreadLocalData];
> > +    for (int i = 0; i < m_numThreadLocalData; i++)
> >      {
> >          m_threadLocalData[i].analysis.setThreadPool(m_threadPool);
> >          m_threadLocalData[i].analysis.initSearch(m_param,
> m_scalingList);
> >          m_threadLocalData[i].analysis.create(g_maxCUDepth + 1,
> g_maxCUSize, m_threadLocalData);
> > +        if(m_param->noiseReduction)
> > +            m_threadLocalData[i].m_nr = new
> NoiseReduction[m_param->frameNumThreads];
>
> w-s, and this should be X265_MALLOC(NoiseReduction,
> m_param->frameNumThreads);
>
> >      }
> >
> >      if (!m_param->bEnableWavefront)
> > @@ -240,6 +243,12 @@
> >          delete [] m_frameEncoder;
> >      }
> >
> > +    if (m_param->noiseReduction)
> > +    {
> > +        for (int i = 0; i < m_numThreadLocalData; i++)
> > +            delete [] m_threadLocalData[i].m_nr;
>
> use destroy() for this
>
> > +    }
> > +
> >      delete [] m_threadLocalData;
> >
> >      if (m_lookahead)
> > @@ -400,6 +409,7 @@
> >          m_lookahead->flush();
> >
> >      FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
> > +    curEncoder->m_frameEncoderID = m_curEncoder;
>
> this is too opaque. better to pass the ID to the frame encoder init()
> function
>
> >      m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
> >      int ret = 0;
> >
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.h
> > --- a/source/encoder/encoder.h        Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/encoder/encoder.h        Tue Oct 14 14:53:24 2014 +0530
> > @@ -116,6 +116,7 @@
> >      PPS                m_pps;
> >      NALList            m_nalList;
> >      ScalingList        m_scalingList;      // quantization matrix
> information
> > +    int                m_numThreadLocalData;
> >
> >      int                m_lastBPSEI;
> >      uint32_t           m_numDelayedPic;
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.cpp
> > --- a/source/encoder/frameencoder.cpp Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/encoder/frameencoder.cpp Tue Oct 14 14:53:24 2014 +0530
> > @@ -45,6 +45,7 @@
> >      , m_frame(NULL)
> >  {
> >      m_totalTime = 0;
> > +    m_frameEncoderID = 0;
> >      m_bAllRowsStop = false;
> >      m_vbvResetTriggerRow = -1;
> >      m_outStreams = NULL;
> > @@ -156,6 +157,7 @@
> >      PPAScopeEvent(FrameEncoder_compressFrame);
> >      int64_t startCompressTime = x265_mdate();
> >      Slice* slice = m_frame->m_picSym->m_slice;
> > +    m_frame->m_frameEncoderID = m_frameEncoderID; // Each Frame knows
> the ID of the FrameEncoder encoding it
> >
> >      /* Emit access unit delimiter unless this is the first frame and
> the user is
> >       * not repeating headers (since AUD is supposed to be the first NAL
> in the access
> > @@ -393,8 +395,36 @@
> >      if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits,
> &m_rce, &m_frameStats) < 0)
> >          m_top->m_aborted = true;
> >
> > +    /* Accumulate NR statistics from all worker threads */
> > +    if (m_nr)
> > +    {
> > +        for (int i = 0; i < m_top->m_numThreadLocalData; i++)
> > +        {
> > +            NoiseReduction* nr =
> &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
> > +            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
> > +            {
> > +                for(int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
> > +                    m_nr->residualSum[cat][coeff] +=
> nr->residualSum[cat][coeff];
> > +
> > +                m_nr->count[cat] += nr->count[cat];
> > +            }
> > +        }
> > +    }
> > +
> >      noiseReductionUpdate();
> >
> > +    /* Copy updated NR coefficients back to all worker threads */
> > +    if (m_nr)
> > +    {
> > +        for (int i = 0; i < m_top->m_numThreadLocalData; i++)
> > +        {
> > +            NoiseReduction* nr =
> &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
> > +            memcpy(nr->offsetDenoise, m_nr->offsetDenoise,
> sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> > +            memset(nr->count, 0, sizeof(uint32_t) *
> MAX_NUM_TR_CATEGORIES);
> > +            memset(nr->residualSum, 0, sizeof(uint32_t) *
> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
> > +        }
> > +    }
>
> someone needs to initialize the NR structs in ThreadLocalData when they
> are allocated. that's probably where the non-determinism is coming from
>
> >      // Decrement referenced frame reference counts, allow them to be
> recycled
> >      for (int l = 0; l < numPredDir; l++)
> >      {
> > @@ -616,7 +646,8 @@
> >      // setup thread-local data
> >      Slice *slice = m_frame->m_picSym->m_slice;
> >      TComPicYuv* fenc = m_frame->getPicYuvOrg();
> > -    tld.analysis.m_quant.m_nr = m_nr;
> > +    if(m_param->noiseReduction)
>
> w-s
>
> > +        tld.analysis.m_quant.m_nr = &tld.m_nr[m_frameEncoderID];
> >      tld.analysis.m_me.setSourcePlane(fenc->getLumaAddr(),
> fenc->getStride());
> >      tld.analysis.m_log =
> &tld.analysis.m_sliceTypeLog[m_frame->m_picSym->m_slice->m_sliceType];
> >      tld.analysis.setQP(slice, slice->m_sliceQp);
> > diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.h
> > --- a/source/encoder/frameencoder.h   Tue Oct 14 14:35:30 2014 +0530
> > +++ b/source/encoder/frameencoder.h   Tue Oct 14 14:53:24 2014 +0530
> > @@ -148,7 +148,7 @@
> >      int                      m_filterRowDelayCus;
> >      Event                    m_completionEvent;
> >      int64_t                  m_totalTime;
> > -
> > +    int                      m_frameEncoderID;
> >  protected:
>
> w-s, try to keep the blank line before protected:
>
> >
> >      /* analyze / compress frame, can be run in parallel within
> reference constraints */
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141015/3a9dba05/attachment-0001.html>


More information about the x265-devel mailing list