[x265] [PATCH] noiseReduction: make NoiseReduction deterministic

Fri Oct 10 20:36:30 CEST 2014

On 10/10, deepthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1412916802 -19800
> #      Fri Oct 10 10:23:22 2014 +0530
> # Node ID 6c8981f48732eab1d5691a279517e13aff9bba2e
> # Parent  4495af3b30bb7a361c3252a8800f8dfa9f191145
> noiseReduction: make NoiseReduction deterministic.
> 
> NoiseReduction is now deterministic for a given NR strength and
> frame encoder count.
> 
> diff -r 4495af3b30bb -r 6c8981f48732 source/common/quant.cpp
> --- a/source/common/quant.cpp	Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/common/quant.cpp	Fri Oct 10 10:23:22 2014 +0530
> @@ -158,7 +158,7 @@
>      m_fencShortBuf = NULL;
>  }
>  
> -bool Quant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy)
> +bool Quant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy, int noiseReduction)
>  {
>      m_entropyCoder = &entropy;
>      m_useRDOQ = useRDOQ;
> @@ -167,7 +167,7 @@
>      m_resiDctCoeff = X265_MALLOC(int32_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
>      m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
>      m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
> -    
> +    m_nr = noiseReduction > 0 ? X265_MALLOC(NoiseReduction, 1) : NULL;
>      return m_resiDctCoeff && m_fencShortBuf;
>  }
>  
> @@ -175,6 +175,7 @@
>  {
>      X265_FREE(m_resiDctCoeff);
>      X265_FREE(m_fencShortBuf);
> +    X265_FREE(m_nr);
>  }
>  
>  void Quant::setQPforQuant(const TComDataCU* cu)
> diff -r 4495af3b30bb -r 6c8981f48732 source/common/quant.h
> --- a/source/common/quant.h	Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/common/quant.h	Fri Oct 10 10:23:22 2014 +0530
> @@ -84,7 +84,7 @@
>      ~Quant();
>  
>      /* one-time setup */
> -    bool init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy);
> +    bool init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy, int NoiseReduction);
>  
>      /* CU setup */
>      void setQPforQuant(const TComDataCU* cu);
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/analysis.h
> --- a/source/encoder/analysis.h	Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/analysis.h	Fri Oct 10 10:23:22 2014 +0530
> @@ -173,7 +173,7 @@
>  struct ThreadLocalData
>  {
>      Analysis analysis;
> -
> +	
>      ~ThreadLocalData() { analysis.destroy(); }
>  };
>  
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp	Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/frameencoder.cpp	Fri Oct 10 10:23:22 2014 +0530
> @@ -325,6 +325,20 @@
>          m_frameStats.percentSkip  = (double)totalSkip / totalCuCount;
>      }
>  
> +    if (m_nr)
> +    {
> +        for(int row = 0; row < m_numRows; row++)
> +        {
> +            /* Accumulate count and residualSum from each row CU encode into FrameEncoder::m_nr */
> +            for (int i = 0; i < 8; i++)
> +            {
> +                for (int j = 0; j < 1024; j++)
> +	                m_nr->residualSum[i][j] += m_rows[row].noiseReduction.residualSum[i][j];
> +                m_nr->count[i] += m_rows[row].noiseReduction.count[i];
> +            }
> +        }
> +    }

lots of white-space nits.

8 and 1024 are somewhat magic numbers

>      m_bs.resetBits();
>      m_entropyCoder.load(m_initSliceContext);
>      m_entropyCoder.setBitstream(&m_bs);
> @@ -616,7 +630,7 @@
>      // setup thread-local data
>      Slice *slice = m_frame->m_picSym->m_slice;
>      TComPicYuv* fenc = m_frame->getPicYuvOrg();
> -    tld.analysis.m_quant.m_nr = m_nr;
> +    
>      tld.analysis.m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
>      tld.analysis.m_log = &tld.analysis.m_sliceTypeLog[m_frame->m_picSym->m_slice->m_sliceType];
>      tld.analysis.setQP(slice, slice->m_sliceQp);
> @@ -668,6 +682,14 @@
>              rowCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
>          }
>  
> +        if (m_nr)
> +        {
> +            NoiseReduction* nr = tld.analysis.m_quant.m_nr;
> +            memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint32_t) * 1024 * 8);
> +            memset(nr->count, 0, sizeof(uint32_t) * 8);
> +            memset(nr->residualSum, 0, sizeof(uint32_t) * 1024 * 8);
> +        }
> +
>          cu->loadCTUData(m_param->maxCUSize);
>          tld.analysis.m_quant.setQPforQuant(cu);
>          tld.analysis.compressCTU(cu, rowCoder); // Does all the CU analysis
> @@ -683,6 +705,18 @@
>          // Completed CU processing
>          curRow.completed++;
>  
> +        if (m_nr)
> +        {
> +            NoiseReduction* nr = tld.analysis.m_quant.m_nr;
> +            /* Accumulate count and residualSum from each CU encode into CTURow */
> +            for (int i = 0; i < 8; i++)
> +            {
> +                for (int j = 0; j < 1024; j++)
> +                    curRow.noiseReduction.residualSum[i][j] += nr->residualSum[i][j];
> +                curRow.noiseReduction.count[i] += nr->count[i];
> +            }
> +        }

This won't work with --pmode.  Multiple worker threads could be doing
quant for this CTU. This is why I suggest an array in ThreadLocalData
indexed by FrameEncoder. Each TLD will be doing at most one quant() at a
time, and we only need to accumulate NR stats per frame encoder. And
this approach doesn't require memcopies or accumulations per CTU like
this (only per-frame).

>          // copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
>          if (m_param->rc.bStatWrite)
>          {
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h	Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/frameencoder.h	Fri Oct 10 10:23:22 2014 +0530
> @@ -53,6 +53,8 @@
>      Entropy           rowGoOnCoder;     /* store context between CTUs, code bitstream if !SAO */
>  
>      FrameStats        rowStats;
> +    
> +    NoiseReduction    noiseReduction; /* accumulates count and residualSum from each CU encode */
>  
>      /* Threading variables */
>  
> @@ -80,6 +82,8 @@
>          busy = false;
>          completed = 0;
>          memset(&rowStats, 0, sizeof(rowStats));
> +        memset(noiseReduction.count, 0, sizeof(uint32_t) * 8);
> +        memset(noiseReduction.residualSum, 0, sizeof(uint32_t) * 1024 * 8);
>          rowGoOnCoder.load(initContext);
>      }
>  };
> diff -r 4495af3b30bb -r 6c8981f48732 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Thu Oct 09 14:28:14 2014 +0530
> +++ b/source/encoder/search.cpp	Fri Oct 10 10:23:22 2014 +0530
> @@ -77,7 +77,7 @@
>      m_me.setSearchMethod(param->searchMethod);
>      m_me.setSubpelRefine(param->subpelRefine);
>  
> -    bool ok = m_quant.init(m_bEnableRDOQ, param->psyRdoq, scalingList, m_entropyCoder);
> +    bool ok = m_quant.init(m_bEnableRDOQ, param->psyRdoq, scalingList, m_entropyCoder, param->noiseReduction);
>      ok &= Predict::allocBuffers(param->internalCsp);
>      ok &= m_predTempYuv.create(MAX_CU_SIZE, MAX_CU_SIZE, param->internalCsp);
>      ok &= m_bidirPredYuv[0].create(MAX_CU_SIZE, MAX_CU_SIZE, m_param->internalCsp);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho