[x265] [PATCH] noiseReduction: make noiseReduction deterministic for a given number of frameEncoders

deepthi at multicorewareinc.com deepthi at multicorewareinc.com
Tue Oct 14 13:24:53 CEST 2014


# HG changeset patch
# User Deepthi Nandakumar <deepthi at multicorewareinc.com>
# Date 1413278604 -19800
#      Tue Oct 14 14:53:24 2014 +0530
# Node ID c6e786dbbfaa39822799d17e6c32d49c6141a7fb
# Parent  38b5733cc629dd16db770e6a93b4f994e13336f3
noiseReduction: make noiseReduction deterministic for a given number of frameEncoders.

diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.cpp
--- a/source/common/frame.cpp	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/common/frame.cpp	Tue Oct 14 14:53:24 2014 +0530
@@ -43,6 +43,7 @@
     m_picSym = NULL;
     m_reconRowCount.set(0);
     m_countRefEncoders = 0;
+    m_frameEncoderID = 0;
     memset(&m_lowres, 0, sizeof(m_lowres));
     m_next = NULL;
     m_prev = NULL;
diff -r 38b5733cc629 -r c6e786dbbfaa source/common/frame.h
--- a/source/common/frame.h	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/common/frame.h	Tue Oct 14 14:53:24 2014 +0530
@@ -50,6 +50,7 @@
     TComPicSym*       m_picSym;
     TComPicYuv*       m_reconPicYuv;
     int               m_POC;
+    int               m_frameEncoderID;     // To identify the ID of the frameEncoder processing this frame
 
     //** Frame Parallelism - notification between FrameEncoders of available motion reference rows **
     ThreadSafeInteger m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
diff -r 38b5733cc629 -r c6e786dbbfaa source/common/quant.cpp
--- a/source/common/quant.cpp	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/common/quant.cpp	Tue Oct 14 14:53:24 2014 +0530
@@ -156,6 +156,7 @@
     m_resiDctCoeff = NULL;
     m_fencDctCoeff = NULL;
     m_fencShortBuf = NULL;
+    m_nr           = NULL;
 }
 
 bool Quant::init(bool useRDOQ, double psyScale, const ScalingList& scalingList, Entropy& entropy)
diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/encoder/analysis.cpp	Tue Oct 14 14:53:24 2014 +0530
@@ -292,7 +292,11 @@
         if (!jobId || m_param->rdLevel > 4)
         {
             slave->m_quant.setQPforQuant(cu);
-            slave->m_quant.m_nr = m_quant.m_nr;
+            if(m_param->noiseReduction)
+            {
+                int frameEncoderID = cu->m_pic->m_frameEncoderID;
+                slave->m_quant.m_nr = &m_tld[threadId].m_nr[frameEncoderID];
+            }
             slave->m_rdContexts[depth].cur.load(m_rdContexts[depth].cur);
         }
     }
diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/analysis.h
--- a/source/encoder/analysis.h	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/encoder/analysis.h	Tue Oct 14 14:53:24 2014 +0530
@@ -172,7 +172,9 @@
 struct ThreadLocalData
 {
     Analysis analysis;
-
+    NoiseReduction *m_nr;
+    
+    ThreadLocalData() { m_nr = NULL; }
     ~ThreadLocalData() { analysis.destroy(); }
 };
 
diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/encoder/encoder.cpp	Tue Oct 14 14:53:24 2014 +0530
@@ -74,6 +74,7 @@
     m_csvfpt = NULL;
     m_param = NULL;
     m_threadPool = 0;
+    m_numThreadLocalData = 0;
 }
 
 void Encoder::create()
@@ -162,15 +163,17 @@
 
     /* Allocate thread local data, one for each thread pool worker and
      * if --no-wpp, one for each frame encoder */
-    int numLocalData = poolThreadCount;
+    m_numThreadLocalData = poolThreadCount;
     if (!m_param->bEnableWavefront)
-        numLocalData += m_param->frameNumThreads;
-    m_threadLocalData = new ThreadLocalData[numLocalData];
-    for (int i = 0; i < numLocalData; i++)
+        m_numThreadLocalData += m_param->frameNumThreads;
+    m_threadLocalData = new ThreadLocalData[m_numThreadLocalData];
+    for (int i = 0; i < m_numThreadLocalData; i++)
     {
         m_threadLocalData[i].analysis.setThreadPool(m_threadPool);
         m_threadLocalData[i].analysis.initSearch(m_param, m_scalingList);
         m_threadLocalData[i].analysis.create(g_maxCUDepth + 1, g_maxCUSize, m_threadLocalData);
+        if(m_param->noiseReduction)
+            m_threadLocalData[i].m_nr = new NoiseReduction[m_param->frameNumThreads];
     }
 
     if (!m_param->bEnableWavefront)
@@ -240,6 +243,12 @@
         delete [] m_frameEncoder;
     }
 
+    if (m_param->noiseReduction)
+    {
+        for (int i = 0; i < m_numThreadLocalData; i++)        
+            delete [] m_threadLocalData[i].m_nr;
+    }
+
     delete [] m_threadLocalData;
 
     if (m_lookahead)
@@ -400,6 +409,7 @@
         m_lookahead->flush();
 
     FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
+    curEncoder->m_frameEncoderID = m_curEncoder;
     m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
     int ret = 0;
 
diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/encoder.h
--- a/source/encoder/encoder.h	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/encoder/encoder.h	Tue Oct 14 14:53:24 2014 +0530
@@ -116,6 +116,7 @@
     PPS                m_pps;
     NALList            m_nalList;
     ScalingList        m_scalingList;      // quantization matrix information
+    int                m_numThreadLocalData;
 
     int                m_lastBPSEI;
     uint32_t           m_numDelayedPic;
diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/encoder/frameencoder.cpp	Tue Oct 14 14:53:24 2014 +0530
@@ -45,6 +45,7 @@
     , m_frame(NULL)
 {
     m_totalTime = 0;
+    m_frameEncoderID = 0;
     m_bAllRowsStop = false;
     m_vbvResetTriggerRow = -1;
     m_outStreams = NULL;
@@ -156,6 +157,7 @@
     PPAScopeEvent(FrameEncoder_compressFrame);
     int64_t startCompressTime = x265_mdate();
     Slice* slice = m_frame->m_picSym->m_slice;
+    m_frame->m_frameEncoderID = m_frameEncoderID; // Each Frame knows the ID of the FrameEncoder encoding it
 
     /* Emit access unit delimiter unless this is the first frame and the user is
      * not repeating headers (since AUD is supposed to be the first NAL in the access
@@ -393,8 +395,36 @@
     if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
         m_top->m_aborted = true;
 
+    /* Accumulate NR statistics from all worker threads */
+    if (m_nr)
+    {
+        for (int i = 0; i < m_top->m_numThreadLocalData; i++)
+        {
+            NoiseReduction* nr = &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
+            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
+            {
+                for(int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
+                    m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff];
+            
+                m_nr->count[cat] += nr->count[cat];
+            }
+        }
+    }
+
     noiseReductionUpdate();
 
+    /* Copy updated NR coefficients back to all worker threads */
+    if (m_nr)
+    {
+        for (int i = 0; i < m_top->m_numThreadLocalData; i++)
+        {
+            NoiseReduction* nr = &m_top->m_threadLocalData[i].m_nr[m_frameEncoderID];
+            memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
+            memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);
+            memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
+        }
+    }
+
     // Decrement referenced frame reference counts, allow them to be recycled
     for (int l = 0; l < numPredDir; l++)
     {
@@ -616,7 +646,8 @@
     // setup thread-local data
     Slice *slice = m_frame->m_picSym->m_slice;
     TComPicYuv* fenc = m_frame->getPicYuvOrg();
-    tld.analysis.m_quant.m_nr = m_nr;
+    if(m_param->noiseReduction)
+        tld.analysis.m_quant.m_nr = &tld.m_nr[m_frameEncoderID];
     tld.analysis.m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
     tld.analysis.m_log = &tld.analysis.m_sliceTypeLog[m_frame->m_picSym->m_slice->m_sliceType];
     tld.analysis.setQP(slice, slice->m_sliceQp);
diff -r 38b5733cc629 -r c6e786dbbfaa source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Tue Oct 14 14:35:30 2014 +0530
+++ b/source/encoder/frameencoder.h	Tue Oct 14 14:53:24 2014 +0530
@@ -148,7 +148,7 @@
     int                      m_filterRowDelayCus;
     Event                    m_completionEvent;
     int64_t                  m_totalTime;
-
+    int                      m_frameEncoderID;
 protected:
 
     /* analyze / compress frame, can be run in parallel within reference constraints */


More information about the x265-devel mailing list