[x265] [PATCH] frameencoder: use a bonded worker thread to perform weight analysis, add stat

Steve Borho steve at borho.org
Fri Mar 6 02:11:48 CET 2015


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1425584683 21600
#      Thu Mar 05 13:44:43 2015 -0600
# Node ID 820dcc3216a55965b4f763dcc4ed4cf2244d4de7
# Parent  e6b519dfbf812f0ba392a70ce651bf589d2ab82a
frameencoder: use a bonded worker thread to perform weight analysis, add stat

Weight analysis can take a substantial amount of time. It is best to use a
worker thread so the frame encoder thread can stay blocked during all of this
processing (we want worker threads to use the cores, not the frame encoders)

Weight analysis can be 1% of the total elapsed encoder time

diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/encoder.cpp	Thu Mar 05 13:44:43 2015 -0600
@@ -848,7 +848,8 @@
     int64_t lookaheadWorkerTime = m_lookahead->m_slicetypeDecideElapsedTime + m_lookahead->m_preLookaheadElapsedTime +
                                   batchElapsedTime + coopSliceElapsedTime;
 
-    int64_t totalWorkerTime = cuStats.totalCTUTime + cuStats.loopFilterElapsedTime + cuStats.pmodeTime + cuStats.pmeTime + lookaheadWorkerTime;
+    int64_t totalWorkerTime = cuStats.totalCTUTime + cuStats.loopFilterElapsedTime + cuStats.pmodeTime +
+                              cuStats.pmeTime + lookaheadWorkerTime + cuStats.weightAnalyzeTime;
     int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
 
     int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
@@ -898,6 +899,12 @@
     x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in loop filters, average %.3lf ms per call\n",
              100.0 * cuStats.loopFilterElapsedTime / totalWorkerTime,
              ELAPSED_MSEC(cuStats.loopFilterElapsedTime) / cuStats.countLoopFilter);
+    if (cuStats.countWeightAnalyze && cuStats.weightAnalyzeTime)
+    {
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in weight analysis, average %.3lf ms per call\n",
+                 100.0 * cuStats.weightAnalyzeTime / totalWorkerTime,
+                 ELAPSED_MSEC(cuStats.weightAnalyzeTime) / cuStats.countWeightAnalyze);
+    }
     if (m_param->bDistributeModeAnalysis && cuStats.countPModeMasters)
     {
         x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PMODE masters per CTU, each blocked an average of %.3lf ns\n",
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.cpp	Thu Mar 05 13:44:43 2015 -0600
@@ -284,6 +284,12 @@
     }
 }
 
+void FrameEncoder::WeightAnalysis::processTasks(int /* workerThreadId */)
+{
+    Frame* frame = master.m_frame;
+    weightAnalyse(*frame->m_encData->m_slice, *frame, *master.m_param);
+}
+
 void FrameEncoder::compressFrame()
 {
     ProfileScopeEvent(frameThread);
@@ -316,7 +322,18 @@
     bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred;
     bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred;
     if (bUseWeightP || bUseWeightB)
-        weightAnalyse(*slice, *m_frame, *m_param);
+    {
+#if DETAILED_CU_STATS
+        m_cuStats.countWeightAnalyze++;
+        ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
+#endif
+        WeightAnalysis wa(*this);
+        if (m_pool && wa.tryBondPeers(*this, 1))
+            /* use an idle worker for weight analysis */
+            wa.waitForExit();
+        else
+            weightAnalyse(*slice, *m_frame, *m_param);
+    }
     else
         slice->disableWeights();
 
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.h	Thu Mar 05 13:44:43 2015 -0600
@@ -200,6 +200,21 @@
     FrameFilter              m_frameFilter;
     NALList                  m_nalList;
 
+    class WeightAnalysis : public BondedTaskGroup
+    {
+    public:
+
+        FrameEncoder& master;
+
+        WeightAnalysis(FrameEncoder& fe) : master(fe) {}
+
+        void processTasks(int workerThreadId);
+
+    protected:
+
+        WeightAnalysis operator=(const WeightAnalysis&);
+    };
+
 protected:
 
     bool initializeGeoms();
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/search.h
--- a/source/encoder/search.h	Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/search.h	Thu Mar 05 13:44:43 2015 -0600
@@ -152,6 +152,7 @@
     int64_t  pmeBlockTime;                      // elapsed worker time blocked for pme batch completion
     int64_t  pmodeTime;                         // elapsed worker time processing pmode slave jobs
     int64_t  pmodeBlockTime;                    // elapsed worker time blocked for pmode batch completion
+    int64_t  weightAnalyzeTime;                 // elapsed worker time analyzing reference weights
     int64_t  totalCTUTime;                      // elapsed worker time in compressCTU (includes pmode master)
 
     uint64_t countIntraRDO[NUM_CU_DEPTH];
@@ -163,6 +164,7 @@
     uint64_t countPMEMasters;
     uint64_t countPModeTasks;
     uint64_t countPModeMasters;
+    uint64_t countWeightAnalyze;
     uint64_t totalCTUs;
 
     CUStats() { clear(); }
@@ -189,6 +191,7 @@
         pmeBlockTime += other.pmeBlockTime;
         pmodeTime += other.pmodeTime;
         pmodeBlockTime += other.pmodeBlockTime;
+        weightAnalyzeTime += other.weightAnalyzeTime;
         totalCTUTime += other.totalCTUTime;
 
         countIntraAnalysis += other.countIntraAnalysis;
@@ -198,6 +201,7 @@
         countPMEMasters += other.countPMEMasters;
         countPModeTasks += other.countPModeTasks;
         countPModeMasters += other.countPModeMasters;
+        countWeightAnalyze += other.countWeightAnalyze;
         totalCTUs += other.totalCTUs;
 
         other.clear();


More information about the x265-devel mailing list