[x265] [PATCH] frameencoder: use a bonded worker thread to perform weight analysis, add stat
Steve Borho
steve at borho.org
Fri Mar 6 02:11:48 CET 2015
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1425584683 21600
# Thu Mar 05 13:44:43 2015 -0600
# Node ID 820dcc3216a55965b4f763dcc4ed4cf2244d4de7
# Parent e6b519dfbf812f0ba392a70ce651bf589d2ab82a
frameencoder: use a bonded worker thread to perform weight analysis, add stat
Weight analysis can take a substantial amount of time. It is best to use a
worker thread so the frame encoder thread can stay blocked during all of this
processing (we want worker threads to use the cores, not the frame encoders)
Weight analysis can be 1% of the total elapsed encoder time
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/encoder.cpp Thu Mar 05 13:44:43 2015 -0600
@@ -848,7 +848,8 @@
int64_t lookaheadWorkerTime = m_lookahead->m_slicetypeDecideElapsedTime + m_lookahead->m_preLookaheadElapsedTime +
batchElapsedTime + coopSliceElapsedTime;
- int64_t totalWorkerTime = cuStats.totalCTUTime + cuStats.loopFilterElapsedTime + cuStats.pmodeTime + cuStats.pmeTime + lookaheadWorkerTime;
+ int64_t totalWorkerTime = cuStats.totalCTUTime + cuStats.loopFilterElapsedTime + cuStats.pmodeTime +
+ cuStats.pmeTime + lookaheadWorkerTime + cuStats.weightAnalyzeTime;
int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
@@ -898,6 +899,12 @@
x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in loop filters, average %.3lf ms per call\n",
100.0 * cuStats.loopFilterElapsedTime / totalWorkerTime,
ELAPSED_MSEC(cuStats.loopFilterElapsedTime) / cuStats.countLoopFilter);
+ if (cuStats.countWeightAnalyze && cuStats.weightAnalyzeTime)
+ {
+ x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in weight analysis, average %.3lf ms per call\n",
+ 100.0 * cuStats.weightAnalyzeTime / totalWorkerTime,
+ ELAPSED_MSEC(cuStats.weightAnalyzeTime) / cuStats.countWeightAnalyze);
+ }
if (m_param->bDistributeModeAnalysis && cuStats.countPModeMasters)
{
x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PMODE masters per CTU, each blocked an average of %.3lf ns\n",
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.cpp Thu Mar 05 13:44:43 2015 -0600
@@ -284,6 +284,12 @@
}
}
+void FrameEncoder::WeightAnalysis::processTasks(int /* workerThreadId */)
+{
+ Frame* frame = master.m_frame;
+ weightAnalyse(*frame->m_encData->m_slice, *frame, *master.m_param);
+}
+
void FrameEncoder::compressFrame()
{
ProfileScopeEvent(frameThread);
@@ -316,7 +322,18 @@
bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred;
bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred;
if (bUseWeightP || bUseWeightB)
- weightAnalyse(*slice, *m_frame, *m_param);
+ {
+#if DETAILED_CU_STATS
+ m_cuStats.countWeightAnalyze++;
+ ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
+#endif
+ WeightAnalysis wa(*this);
+ if (m_pool && wa.tryBondPeers(*this, 1))
+ /* use an idle worker for weight analysis */
+ wa.waitForExit();
+ else
+ weightAnalyse(*slice, *m_frame, *m_param);
+ }
else
slice->disableWeights();
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/frameencoder.h Thu Mar 05 13:44:43 2015 -0600
@@ -200,6 +200,21 @@
FrameFilter m_frameFilter;
NALList m_nalList;
+ class WeightAnalysis : public BondedTaskGroup
+ {
+ public:
+
+ FrameEncoder& master;
+
+ WeightAnalysis(FrameEncoder& fe) : master(fe) {}
+
+ void processTasks(int workerThreadId);
+
+ protected:
+
+ WeightAnalysis operator=(const WeightAnalysis&);
+ };
+
protected:
bool initializeGeoms();
diff -r e6b519dfbf81 -r 820dcc3216a5 source/encoder/search.h
--- a/source/encoder/search.h Thu Mar 05 16:06:04 2015 +0530
+++ b/source/encoder/search.h Thu Mar 05 13:44:43 2015 -0600
@@ -152,6 +152,7 @@
int64_t pmeBlockTime; // elapsed worker time blocked for pme batch completion
int64_t pmodeTime; // elapsed worker time processing pmode slave jobs
int64_t pmodeBlockTime; // elapsed worker time blocked for pmode batch completion
+ int64_t weightAnalyzeTime; // elapsed worker time analyzing reference weights
int64_t totalCTUTime; // elapsed worker time in compressCTU (includes pmode master)
uint64_t countIntraRDO[NUM_CU_DEPTH];
@@ -163,6 +164,7 @@
uint64_t countPMEMasters;
uint64_t countPModeTasks;
uint64_t countPModeMasters;
+ uint64_t countWeightAnalyze;
uint64_t totalCTUs;
CUStats() { clear(); }
@@ -189,6 +191,7 @@
pmeBlockTime += other.pmeBlockTime;
pmodeTime += other.pmodeTime;
pmodeBlockTime += other.pmodeBlockTime;
+ weightAnalyzeTime += other.weightAnalyzeTime;
totalCTUTime += other.totalCTUTime;
countIntraAnalysis += other.countIntraAnalysis;
@@ -198,6 +201,7 @@
countPMEMasters += other.countPMEMasters;
countPModeTasks += other.countPModeTasks;
countPModeMasters += other.countPModeMasters;
+ countWeightAnalyze += other.countWeightAnalyze;
totalCTUs += other.totalCTUs;
other.clear();
More information about the x265-devel
mailing list