[x265-commits] [x265] slicetype: use access macros for alloc and free
Steve Borho
steve at borho.org
Fri Feb 6 19:21:00 CET 2015
details: http://hg.videolan.org/x265/rev/a69806a7ab7d
branches:
changeset: 9320:a69806a7ab7d
user: Steve Borho <steve at borho.org>
date: Fri Feb 06 11:26:04 2015 -0600
description:
slicetype: use access macros for alloc and free
Subject: [x265] stats: include time spent in slicetype decisions
details: http://hg.videolan.org/x265/rev/5389e6d11567
branches:
changeset: 9321:5389e6d11567
user: Steve Borho <steve at borho.org>
date: Fri Feb 06 11:25:38 2015 -0600
description:
stats: include time spent in slicetype decisions
the lookahead may decide not to use worker threads, and when this happens we
often see more than 100% total utilization, which means the API thread is
ejecting a worker thread from the CPU for a time in order to run slicetypeDecide
diffstat:
source/encoder/encoder.cpp | 11 ++++++++++-
source/encoder/slicetype.cpp | 33 +++++++++++++++++++++++++++++----
source/encoder/slicetype.h | 17 ++++++++++++++++-
3 files changed, 55 insertions(+), 6 deletions(-)
diffs (168 lines):
diff -r e51b19ab2319 -r 5389e6d11567 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Feb 06 10:29:58 2015 -0600
+++ b/source/encoder/encoder.cpp Fri Feb 06 11:25:38 2015 -0600
@@ -822,7 +822,11 @@ void Encoder::printSummary()
#define ELAPSED_SEC(val) ((double)(val) / 1000000)
#define ELAPSED_MSEC(val) ((double)(val) / 1000)
- int64_t totalWorkerTime = cuStats.totalCTUTime + cuStats.loopFilterElapsedTime + cuStats.pmodeTime + cuStats.pmeTime;
+ int64_t lookaheadWorkerTime = m_lookahead->m_slicetypeDecideElapsedTime;
+ if (m_lookahead->usingWorkerThreads())
+ lookaheadWorkerTime += m_lookahead->m_est.m_processRowElapsedTime;
+
+ int64_t totalWorkerTime = cuStats.totalCTUTime + cuStats.loopFilterElapsedTime + cuStats.pmodeTime + cuStats.pmeTime + lookaheadWorkerTime;
int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
@@ -878,6 +882,11 @@ void Encoder::printSummary()
ELAPSED_MSEC(cuStats.pmodeTime) / cuStats.countPModeTasks);
}
+ x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in slicetypeDecide (avg %.3lfms) and lookahead row cost (avg %.3lfns)\n",
+ 100.0 * lookaheadWorkerTime / totalWorkerTime,
+ ELAPSED_MSEC(m_lookahead->m_slicetypeDecideElapsedTime) / m_lookahead->m_countSlicetypeDecide,
+ (double)m_lookahead->m_est.m_processRowElapsedTime / m_lookahead->m_est.m_countProcessRow);
+
x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in other tasks\n",
100.0 * unaccounted / totalWorkerTime);
diff -r e51b19ab2319 -r 5389e6d11567 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Fri Feb 06 10:29:58 2015 -0600
+++ b/source/encoder/slicetype.cpp Fri Feb 06 11:25:38 2015 -0600
@@ -67,6 +67,12 @@ Lookahead::Lookahead(x265_param *param,
m_bFilled = false;
m_bFlushed = false;
m_bFlush = false;
+
+#if DETAILED_CU_STATS
+ m_slicetypeDecideElapsedTime = 0;
+ m_countSlicetypeDecide = 0;
+#endif
+
m_widthInCU = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
m_heightInCU = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
m_scratch = (int*)x265_malloc(m_widthInCU * sizeof(int));
@@ -334,6 +340,10 @@ void Lookahead::getEstimatedPictureCost(
void Lookahead::slicetypeDecide()
{
ProfileScopeEvent(slicetypeDecideEV);
+#if DETAILED_CU_STATS
+ ScopedElapsedTime filterPerfScope(m_slicetypeDecideElapsedTime);
+ m_countSlicetypeDecide++;
+#endif
Lowres *frames[X265_LOOKAHEAD_MAX];
Frame *list[X265_LOOKAHEAD_MAX];
@@ -1265,9 +1275,7 @@ CostEstimate::CostEstimate(ThreadPool *p
CostEstimate::~CostEstimate()
{
for (int i = 0; i < 4; i++)
- {
- x265_free(m_wbuffer[i]);
- }
+ X265_FREE(m_wbuffer[i]);
delete[] m_rows;
}
@@ -1278,6 +1286,11 @@ void CostEstimate::init(x265_param *_par
m_widthInCU = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
m_heightInCU = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
+#if DETAILED_CU_STATS
+ m_processRowElapsedTime = 0;
+ m_countProcessRow = 0;
+#endif
+
m_rows = new EstimateRow[m_heightInCU];
for (int i = 0; i < m_heightInCU; i++)
{
@@ -1300,7 +1313,7 @@ void CostEstimate::init(x265_param *_par
/* allocate weighted lowres buffers */
for (int i = 0; i < 4; i++)
{
- m_wbuffer[i] = (pixel*)x265_malloc(sizeof(pixel) * (curFrame->m_lowres.lumaStride * m_paddedLines));
+ m_wbuffer[i] = X265_MALLOC(pixel, curFrame->m_lowres.lumaStride * m_paddedLines);
m_weightedRef.lowresPlane[i] = m_wbuffer[i] + padoffset;
}
@@ -1348,6 +1361,10 @@ int64_t CostEstimate::estimateFrameCost(
if (!fenc->bIntraCalculated)
fenc->rowSatds[0][0][i] = 0;
fenc->rowSatds[b - p0][p1 - b][i] = 0;
+#if DETAILED_CU_STATS
+ m_rows[i].m_processRowElapsedTime = 0;
+ m_rows[i].m_countProcessRow = 0;
+#endif
}
m_bFrameCompleted = false;
@@ -1374,6 +1391,10 @@ int64_t CostEstimate::estimateFrameCost(
// Accumulate cost from each row
for (int row = 0; row < m_heightInCU; row++)
{
+#if DETAILED_CU_STATS
+ m_processRowElapsedTime += m_rows[row].m_processRowElapsedTime;
+ m_countProcessRow += m_rows[row].m_countProcessRow;
+#endif
score += m_rows[row].m_costEst;
fenc->costEst[0][0] += m_rows[row].m_costIntra;
if (m_param->rc.aqMode)
@@ -1524,6 +1545,10 @@ void CostEstimate::weightsAnalyse(Lowres
void CostEstimate::processRow(int row, int /*threadId*/)
{
ProfileScopeEvent(costEstimateRow);
+#if DETAILED_CU_STATS
+ ScopedElapsedTime filterPerfScope(m_processRowElapsedTime);
+ m_countProcessRow++;
+#endif
int realrow = m_heightInCU - 1 - row;
Lowres **frames = m_curframes;
diff -r e51b19ab2319 -r 5389e6d11567 source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Fri Feb 06 10:29:58 2015 -0600
+++ b/source/encoder/slicetype.h Fri Feb 06 11:25:38 2015 -0600
@@ -69,6 +69,11 @@ public:
int m_widthInCU;
int m_heightInCU;
+#if DETAILED_CU_STATS
+ int64_t m_processRowElapsedTime;
+ uint64_t m_countProcessRow;
+#endif
+
EstimateRow()
{
m_me.setQP(X265_LOOKAHEAD_QP);
@@ -107,6 +112,11 @@ public:
volatile bool m_bFrameCompleted;
int m_curb, m_curp0, m_curp1;
+#if DETAILED_CU_STATS
+ int64_t m_processRowElapsedTime;
+ uint64_t m_countProcessRow;
+#endif
+
void processRow(int row, int threadId);
int64_t estimateFrameCost(Lowres **frames, int p0, int p1, int b, bool bIntraPenalty);
@@ -138,6 +148,12 @@ public:
int m_lastKeyframe;
int m_histogram[X265_BFRAME_MAX + 1];
+#if DETAILED_CU_STATS
+ int64_t m_slicetypeDecideElapsedTime;
+ uint64_t m_countSlicetypeDecide;
+ bool usingWorkerThreads() const { return !!m_pool; }
+#endif
+
void addPicture(Frame*, int sliceType);
void flush();
void stop();
@@ -147,7 +163,6 @@ public:
protected:
-
Lock m_inputQueueLock;
Lock m_outputQueueLock;
Event m_outputAvailable;
More information about the x265-commits
mailing list