[x265-commits] [x265] api: fix spelling empirically
Steve Borho
steve at borho.org
Mon Feb 2 23:29:35 CET 2015
details: http://hg.videolan.org/x265/rev/dec1702fa50b
branches:
changeset: 9265:dec1702fa50b
user: Steve Borho <steve at borho.org>
date: Mon Feb 02 12:19:48 2015 -0600
description:
api: fix spelling empirically
Subject: [x265] remove unneeded intermedia variant in coeff scan loop
details: http://hg.videolan.org/x265/rev/ae6bcdc942fa
branches:
changeset: 9266:ae6bcdc942fa
user: Min Chen <chenm003 at 163.com>
date: Mon Feb 02 20:56:15 2015 +0800
description:
remove unneeded intermedia variant in coeff scan loop
Subject: [x265] encoder: do not report stats if no CU time was recorded
details: http://hg.videolan.org/x265/rev/8eaf7cb37ad4
branches:
changeset: 9267:8eaf7cb37ad4
user: Steve Borho <steve at borho.org>
date: Mon Feb 02 15:49:12 2015 -0600
description:
encoder: do not report stats if no CU time was recorded
If the user CTRL+C's the encode right away, don't print NANs
Subject: [x265] encoder: initialize m_threadPool pointer to NULL
details: http://hg.videolan.org/x265/rev/bd4ef159b8fc
branches:
changeset: 9268:bd4ef159b8fc
user: Steve Borho <steve at borho.org>
date: Mon Feb 02 16:00:14 2015 -0600
description:
encoder: initialize m_threadPool pointer to NULL
Subject: [x265] encoder: show unnaccounted time and add CPU utilization numbers
details: http://hg.videolan.org/x265/rev/704aa7690e3d
branches:
changeset: 9269:704aa7690e3d
user: Steve Borho <steve at borho.org>
date: Mon Feb 02 16:07:44 2015 -0600
description:
encoder: show unnaccounted time and add CPU utilization numbers
diffstat:
source/encoder/encoder.cpp | 27 ++++++++++++++++++++++-----
source/encoder/entropy.cpp | 15 +++------------
source/x265.h | 2 +-
3 files changed, 26 insertions(+), 18 deletions(-)
diffs (105 lines):
diff -r 269cc414f218 -r 704aa7690e3d source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Feb 02 11:51:03 2015 -0600
+++ b/source/encoder/encoder.cpp Mon Feb 02 16:07:44 2015 -0600
@@ -79,7 +79,7 @@ Encoder::Encoder()
m_cuOffsetC = NULL;
m_buOffsetY = NULL;
m_buOffsetC = NULL;
- m_threadPool = 0;
+ m_threadPool = NULL;
m_numThreadLocalData = 0;
m_analysisFile = NULL;
}
@@ -817,6 +817,9 @@ void Encoder::printSummary()
CUStats cuStats;
for (int i = 0; i < m_param->frameNumThreads; i++)
cuStats.accumulate(m_frameEncoder[i].m_cuStats);
+
+ if (!cuStats.totalCTUTime)
+ return;
#define ELAPSED_SEC(val) ((double)(val) / 1000000)
#define ELAPSED_MSEC(val) ((double)(val) / 1000)
@@ -827,10 +830,6 @@ void Encoder::printSummary()
if (m_param->bDistributeMotionEstimation && cuStats.countPMEMasters)
totalWorkerTime += cuStats.pmeTime;
- x265_log(m_param, X265_LOG_INFO, "CU: Worker threads compressed " X265_LL " %dX%d CTUs in %.3lf worker seconds, %.3lf CTUs per second\n",
- cuStats.totalCTUs, g_maxCUSize, g_maxCUSize,
- ELAPSED_SEC(totalWorkerTime),
- cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
if (m_param->bDistributeMotionEstimation && cuStats.countPMEMasters)
{
x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in motion estimation, averaging %.3lf CU inter modes per CTU\n",
@@ -868,6 +867,24 @@ void Encoder::printSummary()
ELAPSED_MSEC(cuStats.pmodeTime) / cuStats.countPModeTasks);
}
+ int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
+ int64_t unaccounted = totalWorkerTime -
+ cuStats.intraAnalysisElapsedTime - cuStats.motionEstimationElapsedTime -
+ cuStats.interRDOElapsedTime - cuStats.intraRDOElapsedTime -
+ cuStats.pmeTime - cuStats.pmodeTime;
+
+ x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in other tasks\n",
+ 100.0 * unaccounted / totalWorkerTime);
+
+ x265_log(m_param, X265_LOG_INFO, "CU: " X265_LL " %dX%d CTUs compressed in %.3lf worker seconds, %.3lf CTUs per worker-second\n",
+ cuStats.totalCTUs, g_maxCUSize, g_maxCUSize,
+ ELAPSED_SEC(totalWorkerTime),
+ cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
+
+ x265_log(m_param, X265_LOG_INFO, "CU: %.3lf average worker occupancy, %%%05.2lf of theoretical maximum occupancy\n",
+ (double)totalWorkerTime / elapsedEncodeTime,
+ 100.0 * totalWorkerTime / (elapsedEncodeTime * m_threadPool->getThreadCount()));
+
#undef ELAPSED_SEC
#undef ELAPSED_MSEC
#endif
diff -r 269cc414f218 -r 704aa7690e3d source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Mon Feb 02 11:51:03 2015 -0600
+++ b/source/encoder/entropy.cpp Mon Feb 02 16:07:44 2015 -0600
@@ -1444,11 +1444,9 @@ void Entropy::codeCoeffNxN(const CUData&
//const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
- uint32_t cgBlkNum = 0;
do
{
- const uint32_t cgBlkIdx = scanPosLast & (MLS_CG_BLK_SIZE - 1);
- const uint32_t cgIdx = scanPosLast >> MLS_CG_SIZE;
+ const uint32_t cgIdx = (uint32_t)scanPosLast >> MLS_CG_SIZE;
posLast = codingParameters.scan[scanPosLast++];
@@ -1464,16 +1462,9 @@ void Entropy::codeCoeffNxN(const CUData&
numSig -= isNZCoeff;
// TODO: optimize by instruction BTS
- coeffSign[cgIdx] += (uint16_t)(((uint32_t)curCoeff >> 31) << cgBlkNum);
+ coeffSign[cgIdx] += (uint16_t)(((uint32_t)curCoeff >> 31) << coeffNum[cgIdx]);
coeffFlag[cgIdx] = (coeffFlag[cgIdx] << 1) + (uint16_t)isNZCoeff;
- cgBlkNum += isNZCoeff;
- // TODO: reduce memory store operator, but avoid conditional branch
- coeffNum[cgIdx] = (uint8_t)cgBlkNum;
-
- if (cgBlkIdx == (MLS_CG_BLK_SIZE - 1))
- {
- cgBlkNum = 0;
- }
+ coeffNum[cgIdx] += (uint8_t)isNZCoeff;
}
while (numSig > 0);
scanPosLast--;
diff -r 269cc414f218 -r 704aa7690e3d source/x265.h
--- a/source/x265.h Mon Feb 02 11:51:03 2015 -0600
+++ b/source/x265.h Mon Feb 02 16:07:44 2015 -0600
@@ -368,7 +368,7 @@ typedef struct x265_param
/* Number of concurrently encoded frames between 1 and X265_MAX_FRAME_THREADS
* or 0 for auto-detection. By default x265 will use a number of frame
- * threads emperically determined to be optimal for your CPU core count,
+ * threads empirically determined to be optimal for your CPU core count,
* between 2 and 6. Using more than one frame thread causes motion search
* in the down direction to be clamped but otherwise encode behavior is
* unaffected. With CQP rate control the output bitstream is deterministic
More information about the x265-commits
mailing list