[x265-commits] [x265] api: fix spelling empirically

Steve Borho steve at borho.org
Mon Feb 2 23:29:35 CET 2015


details:   http://hg.videolan.org/x265/rev/dec1702fa50b
branches:  
changeset: 9265:dec1702fa50b
user:      Steve Borho <steve at borho.org>
date:      Mon Feb 02 12:19:48 2015 -0600
description:
api: fix spelling empirically
Subject: [x265] remove unneeded intermedia variant in coeff scan loop

details:   http://hg.videolan.org/x265/rev/ae6bcdc942fa
branches:  
changeset: 9266:ae6bcdc942fa
user:      Min Chen <chenm003 at 163.com>
date:      Mon Feb 02 20:56:15 2015 +0800
description:
remove unneeded intermedia variant in coeff scan loop
Subject: [x265] encoder: do not report stats if no CU time was recorded

details:   http://hg.videolan.org/x265/rev/8eaf7cb37ad4
branches:  
changeset: 9267:8eaf7cb37ad4
user:      Steve Borho <steve at borho.org>
date:      Mon Feb 02 15:49:12 2015 -0600
description:
encoder: do not report stats if no CU time was recorded

If the user CTRL+C's the encode right away, don't print NANs
Subject: [x265] encoder: initialize m_threadPool pointer to NULL

details:   http://hg.videolan.org/x265/rev/bd4ef159b8fc
branches:  
changeset: 9268:bd4ef159b8fc
user:      Steve Borho <steve at borho.org>
date:      Mon Feb 02 16:00:14 2015 -0600
description:
encoder: initialize m_threadPool pointer to NULL
Subject: [x265] encoder: show unnaccounted time and add CPU utilization numbers

details:   http://hg.videolan.org/x265/rev/704aa7690e3d
branches:  
changeset: 9269:704aa7690e3d
user:      Steve Borho <steve at borho.org>
date:      Mon Feb 02 16:07:44 2015 -0600
description:
encoder: show unnaccounted time and add CPU utilization numbers

diffstat:

 source/encoder/encoder.cpp |  27 ++++++++++++++++++++++-----
 source/encoder/entropy.cpp |  15 +++------------
 source/x265.h              |   2 +-
 3 files changed, 26 insertions(+), 18 deletions(-)

diffs (105 lines):

diff -r 269cc414f218 -r 704aa7690e3d source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Feb 02 11:51:03 2015 -0600
+++ b/source/encoder/encoder.cpp	Mon Feb 02 16:07:44 2015 -0600
@@ -79,7 +79,7 @@ Encoder::Encoder()
     m_cuOffsetC = NULL;
     m_buOffsetY = NULL;
     m_buOffsetC = NULL;
-    m_threadPool = 0;
+    m_threadPool = NULL;
     m_numThreadLocalData = 0;
     m_analysisFile = NULL;
 }
@@ -817,6 +817,9 @@ void Encoder::printSummary()
     CUStats cuStats;
     for (int i = 0; i < m_param->frameNumThreads; i++)
         cuStats.accumulate(m_frameEncoder[i].m_cuStats);
+    
+    if (!cuStats.totalCTUTime)
+        return;
 
 #define ELAPSED_SEC(val)  ((double)(val) / 1000000)
 #define ELAPSED_MSEC(val) ((double)(val) / 1000)
@@ -827,10 +830,6 @@ void Encoder::printSummary()
     if (m_param->bDistributeMotionEstimation && cuStats.countPMEMasters)
         totalWorkerTime += cuStats.pmeTime;
 
-    x265_log(m_param, X265_LOG_INFO, "CU: Worker threads compressed " X265_LL " %dX%d CTUs in %.3lf worker seconds, %.3lf CTUs per second\n",
-            cuStats.totalCTUs, g_maxCUSize, g_maxCUSize,
-            ELAPSED_SEC(totalWorkerTime),
-            cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
     if (m_param->bDistributeMotionEstimation && cuStats.countPMEMasters)
     {
         x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in motion estimation, averaging %.3lf CU inter modes per CTU\n",
@@ -868,6 +867,24 @@ void Encoder::printSummary()
                 ELAPSED_MSEC(cuStats.pmodeTime) / cuStats.countPModeTasks);
     }
 
+    int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
+    int64_t unaccounted = totalWorkerTime - 
+                          cuStats.intraAnalysisElapsedTime - cuStats.motionEstimationElapsedTime -
+                          cuStats.interRDOElapsedTime - cuStats.intraRDOElapsedTime -
+                          cuStats.pmeTime - cuStats.pmodeTime;
+
+    x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in other tasks\n",
+            100.0 * unaccounted / totalWorkerTime);
+
+    x265_log(m_param, X265_LOG_INFO, "CU: " X265_LL " %dX%d CTUs compressed in %.3lf worker seconds, %.3lf CTUs per worker-second\n",
+            cuStats.totalCTUs, g_maxCUSize, g_maxCUSize,
+            ELAPSED_SEC(totalWorkerTime),
+            cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
+
+    x265_log(m_param, X265_LOG_INFO, "CU: %.3lf average worker occupancy, %%%05.2lf of theoretical maximum occupancy\n",
+            (double)totalWorkerTime / elapsedEncodeTime,
+            100.0 * totalWorkerTime / (elapsedEncodeTime * m_threadPool->getThreadCount()));
+
 #undef ELAPSED_SEC
 #undef ELAPSED_MSEC
 #endif
diff -r 269cc414f218 -r 704aa7690e3d source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Mon Feb 02 11:51:03 2015 -0600
+++ b/source/encoder/entropy.cpp	Mon Feb 02 16:07:44 2015 -0600
@@ -1444,11 +1444,9 @@ void Entropy::codeCoeffNxN(const CUData&
     //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
     X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
 
-    uint32_t cgBlkNum = 0;
     do
     {
-        const uint32_t cgBlkIdx = scanPosLast & (MLS_CG_BLK_SIZE - 1);
-        const uint32_t cgIdx = scanPosLast >> MLS_CG_SIZE;
+        const uint32_t cgIdx = (uint32_t)scanPosLast >> MLS_CG_SIZE;
 
         posLast = codingParameters.scan[scanPosLast++];
 
@@ -1464,16 +1462,9 @@ void Entropy::codeCoeffNxN(const CUData&
         numSig -= isNZCoeff;
 
         // TODO: optimize by instruction BTS
-        coeffSign[cgIdx] += (uint16_t)(((uint32_t)curCoeff >> 31) << cgBlkNum);
+        coeffSign[cgIdx] += (uint16_t)(((uint32_t)curCoeff >> 31) << coeffNum[cgIdx]);
         coeffFlag[cgIdx] = (coeffFlag[cgIdx] << 1) + (uint16_t)isNZCoeff;
-        cgBlkNum += isNZCoeff;
-        // TODO: reduce memory store operator, but avoid conditional branch
-        coeffNum[cgIdx] = (uint8_t)cgBlkNum;
-
-        if (cgBlkIdx == (MLS_CG_BLK_SIZE - 1))
-        {
-            cgBlkNum = 0;
-        }
+        coeffNum[cgIdx] += (uint8_t)isNZCoeff;
     }
     while (numSig > 0);
     scanPosLast--;
diff -r 269cc414f218 -r 704aa7690e3d source/x265.h
--- a/source/x265.h	Mon Feb 02 11:51:03 2015 -0600
+++ b/source/x265.h	Mon Feb 02 16:07:44 2015 -0600
@@ -368,7 +368,7 @@ typedef struct x265_param
 
     /* Number of concurrently encoded frames between 1 and X265_MAX_FRAME_THREADS
      * or 0 for auto-detection. By default x265 will use a number of frame
-     * threads emperically determined to be optimal for your CPU core count,
+     * threads empirically determined to be optimal for your CPU core count,
      * between 2 and 6.  Using more than one frame thread causes motion search
      * in the down direction to be clamped but otherwise encode behavior is
      * unaffected. With CQP rate control the output bitstream is deterministic


More information about the x265-commits mailing list