[x265] [PATCH Alpha 09/10] Add logs for both layers & fix issue while aborting in check params

Anusuya Kumarasamy anusuya.kumarasamy at multicorewareinc.com
Mon Aug 5 11:10:27 UTC 2024


>From 6dbf96b3a18404a01154c091a49f5ee7c206d107 Mon Sep 17 00:00:00 2001
From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
Date: Wed, 3 Jul 2024 10:22:44 +0530
Subject: [PATCH] Add logs for both layers & fix issue while aborting in
check
 params

---
 source/abrEncApp.cpp            |   3 +-
 source/common/slice.h           |   2 +-
 source/encoder/api.cpp          |  18 +-
 source/encoder/encoder.cpp      | 507 ++++++++++++++++----------------
 source/encoder/encoder.h        |  12 +-
 source/encoder/frameencoder.cpp |  55 ++--
 source/encoder/frameencoder.h   |  32 +-
 source/encoder/framefilter.cpp  |  10 +-
 source/x265.h                   |   2 +
 9 files changed, 330 insertions(+), 311 deletions(-)

diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
index eabfb7d2b..fc4262369 100644
--- a/source/abrEncApp.cpp
+++ b/source/abrEncApp.cpp
@@ -206,6 +206,7 @@ namespace X265_NS {
         {
             x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for
Enc, \n");
             m_ret = 2;
+            m_reader = NULL;
             return -1;
         }

@@ -867,7 +868,7 @@ ret:
             m_reader->stop();
             delete m_reader;
         }
-        else
+        else if (m_scaler != NULL)
         {
             m_scaler->stop();
             m_scaler->destroy();
diff --git a/source/common/slice.h b/source/common/slice.h
index c85cf0972..2a78c9338 100644
--- a/source/common/slice.h
+++ b/source/common/slice.h
@@ -161,6 +161,7 @@ struct VPS
     uint32_t         numReorderPics[MAX_T_LAYERS];
     uint32_t         maxDecPicBuffering[MAX_T_LAYERS];
     uint32_t         maxLatencyIncrease[MAX_T_LAYERS];
+    int              m_numLayers;

 #if ENABLE_ALPHA
     bool             splitting_flag;
@@ -172,7 +173,6 @@ struct VPS
     uint8_t          m_layerIdInNuh[MAX_VPS_LAYER_ID_PLUS1];
     uint8_t          m_layerIdInVps[MAX_VPS_LAYER_ID_PLUS1];
     int              m_viewIdLen;
-    int              m_numLayers;
     int              m_vpsNumLayerSetsMinus1;
     bool             vps_extension_flag;
 #endif
diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
index e21f541d9..88bc25550 100644
--- a/source/encoder/api.cpp
+++ b/source/encoder/api.cpp
@@ -602,7 +602,10 @@ fail:
         *pi_nal = 0;

     if (numEncoded && encoder->m_param->csvLogLevel &&
encoder->m_outputCount >= encoder->m_latestParam->chunkStart)
-        x265_csvlog_frame(encoder->m_param, pic_out[0]);
+    {
+        for (int layer = 0; layer < encoder->m_param->numScalableLayers;
layer++)
+            x265_csvlog_frame(encoder->m_param, pic_out[layer]);
+    }

     if (numEncoded < 0)
         encoder->m_aborted = true;
@@ -653,11 +656,14 @@ void x265_encoder_log(x265_encoder* enc, int argc,
char **argv)
     if (enc)
     {
         Encoder *encoder = static_cast<Encoder*>(enc);
-        x265_stats stats;
-        encoder->fetchStats(&stats, sizeof(stats));
+        x265_stats stats[MAX_SCALABLE_LAYERS];
         int padx = encoder->m_sps.conformanceWindow.rightOffset;
         int pady = encoder->m_sps.conformanceWindow.bottomOffset;
-        x265_csvlog_encode(encoder->m_param, &stats, padx, pady, argc,
argv);
+        for (int layer = 0; layer < encoder->m_param->numScalableLayers;
layer++)
+        {
+            encoder->fetchStats(stats, sizeof(stats[layer]), layer);
+            x265_csvlog_encode(encoder->m_param, &stats[0], padx, pady,
argc, argv);
+        }
     }
 }

@@ -1295,7 +1301,7 @@ FILE* x265_csvlog_open(const x265_param* param)
         {
             if (param->csvLogLevel)
             {
-                fprintf(csvfp, "Encode Order, Type, POC, QP, Bits,
Scenecut, ");
+                fprintf(csvfp, "Layer , Encode Order, Type, POC, QP, Bits,
Scenecut, ");
                 if (!!param->bEnableTemporalSubLayers)
                     fprintf(csvfp, "Temporal Sub Layer ID, ");
                 if (param->csvLogLevel >= 2)
@@ -1409,7 +1415,7 @@ void x265_csvlog_frame(const x265_param* param, const
x265_picture* pic)
         return;

     const x265_frame_stats* frameStats = &pic->frameData;
-    fprintf(param->csvfpt, "%d, %c-SLICE, %4d, %2.2lf, %10d, %d,",
frameStats->encoderOrder, frameStats->sliceType, frameStats->poc,
+    fprintf(param->csvfpt, "%d, %d, %c-SLICE, %4d, %2.2lf, %10d, %d,",
pic->layerID, frameStats->encoderOrder, frameStats->sliceType,
frameStats->poc,

frameStats->qp, (int)frameStats->bits, frameStats->bScenecut);
     if (!!param->bEnableTemporalSubLayers)
         fprintf(param->csvfpt, "%d,", frameStats->tLayer);
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 5697ac5e5..4844a158c 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -1879,7 +1879,10 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture** pic_out)

         m_lookahead->addPicture(*inFrame[0], sliceType);

-        m_dpb->m_picList.pushBack(*inFrame[1]); /* Add enhancement layer
to DPB to be used later in frameencoder*/
+#if ENABLE_ALPHA
+        if(m_param->numScalableLayers > 1)
+            m_dpb->m_picList.pushBack(*inFrame[1]); /* Add enhancement
layer to DPB to be used later in frameencoder*/
+#endif
         m_numDelayedPic++;
     }
     else if (m_latestParam->forceFlush == 2)
@@ -1924,6 +1927,7 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture** pic_out)
                     pic_out[sLayer]->userData = outFrame->m_userData;
                     pic_out[sLayer]->colorSpace = m_param->internalCsp;
                     pic_out[sLayer]->frameData.tLayer =
outFrame->m_tempLayer;
+                    pic_out[sLayer]->layerID = sLayer;
                     frameData = &(pic_out[sLayer]->frameData);

                     pic_out[sLayer]->pts = outFrame->m_pts;
@@ -2059,8 +2063,8 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture** pic_out)
                 if (m_aborted)
                     return -1;

-                if ((m_outputCount + 1) >= m_param->chunkStart && !sLayer)
-                    finishFrameStats(outFrame, curEncoder, frameData,
m_pocLast);
+                if ((m_outputCount + 1) >= m_param->chunkStart)
+                    finishFrameStats(outFrame, curEncoder, frameData,
m_pocLast, sLayer);
                 if (m_param->analysisSave)
                 {
                     pic_out[sLayer]->analysisData.frameBits =
frameData->bits;
@@ -2730,230 +2734,233 @@ void Encoder::printSummary()
     if (m_param->logLevel < X265_LOG_INFO)
         return;

-    char buffer[200];
-    if (m_analyzeI.m_numPics)
-        x265_log(m_param, X265_LOG_INFO, "frame I: %s\n",
statsString(m_analyzeI, buffer));
-    if (m_analyzeP.m_numPics)
-        x265_log(m_param, X265_LOG_INFO, "frame P: %s\n",
statsString(m_analyzeP, buffer));
-    if (m_analyzeB.m_numPics)
-        x265_log(m_param, X265_LOG_INFO, "frame B: %s\n",
statsString(m_analyzeB, buffer));
-    if (m_param->bEnableWeightedPred && m_analyzeP.m_numPics)
-    {
-        x265_log(m_param, X265_LOG_INFO, "Weighted P-Frames: Y:%.1f%%
UV:%.1f%%\n",
-            (float)100.0 * m_numLumaWPFrames / m_analyzeP.m_numPics,
-            (float)100.0 * m_numChromaWPFrames / m_analyzeP.m_numPics);
-    }
-    if (m_param->bEnableWeightedBiPred && m_analyzeB.m_numPics)
+    for (int layer = 0; layer < m_param->numScalableLayers; layer++)
     {
-        x265_log(m_param, X265_LOG_INFO, "Weighted B-Frames: Y:%.1f%%
UV:%.1f%%\n",
-            (float)100.0 * m_numLumaWPBiFrames / m_analyzeB.m_numPics,
-            (float)100.0 * m_numChromaWPBiFrames / m_analyzeB.m_numPics);
-    }
+        char buffer[200];
+        if (m_analyzeI[layer].m_numPics)
+            x265_log(m_param, X265_LOG_INFO, "frame I: %s\n",
statsString(m_analyzeI[layer], buffer));
+        if (m_analyzeP[layer].m_numPics)
+            x265_log(m_param, X265_LOG_INFO, "frame P: %s\n",
statsString(m_analyzeP[layer], buffer));
+        if (m_analyzeB[layer].m_numPics)
+            x265_log(m_param, X265_LOG_INFO, "frame B: %s\n",
statsString(m_analyzeB[layer], buffer));
+        if (m_param->bEnableWeightedPred && m_analyzeP[layer].m_numPics)
+        {
+            x265_log(m_param, X265_LOG_INFO, "Weighted P-Frames: Y:%.1f%%
UV:%.1f%%\n",
+                (float)100.0 * m_numLumaWPFrames /
m_analyzeP[layer].m_numPics,
+                (float)100.0 * m_numChromaWPFrames /
m_analyzeP[layer].m_numPics);
+        }
+        if (m_param->bEnableWeightedBiPred && m_analyzeB[layer].m_numPics)
+        {
+            x265_log(m_param, X265_LOG_INFO, "Weighted B-Frames: Y:%.1f%%
UV:%.1f%%\n",
+                (float)100.0 * m_numLumaWPBiFrames /
m_analyzeB[layer].m_numPics,
+                (float)100.0 * m_numChromaWPBiFrames /
m_analyzeB[layer].m_numPics);
+        }

-    if (m_param->bLossless)
-    {
-        float frameSize = (float)(m_param->sourceWidth -
m_sps.conformanceWindow.rightOffset) *
-                                 (m_param->sourceHeight -
m_sps.conformanceWindow.bottomOffset);
-        float uncompressed = frameSize * X265_DEPTH *
m_analyzeAll.m_numPics;
+        if (m_param->bLossless)
+        {
+            float frameSize = (float)(m_param->sourceWidth -
m_sps.conformanceWindow.rightOffset) *
+                (m_param->sourceHeight -
m_sps.conformanceWindow.bottomOffset);
+            float uncompressed = frameSize * X265_DEPTH *
m_analyzeAll[layer].m_numPics;

-        x265_log(m_param, X265_LOG_INFO, "lossless compression ratio
%.2f::1\n", uncompressed / m_analyzeAll.m_accBits);
-    }
-    if (m_param->bMultiPassOptRPS && m_param->rc.bStatRead)
-    {
-        x265_log(m_param, X265_LOG_INFO, "RPS in SPS: %d frames (%.2f%%),
RPS not in SPS: %d frames (%.2f%%)\n",
-            m_rpsInSpsCount, (float)100.0 * m_rpsInSpsCount /
m_rateControl->m_numEntries,
-            m_rateControl->m_numEntries - m_rpsInSpsCount,
-            (float)100.0 * (m_rateControl->m_numEntries - m_rpsInSpsCount)
/ m_rateControl->m_numEntries);
-    }
+            x265_log(m_param, X265_LOG_INFO, "lossless compression ratio
%.2f::1\n", uncompressed / m_analyzeAll[layer].m_accBits);
+        }
+        if (m_param->bMultiPassOptRPS && m_param->rc.bStatRead)
+        {
+            x265_log(m_param, X265_LOG_INFO, "RPS in SPS: %d frames
(%.2f%%), RPS not in SPS: %d frames (%.2f%%)\n",
+                m_rpsInSpsCount, (float)100.0 * m_rpsInSpsCount /
m_rateControl->m_numEntries,
+                m_rateControl->m_numEntries - m_rpsInSpsCount,
+                (float)100.0 * (m_rateControl->m_numEntries -
m_rpsInSpsCount) / m_rateControl->m_numEntries);
+        }

-    if (m_analyzeAll.m_numPics)
-    {
-        int p = 0;
-        double elapsedEncodeTime = (double)(x265_mdate() -
m_encodeStartTime) / 1000000;
-        double elapsedVideoTime = (double)m_analyzeAll.m_numPics *
m_param->fpsDenom / m_param->fpsNum;
-        double bitrate = (0.001f * m_analyzeAll.m_accBits) /
elapsedVideoTime;
+        if (m_analyzeAll[layer].m_numPics)
+        {
+            int p = 0;
+            double elapsedEncodeTime = (double)(x265_mdate() -
m_encodeStartTime) / 1000000;
+            double elapsedVideoTime =
(double)m_analyzeAll[layer].m_numPics * m_param->fpsDenom / m_param->fpsNum;
+            double bitrate = (0.001f * m_analyzeAll[layer].m_accBits) /
elapsedVideoTime;

-        p += sprintf(buffer + p, "\nencoded %d frames in %.2fs (%.2f fps),
%.2f kb/s, Avg QP:%2.2lf", m_analyzeAll.m_numPics,
-                     elapsedEncodeTime, m_analyzeAll.m_numPics /
elapsedEncodeTime, bitrate, m_analyzeAll.m_totalQp /
(double)m_analyzeAll.m_numPics);
+            p += sprintf(buffer + p, "\nencoded %d frames in %.2fs (%.2f
fps), %.2f kb/s, Avg QP:%2.2lf", m_analyzeAll[layer].m_numPics,
+                elapsedEncodeTime, m_analyzeAll[layer].m_numPics /
elapsedEncodeTime, bitrate, m_analyzeAll[layer].m_totalQp /
(double)m_analyzeAll[layer].m_numPics);

-        if (m_param->bEnablePsnr)
-        {
-            double globalPsnr = (m_analyzeAll.m_psnrSumY * 6 +
m_analyzeAll.m_psnrSumU + m_analyzeAll.m_psnrSumV) / (8 *
m_analyzeAll.m_numPics);
-            p += sprintf(buffer + p, ", Global PSNR: %.3f", globalPsnr);
-        }
+            if (m_param->bEnablePsnr)
+            {
+                double globalPsnr = (m_analyzeAll[layer].m_psnrSumY * 6 +
m_analyzeAll[layer].m_psnrSumU + m_analyzeAll[layer].m_psnrSumV) / (8 *
m_analyzeAll[layer].m_numPics);
+                p += sprintf(buffer + p, ", Global PSNR: %.3f",
globalPsnr);
+            }

-        if (m_param->bEnableSsim)
-            p += sprintf(buffer + p, ", SSIM Mean Y: %.7f (%6.3f dB)",
m_analyzeAll.m_globalSsim / m_analyzeAll.m_numPics,
x265_ssim2dB(m_analyzeAll.m_globalSsim / m_analyzeAll.m_numPics));
+            if (m_param->bEnableSsim)
+                p += sprintf(buffer + p, ", SSIM Mean Y: %.7f (%6.3f dB)",
m_analyzeAll[layer].m_globalSsim / m_analyzeAll[layer].m_numPics,
x265_ssim2dB(m_analyzeAll[layer].m_globalSsim /
m_analyzeAll[layer].m_numPics));

-        sprintf(buffer + p, "\n");
-        general_log(m_param, NULL, X265_LOG_INFO, buffer);
-    }
-    else
-        general_log(m_param, NULL, X265_LOG_INFO, "\nencoded 0 frames\n");
+            sprintf(buffer + p, "\n");
+            general_log(m_param, NULL, X265_LOG_INFO, buffer);
+        }
+        else
+            general_log(m_param, NULL, X265_LOG_INFO, "\nencoded 0
frames\n");

 #if DETAILED_CU_STATS
-    /* Summarize stats from all frame encoders */
-    CUStats cuStats;
-    for (int i = 0; i < m_param->frameNumThreads; i++)
-        cuStats.accumulate(m_frameEncoder[i]->m_cuStats, *m_param);
+        /* Summarize stats from all frame encoders */
+        CUStats cuStats;
+        for (int i = 0; i < m_param->frameNumThreads; i++)
+            cuStats.accumulate(m_frameEncoder[i]->m_cuStats, *m_param);

-    if (!cuStats.totalCTUTime)
-        return;
+        if (!cuStats.totalCTUTime)
+            return;

-    int totalWorkerCount = 0;
-    for (int i = 0; i < m_numPools; i++)
-        totalWorkerCount += m_threadPool[i].m_numWorkers;
+        int totalWorkerCount = 0;
+        for (int i = 0; i < m_numPools; i++)
+            totalWorkerCount += m_threadPool[i].m_numWorkers;

-    int64_t  batchElapsedTime, coopSliceElapsedTime;
-    uint64_t batchCount, coopSliceCount;
-    m_lookahead->getWorkerStats(batchElapsedTime, batchCount,
coopSliceElapsedTime, coopSliceCount);
-    int64_t lookaheadWorkerTime =
m_lookahead->m_slicetypeDecideElapsedTime +
m_lookahead->m_preLookaheadElapsedTime +
-                                  batchElapsedTime + coopSliceElapsedTime;
+        int64_t  batchElapsedTime, coopSliceElapsedTime;
+        uint64_t batchCount, coopSliceCount;
+        m_lookahead->getWorkerStats(batchElapsedTime, batchCount,
coopSliceElapsedTime, coopSliceCount);
+        int64_t lookaheadWorkerTime =
m_lookahead->m_slicetypeDecideElapsedTime +
m_lookahead->m_preLookaheadElapsedTime +
+            batchElapsedTime + coopSliceElapsedTime;

-    int64_t totalWorkerTime = cuStats.totalCTUTime +
cuStats.loopFilterElapsedTime + cuStats.pmodeTime +
-                              cuStats.pmeTime + lookaheadWorkerTime +
cuStats.weightAnalyzeTime;
-    int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;
+        int64_t totalWorkerTime = cuStats.totalCTUTime +
cuStats.loopFilterElapsedTime + cuStats.pmodeTime +
+            cuStats.pmeTime + lookaheadWorkerTime +
cuStats.weightAnalyzeTime;
+        int64_t elapsedEncodeTime = x265_mdate() - m_encodeStartTime;

-    int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
-    uint64_t interRDOTotalCount = 0, intraRDOTotalCount = 0;
-    for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
-    {
-        interRDOTotalTime += cuStats.interRDOElapsedTime[i];
-        intraRDOTotalTime += cuStats.intraRDOElapsedTime[i];
-        interRDOTotalCount += cuStats.countInterRDO[i];
-        intraRDOTotalCount += cuStats.countIntraRDO[i];
-    }
+        int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
+        uint64_t interRDOTotalCount = 0, intraRDOTotalCount = 0;
+        for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
+        {
+            interRDOTotalTime += cuStats.interRDOElapsedTime[i];
+            intraRDOTotalTime += cuStats.intraRDOElapsedTime[i];
+            interRDOTotalCount += cuStats.countInterRDO[i];
+            intraRDOTotalCount += cuStats.countIntraRDO[i];
+        }

-    /* Time within compressCTU() and pmode tasks not captured by ME, Intra
mode selection, or RDO (2Nx2N merge, 2Nx2N bidir, etc) */
-    int64_t unaccounted = (cuStats.totalCTUTime + cuStats.pmodeTime) -
-                          (cuStats.intraAnalysisElapsedTime +
cuStats.motionEstimationElapsedTime + interRDOTotalTime +
intraRDOTotalTime);
+        /* Time within compressCTU() and pmode tasks not captured by ME,
Intra mode selection, or RDO (2Nx2N merge, 2Nx2N bidir, etc) */
+        int64_t unaccounted = (cuStats.totalCTUTime + cuStats.pmodeTime) -
+            (cuStats.intraAnalysisElapsedTime +
cuStats.motionEstimationElapsedTime + interRDOTotalTime +
intraRDOTotalTime);

 #define ELAPSED_SEC(val)  ((double)(val) / 1000000)
 #define ELAPSED_MSEC(val) ((double)(val) / 1000)

-    if (m_param->bDistributeMotionEstimation && cuStats.countPMEMasters)
-    {
-        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
motion estimation, averaging %.3lf CU inter modes per CTU\n",
-                 100.0 * (cuStats.motionEstimationElapsedTime +
cuStats.pmeTime) / totalWorkerTime,
-                 (double)cuStats.countMotionEstimate / cuStats.totalCTUs);
-        x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PME masters per inter
CU, each blocked an average of %.3lf ns\n",
-                 (double)cuStats.countPMEMasters /
cuStats.countMotionEstimate,
-                 (double)cuStats.pmeBlockTime / cuStats.countPMEMasters);
-        x265_log(m_param, X265_LOG_INFO, "CU:       %.3lf slaves per PME
master, each took an average of %.3lf ms\n",
-                 (double)cuStats.countPMETasks / cuStats.countPMEMasters,
-                 ELAPSED_MSEC(cuStats.pmeTime) / cuStats.countPMETasks);
-    }
-    else
-    {
-        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
motion estimation, averaging %.3lf CU inter modes per CTU\n",
-                 100.0 * cuStats.motionEstimationElapsedTime /
totalWorkerTime,
-                 (double)cuStats.countMotionEstimate / cuStats.totalCTUs);
-
-        if (cuStats.skippedMotionReferences[0] ||
cuStats.skippedMotionReferences[1] || cuStats.skippedMotionReferences[2])
-            x265_log(m_param, X265_LOG_INFO, "CU: Skipped motion searches
per depth %%%.2lf %%%.2lf %%%.2lf %%%.2lf\n",
-                     100.0 * cuStats.skippedMotionReferences[0] /
cuStats.totalMotionReferences[0],
-                     100.0 * cuStats.skippedMotionReferences[1] /
cuStats.totalMotionReferences[1],
-                     100.0 * cuStats.skippedMotionReferences[2] /
cuStats.totalMotionReferences[2],
-                     100.0 * cuStats.skippedMotionReferences[3] /
cuStats.totalMotionReferences[3]);
-    }
-    x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in intra
analysis, averaging %.3lf Intra PUs per CTU\n",
-             100.0 * cuStats.intraAnalysisElapsedTime / totalWorkerTime,
-             (double)cuStats.countIntraAnalysis / cuStats.totalCTUs);
-    if (cuStats.skippedIntraCU[0] || cuStats.skippedIntraCU[1] ||
cuStats.skippedIntraCU[2])
-        x265_log(m_param, X265_LOG_INFO, "CU: Skipped intra CUs at depth
%%%.2lf %%%.2lf %%%.2lf\n",
-                 100.0 * cuStats.skippedIntraCU[0] /
cuStats.totalIntraCU[0],
-                 100.0 * cuStats.skippedIntraCU[1] /
cuStats.totalIntraCU[1],
-                 100.0 * cuStats.skippedIntraCU[2] /
cuStats.totalIntraCU[2]);
-    x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in inter
RDO, measuring %.3lf inter/merge predictions per CTU\n",
-             100.0 * interRDOTotalTime / totalWorkerTime,
-             (double)interRDOTotalCount / cuStats.totalCTUs);
-    x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in intra
RDO, measuring %.3lf intra predictions per CTU\n",
-             100.0 * intraRDOTotalTime / totalWorkerTime,
-             (double)intraRDOTotalCount / cuStats.totalCTUs);
-    x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in loop
filters, average %.3lf ms per call\n",
-             100.0 * cuStats.loopFilterElapsedTime / totalWorkerTime,
-             ELAPSED_MSEC(cuStats.loopFilterElapsedTime) /
cuStats.countLoopFilter);
-    if (cuStats.countWeightAnalyze && cuStats.weightAnalyzeTime)
-    {
-        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
weight analysis, average %.3lf ms per call\n",
-                 100.0 * cuStats.weightAnalyzeTime / totalWorkerTime,
-                 ELAPSED_MSEC(cuStats.weightAnalyzeTime) /
cuStats.countWeightAnalyze);
-    }
-    if (m_param->bDistributeModeAnalysis && cuStats.countPModeMasters)
-    {
-        x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PMODE masters per CTU,
each blocked an average of %.3lf ns\n",
-                 (double)cuStats.countPModeMasters / cuStats.totalCTUs,
-                 (double)cuStats.pmodeBlockTime /
cuStats.countPModeMasters);
-        x265_log(m_param, X265_LOG_INFO, "CU:       %.3lf slaves per PMODE
master, each took average of %.3lf ms\n",
-                 (double)cuStats.countPModeTasks /
cuStats.countPModeMasters,
-                 ELAPSED_MSEC(cuStats.pmodeTime) /
cuStats.countPModeTasks);
-    }
-
-    x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
slicetypeDecide (avg %.3lfms) and prelookahead (avg %.3lfms)\n",
-             100.0 * lookaheadWorkerTime / totalWorkerTime,
-             ELAPSED_MSEC(m_lookahead->m_slicetypeDecideElapsedTime) /
m_lookahead->m_countSlicetypeDecide,
-             ELAPSED_MSEC(m_lookahead->m_preLookaheadElapsedTime) /
m_lookahead->m_countPreLookahead);
-
-    x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in other
tasks\n",
-             100.0 * unaccounted / totalWorkerTime);
-
-    if (intraRDOTotalTime && intraRDOTotalCount)
-    {
-        x265_log(m_param, X265_LOG_INFO, "CU: Intra RDO time  per depth
%%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
-                 100.0 * cuStats.intraRDOElapsedTime[0] /
intraRDOTotalTime,  // 64
-                 100.0 * cuStats.intraRDOElapsedTime[1] /
intraRDOTotalTime,  // 32
-                 100.0 * cuStats.intraRDOElapsedTime[2] /
intraRDOTotalTime,  // 16
-                 100.0 * cuStats.intraRDOElapsedTime[3] /
intraRDOTotalTime); // 8
-        x265_log(m_param, X265_LOG_INFO, "CU: Intra RDO calls per depth
%%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
-                 100.0 * cuStats.countIntraRDO[0] / intraRDOTotalCount,
 // 64
-                 100.0 * cuStats.countIntraRDO[1] / intraRDOTotalCount,
 // 32
-                 100.0 * cuStats.countIntraRDO[2] / intraRDOTotalCount,
 // 16
-                 100.0 * cuStats.countIntraRDO[3] / intraRDOTotalCount);
// 8
-    }
-
-    if (interRDOTotalTime && interRDOTotalCount)
-    {
-        x265_log(m_param, X265_LOG_INFO, "CU: Inter RDO time  per depth
%%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
-                 100.0 * cuStats.interRDOElapsedTime[0] /
interRDOTotalTime,  // 64
-                 100.0 * cuStats.interRDOElapsedTime[1] /
interRDOTotalTime,  // 32
-                 100.0 * cuStats.interRDOElapsedTime[2] /
interRDOTotalTime,  // 16
-                 100.0 * cuStats.interRDOElapsedTime[3] /
interRDOTotalTime); // 8
-        x265_log(m_param, X265_LOG_INFO, "CU: Inter RDO calls per depth
%%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
-                 100.0 * cuStats.countInterRDO[0] / interRDOTotalCount,
 // 64
-                 100.0 * cuStats.countInterRDO[1] / interRDOTotalCount,
 // 32
-                 100.0 * cuStats.countInterRDO[2] / interRDOTotalCount,
 // 16
-                 100.0 * cuStats.countInterRDO[3] / interRDOTotalCount);
// 8
-    }
-
-    x265_log(m_param, X265_LOG_INFO, "CU: " X265_LL " %dX%d CTUs
compressed in %.3lf seconds, %.3lf CTUs per worker-second\n",
-             cuStats.totalCTUs, m_param->maxCUSize, m_param->maxCUSize,
-             ELAPSED_SEC(totalWorkerTime),
-             cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
-
-    if (m_threadPool)
-        x265_log(m_param, X265_LOG_INFO, "CU: %.3lf average worker
utilization, %%%05.2lf of theoretical maximum utilization\n",
-                 (double)totalWorkerTime / elapsedEncodeTime,
-                 100.0 * totalWorkerTime / (elapsedEncodeTime *
totalWorkerCount));
+        if (m_param->bDistributeMotionEstimation &&
cuStats.countPMEMasters)
+        {
+            x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
motion estimation, averaging %.3lf CU inter modes per CTU\n",
+                100.0 * (cuStats.motionEstimationElapsedTime +
cuStats.pmeTime) / totalWorkerTime,
+                (double)cuStats.countMotionEstimate / cuStats.totalCTUs);
+            x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PME masters per
inter CU, each blocked an average of %.3lf ns\n",
+                (double)cuStats.countPMEMasters /
cuStats.countMotionEstimate,
+                (double)cuStats.pmeBlockTime / cuStats.countPMEMasters);
+            x265_log(m_param, X265_LOG_INFO, "CU:       %.3lf slaves per
PME master, each took an average of %.3lf ms\n",
+                (double)cuStats.countPMETasks / cuStats.countPMEMasters,
+                ELAPSED_MSEC(cuStats.pmeTime) / cuStats.countPMETasks);
+        }
+        else
+        {
+            x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
motion estimation, averaging %.3lf CU inter modes per CTU\n",
+                100.0 * cuStats.motionEstimationElapsedTime /
totalWorkerTime,
+                (double)cuStats.countMotionEstimate / cuStats.totalCTUs);
+
+            if (cuStats.skippedMotionReferences[0] ||
cuStats.skippedMotionReferences[1] || cuStats.skippedMotionReferences[2])
+                x265_log(m_param, X265_LOG_INFO, "CU: Skipped motion
searches per depth %%%.2lf %%%.2lf %%%.2lf %%%.2lf\n",
+                    100.0 * cuStats.skippedMotionReferences[0] /
cuStats.totalMotionReferences[0],
+                    100.0 * cuStats.skippedMotionReferences[1] /
cuStats.totalMotionReferences[1],
+                    100.0 * cuStats.skippedMotionReferences[2] /
cuStats.totalMotionReferences[2],
+                    100.0 * cuStats.skippedMotionReferences[3] /
cuStats.totalMotionReferences[3]);
+        }
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
intra analysis, averaging %.3lf Intra PUs per CTU\n",
+            100.0 * cuStats.intraAnalysisElapsedTime / totalWorkerTime,
+            (double)cuStats.countIntraAnalysis / cuStats.totalCTUs);
+        if (cuStats.skippedIntraCU[0] || cuStats.skippedIntraCU[1] ||
cuStats.skippedIntraCU[2])
+            x265_log(m_param, X265_LOG_INFO, "CU: Skipped intra CUs at
depth %%%.2lf %%%.2lf %%%.2lf\n",
+                100.0 * cuStats.skippedIntraCU[0] /
cuStats.totalIntraCU[0],
+                100.0 * cuStats.skippedIntraCU[1] /
cuStats.totalIntraCU[1],
+                100.0 * cuStats.skippedIntraCU[2] /
cuStats.totalIntraCU[2]);
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
inter RDO, measuring %.3lf inter/merge predictions per CTU\n",
+            100.0 * interRDOTotalTime / totalWorkerTime,
+            (double)interRDOTotalCount / cuStats.totalCTUs);
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
intra RDO, measuring %.3lf intra predictions per CTU\n",
+            100.0 * intraRDOTotalTime / totalWorkerTime,
+            (double)intraRDOTotalCount / cuStats.totalCTUs);
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in loop
filters, average %.3lf ms per call\n",
+            100.0 * cuStats.loopFilterElapsedTime / totalWorkerTime,
+            ELAPSED_MSEC(cuStats.loopFilterElapsedTime) /
cuStats.countLoopFilter);
+        if (cuStats.countWeightAnalyze && cuStats.weightAnalyzeTime)
+        {
+            x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
weight analysis, average %.3lf ms per call\n",
+                100.0 * cuStats.weightAnalyzeTime / totalWorkerTime,
+                ELAPSED_MSEC(cuStats.weightAnalyzeTime) /
cuStats.countWeightAnalyze);
+        }
+        if (m_param->bDistributeModeAnalysis && cuStats.countPModeMasters)
+        {
+            x265_log(m_param, X265_LOG_INFO, "CU: %.3lf PMODE masters per
CTU, each blocked an average of %.3lf ns\n",
+                (double)cuStats.countPModeMasters / cuStats.totalCTUs,
+                (double)cuStats.pmodeBlockTime /
cuStats.countPModeMasters);
+            x265_log(m_param, X265_LOG_INFO, "CU:       %.3lf slaves per
PMODE master, each took average of %.3lf ms\n",
+                (double)cuStats.countPModeTasks /
cuStats.countPModeMasters,
+                ELAPSED_MSEC(cuStats.pmodeTime) / cuStats.countPModeTasks);
+        }
+
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
slicetypeDecide (avg %.3lfms) and prelookahead (avg %.3lfms)\n",
+            100.0 * lookaheadWorkerTime / totalWorkerTime,
+            ELAPSED_MSEC(m_lookahead->m_slicetypeDecideElapsedTime) /
m_lookahead->m_countSlicetypeDecide,
+            ELAPSED_MSEC(m_lookahead->m_preLookaheadElapsedTime) /
m_lookahead->m_countPreLookahead);
+
+        x265_log(m_param, X265_LOG_INFO, "CU: %%%05.2lf time spent in
other tasks\n",
+            100.0 * unaccounted / totalWorkerTime);
+
+        if (intraRDOTotalTime && intraRDOTotalCount)
+        {
+            x265_log(m_param, X265_LOG_INFO, "CU: Intra RDO time  per
depth %%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
+                100.0 * cuStats.intraRDOElapsedTime[0] /
intraRDOTotalTime,  // 64
+                100.0 * cuStats.intraRDOElapsedTime[1] /
intraRDOTotalTime,  // 32
+                100.0 * cuStats.intraRDOElapsedTime[2] /
intraRDOTotalTime,  // 16
+                100.0 * cuStats.intraRDOElapsedTime[3] /
intraRDOTotalTime); // 8
+            x265_log(m_param, X265_LOG_INFO, "CU: Intra RDO calls per
depth %%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
+                100.0 * cuStats.countIntraRDO[0] / intraRDOTotalCount,  //
64
+                100.0 * cuStats.countIntraRDO[1] / intraRDOTotalCount,  //
32
+                100.0 * cuStats.countIntraRDO[2] / intraRDOTotalCount,  //
16
+                100.0 * cuStats.countIntraRDO[3] / intraRDOTotalCount); //
8
+        }
+
+        if (interRDOTotalTime && interRDOTotalCount)
+        {
+            x265_log(m_param, X265_LOG_INFO, "CU: Inter RDO time  per
depth %%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
+                100.0 * cuStats.interRDOElapsedTime[0] /
interRDOTotalTime,  // 64
+                100.0 * cuStats.interRDOElapsedTime[1] /
interRDOTotalTime,  // 32
+                100.0 * cuStats.interRDOElapsedTime[2] /
interRDOTotalTime,  // 16
+                100.0 * cuStats.interRDOElapsedTime[3] /
interRDOTotalTime); // 8
+            x265_log(m_param, X265_LOG_INFO, "CU: Inter RDO calls per
depth %%%05.2lf %%%05.2lf %%%05.2lf %%%05.2lf\n",
+                100.0 * cuStats.countInterRDO[0] / interRDOTotalCount,  //
64
+                100.0 * cuStats.countInterRDO[1] / interRDOTotalCount,  //
32
+                100.0 * cuStats.countInterRDO[2] / interRDOTotalCount,  //
16
+                100.0 * cuStats.countInterRDO[3] / interRDOTotalCount); //
8
+        }
+
+        x265_log(m_param, X265_LOG_INFO, "CU: " X265_LL " %dX%d CTUs
compressed in %.3lf seconds, %.3lf CTUs per worker-second\n",
+            cuStats.totalCTUs, m_param->maxCUSize, m_param->maxCUSize,
+            ELAPSED_SEC(totalWorkerTime),
+            cuStats.totalCTUs / ELAPSED_SEC(totalWorkerTime));
+
+        if (m_threadPool)
+            x265_log(m_param, X265_LOG_INFO, "CU: %.3lf average worker
utilization, %%%05.2lf of theoretical maximum utilization\n",
+                (double)totalWorkerTime / elapsedEncodeTime,
+                100.0 * totalWorkerTime / (elapsedEncodeTime *
totalWorkerCount));

 #undef ELAPSED_SEC
 #undef ELAPSED_MSEC
 #endif
+    }
 }

-void Encoder::fetchStats(x265_stats *stats, size_t statsSizeBytes)
+void Encoder::fetchStats(x265_stats *stats, size_t statsSizeBytes, int
layer)
 {
     if (statsSizeBytes >= sizeof(stats))
     {
-        stats->globalPsnrY = m_analyzeAll.m_psnrSumY;
-        stats->globalPsnrU = m_analyzeAll.m_psnrSumU;
-        stats->globalPsnrV = m_analyzeAll.m_psnrSumV;
-        stats->encodedPictureCount = m_analyzeAll.m_numPics;
+        stats->globalPsnrY = m_analyzeAll[layer].m_psnrSumY;
+        stats->globalPsnrU = m_analyzeAll[layer].m_psnrSumU;
+        stats->globalPsnrV = m_analyzeAll[layer].m_psnrSumV;
+        stats->encodedPictureCount = m_analyzeAll[layer].m_numPics;
         stats->totalWPFrames = m_numLumaWPFrames;
-        stats->accBits = m_analyzeAll.m_accBits;
+        stats->accBits = m_analyzeAll[layer].m_accBits;
         stats->elapsedEncodeTime = (double)(x265_mdate() -
m_encodeStartTime) / 1000000;
         if (stats->encodedPictureCount > 0)
         {
-            stats->globalSsim = m_analyzeAll.m_globalSsim /
stats->encodedPictureCount;
+            stats->globalSsim = m_analyzeAll[layer].m_globalSsim /
stats->encodedPictureCount;
             stats->globalPsnr = (stats->globalPsnrY * 6 +
stats->globalPsnrU + stats->globalPsnrV) / (8 * stats->encodedPictureCount);
             stats->elapsedVideoTime = (double)stats->encodedPictureCount *
m_param->fpsDenom / m_param->fpsNum;
             stats->bitrate = (0.001f * stats->accBits) /
stats->elapsedVideoTime;
@@ -2969,33 +2976,33 @@ void Encoder::fetchStats(x265_stats *stats, size_t
statsSizeBytes)
         double fps = (double)m_param->fpsNum / m_param->fpsDenom;
         double scale = fps / 1000;

-        stats->statsI.numPics = m_analyzeI.m_numPics;
-        stats->statsI.avgQp   = m_analyzeI.m_totalQp /
(double)m_analyzeI.m_numPics;
-        stats->statsI.bitrate = m_analyzeI.m_accBits * scale /
(double)m_analyzeI.m_numPics;
-        stats->statsI.psnrY   = m_analyzeI.m_psnrSumY /
(double)m_analyzeI.m_numPics;
-        stats->statsI.psnrU   = m_analyzeI.m_psnrSumU /
(double)m_analyzeI.m_numPics;
-        stats->statsI.psnrV   = m_analyzeI.m_psnrSumV /
(double)m_analyzeI.m_numPics;
-        stats->statsI.ssim    = x265_ssim2dB(m_analyzeI.m_globalSsim /
(double)m_analyzeI.m_numPics);
-
-        stats->statsP.numPics = m_analyzeP.m_numPics;
-        stats->statsP.avgQp   = m_analyzeP.m_totalQp /
(double)m_analyzeP.m_numPics;
-        stats->statsP.bitrate = m_analyzeP.m_accBits * scale /
(double)m_analyzeP.m_numPics;
-        stats->statsP.psnrY   = m_analyzeP.m_psnrSumY /
(double)m_analyzeP.m_numPics;
-        stats->statsP.psnrU   = m_analyzeP.m_psnrSumU /
(double)m_analyzeP.m_numPics;
-        stats->statsP.psnrV   = m_analyzeP.m_psnrSumV /
(double)m_analyzeP.m_numPics;
-        stats->statsP.ssim    = x265_ssim2dB(m_analyzeP.m_globalSsim /
(double)m_analyzeP.m_numPics);
-
-        stats->statsB.numPics = m_analyzeB.m_numPics;
-        stats->statsB.avgQp   = m_analyzeB.m_totalQp /
(double)m_analyzeB.m_numPics;
-        stats->statsB.bitrate = m_analyzeB.m_accBits * scale /
(double)m_analyzeB.m_numPics;
-        stats->statsB.psnrY   = m_analyzeB.m_psnrSumY /
(double)m_analyzeB.m_numPics;
-        stats->statsB.psnrU   = m_analyzeB.m_psnrSumU /
(double)m_analyzeB.m_numPics;
-        stats->statsB.psnrV   = m_analyzeB.m_psnrSumV /
(double)m_analyzeB.m_numPics;
-        stats->statsB.ssim    = x265_ssim2dB(m_analyzeB.m_globalSsim /
(double)m_analyzeB.m_numPics);
+        stats->statsI.numPics = m_analyzeI[layer].m_numPics;
+        stats->statsI.avgQp   = m_analyzeI[layer].m_totalQp /
(double)m_analyzeI[layer].m_numPics;
+        stats->statsI.bitrate = m_analyzeI[layer].m_accBits * scale /
(double)m_analyzeI[layer].m_numPics;
+        stats->statsI.psnrY   = m_analyzeI[layer].m_psnrSumY /
(double)m_analyzeI[layer].m_numPics;
+        stats->statsI.psnrU   = m_analyzeI[layer].m_psnrSumU /
(double)m_analyzeI[layer].m_numPics;
+        stats->statsI.psnrV   = m_analyzeI[layer].m_psnrSumV /
(double)m_analyzeI[layer].m_numPics;
+        stats->statsI.ssim    =
x265_ssim2dB(m_analyzeI[layer].m_globalSsim /
(double)m_analyzeI[layer].m_numPics);
+
+        stats->statsP.numPics = m_analyzeP[layer].m_numPics;
+        stats->statsP.avgQp   = m_analyzeP[layer].m_totalQp /
(double)m_analyzeP[layer].m_numPics;
+        stats->statsP.bitrate = m_analyzeP[layer].m_accBits * scale /
(double)m_analyzeP[layer].m_numPics;
+        stats->statsP.psnrY   = m_analyzeP[layer].m_psnrSumY /
(double)m_analyzeP[layer].m_numPics;
+        stats->statsP.psnrU   = m_analyzeP[layer].m_psnrSumU /
(double)m_analyzeP[layer].m_numPics;
+        stats->statsP.psnrV   = m_analyzeP[layer].m_psnrSumV /
(double)m_analyzeP[layer].m_numPics;
+        stats->statsP.ssim    =
x265_ssim2dB(m_analyzeP[layer].m_globalSsim /
(double)m_analyzeP[layer].m_numPics);
+
+        stats->statsB.numPics = m_analyzeB[layer].m_numPics;
+        stats->statsB.avgQp   = m_analyzeB[layer].m_totalQp /
(double)m_analyzeB[layer].m_numPics;
+        stats->statsB.bitrate = m_analyzeB[layer].m_accBits * scale /
(double)m_analyzeB[layer].m_numPics;
+        stats->statsB.psnrY   = m_analyzeB[layer].m_psnrSumY /
(double)m_analyzeB[layer].m_numPics;
+        stats->statsB.psnrU   = m_analyzeB[layer].m_psnrSumU /
(double)m_analyzeB[layer].m_numPics;
+        stats->statsB.psnrV   = m_analyzeB[layer].m_psnrSumV /
(double)m_analyzeB[layer].m_numPics;
+        stats->statsB.ssim    =
x265_ssim2dB(m_analyzeB[layer].m_globalSsim /
(double)m_analyzeB[layer].m_numPics);
         if (m_param->csvLogLevel >= 2 || m_param->maxCLL ||
m_param->maxFALL)
         {
-            stats->maxCLL = m_analyzeAll.m_maxCLL;
-            stats->maxFALL = (uint16_t)(m_analyzeAll.m_maxFALL /
m_analyzeAll.m_numPics);
+            stats->maxCLL = m_analyzeAll[layer].m_maxCLL;
+            stats->maxFALL = (uint16_t)(m_analyzeAll[layer].m_maxFALL /
m_analyzeAll[layer].m_numPics);
         }
     }
     /* If new statistics are added to x265_stats, we must check here
whether the
@@ -3003,10 +3010,10 @@ void Encoder::fetchStats(x265_stats *stats, size_t
statsSizeBytes)
      * future safety) */
 }

-void Encoder::finishFrameStats(Frame* curFrame, FrameEncoder *curEncoder,
x265_frame_stats* frameStats, int inPoc)
+void Encoder::finishFrameStats(Frame* curFrame, FrameEncoder *curEncoder,
x265_frame_stats* frameStats, int inPoc, int layer)
 {
     PicYuv* reconPic = curFrame->m_reconPic;
-    uint64_t bits = curEncoder->m_accessUnitBits;
+    uint64_t bits = curEncoder->m_accessUnitBits[layer];

     //===== calculate PSNR =====
     int width  = reconPic->m_picWidth -
m_sps.conformanceWindow.rightOffset;
@@ -3019,9 +3026,9 @@ void Encoder::finishFrameStats(Frame* curFrame,
FrameEncoder *curEncoder, x265_f
     double refValueC = (double)maxvalC * maxvalC * size / 4.0;
     uint64_t ssdY, ssdU, ssdV;

-    ssdY = curEncoder->m_SSDY;
-    ssdU = curEncoder->m_SSDU;
-    ssdV = curEncoder->m_SSDV;
+    ssdY = curEncoder->m_SSDY[layer];
+    ssdU = curEncoder->m_SSDU[layer];
+    ssdV = curEncoder->m_SSDV[layer];
     double psnrY = (ssdY ? 10.0 * log10(refValueY / (double)ssdY) : 99.99);
     double psnrU = (ssdU ? 10.0 * log10(refValueC / (double)ssdU) : 99.99);
     double psnrV = (ssdV ? 10.0 * log10(refValueC / (double)ssdV) : 99.99);
@@ -3030,49 +3037,49 @@ void Encoder::finishFrameStats(Frame* curFrame,
FrameEncoder *curEncoder, x265_f
     Slice* slice = curEncData.m_slice;

     //===== add bits, psnr and ssim =====
-    m_analyzeAll.addBits(bits);
-    m_analyzeAll.addQP(curEncData.m_avgQpAq);
+    m_analyzeAll[layer].addBits(bits);
+    m_analyzeAll[layer].addQP(curEncData.m_avgQpAq);

     if (m_param->bEnablePsnr)
-        m_analyzeAll.addPsnr(psnrY, psnrU, psnrV);
+        m_analyzeAll[layer].addPsnr(psnrY, psnrU, psnrV);

     double ssim = 0.0;
     if (m_param->bEnableSsim && curEncoder->m_ssimCnt)
     {
-        ssim = curEncoder->m_ssim / curEncoder->m_ssimCnt;
-        m_analyzeAll.addSsim(ssim);
+        ssim = curEncoder->m_ssim[layer] / curEncoder->m_ssimCnt[layer];
+        m_analyzeAll[layer].addSsim(ssim);
     }
     if (slice->isIntra())
     {
-        m_analyzeI.addBits(bits);
-        m_analyzeI.addQP(curEncData.m_avgQpAq);
+        m_analyzeI[layer].addBits(bits);
+        m_analyzeI[layer].addQP(curEncData.m_avgQpAq);
         if (m_param->bEnablePsnr)
-            m_analyzeI.addPsnr(psnrY, psnrU, psnrV);
+            m_analyzeI[layer].addPsnr(psnrY, psnrU, psnrV);
         if (m_param->bEnableSsim)
-            m_analyzeI.addSsim(ssim);
+            m_analyzeI[layer].addSsim(ssim);
     }
     else if (slice->isInterP())
     {
-        m_analyzeP.addBits(bits);
-        m_analyzeP.addQP(curEncData.m_avgQpAq);
+        m_analyzeP[layer].addBits(bits);
+        m_analyzeP[layer].addQP(curEncData.m_avgQpAq);
         if (m_param->bEnablePsnr)
-            m_analyzeP.addPsnr(psnrY, psnrU, psnrV);
+            m_analyzeP[layer].addPsnr(psnrY, psnrU, psnrV);
         if (m_param->bEnableSsim)
-            m_analyzeP.addSsim(ssim);
+            m_analyzeP[layer].addSsim(ssim);
     }
     else if (slice->isInterB())
     {
-        m_analyzeB.addBits(bits);
-        m_analyzeB.addQP(curEncData.m_avgQpAq);
+        m_analyzeB[layer].addBits(bits);
+        m_analyzeB[layer].addQP(curEncData.m_avgQpAq);
         if (m_param->bEnablePsnr)
-            m_analyzeB.addPsnr(psnrY, psnrU, psnrV);
+            m_analyzeB[layer].addPsnr(psnrY, psnrU, psnrV);
         if (m_param->bEnableSsim)
-            m_analyzeB.addSsim(ssim);
+            m_analyzeB[layer].addSsim(ssim);
     }
     if (m_param->csvLogLevel >= 2 || m_param->maxCLL || m_param->maxFALL)
     {
-        m_analyzeAll.m_maxFALL += curFrame->m_fencPic->m_avgLumaLevel;
-        m_analyzeAll.m_maxCLL = X265_MAX(m_analyzeAll.m_maxCLL,
curFrame->m_fencPic->m_maxLumaLevel);
+        m_analyzeAll[layer].m_maxFALL +=
curFrame->m_fencPic->m_avgLumaLevel;
+        m_analyzeAll[layer].m_maxCLL =
X265_MAX(m_analyzeAll[layer].m_maxCLL, curFrame->m_fencPic->m_maxLumaLevel);
     }
     char c = (slice->isIntra() ? (curFrame->m_lowres.sliceType ==
X265_TYPE_IDR ? 'I' : 'i') : slice->isInterP() ? 'P' : 'B');
     int poc = slice->m_poc;
@@ -3121,12 +3128,12 @@ void Encoder::finishFrameStats(Frame* curFrame,
FrameEncoder *curEncoder, x265_f
 #if ENABLE_LIBVMAF
             frameStats->vmafFrameScore = curFrame->m_fencPic->m_vmafScore;
 #endif
-            frameStats->decideWaitTime = ELAPSED_MSEC(0,
curEncoder->m_slicetypeWaitTime);
-            frameStats->row0WaitTime =
ELAPSED_MSEC(curEncoder->m_startCompressTime, curEncoder->m_row0WaitTime);
-            frameStats->wallTime =
ELAPSED_MSEC(curEncoder->m_row0WaitTime, curEncoder->m_endCompressTime);
-            frameStats->refWaitWallTime =
ELAPSED_MSEC(curEncoder->m_row0WaitTime,
curEncoder->m_allRowsAvailableTime);
-            frameStats->totalCTUTime = ELAPSED_MSEC(0,
curEncoder->m_totalWorkerElapsedTime);
-            frameStats->stallTime = ELAPSED_MSEC(0,
curEncoder->m_totalNoWorkerTime);
+            frameStats->decideWaitTime = ELAPSED_MSEC(0,
curEncoder->m_slicetypeWaitTime[layer]);
+            frameStats->row0WaitTime =
ELAPSED_MSEC(curEncoder->m_startCompressTime[layer],
curEncoder->m_row0WaitTime[layer]);
+            frameStats->wallTime =
ELAPSED_MSEC(curEncoder->m_row0WaitTime[layer],
curEncoder->m_endCompressTime[layer]);
+            frameStats->refWaitWallTime =
ELAPSED_MSEC(curEncoder->m_row0WaitTime[layer],
curEncoder->m_allRowsAvailableTime[layer]);
+            frameStats->totalCTUTime = ELAPSED_MSEC(0,
curEncoder->m_totalWorkerElapsedTime[layer]);
+            frameStats->stallTime = ELAPSED_MSEC(0,
curEncoder->m_totalNoWorkerTime[layer]);
             frameStats->totalFrameTime =
ELAPSED_MSEC(curFrame->m_encodeStartTime, x265_mdate());
             if (curEncoder->m_totalActiveWorkerCount)
                 frameStats->avgWPP =
(double)curEncoder->m_totalActiveWorkerCount /
curEncoder->m_activeWorkerCountSamples;
@@ -3385,6 +3392,7 @@ void Encoder::initVPS(VPS *vps)
     vps->ptl.interlacedSourceFlag = !!m_param->interlaceMode;
     vps->ptl.nonPackedConstraintFlag = false;
     vps->ptl.frameOnlyConstraintFlag = !m_param->interlaceMode;
+    vps->m_numLayers = m_param->numScalableLayers;

 #if ENABLE_ALPHA
     vps->vps_extension_flag = false;
@@ -3430,7 +3438,6 @@ void Encoder::initVPS(VPS *vps)
         vps->m_nuhLayerIdPresentFlag = 1;
         vps->m_viewIdLen = 0;
         vps->m_vpsNumLayerSetsMinus1 = 1;
-        vps->m_numLayers = m_param->numScalableLayers;
     }
 #endif
 }
diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
index 22976b180..58709e92e 100644
--- a/source/encoder/encoder.h
+++ b/source/encoder/encoder.h
@@ -217,10 +217,10 @@ public:

     bool               m_externalFlush;
     /* Collect statistics globally */
-    EncStats           m_analyzeAll;
-    EncStats           m_analyzeI;
-    EncStats           m_analyzeP;
-    EncStats           m_analyzeB;
+    EncStats           m_analyzeAll[MAX_SCALABLE_LAYERS];
+    EncStats           m_analyzeI[MAX_SCALABLE_LAYERS];
+    EncStats           m_analyzeP[MAX_SCALABLE_LAYERS];
+    EncStats           m_analyzeB[MAX_SCALABLE_LAYERS];
     VPS                m_vps;
     SPS                m_sps;
     PPS                m_pps;
@@ -320,7 +320,7 @@ public:

     void getEndNalUnits(NALList& list, Bitstream& bs);

-    void fetchStats(x265_stats* stats, size_t statsSizeBytes);
+    void fetchStats(x265_stats* stats, size_t statsSizeBytes, int layer =
0);

     void printSummary();

@@ -352,7 +352,7 @@ public:

     void copyDistortionData(x265_analysis_data* analysis, FrameData
&curEncData);

-    void finishFrameStats(Frame* pic, FrameEncoder *curEncoder,
x265_frame_stats* frameStats, int inPoc);
+    void finishFrameStats(Frame* pic, FrameEncoder *curEncoder,
x265_frame_stats* frameStats, int inPoc, int layer);

     int validateAnalysisData(x265_analysis_validate* param, int
readWriteFlag);

diff --git a/source/encoder/frameencoder.cpp
b/source/encoder/frameencoder.cpp
index 8d72adcf4..9263a4658 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -41,11 +41,9 @@ void weightAnalyse(Slice& slice, Frame& frame,
x265_param& param);

 FrameEncoder::FrameEncoder()
 {
-    m_prevOutputTime = x265_mdate();
     m_reconfigure = false;
     m_isFrameEncoder = true;
     m_threadActive = true;
-    m_slicetypeWaitTime = 0;
     m_activeWorkerCount = 0;
     m_completionCount = 0;
     m_outStreams = NULL;
@@ -60,9 +58,12 @@ FrameEncoder::FrameEncoder()
     m_ctuGeomMap = NULL;
     m_localTldIdx = 0;
     memset(&m_rce, 0, sizeof(RateControlEntry));
-    for(int layer = 0; layer < MAX_SCALABLE_LAYERS; layer++)
+    for (int layer = 0; layer < MAX_SCALABLE_LAYERS; layer++)
+    {
+        m_prevOutputTime[layer] = x265_mdate();
+        m_slicetypeWaitTime[layer] = 0;
         m_frame[layer] = NULL;
-    m_retFrameBuffer = { NULL };
+    }
 }

 void FrameEncoder::destroy()
@@ -290,9 +291,9 @@ bool FrameEncoder::initializeGeoms()

 bool FrameEncoder::startCompressFrame(Frame* curFrame[MAX_SCALABLE_LAYERS])
 {
-    m_slicetypeWaitTime = x265_mdate() - m_prevOutputTime;
     for (int layer = 0; layer < m_param->numScalableLayers; layer++)
     {
+        m_slicetypeWaitTime[layer] = x265_mdate() -
m_prevOutputTime[layer];
         m_frame[layer] = curFrame[layer];
         curFrame[layer]->m_encData->m_frameEncoderID = m_jpId;
         curFrame[layer]->m_encData->m_jobProvider = this;
@@ -462,14 +463,14 @@ void FrameEncoder::compressFrame(int layer)
 {
     ProfileScopeEvent(frameThread);

-    m_startCompressTime = x265_mdate();
+    m_startCompressTime[layer] = x265_mdate();
     m_totalActiveWorkerCount = 0;
     m_activeWorkerCountSamples = 0;
-    m_totalWorkerElapsedTime = 0;
-    m_totalNoWorkerTime = 0;
+    m_totalWorkerElapsedTime[layer] = 0;
+    m_totalNoWorkerTime[layer] = 0;
     m_countRowBlocks = 0;
-    m_allRowsAvailableTime = 0;
-    m_stallStartTime = 0;
+    m_allRowsAvailableTime[layer] = 0;
+    m_stallStartTime[layer] = 0;

     m_completionCount = 0;
     memset((void*)m_bAllRowsStop, 0, sizeof(bool) * m_param->maxSlices);
@@ -477,9 +478,9 @@ void FrameEncoder::compressFrame(int layer)
     m_rowSliceTotalBits[0] = 0;
     m_rowSliceTotalBits[1] = 0;

-    m_SSDY = m_SSDU = m_SSDV = 0;
-    m_ssim = 0;
-    m_ssimCnt = 0;
+    m_SSDY[layer] = m_SSDU[layer] = m_SSDV[layer] = 0;
+    m_ssim[layer] = 0;
+    m_ssimCnt[layer] = 0;
     memset(&(m_frame[layer]->m_encData->m_frameStats), 0,
sizeof(m_frame[layer]->m_encData->m_frameStats));
     m_sLayerId = layer;

@@ -924,14 +925,14 @@ void FrameEncoder::compressFrame(int layer)
                 enableRowEncoder(m_row_to_idx[row]); /* clear external
dependency for this row */
                 if (!rowInSlice)
                 {
-                    m_row0WaitTime = x265_mdate();
+                    m_row0WaitTime[layer] = x265_mdate();
                     enqueueRowEncoder(m_row_to_idx[row]); /* clear
internal dependency, start wavefront */
                 }
                 tryWakeOne();
             } // end of loop rowInSlice
         } // end of loop sliceId

-        m_allRowsAvailableTime = x265_mdate();
+        m_allRowsAvailableTime[layer] = x265_mdate();
         tryWakeOne(); /* ensure one thread is active or help-wanted flag
is set prior to blocking */
         static const int block_ms = 250;
         while (m_completionEvent.timedWait(block_ms))
@@ -962,9 +963,9 @@ void FrameEncoder::compressFrame(int layer)
                 }

                 if (!i)
-                    m_row0WaitTime = x265_mdate();
+                    m_row0WaitTime[layer] = x265_mdate();
                 else if (i == m_numRows - 1)
-                    m_allRowsAvailableTime = x265_mdate();
+                    m_allRowsAvailableTime[layer] = x265_mdate();
                 processRowEncoder(i, m_tld[m_localTldIdx], layer);
             }

@@ -1152,12 +1153,14 @@ void FrameEncoder::compressFrame(int layer)
             bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS)
? 4 : 3;
         }
     }
-    m_accessUnitBits = bytes << 3;
+    m_accessUnitBits[layer] = (layer) ? (bytes - (m_accessUnitBits[0] >>
3)) << 3 : bytes << 3;

     int filler = 0;
     /* rateControlEnd may also block for earlier frames to call
rateControlUpdateStats */
-    if (!layer && m_top->m_rateControl->rateControlEnd(m_frame[layer],
m_accessUnitBits, &m_rce, &filler) < 0)
+    if (!layer && m_top->m_rateControl->rateControlEnd(m_frame[layer],
m_accessUnitBits[layer], &m_rce, &filler) < 0)
         m_top->m_aborted = true;
+    if (layer)
+        m_frame[layer]->m_encData->m_avgQpAq =
m_frame[0]->m_encData->m_avgQpAq;

     if (filler > 0)
     {
@@ -1172,7 +1175,7 @@ void FrameEncoder::compressFrame(int layer)
         m_nalList.serialize(NAL_UNIT_FILLER_DATA, m_bs);
         bytes += m_nalList.m_nal[m_nalList.m_numNal - 1].sizeBytes;
         bytes -= 3; //exclude start code prefix
-        m_accessUnitBits = bytes << 3;
+        m_accessUnitBits[layer] = bytes << 3;
     }

     if (m_frame[layer]->m_rpu.payloadSize)
@@ -1183,7 +1186,7 @@ void FrameEncoder::compressFrame(int layer)
         m_nalList.serialize(NAL_UNIT_UNSPECIFIED, m_bs);
     }

-    m_endCompressTime = x265_mdate();
+    m_endCompressTime[layer] = x265_mdate();

     /* Decrement referenced frame reference counts, allow them to be
recycled */
     for (int l = 0; l < numPredDir; l++)
@@ -1234,7 +1237,7 @@ void FrameEncoder::compressFrame(int layer)
         m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], *m_param);
 #endif

-    m_endFrameTime = x265_mdate();
+    m_endFrameTime[layer] = x265_mdate();
 }

 void FrameEncoder::initDecodedPictureHashSEI(int row, int cuAddr, int
height, int layer)
@@ -1387,7 +1390,7 @@ void FrameEncoder::processRow(int row, int threadId,
int layer)
 {
     int64_t startTime = x265_mdate();
     if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime)
-        m_totalNoWorkerTime += x265_mdate() - m_stallStartTime;
+        m_totalNoWorkerTime[layer] += x265_mdate() -
m_stallStartTime[layer];

     const uint32_t realRow = m_idx_to_row[row >> 1];
     const uint32_t typeNum = m_idx_to_row[row & 1];
@@ -1404,9 +1407,9 @@ void FrameEncoder::processRow(int row, int threadId,
int layer)
     }

     if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
-        m_stallStartTime = x265_mdate();
+        m_stallStartTime[layer] = x265_mdate();

-    m_totalWorkerElapsedTime += x265_mdate() - startTime; // not thread
safe, but good enough
+    m_totalWorkerElapsedTime[layer] += x265_mdate() - startTime; // not
thread safe, but good enough
 }

 // Called by worker threads
@@ -2283,9 +2286,9 @@ Frame** FrameEncoder::getEncodedPicture(NALList&
output)
         {
             m_retFrameBuffer[i] = m_frame[i];
             m_frame[i] = NULL;
+            m_prevOutputTime[i] = x265_mdate();
         }
         output.takeContents(m_nalList);
-        m_prevOutputTime = x265_mdate();
         return m_retFrameBuffer;
     }

diff --git a/source/encoder/frameencoder.h b/source/encoder/frameencoder.h
index 9fcd2dcf5..6253cbd84 100644
--- a/source/encoder/frameencoder.h
+++ b/source/encoder/frameencoder.h
@@ -190,27 +190,27 @@ public:
     RateControlEntry         m_rce;
     SEIDecodedPictureHash    m_seiReconPictureDigest;

-    uint64_t                 m_SSDY;
-    uint64_t                 m_SSDU;
-    uint64_t                 m_SSDV;
-    double                   m_ssim;
-    uint64_t                 m_accessUnitBits;
-    uint32_t                 m_ssimCnt;
+    uint64_t                 m_SSDY[MAX_SCALABLE_LAYERS];
+    uint64_t                 m_SSDU[MAX_SCALABLE_LAYERS];
+    uint64_t                 m_SSDV[MAX_SCALABLE_LAYERS];
+    double                   m_ssim[MAX_SCALABLE_LAYERS];
+    uint64_t                 m_accessUnitBits[MAX_SCALABLE_LAYERS];
+    uint32_t                 m_ssimCnt[MAX_SCALABLE_LAYERS];

     volatile int             m_activeWorkerCount;        // count of
workers currently encoding or filtering CTUs
     volatile int             m_totalActiveWorkerCount;   // sum of
m_activeWorkerCount sampled at end of each CTU
     volatile int             m_activeWorkerCountSamples; // count of times
m_activeWorkerCount was sampled (think vbv restarts)
     volatile int             m_countRowBlocks;           // count of
workers forced to abandon a row because of top dependency
-    int64_t                  m_startCompressTime;        // timestamp when
frame encoder is given a frame
-    int64_t                  m_row0WaitTime;             // timestamp when
row 0 is allowed to start
-    int64_t                  m_allRowsAvailableTime;     // timestamp when
all reference dependencies are resolved
-    int64_t                  m_endCompressTime;          // timestamp
after all CTUs are compressed
-    int64_t                  m_endFrameTime;             // timestamp
after RCEnd, NR updates, etc
-    int64_t                  m_stallStartTime;           // timestamp when
worker count becomes 0
-    int64_t                  m_prevOutputTime;           // timestamp when
prev frame was retrieved by API thread
-    int64_t                  m_slicetypeWaitTime;        // total elapsed
time waiting for decided frame
-    int64_t                  m_totalWorkerElapsedTime;   // total elapsed
time spent by worker threads processing CTUs
-    int64_t                  m_totalNoWorkerTime;        // total elapsed
time without any active worker threads
+    int64_t                  m_startCompressTime[MAX_SCALABLE_LAYERS];
   // timestamp when frame encoder is given a frame
+    int64_t                  m_row0WaitTime[MAX_SCALABLE_LAYERS];
    // timestamp when row 0 is allowed to start
+    int64_t                  m_allRowsAvailableTime[MAX_SCALABLE_LAYERS];
    // timestamp when all reference dependencies are resolved
+    int64_t                  m_endCompressTime[MAX_SCALABLE_LAYERS];
   // timestamp after all CTUs are compressed
+    int64_t                  m_endFrameTime[MAX_SCALABLE_LAYERS];
    // timestamp after RCEnd, NR updates, etc
+    int64_t                  m_stallStartTime[MAX_SCALABLE_LAYERS];
    // timestamp when worker count becomes 0
+    int64_t                  m_prevOutputTime[MAX_SCALABLE_LAYERS];
    // timestamp when prev frame was retrieved by API thread
+    int64_t                  m_slicetypeWaitTime[MAX_SCALABLE_LAYERS];
   // total elapsed time waiting for decided frame
+    int64_t
 m_totalWorkerElapsedTime[MAX_SCALABLE_LAYERS];   // total elapsed time
spent by worker threads processing CTUs
+    int64_t                  m_totalNoWorkerTime[MAX_SCALABLE_LAYERS];
   // total elapsed time without any active worker threads
 #if DETAILED_CU_STATS
     CUStats                  m_cuStats;
 #endif
diff --git a/source/encoder/framefilter.cpp b/source/encoder/framefilter.cpp
index 6b8ef03ac..da01d0ceb 100644
--- a/source/encoder/framefilter.cpp
+++ b/source/encoder/framefilter.cpp
@@ -673,7 +673,7 @@ void FrameFilter::processPostRow(int row, int layer)
         uint32_t height = m_parallelFilter[row].getCUHeight();

         uint64_t ssdY =
m_frameEncoder->m_top->computeSSD(fencPic->getLumaAddr(cuAddr),
reconPic->getLumaAddr(cuAddr), stride, width, height, m_param);
-        m_frameEncoder->m_SSDY += ssdY;
+        m_frameEncoder->m_SSDY[layer] += ssdY;

         if (m_param->internalCsp != X265_CSP_I400)
         {
@@ -684,8 +684,8 @@ void FrameFilter::processPostRow(int row, int layer)
             uint64_t ssdU =
m_frameEncoder->m_top->computeSSD(fencPic->getCbAddr(cuAddr),
reconPic->getCbAddr(cuAddr), stride, width, height, m_param);
             uint64_t ssdV =
m_frameEncoder->m_top->computeSSD(fencPic->getCrAddr(cuAddr),
reconPic->getCrAddr(cuAddr), stride, width, height, m_param);

-            m_frameEncoder->m_SSDU += ssdU;
-            m_frameEncoder->m_SSDV += ssdV;
+            m_frameEncoder->m_SSDU[layer] += ssdU;
+            m_frameEncoder->m_SSDV[layer] += ssdV;
         }
     }

@@ -705,9 +705,9 @@ void FrameFilter::processPostRow(int row, int layer)
         /* SSIM is done for each row in blocks of 4x4 . The First blocks
are offset by 2 pixels to the right
         * to avoid alignment of ssim blocks with DCT blocks. */
         minPixY += bStart ? 2 : -6;
-        m_frameEncoder->m_ssim += calculateSSIM(rec + 2 + minPixY *
stride1, stride1, fenc + 2 + minPixY * stride2, stride2,
+        m_frameEncoder->m_ssim[layer] += calculateSSIM(rec + 2 + minPixY *
stride1, stride1, fenc + 2 + minPixY * stride2, stride2,
                                                 m_param->sourceWidth - 2,
maxPixY - minPixY, m_ssimBuf, ssim_cnt);
-        m_frameEncoder->m_ssimCnt += ssim_cnt;
+        m_frameEncoder->m_ssimCnt[layer] += ssim_cnt;
     }

     if (m_param->maxSlices == 1)
diff --git a/source/x265.h b/source/x265.h
index 238b1358a..084f7cd64 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -488,6 +488,8 @@ typedef struct x265_picture
     uint32_t picStruct;

     int    width;
+
+    int   layerID;
 } x265_picture;

 typedef enum
-- 
2.36.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240805/b3129ff1/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0009-Add-logs-for-both-layers-fix-issue-while-aborting-in.patch
Type: application/octet-stream
Size: 59755 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240805/b3129ff1/attachment-0001.obj>


More information about the x265-devel mailing list