[x265] [PATCH] stats: report counts of each CU partition size/mode per frame
Steve Borho
steve at borho.org
Tue May 26 14:45:04 CEST 2015
On 05/25, Divya Manivannan wrote:
> # HG changeset patch
> # User Divya Manivannan <divya at multicorewareinc.com>
> # Date 1432535853 -19800
> # Mon May 25 12:07:33 2015 +0530
> # Node ID 05579078c8faf0f4c3dbe10805d1b83048db1e96
> # Parent a7bf7a150a705489cb63d0454c59ec599bad8c93
> stats: report counts of each CU partition size/mode per frame
>
> diff -r a7bf7a150a70 -r 05579078c8fa source/common/param.cpp
> --- a/source/common/param.cpp Fri May 22 14:29:35 2015 +0530
> +++ b/source/common/param.cpp Mon May 25 12:07:33 2015 +0530
> @@ -104,6 +104,7 @@
> param->csvfn = NULL;
> param->rc.lambdaFileName = NULL;
> param->bLogCuStats = 0;
> + param->bLogFrameStats = 0;
> param->decodedPictureHashSEI = 0;
>
> /* Quality Measurement Metrics */
> @@ -582,6 +583,7 @@
> }
> }
> OPT("cu-stats") p->bLogCuStats = atobool(value);
> + OPT("frame-stats") p->bLogFrameStats = atobool(value);
> OPT("annexb") p->bAnnexB = atobool(value);
> OPT("repeat-headers") p->bRepeatHeaders = atobool(value);
> OPT("wpp") p->bEnableWavefront = atobool(value);
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/encoder.cpp Mon May 25 12:07:33 2015 +0530
> @@ -73,6 +73,7 @@
> m_numDelayedPic = 0;
> m_outputCount = 0;
> m_csvfpt = NULL;
> + m_csvfptFrame = NULL;
> m_param = NULL;
> m_latestParam = NULL;
> m_cuOffsetY = NULL;
> @@ -254,6 +255,20 @@
> }
> }
>
> + if (m_param->bLogFrameStats)
> + {
> + m_csvfptFrame = fopen("FrameLog.csv", "wb");
> + if (m_csvfptFrame)
> + {
> + fprintf(m_csvfptFrame, "POC, SLICE TYPE, 64x64 CU%% , 32x32 CU%%, 16x16 CU%%, 8x8 CU%%, 4x4 prediction%%, Intra CU%%, Inter CU%%, Skip CU%%\n");
w/s
> + }
> + else
> + {
> + x265_log(m_param, X265_LOG_ERROR, "Unable to open CSV log file FrameLog.csv, aborting\n");
> + m_aborted = true;
> + }
> + }
> +
> int numRows = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
> int numCols = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
> for (int i = 0; i < m_param->frameNumThreads; i++)
> @@ -367,6 +382,8 @@
> fclose(m_analysisFile);
> if (m_csvfpt)
> fclose(m_csvfpt);
> + if (m_csvfptFrame)
> + fclose(m_csvfptFrame);
>
> if (m_param)
> {
> @@ -1465,6 +1482,16 @@
> fprintf(m_csvfpt, "\n");
> fflush(stderr);
> }
> +
> + if (m_param->bLogFrameStats && m_csvfptFrame)
> + {
> + fprintf(m_csvfptFrame, "%d, %c-SLICE,", poc, c);
> + fprintf(m_csvfptFrame, " %.2lf, %.2lf, %.2lf,", curEncoder->m_frameStats..percentCuDepth[0], curEncoder->m_frameStats.cuStat.percentCuDepth[1], curEncoder->m_frameStats.cuStat.percentCuDepth[2]);
> + fprintf(m_csvfptFrame, " %.2lf, %.2lf, %.2lf,", curEncoder->m_frameStats.cuStat.percentCuDepth[3], curEncoder->m_frameStats.cuStat.percentIntraNxN, curEncoder->m_frameStats.cuStat.percentIntraCu);
> + fprintf(m_csvfptFrame, " %.2lf, %.2lf", curEncoder->m_frameStats.cuStat.percentInterCu, curEncoder->m_frameStats.cuStat.percentSkipCu);
> + fprintf(m_csvfptFrame, "\n");
> + fflush(stderr);
> + }
> }
>
> #if defined(_MSC_VER)
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/encoder.h
> --- a/source/encoder/encoder.h Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/encoder.h Mon May 25 12:07:33 2015 +0530
> @@ -106,6 +106,7 @@
> EncStats m_analyzeP;
> EncStats m_analyzeB;
> FILE* m_csvfpt;
> + FILE* m_csvfptFrame;
> int64_t m_encodeStartTime;
>
> // weighted prediction
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/frameencoder.cpp Mon May 25 12:07:33 2015 +0530
> @@ -552,7 +552,7 @@
> }
> }
>
> - if (m_param->rc.bStatWrite)
> + if (m_param->rc.bStatWrite || m_param->bLogFrameStats)
> {
> int totalI = 0, totalP = 0, totalSkip = 0;
>
> @@ -562,14 +562,30 @@
> m_frameStats.mvBits += m_rows[i].rowStats.mvBits;
> m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
> m_frameStats.miscBits += m_rows[i].rowStats.miscBits;
> + m_frameStats.cuStat.cntInterCu += m_rows[i].rowStats.cuStat.cntInterCu;
> + m_frameStats.cuStat.cntIntraCu += m_rows[i].rowStats.cuStat.cntIntraCu;
> + m_frameStats.cuStat.cntIntraNxN += m_rows[i].rowStats.cuStat.cntIntraNxN;
> + m_frameStats.cuStat.cntSkipCu += m_rows[i].rowStats.cuStat.cntSkipCu;
> + m_frameStats.cuStat.totalCu += m_rows[i].rowStats.cuStat.totalCu;
> totalI += m_rows[i].rowStats.iCuCnt;
> totalP += m_rows[i].rowStats.pCuCnt;
> totalSkip += m_rows[i].rowStats.skipCuCnt;
> +
> + for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
> + {
> + m_frameStats.cuStat.cntCuDepth[depth] += m_rows[i].rowStats.cuStat.cntCuDepth[depth];
> + }
we don't use braces around single line expressions, generally
> }
> int totalCuCount = totalI + totalP + totalSkip;
> m_frameStats.percentIntra = (double)totalI / totalCuCount;
> m_frameStats.percentInter = (double)totalP / totalCuCount;
> m_frameStats.percentSkip = (double)totalSkip / totalCuCount;
> + m_frameStats.cuStat.percentIntraNxN = (double)(m_frameStats.cuStat.cntIntraNxN * 100) / m_frameStats.cuStat.totalCu;
> + m_frameStats.cuStat.percentIntraCu = (double)(m_frameStats.cuStat.cntIntraCu * 100) / m_frameStats.cuStat.totalCu;
> + m_frameStats.cuStat.percentInterCu = (double)(m_frameStats.cuStat.cntInterCu * 100) / m_frameStats.cuStat.totalCu;
> + m_frameStats.cuStat.percentSkipCu = (double)(m_frameStats.cuStat.cntSkipCu * 100) / m_frameStats.cuStat.totalCu;
> + for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
> + m_frameStats.cuStat.percentCuDepth[depth] = (double)(m_frameStats.cuStat.cntCuDepth[depth] * 100) / m_frameStats.cuStat.totalCu;
> }
>
> m_bs.resetBits();
> @@ -904,17 +920,22 @@
> // Completed CU processing
> curRow.completed++;
>
> - if (m_param->bLogCuStats || m_param->rc.bStatWrite)
> + if (m_param->bLogCuStats || m_param->rc.bStatWrite || m_param->bLogFrameStats)
> curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, qTreeInterCnt, qTreeIntraCnt, qTreeSkipCnt);
> else if (m_param->rc.aqMode)
> curEncData.m_rowStat[row].sumQpAq += calcCTUQP(*ctu);
>
> // copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
> - if (m_param->rc.bStatWrite)
> + if (m_param->rc.bStatWrite || m_param->bLogFrameStats)
> {
> curRow.rowStats.mvBits += best.mvBits;
> curRow.rowStats.coeffBits += best.coeffBits;
> curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
> + curRow.rowStats.cuStat.cntInterCu += m_cuStatistic.cntInterCu;
> + curRow.rowStats.cuStat.cntIntraCu += m_cuStatistic.cntIntraCu;
> + curRow.rowStats.cuStat.cntIntraNxN += m_cuStatistic.cntIntraNxN;
> + curRow.rowStats.cuStat.cntSkipCu += m_cuStatistic.cntSkipCu;
> + curRow.rowStats.cuStat.totalCu += m_cuStatistic.totalCu;
>
> for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
> {
> @@ -923,10 +944,12 @@
> curRow.rowStats.iCuCnt += qTreeIntraCnt[depth] << shift;
> curRow.rowStats.pCuCnt += qTreeInterCnt[depth] << shift;
> curRow.rowStats.skipCuCnt += qTreeSkipCnt[depth] << shift;
> + curRow.rowStats.cuStat.cntCuDepth[depth] += m_cuStatistic.cntCuDepth[depth];
>
> // clear the row cu data from thread local object
> qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
> }
> + memset(&m_cuStatistic, 0, sizeof(CuStatistic));
> }
>
> curEncData.m_cuStat[cuAddr].totalBits = best.totalBits;
> @@ -1106,6 +1129,7 @@
> int FrameEncoder::collectCTUStatistics(const CUData& ctu, uint32_t* qtreeInterCnt, uint32_t* qtreeIntraCnt, uint32_t* qtreeSkipCnt)
> {
> StatisticLog* log = &m_sliceTypeLog[ctu.m_slice->m_sliceType];
> + CuStatistic* cuLog = &m_cuStatistic;
> int totQP = 0;
>
> if (ctu.m_slice->m_sliceType == I_SLICE)
> @@ -1117,6 +1141,9 @@
>
> log->totalCu++;
> log->cntIntra[depth]++;
> + cuLog->cntCuDepth[depth]++;
> + cuLog->cntIntraCu++;
> + cuLog->totalCu++;
This all looks terribly redundant, and this is introducing new data
races. Perhaps this function can increment only m_cuStatistic, and at
the end of the frame those stats are all added to
m_sliceTypeLog[m_slice->m_sliceType];
> qtreeIntraCnt[depth]++;
> totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
>
> @@ -1124,6 +1151,9 @@
> {
> log->totalCu--;
> log->cntIntra[depth]--;
> + cuLog->cntCuDepth[depth]--;
> + cuLog->cntIntraCu--;
> + cuLog->totalCu--;
> qtreeIntraCnt[depth]--;
> }
> else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
> @@ -1132,6 +1162,8 @@
> X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
> log->cntIntraNxN++;
> log->cntIntra[depth]--;
> + cuLog->cntIntraNxN++;
> + cuLog->cntCuDepth[depth]--;
> }
> else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
> log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
> @@ -1148,22 +1180,28 @@
>
> log->totalCu++;
> log->cntTotalCu[depth]++;
> + cuLog->cntCuDepth[depth]++;
> + cuLog->totalCu++;
> totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
>
> if (ctu.m_predMode[absPartIdx] == MODE_NONE)
> {
> log->totalCu--;
> log->cntTotalCu[depth]--;
> + cuLog->cntCuDepth[depth]--;
> + cuLog->totalCu--;
> }
> else if (ctu.isSkipped(absPartIdx))
> {
> log->totalCu--;
> log->cntSkipCu[depth]++;
> + cuLog->cntSkipCu++;
> qtreeSkipCnt[depth]++;
> }
> else if (ctu.isInter(absPartIdx))
> {
> log->cntInter[depth]++;
> + cuLog->cntInterCu++;
> qtreeInterCnt[depth]++;
>
> if (ctu.m_partSize[absPartIdx] < AMP_ID)
> @@ -1174,6 +1212,7 @@
> else if (ctu.isIntra(absPartIdx))
> {
> log->cntIntra[depth]++;
> + cuLog->cntIntraCu++;
> qtreeIntraCnt[depth]++;
>
> if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
> @@ -1181,6 +1220,8 @@
> X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
> log->cntIntraNxN++;
> log->cntIntra[depth]--;
> + cuLog->cntIntraNxN++;
> + cuLog->cntCuDepth[depth]--;
> /* TODO: log intra modes at absPartIdx +0 to +3 */
> }
> else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/frameencoder.h Mon May 25 12:07:33 2015 +0530
> @@ -157,6 +157,7 @@
> uint32_t m_crc[3];
> uint32_t m_checksum[3];
> StatisticLog m_sliceTypeLog[3]; // per-slice type CU statistics
> + CuStatistic m_cuStatistic; // stats of current CU
> FrameStats m_frameStats; // stats of current frame for multi-pass encodes
>
> volatile int m_activeWorkerCount; // count of workers currently encoding or filtering CTUs
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/ratecontrol.h Mon May 25 12:07:33 2015 +0530
> @@ -46,6 +46,27 @@
> #define MIN_AMORTIZE_FRACTION 0.2
> #define CLIP_DURATION(f) x265_clip3(MIN_FRAME_DURATION, MAX_FRAME_DURATION, f)
>
> +struct CuStatistic
> +{
> + uint64_t cntCuDepth[4];
> + uint64_t cntIntraNxN;
> + uint64_t cntSkipCu;
> + uint64_t cntIntraCu;
> + uint64_t cntInterCu;
> + uint64_t totalCu;
> +
> + double percentCuDepth[4];
> + double percentIntraNxN;
> + double percentSkipCu;
> + double percentIntraCu;
> + double percentInterCu;
> +
> + CuStatistic()
> + {
> + memset(this, 0, sizeof(CuStatistic));
> + }
> +};
> +
> /* Current frame stats for 2 pass */
> struct FrameStats
> {
> @@ -61,6 +82,8 @@
> double percentIntra;
> double percentInter;
> double percentSkip;
> +
> + CuStatistic cuStat;
> };
this data structure nesting doesn't make a lot of sense.
> struct Predictor
> diff -r a7bf7a150a70 -r 05579078c8fa source/x265.h
> --- a/source/x265.h Fri May 22 14:29:35 2015 +0530
> +++ b/source/x265.h Mon May 25 12:07:33 2015 +0530
> @@ -443,6 +443,9 @@
> * modes during mode decision. Default disabled */
> int bLogCuStats;
>
> + /* Enable logging of frame level data. Default is disabled*/
w/s
> + int bLogFrameStats;
> +
> /* Enable the measurement and reporting of PSNR. Default is enabled */
> int bEnablePsnr;
new params would require reST docs and an API version bump
> diff -r a7bf7a150a70 -r 05579078c8fa source/x265cli.h
> --- a/source/x265cli.h Fri May 22 14:29:35 2015 +0530
> +++ b/source/x265cli.h Mon May 25 12:07:33 2015 +0530
> @@ -56,6 +56,8 @@
> { "csv", required_argument, NULL, 0 },
> { "no-cu-stats", no_argument, NULL, 0 },
> { "cu-stats", no_argument, NULL, 0 },
> + { "no-frame-stats", no_argument, NULL, 0 },
> + { "frame-stats", no_argument, NULL, 0 },
> { "y4m", no_argument, NULL, 0 },
> { "no-progress", no_argument, NULL, 0 },
> { "output", required_argument, NULL, 'o' },
> @@ -248,6 +250,7 @@
> H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", x265::logLevelNames[param->logLevel + 1]);
> H0(" --no-progress Disable CLI progress reports\n");
> H0(" --[no-]cu-stats Enable logging stats about distribution of cu across all modes. Default %s\n",OPT(param->bLogCuStats));
> + H0(" --[no-]frame-stats Enable logging stats about frames. Default %s\n", OPT(param->bLogFrameStats));
--cu-stats toggles logging of stats to x265_log(), --frame-stats toggles
the writing of a new CSV file (with a hard coded filename). This is
non-intuitive. This really behaves like it should be named --frame-csv
and its param should be a const char*, which begs a number of other questions.
I don't much like how the libx265 code is writing so many files, when
much of this data could be quite useful for any code that monitors the
progress of an encode. I can understand exceptions being made for the
2-pass stats files and the analysis re-use files being read and written
directly by the encoder (since they must work via ffmpeg and similar
apps), but I believe the CSV files really should be getting written by
our CLI.
Everything that is getting written into our CSV files today should be
discoverable from the C API, and the CLI should be using those
interfaces to write the CSV files itself.
We should define a new x265_frame_stats struct in x265.h and add a
'x265_frame_stats* frame_stats' to the end of x265_picture, and have
x265_encoder_encode() fill it in if pic_out->frame_stats is not NULL.
And this struct should contain all of the data that is written into
per-frame CSV files today, plus these new per-frame coding stats.
Then all of the CSV writing can be hoisted out of the libx265 library
and into x265.cpp
--
Steve Borho
More information about the x265-devel
mailing list