[x265] [PATCH] stats: report counts of each CU partition size/mode per frame

Steve Borho steve at borho.org
Tue May 26 14:45:04 CEST 2015


On 05/25, Divya Manivannan wrote:
> # HG changeset patch
> # User Divya Manivannan <divya at multicorewareinc.com>
> # Date 1432535853 -19800
> #      Mon May 25 12:07:33 2015 +0530
> # Node ID 05579078c8faf0f4c3dbe10805d1b83048db1e96
> # Parent  a7bf7a150a705489cb63d0454c59ec599bad8c93
> stats: report counts of each CU partition size/mode per frame
> 
> diff -r a7bf7a150a70 -r 05579078c8fa source/common/param.cpp
> --- a/source/common/param.cpp	Fri May 22 14:29:35 2015 +0530
> +++ b/source/common/param.cpp	Mon May 25 12:07:33 2015 +0530
> @@ -104,6 +104,7 @@
>      param->csvfn = NULL;
>      param->rc.lambdaFileName = NULL;
>      param->bLogCuStats = 0;
> +    param->bLogFrameStats = 0;
>      param->decodedPictureHashSEI = 0;
>  
>      /* Quality Measurement Metrics */
> @@ -582,6 +583,7 @@
>          }
>      }
>      OPT("cu-stats") p->bLogCuStats = atobool(value);
> +    OPT("frame-stats") p->bLogFrameStats = atobool(value);
>      OPT("annexb") p->bAnnexB = atobool(value);
>      OPT("repeat-headers") p->bRepeatHeaders = atobool(value);
>      OPT("wpp") p->bEnableWavefront = atobool(value);
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/encoder.cpp	Mon May 25 12:07:33 2015 +0530
> @@ -73,6 +73,7 @@
>      m_numDelayedPic = 0;
>      m_outputCount = 0;
>      m_csvfpt = NULL;
> +    m_csvfptFrame = NULL;
>      m_param = NULL;
>      m_latestParam = NULL;
>      m_cuOffsetY = NULL;
> @@ -254,6 +255,20 @@
>          }
>      }
>  
> +    if (m_param->bLogFrameStats)
> +    {
> +        m_csvfptFrame = fopen("FrameLog.csv", "wb");
> +        if (m_csvfptFrame)
> +        {
> +            fprintf(m_csvfptFrame, "POC, SLICE TYPE, 64x64 CU%% , 32x32 CU%%, 16x16 CU%%, 8x8 CU%%, 4x4 prediction%%, Intra CU%%, Inter CU%%, Skip CU%%\n");

w/s

> +        }
> +        else
> +        {
> +            x265_log(m_param, X265_LOG_ERROR, "Unable to open CSV log file FrameLog.csv, aborting\n");
> +            m_aborted = true;
> +        }
> +    }
> +
>      int numRows = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
>      int numCols = (m_param->sourceWidth  + g_maxCUSize - 1) / g_maxCUSize;
>      for (int i = 0; i < m_param->frameNumThreads; i++)
> @@ -367,6 +382,8 @@
>          fclose(m_analysisFile);
>      if (m_csvfpt)
>          fclose(m_csvfpt);
> +    if (m_csvfptFrame)
> +        fclose(m_csvfptFrame);
>  
>      if (m_param)
>      {
> @@ -1465,6 +1482,16 @@
>          fprintf(m_csvfpt, "\n");
>          fflush(stderr);
>      }
> +
> +    if (m_param->bLogFrameStats && m_csvfptFrame)
> +    {
> +        fprintf(m_csvfptFrame, "%d, %c-SLICE,", poc, c);
> +        fprintf(m_csvfptFrame, " %.2lf, %.2lf, %.2lf,", curEncoder->m_frameStats..percentCuDepth[0], curEncoder->m_frameStats.cuStat.percentCuDepth[1], curEncoder->m_frameStats.cuStat.percentCuDepth[2]);
> +        fprintf(m_csvfptFrame, " %.2lf, %.2lf, %.2lf,", curEncoder->m_frameStats.cuStat.percentCuDepth[3], curEncoder->m_frameStats.cuStat.percentIntraNxN, curEncoder->m_frameStats.cuStat.percentIntraCu);
> +        fprintf(m_csvfptFrame, " %.2lf, %.2lf", curEncoder->m_frameStats.cuStat.percentInterCu, curEncoder->m_frameStats.cuStat.percentSkipCu);
> +        fprintf(m_csvfptFrame, "\n");
> +        fflush(stderr);
> +    }
>  }
>  
>  #if defined(_MSC_VER)
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/encoder.h
> --- a/source/encoder/encoder.h	Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/encoder.h	Mon May 25 12:07:33 2015 +0530
> @@ -106,6 +106,7 @@
>      EncStats           m_analyzeP;
>      EncStats           m_analyzeB;
>      FILE*              m_csvfpt;
> +    FILE*              m_csvfptFrame;
>      int64_t            m_encodeStartTime;
>  
>      // weighted prediction
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp	Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/frameencoder.cpp	Mon May 25 12:07:33 2015 +0530
> @@ -552,7 +552,7 @@
>          }
>      }
>  
> -    if (m_param->rc.bStatWrite)
> +    if (m_param->rc.bStatWrite || m_param->bLogFrameStats)
>      {
>          int totalI = 0, totalP = 0, totalSkip = 0;
>  
> @@ -562,14 +562,30 @@
>              m_frameStats.mvBits    += m_rows[i].rowStats.mvBits;
>              m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
>              m_frameStats.miscBits  += m_rows[i].rowStats.miscBits;
> +            m_frameStats.cuStat.cntInterCu += m_rows[i].rowStats.cuStat.cntInterCu;
> +            m_frameStats.cuStat.cntIntraCu += m_rows[i].rowStats.cuStat.cntIntraCu;
> +            m_frameStats.cuStat.cntIntraNxN += m_rows[i].rowStats.cuStat.cntIntraNxN;
> +            m_frameStats.cuStat.cntSkipCu += m_rows[i].rowStats.cuStat.cntSkipCu;
> +            m_frameStats.cuStat.totalCu += m_rows[i].rowStats.cuStat.totalCu;
>              totalI                 += m_rows[i].rowStats.iCuCnt;
>              totalP                 += m_rows[i].rowStats.pCuCnt;
>              totalSkip              += m_rows[i].rowStats.skipCuCnt;
> +
> +            for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
> +            {
> +                m_frameStats.cuStat.cntCuDepth[depth] += m_rows[i].rowStats.cuStat.cntCuDepth[depth];
> +            }

we don't use braces around single line expressions, generally

>          }
>          int totalCuCount = totalI + totalP + totalSkip;
>          m_frameStats.percentIntra = (double)totalI / totalCuCount;
>          m_frameStats.percentInter = (double)totalP / totalCuCount;
>          m_frameStats.percentSkip  = (double)totalSkip / totalCuCount;
> +        m_frameStats.cuStat.percentIntraNxN = (double)(m_frameStats.cuStat.cntIntraNxN * 100) / m_frameStats.cuStat.totalCu;
> +        m_frameStats.cuStat.percentIntraCu = (double)(m_frameStats.cuStat.cntIntraCu * 100) / m_frameStats.cuStat.totalCu;
> +        m_frameStats.cuStat.percentInterCu = (double)(m_frameStats.cuStat.cntInterCu * 100) / m_frameStats.cuStat.totalCu;
> +        m_frameStats.cuStat.percentSkipCu = (double)(m_frameStats.cuStat.cntSkipCu * 100) / m_frameStats.cuStat.totalCu;
> +        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
> +            m_frameStats.cuStat.percentCuDepth[depth] = (double)(m_frameStats.cuStat.cntCuDepth[depth] * 100) / m_frameStats.cuStat.totalCu;
>      }
>  
>      m_bs.resetBits();
> @@ -904,17 +920,22 @@
>          // Completed CU processing
>          curRow.completed++;
>  
> -        if (m_param->bLogCuStats || m_param->rc.bStatWrite)
> +        if (m_param->bLogCuStats || m_param->rc.bStatWrite || m_param->bLogFrameStats)
>              curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, qTreeInterCnt, qTreeIntraCnt, qTreeSkipCnt);
>          else if (m_param->rc.aqMode)
>              curEncData.m_rowStat[row].sumQpAq += calcCTUQP(*ctu);
>  
>          // copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
> -        if (m_param->rc.bStatWrite)
> +        if (m_param->rc.bStatWrite || m_param->bLogFrameStats)
>          {
>              curRow.rowStats.mvBits += best.mvBits;
>              curRow.rowStats.coeffBits += best.coeffBits;
>              curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
> +            curRow.rowStats.cuStat.cntInterCu += m_cuStatistic.cntInterCu;
> +            curRow.rowStats.cuStat.cntIntraCu += m_cuStatistic.cntIntraCu;
> +            curRow.rowStats.cuStat.cntIntraNxN += m_cuStatistic.cntIntraNxN;
> +            curRow.rowStats.cuStat.cntSkipCu += m_cuStatistic.cntSkipCu;
> +            curRow.rowStats.cuStat.totalCu += m_cuStatistic.totalCu;
>  
>              for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
>              {
> @@ -923,10 +944,12 @@
>                  curRow.rowStats.iCuCnt += qTreeIntraCnt[depth] << shift;
>                  curRow.rowStats.pCuCnt += qTreeInterCnt[depth] << shift;
>                  curRow.rowStats.skipCuCnt += qTreeSkipCnt[depth] << shift;
> +                curRow.rowStats.cuStat.cntCuDepth[depth] += m_cuStatistic.cntCuDepth[depth];
>  
>                  // clear the row cu data from thread local object
>                  qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
>              }
> +            memset(&m_cuStatistic, 0, sizeof(CuStatistic));
>          }
>  
>          curEncData.m_cuStat[cuAddr].totalBits = best.totalBits;
> @@ -1106,6 +1129,7 @@
>  int FrameEncoder::collectCTUStatistics(const CUData& ctu, uint32_t* qtreeInterCnt, uint32_t* qtreeIntraCnt, uint32_t* qtreeSkipCnt)
>  {
>      StatisticLog* log = &m_sliceTypeLog[ctu.m_slice->m_sliceType];
> +    CuStatistic* cuLog = &m_cuStatistic;
>      int totQP = 0;
>  
>      if (ctu.m_slice->m_sliceType == I_SLICE)
> @@ -1117,6 +1141,9 @@
>  
>              log->totalCu++;
>              log->cntIntra[depth]++;
> +            cuLog->cntCuDepth[depth]++;
> +            cuLog->cntIntraCu++;
> +            cuLog->totalCu++;

This all looks terribly redundant, and this is introducing new data
races. Perhaps this function can increment only m_cuStatistic, and at
the end of the frame those stats are all added to
m_sliceTypeLog[m_slice->m_sliceType];

>              qtreeIntraCnt[depth]++;
>              totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
>  
> @@ -1124,6 +1151,9 @@
>              {
>                  log->totalCu--;
>                  log->cntIntra[depth]--;
> +                cuLog->cntCuDepth[depth]--;
> +                cuLog->cntIntraCu--;
> +                cuLog->totalCu--;
>                  qtreeIntraCnt[depth]--;
>              }
>              else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
> @@ -1132,6 +1162,8 @@
>                  X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
>                  log->cntIntraNxN++;
>                  log->cntIntra[depth]--;
> +                cuLog->cntIntraNxN++;
> +                cuLog->cntCuDepth[depth]--;
>              }
>              else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
>                  log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
> @@ -1148,22 +1180,28 @@
>  
>              log->totalCu++;
>              log->cntTotalCu[depth]++;
> +            cuLog->cntCuDepth[depth]++;
> +            cuLog->totalCu++;
>              totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
>  
>              if (ctu.m_predMode[absPartIdx] == MODE_NONE)
>              {
>                  log->totalCu--;
>                  log->cntTotalCu[depth]--;
> +                cuLog->cntCuDepth[depth]--;
> +                cuLog->totalCu--;
>              }
>              else if (ctu.isSkipped(absPartIdx))
>              {
>                  log->totalCu--;
>                  log->cntSkipCu[depth]++;
> +                cuLog->cntSkipCu++;
>                  qtreeSkipCnt[depth]++;
>              }
>              else if (ctu.isInter(absPartIdx))
>              {
>                  log->cntInter[depth]++;
> +                cuLog->cntInterCu++;
>                  qtreeInterCnt[depth]++;
>  
>                  if (ctu.m_partSize[absPartIdx] < AMP_ID)
> @@ -1174,6 +1212,7 @@
>              else if (ctu.isIntra(absPartIdx))
>              {
>                  log->cntIntra[depth]++;
> +                cuLog->cntIntraCu++;
>                  qtreeIntraCnt[depth]++;
>  
>                  if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
> @@ -1181,6 +1220,8 @@
>                      X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
>                      log->cntIntraNxN++;
>                      log->cntIntra[depth]--;
> +                    cuLog->cntIntraNxN++;
> +                    cuLog->cntCuDepth[depth]--;
>                      /* TODO: log intra modes at absPartIdx +0 to +3 */
>                  }
>                  else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h	Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/frameencoder.h	Mon May 25 12:07:33 2015 +0530
> @@ -157,6 +157,7 @@
>      uint32_t                 m_crc[3];
>      uint32_t                 m_checksum[3];
>      StatisticLog             m_sliceTypeLog[3];     // per-slice type CU statistics
> +    CuStatistic              m_cuStatistic;         // stats of current CU
>      FrameStats               m_frameStats;          // stats of current frame for multi-pass encodes
>  
>      volatile int             m_activeWorkerCount;        // count of workers currently encoding or filtering CTUs
> diff -r a7bf7a150a70 -r 05579078c8fa source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h	Fri May 22 14:29:35 2015 +0530
> +++ b/source/encoder/ratecontrol.h	Mon May 25 12:07:33 2015 +0530
> @@ -46,6 +46,27 @@
>  #define MIN_AMORTIZE_FRACTION 0.2
>  #define CLIP_DURATION(f) x265_clip3(MIN_FRAME_DURATION, MAX_FRAME_DURATION, f)
>  
> +struct CuStatistic
> +{
> +    uint64_t cntCuDepth[4];
> +    uint64_t cntIntraNxN;
> +    uint64_t cntSkipCu;
> +    uint64_t cntIntraCu;
> +    uint64_t cntInterCu;
> +    uint64_t totalCu;
> +
> +    double   percentCuDepth[4];
> +    double   percentIntraNxN;
> +    double   percentSkipCu;
> +    double   percentIntraCu;
> +    double   percentInterCu;
> +
> +    CuStatistic()
> +    {
> +        memset(this, 0, sizeof(CuStatistic));
> +    }
> +};
> +
>  /* Current frame stats for 2 pass */
>  struct FrameStats
>  {
> @@ -61,6 +82,8 @@
>      double      percentIntra;
>      double      percentInter;
>      double      percentSkip;
> +
> +    CuStatistic cuStat;
>  };

this data structure nesting doesn't make a lot of sense.

>  struct Predictor
> diff -r a7bf7a150a70 -r 05579078c8fa source/x265.h
> --- a/source/x265.h	Fri May 22 14:29:35 2015 +0530
> +++ b/source/x265.h	Mon May 25 12:07:33 2015 +0530
> @@ -443,6 +443,9 @@
>       * modes during mode decision. Default disabled */
>      int       bLogCuStats;
>  
> +    /* Enable logging of frame level data. Default is disabled*/
w/s
> +    int       bLogFrameStats;
> +
>      /* Enable the measurement and reporting of PSNR. Default is enabled */
>      int       bEnablePsnr;

new params would require reST docs and an API version bump

> diff -r a7bf7a150a70 -r 05579078c8fa source/x265cli.h
> --- a/source/x265cli.h	Fri May 22 14:29:35 2015 +0530
> +++ b/source/x265cli.h	Mon May 25 12:07:33 2015 +0530
> @@ -56,6 +56,8 @@
>      { "csv",            required_argument, NULL, 0 },
>      { "no-cu-stats",          no_argument, NULL, 0 },
>      { "cu-stats",             no_argument, NULL, 0 },
> +    { "no-frame-stats",       no_argument, NULL, 0 },
> +    { "frame-stats",          no_argument, NULL, 0 },
>      { "y4m",                  no_argument, NULL, 0 },
>      { "no-progress",          no_argument, NULL, 0 },
>      { "output",         required_argument, NULL, 'o' },
> @@ -248,6 +250,7 @@
>      H0("   --log-level <string>          Logging level: none error warning info debug full. Default %s\n", x265::logLevelNames[param->logLevel + 1]);
>      H0("   --no-progress                 Disable CLI progress reports\n");
>      H0("   --[no-]cu-stats               Enable logging stats about distribution of cu across all modes. Default %s\n",OPT(param->bLogCuStats));
> +    H0("   --[no-]frame-stats            Enable logging stats about frames. Default %s\n", OPT(param->bLogFrameStats));

--cu-stats toggles logging of stats to x265_log(), --frame-stats toggles
the writing of a new CSV file (with a hard coded filename). This is
non-intuitive. This really behaves like it should be named --frame-csv
and its param should be a const char*, which begs a number of other questions.

I don't much like how the libx265 code is writing so many files, when
much of this data could be quite useful for any code that monitors the
progress of an encode.  I can understand exceptions being made for the
2-pass stats files and the analysis re-use files being read and written
directly by the encoder (since they must work via ffmpeg and similar
apps), but I believe the CSV files really should be getting written by
our CLI.

Everything that is getting written into our CSV files today should be
discoverable from the C API, and the CLI should be using those
interfaces to write the CSV files itself.

We should define a new x265_frame_stats struct in x265.h and add a
'x265_frame_stats* frame_stats' to the end of x265_picture, and have
x265_encoder_encode() fill it in if pic_out->frame_stats is not NULL.
And this struct should contain all of the data that is written into
per-frame CSV files today, plus these new per-frame coding stats.

Then all of the CSV writing can be hoisted out of the libx265 library
and into x265.cpp

-- 
Steve Borho


More information about the x265-devel mailing list