[x265] [PATCH] stats: count of each CU partition per frame
Divya Manivannan
divya at multicorewareinc.com
Mon Jul 6 07:29:18 CEST 2015
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1435741578 -19800
# Wed Jul 01 14:36:18 2015 +0530
# Node ID fab7c493b7ce3845565f9fbe07ebc183c22e928c
# Parent 1162fb0b99f82d32529f396e3afb6966cc38ec02
stats: count of each CU partition per frame
diff -r 1162fb0b99f8 -r fab7c493b7ce doc/reST/api.rst
--- a/doc/reST/api.rst Fri Jul 03 13:43:47 2015 -0500
+++ b/doc/reST/api.rst Wed Jul 01 14:36:18 2015 +0530
@@ -338,10 +338,6 @@
Cleanup
=======
- /* x265_encoder_log:
- * This function is now deprecated */
- void x265_encoder_log(x265_encoder *encoder, int argc, char **argv);
-
Finally, the encoder must be closed in order to free all of its
resources. An encoder that has been flushed cannot be restarted and
reused. Once **x265_encoder_close()** has been called, the encoder
diff -r 1162fb0b99f8 -r fab7c493b7ce source/CMakeLists.txt
--- a/source/CMakeLists.txt Fri Jul 03 13:43:47 2015 -0500
+++ b/source/CMakeLists.txt Wed Jul 01 14:36:18 2015 +0530
@@ -30,7 +30,7 @@
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 63)
+set(X265_BUILD 64)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 1162fb0b99f8 -r fab7c493b7ce source/common/framedata.h
--- a/source/common/framedata.h Fri Jul 03 13:43:47 2015 -0500
+++ b/source/common/framedata.h Wed Jul 01 14:36:18 2015 +0530
@@ -34,6 +34,9 @@
class PicYuv;
class JobProvider;
+#define INTER_MODES 4 // 2Nx2N, 2NxN, Nx2N, AMP modes
+#define INTRA_MODES 3 // DC, Planar, Angular modes
+
/* Current frame stats for 2 pass */
struct FrameStats
{
@@ -49,6 +52,25 @@
double percent8x8Intra;
double percent8x8Inter;
double percent8x8Skip;
+ double percentIntraNxN;
+ double percentSkipCu[NUM_CU_DEPTH];
+ double percentMergeCu[NUM_CU_DEPTH];
+ double percentIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
+ double percentInterDistribution[NUM_CU_DEPTH][3]; // 2Nx2N, RECT, AMP modes percentage
+
+ uint64_t cntIntraNxN;
+ uint64_t totalCu;
+ uint64_t cntSkipCu[NUM_CU_DEPTH];
+ uint64_t cntMergeCu[NUM_CU_DEPTH];
+ uint64_t cntInter[NUM_CU_DEPTH];
+ uint64_t cntIntra[NUM_CU_DEPTH];
+ uint64_t cuInterDistribution[NUM_CU_DEPTH][INTER_MODES];
+ uint64_t cuIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
+
+ FrameStats()
+ {
+ memset(this, 0, sizeof(FrameStats));
+ }
};
/* Per-frame data that is used during encodes and referenced while the picture
diff -r 1162fb0b99f8 -r fab7c493b7ce source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Jul 03 13:43:47 2015 -0500
+++ b/source/encoder/encoder.cpp Wed Jul 01 14:36:18 2015 +0530
@@ -1163,6 +1163,18 @@
else
frameStats->avgWPP = 1;
frameStats->countRowBlocks = curEncoder->m_countRowBlocks;
+
+ frameStats->cuStats.percentIntraNxN = curFrame->m_encData->m_frameStats.percentIntraNxN;
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ frameStats->cuStats.percentSkipCu[depth] = curFrame->m_encData->m_frameStats.percentSkipCu[depth];
+ frameStats->cuStats.percentMergeCu[depth] = curFrame->m_encData->m_frameStats.percentMergeCu[depth];
+ frameStats->cuStats.percentInterDistribution[depth][0] = curFrame->m_encData->m_frameStats.percentInterDistribution[depth][0];
+ frameStats->cuStats.percentInterDistribution[depth][1] = curFrame->m_encData->m_frameStats.percentInterDistribution[depth][1];
+ frameStats->cuStats.percentInterDistribution[depth][2] = curFrame->m_encData->m_frameStats.percentInterDistribution[depth][2];
+ for (int n = 0; n < INTRA_MODES; n++)
+ frameStats->cuStats.percentIntraDistribution[depth][n] = curFrame->m_encData->m_frameStats.percentIntraDistribution[depth][n];
+ }
}
}
diff -r 1162fb0b99f8 -r fab7c493b7ce source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Fri Jul 03 13:43:47 2015 -0500
+++ b/source/encoder/frameencoder.cpp Wed Jul 01 14:36:18 2015 +0530
@@ -583,6 +583,33 @@
m_frame->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;
m_frame->m_encData->m_frameStats.percent8x8Skip = (double)totalSkip / totalCuCount;
}
+ for (uint32_t i = 0; i < m_numRows; i++)
+ {
+ m_frame->m_encData->m_frameStats.cntIntraNxN += m_rows[i].rowStats.cntIntraNxN;
+ m_frame->m_encData->m_frameStats.totalCu += m_rows[i].rowStats.totalCu;
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ m_frame->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
+ m_frame->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
+ for (int m = 0; m < INTER_MODES; m++)
+ m_frame->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];
+ for (int n = 0; n < INTRA_MODES; n++)
+ m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] += m_rows[i].rowStats.cuIntraDistribution[depth][n];
+ }
+ }
+ m_frame->m_encData->m_frameStats.percentIntraNxN = (double)(m_frame->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame->m_encData->m_frameStats.totalCu;
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ m_frame->m_encData->m_frameStats.percentSkipCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
+ m_frame->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
+ for (int n = 0; n < INTRA_MODES; n++)
+ m_frame->m_encData->m_frameStats.percentIntraDistribution[depth][n] = (double)(m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] * 100) / m_frame->m_encData->m_frameStats.totalCu;
+ uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts
+ cuInterRectCnt += m_frame->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame->m_encData->m_frameStats.cuInterDistribution[depth][2];
+ m_frame->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame->m_encData->m_frameStats.totalCu;
+ m_frame->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame->m_encData->m_frameStats.totalCu;
+ m_frame->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame->m_encData->m_frameStats.totalCu;
+ }
m_bs.resetBits();
m_entropyCoder.load(m_initSliceContext);
@@ -838,13 +865,6 @@
const uint32_t lineStartCUAddr = row * numCols;
bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
- /* These store the count of inter, intra and skip cus within quad tree structure of each CTU */
- uint32_t qTreeInterCnt[NUM_CU_DEPTH];
- uint32_t qTreeIntraCnt[NUM_CU_DEPTH];
- uint32_t qTreeSkipCnt[NUM_CU_DEPTH];
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
- qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
-
while (curRow.completed < numCols)
{
ProfileScopeEvent(encodeCTU);
@@ -916,30 +936,42 @@
// Completed CU processing
curRow.completed++;
- if (m_param->rc.bStatWrite)
- curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, qTreeInterCnt, qTreeIntraCnt, qTreeSkipCnt);
- else if (m_param->rc.aqMode)
- curEncData.m_rowStat[row].sumQpAq += calcCTUQP(*ctu);
+ FrameStats frameLog;
+ curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog);
// copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
if (m_param->rc.bStatWrite)
{
- curRow.rowStats.mvBits += best.mvBits;
+ curRow.rowStats.mvBits += best.mvBits;
curRow.rowStats.coeffBits += best.coeffBits;
- curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
+ curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
{
/* 1 << shift == number of 8x8 blocks at current depth */
int shift = 2 * (g_maxCUDepth - depth);
- curRow.rowStats.intra8x8Cnt += qTreeIntraCnt[depth] << shift;
- curRow.rowStats.inter8x8Cnt += qTreeInterCnt[depth] << shift;
- curRow.rowStats.skip8x8Cnt += qTreeSkipCnt[depth] << shift;
+ int cuSize = g_maxCUSize >> depth;
- // clear the row cu data from thread local object
- qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
+ if (cuSize == 8)
+ curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
+ else
+ curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] << shift);
+
+ curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);
+ curRow.rowStats.skip8x8Cnt += (int)(frameLog.cntSkipCu[depth] << shift);
}
}
+ curRow.rowStats.cntIntraNxN += frameLog.cntIntraNxN;
+ curRow.rowStats.totalCu += frameLog.totalCu;
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ curRow.rowStats.cntSkipCu[depth] += frameLog.cntSkipCu[depth];
+ curRow.rowStats.cntMergeCu[depth] += frameLog.cntMergeCu[depth];
+ for (int m = 0; m < INTER_MODES; m++)
+ curRow.rowStats.cuInterDistribution[depth][m] += frameLog.cuInterDistribution[depth][m];
+ for (int n = 0; n < INTRA_MODES; n++)
+ curRow.rowStats.cuIntraDistribution[depth][n] += frameLog.cuIntraDistribution[depth][n];
+ }
curEncData.m_cuStat[cuAddr].totalBits = best.totalBits;
x265_emms();
@@ -1115,11 +1147,9 @@
}
/* collect statistics about CU coding decisions, return total QP */
-int FrameEncoder::collectCTUStatistics(const CUData& ctu, uint32_t* qtreeInterCnt, uint32_t* qtreeIntraCnt, uint32_t* qtreeSkipCnt)
+int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats* log)
{
- StatisticLog* log = &m_sliceTypeLog[ctu.m_slice->m_sliceType];
int totQP = 0;
-
if (ctu.m_slice->m_sliceType == I_SLICE)
{
uint32_t depth = 0;
@@ -1129,14 +1159,12 @@
log->totalCu++;
log->cntIntra[depth]++;
- qtreeIntraCnt[depth]++;
totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
if (ctu.m_predMode[absPartIdx] == MODE_NONE)
{
log->totalCu--;
log->cntIntra[depth]--;
- qtreeIntraCnt[depth]--;
}
else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
{
@@ -1159,24 +1187,20 @@
depth = ctu.m_cuDepth[absPartIdx];
log->totalCu++;
- log->cntTotalCu[depth]++;
totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
if (ctu.m_predMode[absPartIdx] == MODE_NONE)
- {
log->totalCu--;
- log->cntTotalCu[depth]--;
- }
else if (ctu.isSkipped(absPartIdx))
{
- log->totalCu--;
- log->cntSkipCu[depth]++;
- qtreeSkipCnt[depth]++;
+ if (ctu.m_mergeFlag[0])
+ log->cntMergeCu[depth]++;
+ else
+ log->cntSkipCu[depth]++;
}
else if (ctu.isInter(absPartIdx))
{
log->cntInter[depth]++;
- qtreeInterCnt[depth]++;
if (ctu.m_partSize[absPartIdx] < AMP_ID)
log->cuInterDistribution[depth][ctu.m_partSize[absPartIdx]]++;
@@ -1186,7 +1210,6 @@
else if (ctu.isIntra(absPartIdx))
{
log->cntIntra[depth]++;
- qtreeIntraCnt[depth]++;
if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
{
@@ -1206,21 +1229,6 @@
return totQP;
}
-/* iterate over coded CUs and determine total QP */
-int FrameEncoder::calcCTUQP(const CUData& ctu)
-{
- int totQP = 0;
- uint32_t depth = 0, numParts = ctu.m_numPartitions;
-
- for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += numParts)
- {
- depth = ctu.m_cuDepth[absPartIdx];
- numParts = ctu.m_numPartitions >> (depth * 2);
- totQP += ctu.m_qp[absPartIdx] * numParts;
- }
- return totQP;
-}
-
/* DCT-domain noise reduction / adaptive deadzone from libavcodec */
void FrameEncoder::noiseReductionUpdate()
{
diff -r 1162fb0b99f8 -r fab7c493b7ce source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Fri Jul 03 13:43:47 2015 -0500
+++ b/source/encoder/frameencoder.h Wed Jul 01 14:36:18 2015 +0530
@@ -49,8 +49,6 @@
#define ANGULAR_MODE_ID 2
#define AMP_ID 3
-#define INTER_MODES 4
-#define INTRA_MODES 3
struct StatisticLog
{
@@ -156,7 +154,6 @@
MD5Context m_state[3];
uint32_t m_crc[3];
uint32_t m_checksum[3];
- StatisticLog m_sliceTypeLog[3]; // per-slice type CU statistics
volatile int m_activeWorkerCount; // count of workers currently encoding or filtering CTUs
volatile int m_totalActiveWorkerCount; // sum of m_activeWorkerCount sampled at end of each CTU
@@ -220,8 +217,7 @@
void encodeSlice();
void threadMain();
- int collectCTUStatistics(const CUData& ctu, uint32_t* qtreeInterCnt, uint32_t* qtreeIntraCnt, uint32_t* qtreeSkipCnt);
- int calcCTUQP(const CUData& ctu);
+ int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog);
void noiseReductionUpdate();
/* Called by WaveFront::findJob() */
diff -r 1162fb0b99f8 -r fab7c493b7ce source/x265.cpp
--- a/source/x265.cpp Fri Jul 03 13:43:47 2015 -0500
+++ b/source/x265.cpp Wed Jul 01 14:36:18 2015 +0530
@@ -171,7 +171,49 @@
fprintf(csvfpt, "RateFactor, ");
fprintf(csvfpt, "Y PSNR, U PSNR, V PSNR, YUV PSNR, SSIM, SSIM (dB), List 0, List 1");
/* detailed performance statistics */
- fprintf(csvfpt, ", DecideWait (ms), Row0Wait (ms), Wall time (ms), Ref Wait Wall (ms), Total CTU time (ms), Stall Time (ms), Avg WPP, Row Blocks\n");
+ fprintf(csvfpt, ", DecideWait (ms), Row0Wait (ms), Wall time (ms), Ref Wait Wall (ms), Total CTU time (ms), Stall Time (ms), Avg WPP, Row Blocks");
+ if (csvLogLevel >= 2)
+ {
+ uint32_t size = param->maxCUSize;
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ fprintf(csvfpt, ", Intra %dx%d DC, Intra %dx%d Planar, Intra %dx%d Ang", size, size, size, size, size, size);
+ size /= 2;
+ }
+ fprintf(csvfpt, ", 4x4");
+ size = param->maxCUSize;
+ if (param->bEnableRectInter)
+ {
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ fprintf(csvfpt, ", Inter %dx%d, Inter %dx%d (Rect)", size, size, size, size);
+ if (param->bEnableAMP)
+ fprintf(csvfpt, ", Inter %dx%d (Amp)", size, size);
+ size /= 2;
+ }
+ }
+ else
+ {
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ fprintf(csvfpt, ", Inter %dx%d", size, size);
+ size /= 2;
+ }
+ }
+ size = param->maxCUSize;
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ fprintf(csvfpt, ", Skip %dx%d", size, size);
+ size /= 2;
+ }
+ size = param->maxCUSize;
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ fprintf(csvfpt, ", Merge %dx%d", size, size);
+ size /= 2;
+ }
+ }
+ fprintf(csvfpt, "\n");
}
else
fputs(summaryCSVHeader, csvfpt);
@@ -312,6 +354,30 @@
}
fprintf(csvfpt, " %.1lf, %.1lf, %.1lf, %.1lf, %.1lf, %.1lf,", frameStats->decideWaitTime, frameStats->row0WaitTime, frameStats->wallTime, frameStats->refWaitWallTime, frameStats->totalCTUTime, frameStats->stallTime);
fprintf(csvfpt, " %.3lf, %d", frameStats->avgWPP, frameStats->countRowBlocks);
+ if (csvLogLevel >= 2)
+ {
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ fprintf(csvfpt, ", %5.2lf%%, %5.2lf%%, %5.2lf%%", frameStats->cuStats.percentIntraDistribution[depth][0], frameStats->cuStats.percentIntraDistribution[depth][1], frameStats->cuStats.percentIntraDistribution[depth][2]);
+ fprintf(csvfpt, ", %5.2lf%%", frameStats->cuStats.percentIntraNxN);
+ if (param->bEnableRectInter)
+ {
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ {
+ fprintf(csvfpt, ", %5.2lf%%, %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0], frameStats->cuStats.percentInterDistribution[depth][1]);
+ if (param->bEnableAMP)
+ fprintf(csvfpt, ", %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][2]);
+ }
+ }
+ else
+ {
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ fprintf(csvfpt, ", %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0]);
+ }
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ fprintf(csvfpt, ", %5.2lf%%", frameStats->cuStats.percentSkipCu[depth]);
+ for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ fprintf(csvfpt, ", %5.2lf%%", frameStats->cuStats.percentMergeCu[depth]);
+ }
fprintf(csvfpt, "\n");
fflush(stderr);
}
@@ -703,17 +769,6 @@
if (cliopt.reconPlayCmd)
reconPlay = new ReconPlay(cliopt.reconPlayCmd, *param);
- if (cliopt.csvfn)
- {
- if (cliopt.parseCSVFile())
- {
- cliopt.destroy();
- if (cliopt.api)
- cliopt.api->param_free(cliopt.param);
- exit(5);
- }
- }
-
/* note: we could try to acquire a different libx265 API here based on
* the profile found during option parsing, but it must be done before
* opening an encoder */
@@ -731,6 +786,17 @@
/* get the encoder parameters post-initialization */
api->encoder_parameters(encoder, param);
+ if (cliopt.csvfn)
+ {
+ if (cliopt.parseCSVFile())
+ {
+ cliopt.destroy();
+ if (cliopt.api)
+ cliopt.api->param_free(cliopt.param);
+ exit(5);
+ }
+ }
+
/* Control-C handler */
if (signal(SIGINT, sigint_handler) == SIG_ERR)
x265_log(param, X265_LOG_ERROR, "Unable to register CTRL+C handler: %s\n", strerror(errno));
diff -r 1162fb0b99f8 -r fab7c493b7ce source/x265.h
--- a/source/x265.h Fri Jul 03 13:43:47 2015 -0500
+++ b/source/x265.h Wed Jul 01 14:36:18 2015 +0530
@@ -100,6 +100,18 @@
uint32_t numPartitions;
} x265_analysis_data;
+/* cu statistics */
+typedef struct x265_cu_stats
+{
+ double percentSkipCu[4]; // Percentage of skip cu in all depths
+ double percentMergeCu[4]; // Percentage of merge cu in all depths
+ double percentIntraDistribution[4][3]; // Percentage of DC, Planar, Angular intra modes in all depths
+ double percentInterDistribution[4][3]; // Percentage of 2Nx2N inter, rect and amp in all depths
+ double percentIntraNxN; // Percentage of 4x4 cu
+
+ /* All the above values will add up to 100%. */
+} x265_cu_stats;
+
/* Frame level statistics */
typedef struct x265_frame_stats
{
@@ -124,6 +136,7 @@
int list0POC[16];
int list1POC[16];
char sliceType;
+ x265_cu_stats cuStats;
} x265_frame_stats;
/* Used to pass pictures into the encoder, and to get picture data back out of
More information about the x265-devel
mailing list