[x265] [PATCH 3 of 7] add maxCUDepth to param to replace global g_maxCUDepth
kavitha at multicorewareinc.com
kavitha at multicorewareinc.com
Wed Jun 21 08:44:53 CEST 2017
# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1496121350 -19800
# Tue May 30 10:45:50 2017 +0530
# Node ID 3b0883b24cd21a2aea3fc4bf82467df717c2e39c
# Parent 624374a3b21cba624bee8002bcbdb4fa0917a6fe
add maxCUDepth to param to replace global g_maxCUDepth
diff -r 624374a3b21c -r 3b0883b24cd2 source/CMakeLists.txt
--- a/source/CMakeLists.txt Mon Jun 12 17:21:57 2017 +0530
+++ b/source/CMakeLists.txt Tue May 30 10:45:50 2017 +0530
@@ -29,7 +29,7 @@
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 123)
+set(X265_BUILD 124)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/analysis.cpp Tue May 30 10:45:50 2017 +0530
@@ -93,7 +93,7 @@
uint32_t cuSize = m_param->maxCUSize;
bool ok = true;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++, cuSize >>= 1)
+ for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++, cuSize >>= 1)
{
ModeDepth &md = m_modeDepth[depth];
@@ -116,7 +116,7 @@
void Analysis::destroy()
{
- for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+ for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
{
m_modeDepth[i].cuMemPool.destroy();
m_modeDepth[i].fencYuv.destroy();
@@ -296,7 +296,7 @@
depth = ctu.m_cuDepth[absPartIdx];
partSize = ctu.m_partSize[absPartIdx];
uint32_t numPU = nbPartsTable[(int)partSize];
- int shift = 2 * (g_maxCUDepth + 1 - depth);
+ int shift = 2 * (m_param->maxCUDepth + 1 - depth);
for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
{
PredictionUnit pu(ctu, cuGeom, puIdx);
@@ -2937,7 +2937,7 @@
void Analysis::encodeResidue(const CUData& ctu, const CUGeom& cuGeom)
{
- if (cuGeom.depth < ctu.m_cuDepth[cuGeom.absPartIdx] && cuGeom.depth < g_maxCUDepth)
+ if (cuGeom.depth < ctu.m_cuDepth[cuGeom.absPartIdx] && cuGeom.depth < ctu.m_encData->m_param->maxCUDepth)
{
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/encoder.cpp Tue May 30 10:45:50 2017 +0530
@@ -1424,7 +1424,7 @@
/* Summarize stats from all frame encoders */
CUStats cuStats;
for (int i = 0; i < m_param->frameNumThreads; i++)
- cuStats.accumulate(m_frameEncoder[i]->m_cuStats);
+ cuStats.accumulate(m_frameEncoder[i]->m_cuStats, m_param);
if (!cuStats.totalCTUTime)
return;
@@ -1445,7 +1445,7 @@
int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
uint64_t interRDOTotalCount = 0, intraRDOTotalCount = 0;
- for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+ for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
{
interRDOTotalTime += cuStats.interRDOElapsedTime[i];
intraRDOTotalTime += cuStats.intraRDOElapsedTime[i];
@@ -1771,7 +1771,7 @@
frameStats->minChromaVLevel = curFrame->m_fencPic->m_minChromaVLevel;
frameStats->avgChromaVLevel = curFrame->m_fencPic->m_avgChromaVLevel;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
{
frameStats->cuStats.percentSkipCu[depth] = curFrame->m_encData->m_frameStats.percentSkipCu[depth];
frameStats->cuStats.percentMergeCu[depth] = curFrame->m_encData->m_frameStats.percentMergeCu[depth];
@@ -1786,7 +1786,7 @@
frameStats->puStats.percentNxN = 0;
else
frameStats->puStats.percentNxN = (double)(curFrame->m_encData->m_frameStats.cnt4x4 / (double)curFrame->m_encData->m_frameStats.totalPu[4]) * 100;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
{
if (curFrame->m_encData->m_frameStats.totalPu[depth] == 0)
{
@@ -1988,8 +1988,8 @@
sps->numPartitions = NUM_4x4_PARTITIONS;
sps->numPartInCUSize = 1 << g_unitSizeDepth;
- sps->log2MinCodingBlockSize = m_param->maxLog2CUSize - g_maxCUDepth;
- sps->log2DiffMaxMinCodingBlockSize = g_maxCUDepth;
+ sps->log2MinCodingBlockSize = m_param->maxLog2CUSize - m_param->maxCUDepth;
+ sps->log2DiffMaxMinCodingBlockSize = m_param->maxCUDepth;
uint32_t maxLog2TUSize = (uint32_t)g_log2Size[m_param->maxTUSize];
sps->quadtreeTULog2MaxSize = X265_MIN((uint32_t)m_param->maxLog2CUSize, maxLog2TUSize);
sps->quadtreeTULog2MinSize = 2;
@@ -1999,7 +1999,7 @@
sps->bUseSAO = m_param->bEnableSAO;
sps->bUseAMP = m_param->bEnableAMP;
- sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
+ sps->maxAMPDepth = m_param->bEnableAMP ? m_param->maxCUDepth : 0;
sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
@@ -2643,6 +2643,7 @@
}
}
p->maxLog2CUSize = g_log2Size[p->maxCUSize];
+ p->maxCUDepth = p->maxLog2CUSize - g_log2Size[p->minCUSize];
}
void Encoder::allocAnalysis(x265_analysis_data* analysis)
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/entropy.cpp Tue May 30 10:45:50 2017 +0530
@@ -783,7 +783,7 @@
if (cuSplitFlag)
codeSplitFlag(ctu, absPartIdx, depth);
- if (depth < ctu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
+ if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
{
uint32_t qNumParts = cuGeom.numPartitions >> 2;
if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
@@ -863,7 +863,7 @@
case SIZE_nRx2N:
bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
- if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
+ if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
if (cu.m_slice->m_sps->maxAMPDepth > depth)
{
@@ -1512,7 +1512,7 @@
if (cu.isIntra(absPartIdx))
{
- if (depth == g_maxCUDepth)
+ if (depth == cu.m_encData->m_param->maxCUDepth)
encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
return;
}
@@ -1541,7 +1541,7 @@
case SIZE_nRx2N:
encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
- if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
+ if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
if (cu.m_slice->m_sps->maxAMPDepth > depth)
{
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/frameencoder.cpp Tue May 30 10:45:50 2017 +0530
@@ -887,7 +887,7 @@
m_frame->m_encData->m_frameStats.psyEnergy += m_rows[i].rowStats.psyEnergy;
m_frame->m_encData->m_frameStats.ssimEnergy += m_rows[i].rowStats.ssimEnergy;
m_frame->m_encData->m_frameStats.resEnergy += m_rows[i].rowStats.resEnergy;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
{
m_frame->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
m_frame->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
@@ -903,7 +903,7 @@
m_frame->m_encData->m_frameStats.avgSsimEnergy = (double)(m_frame->m_encData->m_frameStats.ssimEnergy) / m_frame->m_encData->m_frameStats.totalCtu;
m_frame->m_encData->m_frameStats.avgResEnergy = (double)(m_frame->m_encData->m_frameStats.resEnergy) / m_frame->m_encData->m_frameStats.totalCtu;
m_frame->m_encData->m_frameStats.percentIntraNxN = (double)(m_frame->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame->m_encData->m_frameStats.totalCu;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
{
m_frame->m_encData->m_frameStats.percentSkipCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
m_frame->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
@@ -1101,7 +1101,7 @@
/* Accumulate CU statistics from each worker thread, we could report
* per-frame stats here, but currently we do not. */
for (int i = 0; i < numTLD; i++)
- m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId]);
+ m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], m_param);
#endif
m_endFrameTime = x265_mdate();
@@ -1213,7 +1213,6 @@
const uint32_t row = (uint32_t)intRow;
CTURow& curRow = m_rows[row];
- tld.analysis.m_param = m_param;
if (m_param->bEnableWavefront)
{
ScopedLock self(curRow.lock);
@@ -1478,10 +1477,10 @@
curRow.rowStats.coeffBits += best.coeffBits;
curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
{
/* 1 << shift == number of 8x8 blocks at current depth */
- int shift = 2 * (g_maxCUDepth - depth);
+ int shift = 2 * (m_param->maxCUDepth - depth);
int cuSize = m_param->maxCUSize >> depth;
if (cuSize == 8)
@@ -1501,7 +1500,7 @@
curRow.rowStats.resEnergy += best.resEnergy;
curRow.rowStats.cntIntraNxN += frameLog.cntIntraNxN;
curRow.rowStats.totalCu += frameLog.totalCu;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
{
curRow.rowStats.cntSkipCu[depth] += frameLog.cntSkipCu[depth];
curRow.rowStats.cntMergeCu[depth] += frameLog.cntMergeCu[depth];
@@ -1734,7 +1733,6 @@
}
}
- tld.analysis.m_param = NULL;
curRow.busy = false;
// CHECK_ME: Does it always FALSE condition?
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/search.cpp
--- a/source/encoder/search.cpp Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/search.cpp Tue May 30 10:45:50 2017 +0530
@@ -136,7 +136,7 @@
}
/* the rest of these buffers are indexed per-depth */
- for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+ for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
{
int cuSize = param.maxCUSize >> i;
ok &= m_rqt[i].tmpResiYuv.create(cuSize, param.internalCsp);
@@ -186,7 +186,7 @@
m_rqt[i].resiQtYuv.destroy();
}
- for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+ for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
{
m_rqt[i].tmpResiYuv.destroy();
m_rqt[i].tmpPredYuv.destroy();
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/search.h
--- a/source/encoder/search.h Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/search.h Tue May 30 10:45:50 2017 +0530
@@ -204,9 +204,9 @@
memset(this, 0, sizeof(*this));
}
- void accumulate(CUStats& other)
+ void accumulate(CUStats& other, x265_param& param)
{
- for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+ for (uint32_t i = 0; i <= param.maxCUDepth; i++)
{
intraRDOElapsedTime[i] += other.intraRDOElapsedTime[i];
interRDOElapsedTime[i] += other.interRDOElapsedTime[i];
diff -r 624374a3b21c -r 3b0883b24cd2 source/x265-extras.cpp
--- a/source/x265-extras.cpp Mon Jun 12 17:21:57 2017 +0530
+++ b/source/x265-extras.cpp Tue May 30 10:45:50 2017 +0530
@@ -69,7 +69,7 @@
fprintf(csvfp, "Latency, ");
fprintf(csvfp, "List 0, List 1");
uint32_t size = param.maxCUSize;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
{
fprintf(csvfp, ", Intra %dx%d DC, Intra %dx%d Planar, Intra %dx%d Ang", size, size, size, size, size, size);
size /= 2;
@@ -78,7 +78,7 @@
size = param.maxCUSize;
if (param.bEnableRectInter)
{
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
{
fprintf(csvfp, ", Inter %dx%d, Inter %dx%d (Rect)", size, size, size, size);
if (param.bEnableAMP)
@@ -88,20 +88,20 @@
}
else
{
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
{
fprintf(csvfp, ", Inter %dx%d", size, size);
size /= 2;
}
}
size = param.maxCUSize;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
{
fprintf(csvfp, ", Skip %dx%d", size, size);
size /= 2;
}
size = param.maxCUSize;
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
{
fprintf(csvfp, ", Merge %dx%d", size, size);
size /= 2;
@@ -184,14 +184,14 @@
else
fputs(" -,", csvfp);
}
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
fprintf(csvfp, "%5.2lf%%, %5.2lf%%, %5.2lf%%,", frameStats->cuStats.percentIntraDistribution[depth][0],
frameStats->cuStats.percentIntraDistribution[depth][1],
frameStats->cuStats.percentIntraDistribution[depth][2]);
fprintf(csvfp, "%5.2lf%%", frameStats->cuStats.percentIntraNxN);
if (param.bEnableRectInter)
{
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
{
fprintf(csvfp, ", %5.2lf%%, %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0],
frameStats->cuStats.percentInterDistribution[depth][1]);
@@ -201,12 +201,12 @@
}
else
{
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0]);
}
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentSkipCu[depth]);
- for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+ for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentMergeCu[depth]);
fprintf(csvfp, ", %.2lf, %.2lf, %.2lf, %.2lf ", frameStats->avgLumaDistortion,
diff -r 624374a3b21c -r 3b0883b24cd2 source/x265.h
--- a/source/x265.h Mon Jun 12 17:21:57 2017 +0530
+++ b/source/x265.h Tue May 30 10:45:50 2017 +0530
@@ -1461,6 +1461,9 @@
/* Log of maximum CTU size */
uint32_t maxLog2CUSize;
+
+ /* Actual CU depth with respect to config depth */
+ uint32_t maxCUDepth;
} x265_param;
/* x265_param_alloc:
More information about the x265-devel
mailing list