[x265] [PATCH] log: make qTreeCnt as stack arrays to avoid non determinism in 2 pass
Steve Borho
steve at borho.org
Tue Apr 28 17:53:51 CEST 2015
On 04/28, aarthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1430225684 -19800
> # Tue Apr 28 18:24:44 2015 +0530
> # Node ID 9a66b31df0d07a7146694763de119cb2e0ca2cfa
> # Parent 13290abce29209be5206cce66e436c8ff3682540
> log: make qTreeCnt as stack arrays to avoid non determinism in 2 pass
>
> currently,qTreeCount arrays collected in log stats for each sliceType is not protected and may cause
> inconsistency in 2 pass encodes when multiple rows finish simultaneously.
>
> diff -r 13290abce292 -r 9a66b31df0d0 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Mon Apr 27 14:15:28 2015 -0500
> +++ b/source/encoder/frameencoder.cpp Tue Apr 28 18:24:44 2015 +0530
> @@ -823,7 +823,12 @@
> const uint32_t numCols = m_numCols;
> const uint32_t lineStartCUAddr = row * numCols;
> bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
> -
> + /* These states store the count of inter,intra and skip ctus within quad tree structure of each CU */
> + uint32_t qTreeInterCnt[4];
> + uint32_t qTreeIntraCnt[4];
> + uint32_t qTreeSkipCnt[4];
queued, using an enum here instead of 4
> + for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
> + qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
> while (curRow.completed < numCols)
> {
> ProfileScopeEvent(encodeCTU);
> @@ -896,7 +901,7 @@
> curRow.completed++;
>
> if (m_param->bLogCuStats || m_param->rc.bStatWrite)
> - curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu);
> + curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, qTreeInterCnt, qTreeIntraCnt, qTreeSkipCnt);
> else if (m_param->rc.aqMode)
> curEncData.m_rowStat[row].sumQpAq += calcCTUQP(*ctu);
>
> @@ -906,18 +911,17 @@
> curRow.rowStats.mvBits += best.mvBits;
> curRow.rowStats.coeffBits += best.coeffBits;
> curRow.rowStats.miscBits += best.totalBits - (best.mvBits + best.coeffBits);
> - StatisticLog* log = &m_sliceTypeLog[slice->m_sliceType];
>
> for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
> {
> /* 1 << shift == number of 8x8 blocks at current depth */
> int shift = 2 * (g_maxCUDepth - depth);
> - curRow.rowStats.iCuCnt += log->qTreeIntraCnt[depth] << shift;
> - curRow.rowStats.pCuCnt += log->qTreeInterCnt[depth] << shift;
> - curRow.rowStats.skipCuCnt += log->qTreeSkipCnt[depth] << shift;
> + curRow.rowStats.iCuCnt += qTreeIntraCnt[depth] << shift;
> + curRow.rowStats.pCuCnt += qTreeInterCnt[depth] << shift;
> + curRow.rowStats.skipCuCnt += qTreeSkipCnt[depth] << shift;
>
> // clear the row cu data from thread local object
> - log->qTreeIntraCnt[depth] = log->qTreeInterCnt[depth] = log->qTreeSkipCnt[depth] = 0;
> + qTreeIntraCnt[depth] = qTreeInterCnt[depth] = qTreeSkipCnt[depth] = 0;
> }
> }
>
> @@ -1094,7 +1098,7 @@
> }
>
> /* collect statistics about CU coding decisions, return total QP */
> -int FrameEncoder::collectCTUStatistics(const CUData& ctu)
> +int FrameEncoder::collectCTUStatistics(const CUData& ctu, uint32_t* qtreeInterCnt, uint32_t* qtreeIntraCnt, uint32_t* qtreeSkipCnt)
> {
> StatisticLog* log = &m_sliceTypeLog[ctu.m_slice->m_sliceType];
> int totQP = 0;
> @@ -1108,14 +1112,14 @@
>
> log->totalCu++;
> log->cntIntra[depth]++;
> - log->qTreeIntraCnt[depth]++;
> + qtreeIntraCnt[depth]++;
> totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
>
> if (ctu.m_predMode[absPartIdx] == MODE_NONE)
> {
> log->totalCu--;
> log->cntIntra[depth]--;
> - log->qTreeIntraCnt[depth]--;
> + qtreeIntraCnt[depth]--;
> }
> else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
> {
> @@ -1150,12 +1154,12 @@
> {
> log->totalCu--;
> log->cntSkipCu[depth]++;
> - log->qTreeSkipCnt[depth]++;
> + qtreeSkipCnt[depth]++;
> }
> else if (ctu.isInter(absPartIdx))
> {
> log->cntInter[depth]++;
> - log->qTreeInterCnt[depth]++;
> + qtreeInterCnt[depth]++;
>
> if (ctu.m_partSize[absPartIdx] < AMP_ID)
> log->cuInterDistribution[depth][ctu.m_partSize[absPartIdx]]++;
> @@ -1165,7 +1169,7 @@
> else if (ctu.isIntra(absPartIdx))
> {
> log->cntIntra[depth]++;
> - log->qTreeIntraCnt[depth]++;
> + qtreeIntraCnt[depth]++;
>
> if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
> {
> diff -r 13290abce292 -r 9a66b31df0d0 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Mon Apr 27 14:15:28 2015 -0500
> +++ b/source/encoder/frameencoder.h Tue Apr 28 18:24:44 2015 +0530
> @@ -63,11 +63,6 @@
> uint64_t cntTotalCu[4];
> uint64_t totalCu;
>
> - /* These states store the count of inter,intra and skip ctus within quad tree structure of each CU */
> - uint32_t qTreeInterCnt[4];
> - uint32_t qTreeIntraCnt[4];
> - uint32_t qTreeSkipCnt[4];
> -
> StatisticLog()
> {
> memset(this, 0, sizeof(StatisticLog));
> @@ -226,7 +221,7 @@
> void encodeSlice();
>
> void threadMain();
> - int collectCTUStatistics(const CUData& ctu);
> + int collectCTUStatistics(const CUData& ctu, uint32_t* qtreeInterCnt, uint32_t* qtreeIntraCnt, uint32_t* qtreeSkipCnt);
> int calcCTUQP(const CUData& ctu);
> void noiseReductionUpdate();
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list