[x265] [PATCH] get same output between single and multi threading
Steve Borho
steve at borho.org
Mon Sep 30 21:17:52 CEST 2013
On Mon, Sep 30, 2013 at 1:23 AM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1380521904 -28800
> # Node ID cd4d431e292d0935c154daf0917dbb6816cdbca8
> # Parent 55edc34e253c14d3eccb83a7d1db43774349ff9a
> get same output between single and multi threading
>
> Output mistake reasons:
>
> 1. CABAC Table Initialize
> The HM have decide best table for CABAC, but in frame parallelism,
> we can't get this information before thread start, we have to
> disable it now.
> Side effect: maybe lost some compress performance
>
> 2. SAO Global Disable
> The HM decide SAO global disable flag depend previous same type of
> slice. but here, we can get right statistics informat before start,
> so we have to disable it.
> Side effect: more computer cost since we always try SAO on every LCU
>
> 3. CABAC status m_frac not reset.
> This HM bug still alive, we found more here.
>
> diff -r 55edc34e253c -r cd4d431e292d source/Lib/TLibEncoder/TEncTop.cpp
> --- a/source/Lib/TLibEncoder/TEncTop.cpp Sat Sep 28 22:54:44 2013
> -0500
> +++ b/source/Lib/TLibEncoder/TEncTop.cpp Mon Sep 30 14:18:24 2013
> +0800
> @@ -823,7 +823,7 @@
> pps->setDeblockingFilterOverrideEnabledFlag(!m_loopFilterOffsetInPPS);
> pps->setPicDisableDeblockingFilterFlag(!param.bEnableLoopFilter);
> pps->setLog2ParallelMergeLevelMinus2(m_log2ParallelMergeLevelMinus2);
> - pps->setCabacInitPresentFlag(CABAC_INIT_PRESENT_FLAG);
> + pps->setCabacInitPresentFlag(param.frameNumThreads > 1 ? 0 :
> CABAC_INIT_PRESENT_FLAG);
>
> pps->setNumRefIdxL0DefaultActive(1);
> pps->setNumRefIdxL1DefaultActive(1);
> diff -r 55edc34e253c -r cd4d431e292d source/encoder/cturow.h
> --- a/source/encoder/cturow.h Sat Sep 28 22:54:44 2013 -0500
> +++ b/source/encoder/cturow.h Mon Sep 30 14:18:24 2013 +0800
> @@ -63,9 +63,22 @@
>
> void destroy();
>
> - void init()
> + void init(TComSlice *slice)
> {
> m_active = 0;
> +
> + // Note: Reset status to avoid frame parallelism output mistake
> on different thread number
> + for (UInt depth = 0; depth < g_maxCUDepth + 1; depth++)
> + {
> + for (int ciIdx = 0; ciIdx < CI_NUM; ciIdx++)
> + {
> + m_rdSbacCoders[depth][ciIdx]->setSlice(slice);
> + m_rdSbacCoders[depth][ciIdx]->resetEntropy();
> + m_binCodersCABAC[depth][ciIdx]->m_fracBits = 0;
> + }
> + }
> + m_rdGoOnSbacCoder.setSlice(slice);
> + m_rdGoOnSbacCoder.resetEntropy();
> }
>
> void processCU(TComDataCU *cu, TComSlice *slice, TEncSbac
> *bufferSBac, bool bSaveCabac);
> diff -r 55edc34e253c -r cd4d431e292d source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Sat Sep 28 22:54:44 2013 -0500
> +++ b/source/encoder/frameencoder.cpp Mon Sep 30 14:18:24 2013 +0800
> @@ -877,12 +877,14 @@
> void FrameEncoder::compressCTURows()
> {
> PPAScopeEvent(FrameEncoder_compressRows);
> + TComSlice* slice = m_pic->getSlice();
> +
> // reset entropy coders
> m_sbacCoder.init(&m_binCoderCABAC);
> for (int i = 0; i < this->m_numRows; i++)
> {
> - m_rows[i].init();
> - m_rows[i].m_entropyCoder.setEntropyCoder(&m_sbacCoder,
> m_pic->getSlice());
> + m_rows[i].init(slice);
> + m_rows[i].m_entropyCoder.setEntropyCoder(&m_sbacCoder, slice);
> m_rows[i].m_entropyCoder.resetEntropy();
>
> m_rows[i].m_rdSbacCoders[0][CI_CURR_BEST]->load(&m_sbacCoder);
> @@ -891,7 +893,6 @@
> }
>
> UInt refLagRows = ((m_cfg->param.searchRange + NTAPS_LUMA/2 +
> g_maxCUHeight - 1) / g_maxCUHeight) + 1;
> - TComSlice* slice = m_pic->getSlice();
> int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
>
> m_frameFilter.start(m_pic);
> diff -r 55edc34e253c -r cd4d431e292d source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp Sat Sep 28 22:54:44 2013 -0500
> +++ b/source/encoder/framefilter.cpp Mon Sep 30 14:18:24 2013 +0800
> @@ -98,6 +98,13 @@
> SAOParam* saoParam = pic->getPicSym()->getSaoParam();
> m_sao.resetSAOParam(saoParam);
> m_sao.rdoSaoUnitRowInit(saoParam);
> +
> + // NOTE: Disable SAO automatic turn off when Frame Parallelism
> for output exact
> + if (m_cfg->param.frameNumThreads)
>
param.frameNumThreads is always != 0. should this be checking for > 1?
+ {
> + saoParam->bSaoFlag[0] = true;
> + saoParam->bSaoFlag[1] = true;
> + }
> }
> }
>
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130930/21bb20bc/attachment.html>
More information about the x265-devel
mailing list