[x265] [PATCH] frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled
Steve Borho
steve at borho.org
Mon Sep 8 13:37:16 CEST 2014
On 09/08, Deepthi Nandakumar wrote:
> On Sat, Sep 6, 2014 at 10:08 PM, Steve Borho <steve at borho.org> wrote:
>
> > # HG changeset patch
> > # User Steve Borho <steve at borho.org>
> > # Date 1409932577 -7200
> > # Fri Sep 05 17:56:17 2014 +0200
> > # Node ID 07d69bce1760a28be1b1ee1821dfeb3335602422
> > # Parent 795878af39730deb24e2ee0e585c625084bb031b
> > frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled
> >
> > This is a performance optimization, it allows the encoder to generate the
> > final
> > bitstream of each CTU as it is compressed and cache hot.
> >
> > When SAO is enabled, SAO analysis must be performed and coded at the start
> > of
> > the CTU but SAO analysis currently requires surrounding CTUs to be encoded
> > making the second pass unavoidable.
> >
> > diff -r 795878af3973 -r 07d69bce1760 source/encoder/frameencoder.cpp
> > --- a/source/encoder/frameencoder.cpp Fri Sep 05 16:03:44 2014 +0200
> > +++ b/source/encoder/frameencoder.cpp Fri Sep 05 17:56:17 2014 +0200
> > @@ -192,16 +192,6 @@
> > }
> > }
> >
> > - uint32_t numSubstreams = m_param->bEnableWavefront ?
> > m_frame->getPicSym()->getFrameHeightInCU() : 1;
> > - if (!m_outStreams)
> > - {
> > - m_outStreams = new Bitstream[numSubstreams];
> > - m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
> > - }
> > - else
> > - for (uint32_t i = 0; i < numSubstreams; i++)
> > - m_outStreams[i].resetBits();
> > -
> > /* Get the QP for this frame from rate control. This call may block
> > until
> > * frames ahead of it in encode order have called rateControlEnd() */
> > int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce,
> > m_top);
> > @@ -214,6 +204,24 @@
> >
> > m_frameFilter.start(m_frame, m_initSliceContext, qp);
> >
> > + // reset entropy coders
> > + m_entropyCoder.load(m_initSliceContext);
> > + for (int i = 0; i < m_numRows; i++)
> > + m_rows[i].init(m_initSliceContext);
> > +
> > + uint32_t numSubstreams = m_param->bEnableWavefront ?
> > m_frame->getPicSym()->getFrameHeightInCU() : 1;
> > + if (!m_outStreams)
> > + {
> > + m_outStreams = new Bitstream[numSubstreams];
> > + m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
> > + if (!m_param->bEnableSAO)
> > + for (uint32_t i = 0; i < numSubstreams; i++)
> > +
> > m_rows[i].rdEntropyCoders[0][CI_CURR_BEST].setBitstream(&m_outStreams[i]);
> > + }
> > + else
> > + for (uint32_t i = 0; i < numSubstreams; i++)
> > + m_outStreams[i].resetBits();
> > +
> > if (m_frame->m_lowres.bKeyframe)
> > {
> > if (m_param->bEmitHRDSEI)
> > @@ -328,7 +336,7 @@
> > m_entropyCoder.setBitstream(&m_bs);
> > m_entropyCoder.codeSliceHeader(slice);
> >
> > - // re-encode each row of CUs for the final time (TODO: get rid of
> > this second pass)
> > + // finish encode of each CTU row
> > encodeSlice();
> >
> > // serialize each row, record final lengths in slice header
> > @@ -409,8 +417,40 @@
> > const uint32_t widthInLCUs =
> > m_frame->getPicSym()->getFrameWidthInCU();
> > const uint32_t lastCUAddr = (slice->m_endCUAddr +
> > m_frame->getNumPartInCU() - 1) / m_frame->getNumPartInCU();
> > const int numSubstreams = m_param->bEnableWavefront ?
> > m_frame->getPicSym()->getFrameHeightInCU() : 1;
> > +
> > + if (!m_param->bEnableSAO)
> > + {
> > + /* terminate each row and collect stats */
> > + for (uint32_t cuAddr = 0; cuAddr < lastCUAddr; cuAddr++)
> > + {
> > + uint32_t col = cuAddr % widthInLCUs;
> > +
> > + if (m_param->bEnableWavefront && col == widthInLCUs - 1)
> > + {
> > + uint32_t lin = cuAddr / widthInLCUs;
> > + uint32_t subStrm = lin % numSubstreams;
> > +
> > m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1);
> > +
> > m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish();
> > + m_outStreams[subStrm].writeByteAlignment();
> > + }
> > +
> > + // Collect Frame Stats for 2 pass
> > + TComDataCU* cu = m_frame->getCU(cuAddr);
> > + m_frameStats.mvBits += cu->m_mvBits;
> > + m_frameStats.coeffBits += cu->m_coeffBits;
> > + m_frameStats.miscBits += cu->m_totalBits - (cu->m_mvBits +
> > cu->m_coeffBits);
> > + }
> > + if (!m_param->bEnableWavefront)
> > + {
> > +
> > m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1);
> > + m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish();
> > + m_outStreams[0].writeByteAlignment();
> > + }
> > +
> > + return;
> > + }
> > +
> > SAOParam *saoParam = slice->m_pic->getPicSym()->m_saoParam;
> > -
> > for (uint32_t cuAddr = 0; cuAddr < lastCUAddr; cuAddr++)
> > {
> > uint32_t col = cuAddr % widthInLCUs;
> > @@ -487,11 +527,6 @@
> > PPAScopeEvent(FrameEncoder_compressRows);
> > Slice* slice = m_frame->m_picSym->m_slice;
> >
> > - // reset entropy coders
> > - m_entropyCoder.load(m_initSliceContext);
> > - for (int i = 0; i < m_numRows; i++)
> > - m_rows[i].init(m_initSliceContext);
> > -
> > m_bAllRowsStop = false;
> > m_vbvResetTriggerRow = -1;
> >
> > @@ -672,15 +707,17 @@
> > }
> >
> > if (m_param->bEnableWavefront && col == 0 && row > 0)
> > + {
> > // Load SBAC coder context from previous row.
> > +
> > curRow.rdEntropyCoders[0][CI_CURR_BEST].copyState(m_initSliceContext);
> >
> > curRow.rdEntropyCoders[0][CI_CURR_BEST].loadContexts(m_rows[row -
> > 1].bufferEntropyCoder);
> >
>
> It's the same thing in encodeSlice as well, but why are we copying State
> from m_initSliceContext, and context from the saved previous row Coder?
> Shouldnt both state and context be copied from the previous row coder?
I don't know the exact reasons for it, but with WPP only the contexts
are copied from the row above. The rest of the state is initialized
uniformly for each row.
--
Steve Borho
More information about the x265-devel
mailing list