[x265] [PATCH] frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled

Steve Borho steve at borho.org
Fri Sep 5 17:57:32 CEST 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1409932577 -7200
#      Fri Sep 05 17:56:17 2014 +0200
# Node ID 07d69bce1760a28be1b1ee1821dfeb3335602422
# Parent  795878af39730deb24e2ee0e585c625084bb031b
frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled

This is a performance optimization, it allows the encoder to generate the final
bitstream of each CTU as it is compressed and cache hot.

When SAO is enabled, SAO analysis must be performed and coded at the start of
the CTU but SAO analysis currently requires surrounding CTUs to be encoded
making the second pass unavoidable.

diff -r 795878af3973 -r 07d69bce1760 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Fri Sep 05 16:03:44 2014 +0200
+++ b/source/encoder/frameencoder.cpp	Fri Sep 05 17:56:17 2014 +0200
@@ -192,16 +192,6 @@
         }
     }
 
-    uint32_t numSubstreams = m_param->bEnableWavefront ? m_frame->getPicSym()->getFrameHeightInCU() : 1;
-    if (!m_outStreams)
-    {
-        m_outStreams = new Bitstream[numSubstreams];
-        m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
-    }
-    else
-        for (uint32_t i = 0; i < numSubstreams; i++)
-            m_outStreams[i].resetBits();
-
     /* Get the QP for this frame from rate control. This call may block until
      * frames ahead of it in encode order have called rateControlEnd() */
     int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, m_top);
@@ -214,6 +204,24 @@
 
     m_frameFilter.start(m_frame, m_initSliceContext, qp);
 
+    // reset entropy coders
+    m_entropyCoder.load(m_initSliceContext);
+    for (int i = 0; i < m_numRows; i++)
+        m_rows[i].init(m_initSliceContext);
+
+    uint32_t numSubstreams = m_param->bEnableWavefront ? m_frame->getPicSym()->getFrameHeightInCU() : 1;
+    if (!m_outStreams)
+    {
+        m_outStreams = new Bitstream[numSubstreams];
+        m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
+        if (!m_param->bEnableSAO)
+            for (uint32_t i = 0; i < numSubstreams; i++)
+                m_rows[i].rdEntropyCoders[0][CI_CURR_BEST].setBitstream(&m_outStreams[i]);
+    }
+    else
+        for (uint32_t i = 0; i < numSubstreams; i++)
+            m_outStreams[i].resetBits();
+
     if (m_frame->m_lowres.bKeyframe)
     {
         if (m_param->bEmitHRDSEI)
@@ -328,7 +336,7 @@
     m_entropyCoder.setBitstream(&m_bs);
     m_entropyCoder.codeSliceHeader(slice);
 
-    // re-encode each row of CUs for the final time (TODO: get rid of this second pass)
+    // finish encode of each CTU row
     encodeSlice();
 
     // serialize each row, record final lengths in slice header
@@ -409,8 +417,40 @@
     const uint32_t widthInLCUs = m_frame->getPicSym()->getFrameWidthInCU();
     const uint32_t lastCUAddr = (slice->m_endCUAddr + m_frame->getNumPartInCU() - 1) / m_frame->getNumPartInCU();
     const int numSubstreams = m_param->bEnableWavefront ? m_frame->getPicSym()->getFrameHeightInCU() : 1;
+
+    if (!m_param->bEnableSAO)
+    {
+        /* terminate each row and collect stats */
+        for (uint32_t cuAddr = 0; cuAddr < lastCUAddr; cuAddr++)
+        {
+            uint32_t col = cuAddr % widthInLCUs;
+
+            if (m_param->bEnableWavefront && col == widthInLCUs - 1)
+            {
+                uint32_t lin = cuAddr / widthInLCUs;
+                uint32_t subStrm = lin % numSubstreams;
+                m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1);
+                m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish();
+                m_outStreams[subStrm].writeByteAlignment();
+            }
+
+            // Collect Frame Stats for 2 pass
+            TComDataCU* cu = m_frame->getCU(cuAddr);
+            m_frameStats.mvBits += cu->m_mvBits;
+            m_frameStats.coeffBits += cu->m_coeffBits;
+            m_frameStats.miscBits += cu->m_totalBits - (cu->m_mvBits + cu->m_coeffBits);
+        }
+        if (!m_param->bEnableWavefront)
+        {
+            m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1);
+            m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish();
+            m_outStreams[0].writeByteAlignment();
+        }
+
+        return;
+    }
+
     SAOParam *saoParam = slice->m_pic->getPicSym()->m_saoParam;
-
     for (uint32_t cuAddr = 0; cuAddr < lastCUAddr; cuAddr++)
     {
         uint32_t col = cuAddr % widthInLCUs;
@@ -487,11 +527,6 @@
     PPAScopeEvent(FrameEncoder_compressRows);
     Slice* slice = m_frame->m_picSym->m_slice;
 
-    // reset entropy coders
-    m_entropyCoder.load(m_initSliceContext);
-    for (int i = 0; i < m_numRows; i++)
-        m_rows[i].init(m_initSliceContext);
-
     m_bAllRowsStop = false;
     m_vbvResetTriggerRow = -1;
 
@@ -672,15 +707,17 @@
         }
 
         if (m_param->bEnableWavefront && col == 0 && row > 0)
+        {
             // Load SBAC coder context from previous row.
+            curRow.rdEntropyCoders[0][CI_CURR_BEST].copyState(m_initSliceContext);
             curRow.rdEntropyCoders[0][CI_CURR_BEST].loadContexts(m_rows[row - 1].bufferEntropyCoder);
+        }
 
         tld.cuCoder.m_quant.setQPforQuant(cu);
         tld.cuCoder.compressCU(cu); // Does all the CU analysis
 
         /* advance top-level CI_CURR_BEST to include the context of this CTU.
-         * Note that if SAO was disabled this could directly write to a
-         * bitstream object and we could skip most of encodeSlice() */
+         * if SAO is disabled, this writes final CTU bitstream */
         curRow.rdEntropyCoders[0][CI_CURR_BEST].encodeCU(cu);
 
         if (m_param->bEnableWavefront && col == 1)


More information about the x265-devel mailing list