[x265] [PATCH] nal: refactor nal code, marshall packets directly into output buffer

Steve Borho steve at borho.org
Thu Jun 19 23:37:39 CEST 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1403212948 18000
#      Thu Jun 19 16:22:28 2014 -0500
# Node ID ba9c58a4bee005f82688ca9907b87916a7cf0e9d
# Parent  ecccd5401d27b3aa5f2333295c933518653d73ef
nal: refactor nal code, marshall packets directly into output buffer

This removes two malloc/free for every NAL unit and removes yet another set of
memcopies at the end of the each frame encode. We're now writing the escaped
NAL packets directly into the buffer handed back to the user.

We preserve the max size of this output buffer to prevent having to do any
reallocations once the encoder is running.

diff -r ecccd5401d27 -r ba9c58a4bee0 source/common/common.h
--- a/source/common/common.h	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/common/common.h	Thu Jun 19 16:22:28 2014 -0500
@@ -156,8 +156,6 @@
 #define X265_LOWRES_CU_SIZE   8
 #define X265_LOWRES_CU_BITS   3
 
-#define MAX_NAL_UNITS 12
-
 #define X265_MALLOC(type, count)    (type*)x265_malloc(sizeof(type) * (count))
 #define X265_FREE(ptr)              x265_free(ptr)
 #define CHECKED_MALLOC(var, type, count) \
diff -r ecccd5401d27 -r ba9c58a4bee0 source/encoder/api.cpp
--- a/source/encoder/api.cpp	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/encoder/api.cpp	Thu Jun 19 16:22:28 2014 -0500
@@ -71,30 +71,16 @@
 extern "C"
 int x265_encoder_headers(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal)
 {
-    if (!pp_nal || !enc)
-        return -1;
-
-    Encoder *encoder = static_cast<Encoder*>(enc);
-
-    int ret = 0;
-    NALUnit *nalunits[MAX_NAL_UNITS];
-    memset(nalunits, 0, sizeof(nalunits));
-    if (encoder->getStreamHeaders(nalunits) > 0)
+    if (pp_nal && enc)
     {
-        int nalcount = encoder->extractNalData(nalunits, ret);
-        *pp_nal = &encoder->m_nals[0];
-        if (pi_nal) *pi_nal = nalcount;
-    }
-    else if (pi_nal)
-    {
-        *pi_nal = 0;
-        ret = -1;
+        Encoder *encoder = static_cast<Encoder*>(enc);
+        encoder->getStreamHeaders();
+        *pp_nal = &encoder->m_nalList.m_nal[0];
+        if (pi_nal) *pi_nal = encoder->m_nalList.m_numNal;
+        return encoder->m_nalList.m_occupancy;
     }
 
-    for (int i = 0; i < MAX_NAL_UNITS; i++)
-        delete nalunits[i];
-
-    return ret;
+    return -1;
 }
 
 extern "C"
@@ -114,23 +100,16 @@
         return -1;
 
     Encoder *encoder = static_cast<Encoder*>(enc);
-    NALUnit *nalunits[MAX_NAL_UNITS];
-    memset(nalunits, 0, sizeof(nalunits));
-    int numEncoded = encoder->encode(!pic_in, pic_in, pic_out, nalunits);
+    int numEncoded = encoder->encode(!pic_in, pic_in, pic_out);
 
     if (pp_nal && numEncoded > 0)
     {
-        int memsize;
-        int nalcount = encoder->extractNalData(nalunits, memsize);
-        *pp_nal = &encoder->m_nals[0];
-        if (pi_nal) *pi_nal = nalcount;
+        *pp_nal = &encoder->m_nalList.m_nal[0];
+        if (pi_nal) *pi_nal = encoder->m_nalList.m_numNal;
     }
     else if (pi_nal)
         *pi_nal = 0;
 
-    for (int i = 0; i < MAX_NAL_UNITS; i++)
-        delete nalunits[i];
-
     return numEncoded;
 }
 
diff -r ecccd5401d27 -r ba9c58a4bee0 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/encoder/encoder.cpp	Thu Jun 19 16:22:28 2014 -0500
@@ -144,7 +144,7 @@
         for (int i = 0; i < m_totalFrameThreads; i++)
         {
             // Ensure frame encoder is idle before destroying it
-            m_frameEncoder[i].getEncodedPicture(NULL);
+            m_frameEncoder[i].getEncodedPicture(m_nalList);
             m_frameEncoder[i].destroy();
         }
 
@@ -190,9 +190,10 @@
     m_encodeStartTime = x265_mdate();
 }
 
-int Encoder::getStreamHeaders(NALUnit **nalunits)
+void Encoder::getStreamHeaders()
 {
-    return m_frameEncoder->getStreamHeaders(nalunits);
+    TComOutputBitstream bs;
+    m_frameEncoder->getStreamHeaders(m_nalList, bs);
 }
 
 void Encoder::updateVbvPlan(RateControl* rc)
@@ -215,49 +216,13 @@
     }
 }
 
-#define VERBOSE_RATE 0
-#if VERBOSE_RATE
-static const char* nalUnitTypeToString(NalUnitType type)
-{
-    switch (type)
-    {
-    case NAL_UNIT_CODED_SLICE_TRAIL_R:    return "TRAIL_R";
-    case NAL_UNIT_CODED_SLICE_TRAIL_N:    return "TRAIL_N";
-    case NAL_UNIT_CODED_SLICE_TLA_R:      return "TLA_R";
-    case NAL_UNIT_CODED_SLICE_TSA_N:      return "TSA_N";
-    case NAL_UNIT_CODED_SLICE_STSA_R:     return "STSA_R";
-    case NAL_UNIT_CODED_SLICE_STSA_N:     return "STSA_N";
-    case NAL_UNIT_CODED_SLICE_BLA_W_LP:   return "BLA_W_LP";
-    case NAL_UNIT_CODED_SLICE_BLA_W_RADL: return "BLA_W_RADL";
-    case NAL_UNIT_CODED_SLICE_BLA_N_LP:   return "BLA_N_LP";
-    case NAL_UNIT_CODED_SLICE_IDR_W_RADL: return "IDR_W_RADL";
-    case NAL_UNIT_CODED_SLICE_IDR_N_LP:   return "IDR_N_LP";
-    case NAL_UNIT_CODED_SLICE_CRA:        return "CRA";
-    case NAL_UNIT_CODED_SLICE_RADL_R:     return "RADL_R";
-    case NAL_UNIT_CODED_SLICE_RASL_R:     return "RASL_R";
-    case NAL_UNIT_VPS:                    return "VPS";
-    case NAL_UNIT_SPS:                    return "SPS";
-    case NAL_UNIT_PPS:                    return "PPS";
-    case NAL_UNIT_ACCESS_UNIT_DELIMITER:  return "AUD";
-    case NAL_UNIT_EOS:                    return "EOS";
-    case NAL_UNIT_EOB:                    return "EOB";
-    case NAL_UNIT_FILLER_DATA:            return "FILLER";
-    case NAL_UNIT_PREFIX_SEI:             return "SEI";
-    case NAL_UNIT_SUFFIX_SEI:             return "SEI";
-    default:                              return "UNK";
-    }
-}
-
-#endif // if VERBOSE_RATE
-
 /**
  \param   flush               force encoder to encode a frame
  \param   pic_in              input original YUV picture or NULL
  \param   pic_out             pointer to reconstructed picture struct
- \param   nalunits            output NAL packets
- \retval                      number of encoded pictures
+ \retval                      number of encoded pictures, m_nalList contains access unit
  */
-int Encoder::encode(bool flush, const x265_picture* pic_in, x265_picture *pic_out, NALUnit **nalunits)
+int Encoder::encode(bool flush, const x265_picture* pic_in, x265_picture *pic_out)
 {
     if (m_aborted)
         return -1;
@@ -348,7 +313,7 @@
     // getEncodedPicture() should block until the FrameEncoder has completed
     // encoding the frame.  This is how back-pressure through the API is
     // accomplished when the encoder is full.
-    TComPic *out = curEncoder->getEncodedPicture(nalunits);
+    TComPic *out = curEncoder->getEncodedPicture(m_nalList);
 
     if (!out && flush)
     {
@@ -362,7 +327,7 @@
         {
             curEncoder = &m_frameEncoder[m_curEncoder];
             m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
-            out = curEncoder->getEncodedPicture(nalunits);
+            out = curEncoder->getEncodedPicture(m_nalList);
         }
         while (!out && flushed != m_curEncoder);
     }
@@ -426,26 +391,21 @@
                 m_numChromaWPBiFrames++;
         }
 
-        /* calculate the size of the access unit, excluding:
-         *  - any AnnexB contributions (start_code_prefix, zero_byte, etc.,)
-         *  - SEI NAL units
-         */
-        uint32_t numRBSPBytes = 0;
-        for (int count = 0; nalunits[count] != NULL; count++)
+        uint64_t bytes = 0;
+        for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
         {
-            uint32_t numRBSPBytes_nal = nalunits[count]->m_packetSize;
-#if VERBOSE_RATE
-            printf("*** %6s numBytesInNALunit: %u\n", nalUnitTypeToString(nalunits[count]->m_nalUnitType), numRBSPBytes_nal);
-#endif
-            if (nalunits[count]->m_nalUnitType != NAL_UNIT_PREFIX_SEI && nalunits[count]->m_nalUnitType != NAL_UNIT_SUFFIX_SEI)
+            int type = m_nalList.m_nal[i].type;
+
+            // exclude SEI
+            if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
             {
-                numRBSPBytes += numRBSPBytes_nal;
+                bytes += m_nalList.m_nal[i].sizeBytes;
+                // and exclude start code prefix
+                bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
             }
         }
-
-        uint64_t bits = numRBSPBytes * 8;
-        m_rateControl->rateControlEnd(out, bits, &curEncoder->m_rce);
-        finishFrameStats(out, curEncoder, bits);
+        m_rateControl->rateControlEnd(out, bytes << 3, &curEncoder->m_rce);
+        finishFrameStats(out, curEncoder, bytes << 3);
 
         // Allow this frame to be recycled if no frame encoders are using it for reference
         if (!pic_out)
@@ -1456,67 +1416,3 @@
     m_pcmLog2MaxSize = 5;
     m_bPCMFilterDisableFlag = false;
 }
-
-int Encoder::extractNalData(NALUnit **nalunits, int& memsize)
-{
-    int offset = 0;
-    int nalcount = 0;
-    int num = 0;
-
-    memsize = 0;
-    for (; num < MAX_NAL_UNITS && nalunits[num] != NULL; num++)
-    {
-        const NALUnit& temp = *nalunits[num];
-        memsize += temp.m_packetSize + 4;
-    }
-
-    X265_FREE(m_packetData);
-    X265_FREE(m_nals);
-    CHECKED_MALLOC(m_packetData, char, memsize);
-    CHECKED_MALLOC(m_nals, x265_nal, num);
-
-    memsize = 0;
-
-    /* Copy NAL output packets into x265_nal_t structures */
-    for (; nalcount < num; nalcount++)
-    {
-        const NALUnit& nalu = *nalunits[nalcount];
-        int size; /* size of annexB unit in bytes */
-
-        static const char start_code_prefix[] = { 0, 0, 0, 1 };
-        if (nalcount == 0 || nalu.m_nalUnitType == NAL_UNIT_SPS || nalu.m_nalUnitType == NAL_UNIT_PPS)
-        {
-            /* From AVC, When any of the following conditions are fulfilled, the
-             * zero_byte syntax element shall be present:
-             *  - the nal_unit_type within the nal_unit() is equal to 7 (sequence
-             *    parameter set) or 8 (picture parameter set),
-             *  - the byte stream NAL unit syntax structure contains the first NAL
-             *    unit of an access unit in decoding order, as specified by subclause
-             *    7.4.1.2.3.
-             */
-            ::memcpy(m_packetData + memsize, start_code_prefix, 4);
-            size = 4;
-        }
-        else
-        {
-            ::memcpy(m_packetData + memsize, start_code_prefix + 1, 3);
-            size = 3;
-        }
-        memsize += size;
-        ::memcpy(m_packetData + memsize, nalu.m_nalUnitData, nalu.m_packetSize);
-        memsize += nalu.m_packetSize;
-
-        m_nals[nalcount].type = nalu.m_nalUnitType;
-        m_nals[nalcount].sizeBytes = size + nalu.m_packetSize;
-    }
-
-    /* Setup payload pointers, now that we're done adding content to m_packetData */
-    for (int i = 0; i < nalcount; i++)
-    {
-        m_nals[i].payload = (uint8_t*)m_packetData + offset;
-        offset += m_nals[i].sizeBytes;
-    }
-
-fail:
-    return nalcount;
-}
diff -r ecccd5401d27 -r ba9c58a4bee0 source/encoder/encoder.h
--- a/source/encoder/encoder.h	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/encoder/encoder.h	Thu Jun 19 16:22:28 2014 -0500
@@ -25,10 +25,8 @@
 #define X265_ENCODER_H
 
 #include "x265.h"
-
 #include "TLibCommon/TComSlice.h"
-
-#include "piclist.h"
+#include "nal.h"
 
 struct x265_encoder {};
 
@@ -67,7 +65,6 @@
 class Lookahead;
 class RateControl;
 class ThreadPool;
-struct NALUnit;
 
 class Encoder : public x265_encoder
 {
@@ -114,6 +111,7 @@
 
     int                m_conformanceMode;
     TComVPS            m_vps;
+    NALList            m_nalList;
 
     /* profile & level */
     Profile::Name      m_profile;
@@ -200,9 +198,9 @@
     void initSPS(TComSPS *sps);
     void initPPS(TComPPS *pps);
 
-    int encode(bool bEos, const x265_picture* pic, x265_picture *pic_out, NALUnit **nalunits);
+    int encode(bool bEos, const x265_picture* pic, x265_picture *pic_out);
 
-    int getStreamHeaders(NALUnit **nalunits);
+    void getStreamHeaders();
 
     void fetchStats(x265_stats* stats, size_t statsSizeBytes);
 
@@ -218,8 +216,6 @@
 
     void configure(x265_param *param);
 
-    int  extractNalData(NALUnit **nalunits, int& memsize);
-
     void updateVbvPlan(RateControl* rc);
 
 protected:
diff -r ecccd5401d27 -r ba9c58a4bee0 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/encoder/frameencoder.cpp	Thu Jun 19 16:22:28 2014 -0500
@@ -50,10 +50,6 @@
     , m_param(NULL)
     , m_pic(NULL)
 {
-    for (int i = 0; i < MAX_NAL_UNITS; i++)
-        m_nalList[i] = NULL;
-
-    m_nalCount = 0;
     m_totalTime = 0;
     m_bAllRowsStop = false;
     m_vbvResetTriggerRow = -1;
@@ -73,10 +69,6 @@
     m_threadActive = false;
     m_enable.trigger();
 
-    // flush condition, release queued NALs
-    for (int i = 0; i < m_nalCount; i++)
-        delete m_nalList[i];
-
     if (m_rows)
     {
         for (int i = 0; i < m_numRows; ++i)
@@ -229,42 +221,26 @@
     }
 }
 
-int FrameEncoder::getStreamHeaders(NALUnit **nalunits)
+void FrameEncoder::getStreamHeaders(NALList& list, TComOutputBitstream& bs)
 {
-    TComOutputBitstream bs;
     TEncEntropy* entropyCoder = getEntropyCoder(0);
 
+    /* headers for start of bitstream */
     entropyCoder->setEntropyCoder(&m_sbacCoder, NULL);
     entropyCoder->setBitstream(&bs);
+    entropyCoder->encodeVPS(&m_top->m_vps);
+    bs.writeByteAlignment();
+    list.serialize(NAL_UNIT_VPS, bs);
 
-    int count = 0;
+    bs.clear();
+    entropyCoder->encodeSPS(&m_sps);
+    bs.writeByteAlignment();
+    list.serialize(NAL_UNIT_SPS, bs);
 
-    /* headers for start of bitstream */
-    nalunits[count] = new NALUnit;
-    if (nalunits[count])
-    {
-        entropyCoder->encodeVPS(&m_top->m_vps);
-        bs.writeByteAlignment();
-        nalunits[count++]->serialize(NAL_UNIT_VPS, bs);
-    }
-
-    nalunits[count] = new NALUnit;
-    if (nalunits[count])
-    {
-        bs.clear();
-        entropyCoder->encodeSPS(&m_sps);
-        bs.writeByteAlignment();
-        nalunits[count++]->serialize(NAL_UNIT_SPS, bs);
-    }
-
-    nalunits[count] = new NALUnit;
-    if (nalunits[count])
-    {
-        bs.clear();
-        entropyCoder->encodePPS(&m_pps);
-        bs.writeByteAlignment();
-        nalunits[count++]->serialize(NAL_UNIT_PPS, bs);
-    }
+    bs.clear();
+    entropyCoder->encodePPS(&m_pps);
+    bs.writeByteAlignment();
+    list.serialize(NAL_UNIT_PPS, bs);
 
     if (m_param->bEmitHRDSEI)
     {
@@ -275,16 +251,10 @@
         sei.m_numSpsIdsMinus1 = 0;
         sei.m_activeSeqParamSetId = m_sps.getSPSId();
 
-        nalunits[count] = new NALUnit;
-        if (nalunits[count])
-        {
-            bs.clear();
-            sei.write(bs, m_sps);
-            nalunits[count++]->serialize(NAL_UNIT_PREFIX_SEI, bs);
-        }
+        bs.clear();
+        sei.write(bs, m_sps);
+        list.serialize(NAL_UNIT_PREFIX_SEI, bs);
     }
-
-    return count;
 }
 
 void FrameEncoder::initSlice(TComPic* pic)
@@ -389,7 +359,6 @@
     int          chFmt             = slice->getSPS()->getChromaFormatIdc();
     int          totalCoded        = (int)m_top->m_encodedFrameNum - 1;
 
-    m_nalCount = 0;
     entropyCoder->setEntropyCoder(&m_sbacCoder, NULL);
 
     /* Emit access unit delimiter unless this is the first frame and the user is
@@ -397,46 +366,38 @@
      * unit) */
     if (m_param->bEnableAccessUnitDelimiters && (m_pic->getPOC() || m_param->bRepeatHeaders))
     {
-        m_nalList[m_nalCount] = new NALUnit;
-        if (m_nalList[m_nalCount])
-        {
-            entropyCoder->setBitstream(&m_bs);
-            m_bs.clear();
-            entropyCoder->encodeAUD(slice);
-            m_bs.writeByteAlignment();
-            m_nalList[m_nalCount++]->serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);
-        }
+        m_bs.clear();
+        entropyCoder->setBitstream(&m_bs);
+        entropyCoder->encodeAUD(slice);
+        m_bs.writeByteAlignment();
+        m_nalList.serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);
     }
     if (m_pic->m_lowres.bKeyframe)
     {
         if (m_param->bRepeatHeaders)
-            m_nalCount += getStreamHeaders(m_nalList + m_nalCount);
+            getStreamHeaders(m_nalList, m_bs);
 
         if (m_param->bEmitHRDSEI)
         {
-            m_nalList[m_nalCount] = new NALUnit;
-            if (m_nalList[m_nalCount])
-            {
-                SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;
-                bpSei->m_bpSeqParameterSetId = m_sps.getSPSId();
-                bpSei->m_rapCpbParamsPresentFlag = 0;
+            SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;
+            bpSei->m_bpSeqParameterSetId = m_sps.getSPSId();
+            bpSei->m_rapCpbParamsPresentFlag = 0;
 
-                // for the concatenation, it can be set to one during splicing.
-                bpSei->m_concatenationFlag = 0;
+            // for the concatenation, it can be set to one during splicing.
+            bpSei->m_concatenationFlag = 0;
 
-                // since the temporal layer HRD is not ready, we assumed it is fixed
-                bpSei->m_auCpbRemovalDelayDelta = 1;
-                bpSei->m_cpbDelayOffset = 0;
-                bpSei->m_dpbDelayOffset = 0;
+            // since the temporal layer HRD is not ready, we assumed it is fixed
+            bpSei->m_auCpbRemovalDelayDelta = 1;
+            bpSei->m_cpbDelayOffset = 0;
+            bpSei->m_dpbDelayOffset = 0;
 
-                // hrdFullness() calculates the initial CPB removal delay and offset
-                m_top->m_rateControl->hrdFullness(bpSei);
+            // hrdFullness() calculates the initial CPB removal delay and offset
+            m_top->m_rateControl->hrdFullness(bpSei);
 
-                m_bs.clear();
-                bpSei->write(m_bs, m_sps);
+            m_bs.clear();
+            bpSei->write(m_bs, m_sps);
 
-                m_nalList[m_nalCount++]->serialize(NAL_UNIT_PREFIX_SEI, m_bs);
-            }
+            m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
 
             m_top->m_lastBPSEI = totalCoded;
         }
@@ -449,20 +410,16 @@
         // m_recoveryPocCnt. Our encoder does not use references prior to the most recent CRA,
         // so all pictures following the CRA in POC order are guaranteed to be displayable,
         // so m_recoveryPocCnt is always 0.
-        m_nalList[m_nalCount] = new NALUnit;
-        if (m_nalList[m_nalCount])
-        {
-            SEIRecoveryPoint sei_recovery_point;
-            sei_recovery_point.m_recoveryPocCnt = 0;
-            sei_recovery_point.m_exactMatchingFlag = true;
-            sei_recovery_point.m_brokenLinkFlag = false;
+        SEIRecoveryPoint sei_recovery_point;
+        sei_recovery_point.m_recoveryPocCnt = 0;
+        sei_recovery_point.m_exactMatchingFlag = true;
+        sei_recovery_point.m_brokenLinkFlag = false;
 
-            m_bs.clear();
-            sei_recovery_point.write(m_bs, *slice->getSPS());
-            m_bs.writeByteAlignment();
+        m_bs.clear();
+        sei_recovery_point.write(m_bs, *slice->getSPS());
+        m_bs.writeByteAlignment();
 
-            m_nalList[m_nalCount++]->serialize(NAL_UNIT_PREFIX_SEI, m_bs);
-        }
+        m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
     }
 
     if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
@@ -501,13 +458,9 @@
             sei->m_picDpbOutputDelay = slice->getSPS()->getNumReorderPics(0) + poc - totalCoded;
         }
 
-        m_nalList[m_nalCount] = new NALUnit;
-        if (m_nalList[m_nalCount])
-        {
-            m_bs.clear();
-            sei->write(m_bs, m_sps);
-            m_nalList[m_nalCount++]->serialize(NAL_UNIT_PREFIX_SEI, m_bs);
-        }
+        m_bs.clear();
+        sei->write(m_bs, m_sps);
+        m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
     }
 
     int qp = slice->getSliceQp();
@@ -621,51 +574,47 @@
             m_outStreams[i].clear();
     slice->allocSubstreamSizes(numSubstreams);
 
-    m_nalList[m_nalCount] = new NALUnit;
-    if (m_nalList[m_nalCount])
+    m_bs.clear();
+    m_sbacCoder.init(&m_binCoderCABAC);
+    entropyCoder->setEntropyCoder(&m_sbacCoder, slice);
+    entropyCoder->resetEntropy();
+    entropyCoder->setBitstream(&m_bs);
+    entropyCoder->encodeSliceHeader(slice);
+
+    // re-encode each row of CUs for the final time (TODO: get rid of this second pass)
+    for (int i = 0; i < m_numRows; i++)
     {
-        m_bs.clear();
-        m_sbacCoder.init(&m_binCoderCABAC);
-        entropyCoder->setEntropyCoder(&m_sbacCoder, slice);
-        entropyCoder->resetEntropy();
-        entropyCoder->setBitstream(&m_bs);
-        entropyCoder->encodeSliceHeader(slice);
+        m_rows[i].m_entropyCoder.setEntropyCoder(&m_rows[i].m_sbacCoder, slice);
+        m_rows[i].m_entropyCoder.resetEntropy();
+    }
+    getSbacCoder(0)->load(&m_sbacCoder);
+    entropyCoder->setEntropyCoder(getSbacCoder(0), slice);
+    entropyCoder->resetEntropy();
+    entropyCoder->setBitstream(&m_outStreams[0]);
+    m_sbacCoder.load(getSbacCoder(0));
+    encodeSlice(m_outStreams);
 
-        // re-encode each row of CUs for the final time (TODO: get rid of this second pass)
-        for (int i = 0; i < m_numRows; i++)
-        {
-            m_rows[i].m_entropyCoder.setEntropyCoder(&m_rows[i].m_sbacCoder, slice);
-            m_rows[i].m_entropyCoder.resetEntropy();
-        }
-        getSbacCoder(0)->load(&m_sbacCoder);
-        entropyCoder->setEntropyCoder(getSbacCoder(0), slice);
-        entropyCoder->resetEntropy();
-        entropyCoder->setBitstream(&m_outStreams[0]);
-        m_sbacCoder.load(getSbacCoder(0));
-        encodeSlice(m_outStreams);
+    // flush per-row streams
+    for (uint32_t i = 0; i < numSubstreams; i++)
+    {
+        entropyCoder->setEntropyCoder(getSbacCoder(i), slice);
+        entropyCoder->setBitstream(&m_outStreams[i]);
+        entropyCoder->encodeTerminatingBit(1);
+        entropyCoder->encodeSliceFinish();
+        m_outStreams[i].writeByteAlignment();
+    }
 
-        // flush per-row streams
-        for (uint32_t i = 0; i < numSubstreams; i++)
-        {
-            entropyCoder->setEntropyCoder(getSbacCoder(i), slice);
-            entropyCoder->setBitstream(&m_outStreams[i]);
-            entropyCoder->encodeTerminatingBit(1);
-            entropyCoder->encodeSliceFinish();
-            m_outStreams[i].writeByteAlignment();
-        }
+    uint32_t totalBytes;
+    uint8_t *concatStreams = m_nalList.serializeMultiple(slice->getSubstreamSizes(), totalBytes, numSubstreams, m_outStreams);
 
-        uint32_t totalBytes;
-        uint8_t *concatStreams = m_nalList[m_nalCount]->serializeMultiple(slice->getSubstreamSizes(), totalBytes, numSubstreams, m_outStreams);
+    // complete the slice header by writing WPP row-starts
+    entropyCoder->setEntropyCoder(&m_sbacCoder, slice);
+    entropyCoder->setBitstream(&m_bs);
+    entropyCoder->encodeTilesWPPEntryPoint(slice);
+    m_bs.writeByteAlignment();
 
-        // complete the slice header by writing WPP row-starts
-        entropyCoder->setEntropyCoder(&m_sbacCoder, slice);
-        entropyCoder->setBitstream(&m_bs);
-        entropyCoder->encodeTilesWPPEntryPoint(slice);
-        m_bs.writeByteAlignment();
-
-        m_nalList[m_nalCount++]->serialize(slice->getNalUnitType(), m_bs, concatStreams, totalBytes);
-        X265_FREE(concatStreams);
-    }
+    m_nalList.serialize(slice->getNalUnitType(), m_bs, concatStreams, totalBytes);
+    X265_FREE(concatStreams);
 
     if (m_param->decodedPictureHashSEI)
     {
@@ -693,15 +642,12 @@
                 checksumFinish(m_checksum[i], m_seiReconPictureDigest.m_digest[i]);
             }
         }
-        m_nalList[m_nalCount] = new NALUnit;
-        if (m_nalList[m_nalCount])
-        {
-            m_bs.clear();
-            m_seiReconPictureDigest.write(m_bs, *slice->getSPS());
-            m_bs.writeByteAlignment();
 
-            m_nalList[m_nalCount++]->serialize(NAL_UNIT_SUFFIX_SEI, m_bs);
-        }
+        m_bs.clear();
+        m_seiReconPictureDigest.write(m_bs, *slice->getSPS());
+        m_bs.writeByteAlignment();
+
+        m_nalList.serialize(NAL_UNIT_SUFFIX_SEI, m_bs);
     }
 
     // Decrement referenced frame reference counts, allow them to be recycled
@@ -1250,7 +1196,7 @@
     return Clip3(MIN_QP, MAX_MAX_QP, (int)(qp + 0.5));
 }
 
-TComPic *FrameEncoder::getEncodedPicture(NALUnit **nalunits)
+TComPic *FrameEncoder::getEncodedPicture(NALList& output)
 {
     if (m_pic)
     {
@@ -1259,14 +1205,7 @@
 
         TComPic *ret = m_pic;
         m_pic = NULL;
-
-        if (nalunits)
-        {
-            // move NALs from member variable to user's container
-            X265_CHECK(m_nalCount <= MAX_NAL_UNITS, "NAL unit overflow\n");
-            ::memcpy(nalunits, m_nalList, sizeof(NALUnit*) * m_nalCount);
-            m_nalCount = 0;
-        }
+        output.takeContents(m_nalList);
         return ret;
     }
 
diff -r ecccd5401d27 -r ba9c58a4bee0 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/encoder/frameencoder.h	Thu Jun 19 16:22:28 2014 -0500
@@ -41,13 +41,13 @@
 #include "cturow.h"
 #include "ratecontrol.h"
 #include "reference.h"
+#include "nal.h"
 
 namespace x265 {
 // private x265 namespace
 
 class ThreadPool;
 class Encoder;
-struct NALUnit;
 
 // Manages the wave-front processing of a single encoding frame
 class FrameEncoder : public WaveFront, public Thread
@@ -126,7 +126,7 @@
     /* Frame singletons, last the life of the encoder */
     TEncSampleAdaptiveOffset* getSAO()         { return &m_frameFilter.m_sao; }
 
-    int getStreamHeaders(NALUnit **nalunits);
+    void getStreamHeaders(NALList& list, TComOutputBitstream& bs);
 
     void initSlice(TComPic* pic);
 
@@ -139,7 +139,7 @@
     void encodeSlice(TComOutputBitstream* substreams);
 
     /* blocks until worker thread is done, returns encoded picture and bitstream */
-    TComPic *getEncodedPicture(NALUnit **nalunits);
+    TComPic *getEncodedPicture(NALList& list);
 
     void setLambda(int qp, int row);
 
@@ -186,11 +186,9 @@
     TComOutputBitstream      m_bs;
     TComOutputBitstream*     m_outStreams;
     NoiseReduction           m_nr;
+    NALList                  m_nalList;
 
-    /* Picture being encoded, and its output NAL list */
     TComPic*                 m_pic;
-    NALUnit*                 m_nalList[MAX_NAL_UNITS];
-    int                      m_nalCount;
 
     int                      m_filterRowDelay;
     Event                    m_completionEvent;
diff -r ecccd5401d27 -r ba9c58a4bee0 source/encoder/nal.cpp
--- a/source/encoder/nal.cpp	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/encoder/nal.cpp	Thu Jun 19 16:22:28 2014 -0500
@@ -25,29 +25,79 @@
 #include "TLibCommon/TComBitStream.h"
 #include "nal.h"
 
-namespace x265 {
-// private namespace
+using namespace x265;
 
-void NALUnit::serialize(NalUnitType nalUnitType, const TComOutputBitstream& bs, uint8_t* extra, uint32_t extraBytes)
+void NALList::takeContents(NALList& other)
 {
-    uint32_t bitsSize = bs.getNumberOfWrittenBytes();
+    /* take other NAL buffer, discard our old one */
+    X265_FREE(m_buffer);
+    m_buffer = other.m_buffer;
+    m_allocSize = other.m_allocSize;
+    m_occupancy = other.m_occupancy;
+
+    /* copy packet data */
+    m_numNal = other.m_numNal;
+    memcpy(m_nal, other.m_nal, sizeof(x265_nal) * m_numNal);
+
+    /* reset other list, re-allocate their buffer with same size */
+    other.m_numNal = 0;
+    other.m_occupancy = 0;
+    other.m_buffer = X265_MALLOC(uint8_t, m_allocSize);
+}
+
+void NALList::serialize(NalUnitType nalUnitType, const TComOutputBitstream& bs, uint8_t* extra, uint32_t extraBytes)
+{
+    static const char startCodePrefix[] = { 0, 0, 0, 1 };
+
+    uint32_t payloadSize = bs.getNumberOfWrittenBytes();
     const uint8_t* bpayload = bs.getFIFO();
     if (!bpayload)
         return;
 
-    /* padded allocation for emulation prevention bytes */
-    uint8_t* out = m_nalUnitData = X265_MALLOC(uint8_t, 2 + bitsSize + (bitsSize >> 1) + extraBytes);
-    if (!out)
-        return;
+    uint32_t nextSize = m_occupancy + sizeof(startCodePrefix) + 2 + payloadSize + (payloadSize >> 1) + extraBytes;
+    if (nextSize > m_allocSize)
+    {
+        uint8_t *temp = X265_MALLOC(uint8_t, nextSize);
+        if (temp)
+        {
+            memcpy(temp, m_buffer, m_occupancy);
 
-    /* 16bit NAL header:
+            /* fixup existing payload pointers */
+            for (uint32_t i = 0; i < m_numNal; i++)
+                m_nal[i].payload = temp + (m_nal[i].payload - m_buffer);
+
+            X265_FREE(m_buffer);
+            m_buffer = temp;
+            m_allocSize = nextSize;
+        }
+        else
+        {
+            x265_log(NULL, X265_LOG_ERROR, "Unable to realloc access unit buffer");
+            return;
+        }
+    }
+
+    uint8_t *out = m_buffer + m_occupancy;
+    uint32_t bytes = 0;
+
+    if (!m_numNal || nalUnitType == NAL_UNIT_SPS || nalUnitType == NAL_UNIT_PPS)
+    {
+        memcpy(out, startCodePrefix, 4);
+        bytes += 4;
+    }
+    else
+    {
+        memcpy(out, startCodePrefix + 1, 3);
+        bytes += 3;
+    }
+
+    /* 16 bit NAL header:
      * forbidden_zero_bit       1-bit
      * nal_unit_type            6-bits
      * nuh_reserved_zero_6bits  6-bits
      * nuh_temporal_id_plus1    3-bits */
-    out[0] = (uint8_t)nalUnitType << 1;
-    out[1] = 1;
-    uint32_t bytes = 2;
+    out[bytes++] = (uint8_t)nalUnitType << 1;
+    out[bytes++] = 1;
 
     /* 7.4.1 ...
      * Within the NAL unit, the following three-byte sequences shall not occur at
@@ -56,7 +106,7 @@
      *  - 0x000001
      *  - 0x000002
      */
-    for (uint32_t i = 0; i < bitsSize; i++)
+    for (uint32_t i = 0; i < payloadSize; i++)
     {
         if (i > 2 && !out[bytes - 2] && !out[bytes - 3] && out[bytes - 1] <= 0x03)
         {
@@ -71,6 +121,7 @@
 
     if (extra)
     {
+        /* these bytes were escaped by serializeMultiple */
         memcpy(out + bytes, extra, extraBytes);
         bytes += extraBytes;
     }
@@ -82,16 +133,20 @@
      */
     if (!out[bytes - 1])
         out[bytes++] = 0x03;
+    m_occupancy += bytes;
 
-    X265_CHECK(bytes <= 2 + bitsSize + (bitsSize >> 1) + extraBytes, "NAL buffer overflow\n");
+    X265_CHECK(bytes <= 2 + payloadSize + (payloadSize >> 1) + extraBytes, "NAL buffer overflow\n");
+    X265_CHECK(m_numNal < MAX_NAL_UNITS, "NAL count overflow\n");
 
-    m_nalUnitType = nalUnitType;
-    m_packetSize = bytes;
+    x265_nal& nal = m_nal[m_numNal++];
+    nal.type = nalUnitType;
+    nal.sizeBytes = bytes;
+    nal.payload = out;
 }
 
 /* concatenate and escape multiple sub-streams, return final escaped lengths and
  * concatenated buffer. Caller is responsible for freeing the returned buffer */
-uint8_t *NALUnit::serializeMultiple(uint32_t* streamSizeBytes, uint32_t& totalBytes, uint32_t streamCount, const TComOutputBitstream* streams)
+uint8_t *NALList::serializeMultiple(uint32_t* streamSizeBytes, uint32_t& totalBytes, uint32_t streamCount, const TComOutputBitstream* streams)
 {
     uint32_t estSize = 0;
     for (uint32_t s = 0; s < streamCount; s++)
@@ -130,4 +185,3 @@
     return out;
 }
 
-}
diff -r ecccd5401d27 -r ba9c58a4bee0 source/encoder/nal.h
--- a/source/encoder/nal.h	Thu Jun 19 22:13:36 2014 +0900
+++ b/source/encoder/nal.h	Thu Jun 19 16:22:28 2014 -0500
@@ -25,23 +25,34 @@
 #define X265_NAL_H
 
 #include "common.h"
+#include "x265.h"
 
 namespace x265 {
 // private namespace
 
 class TComOutputBitstream;
 
-struct NALUnit
+class NALList
 {
-    uint32_t    m_packetSize;
-    uint8_t*    m_nalUnitData;
-    NalUnitType m_nalUnitType;
+    static const int MAX_NAL_UNITS = 16;
 
-    NALUnit() : m_packetSize(0), m_nalUnitData(NULL), m_nalUnitType(NAL_UNIT_INVALID) {}
-    ~NALUnit() { X265_FREE(m_nalUnitData); }
+public:
+
+    x265_nal    m_nal[MAX_NAL_UNITS];
+    uint32_t    m_numNal;
+
+    uint8_t*    m_buffer;
+    uint32_t    m_occupancy;
+    uint32_t    m_allocSize;
+
+    NALList() : m_numNal(0), m_buffer(NULL), m_occupancy(0), m_allocSize(0) {}
+    ~NALList() { X265_FREE(m_buffer); }
+
+    void takeContents(NALList& other);
 
     void serialize(NalUnitType nalUnitType, const TComOutputBitstream& bs, uint8_t *extra = NULL, uint32_t extraBytes = 0);
-    uint8_t *serializeMultiple(uint32_t* streamSizeBytes, uint32_t& totalBytes, uint32_t streamCount, const TComOutputBitstream* streams);
+
+    static uint8_t *serializeMultiple(uint32_t* streamSizeBytes, uint32_t& totalBytes, uint32_t streamCount, const TComOutputBitstream* streams);
 };
 
 }


More information about the x265-devel mailing list