[x265-commits] [x265] nal: greatly simplify NAL header generation, do not use T...

Thu Jun 12 07:05:17 CEST 2014

details:   http://hg.videolan.org/x265/rev/7868d22e535d
branches:  
changeset: 7032:7868d22e535d
user:      Steve Borho <steve at borho.org>
date:      Wed Jun 11 12:46:04 2014 -0500
description:
nal: greatly simplify NAL header generation, do not use TComOutputBitstream

This saves a malloc/free and a great deal of needless overhead
Subject: [x265] nal: msvc and its integer conversion warnings

details:   http://hg.videolan.org/x265/rev/b76c4e415cdd
branches:  
changeset: 7033:b76c4e415cdd
user:      Steve Borho <steve at borho.org>
date:      Wed Jun 11 14:17:51 2014 -0500
description:
nal: msvc and its integer conversion warnings
Subject: [x265] frameencoder: remove obsolete check

details:   http://hg.videolan.org/x265/rev/ea6f7da090ef
branches:  
changeset: 7034:ea6f7da090ef
user:      Steve Borho <steve at borho.org>
date:      Wed Jun 11 21:27:17 2014 -0500
description:
frameencoder: remove obsolete check
Subject: [x265] TComDataCU: nits

details:   http://hg.videolan.org/x265/rev/ebe26cc5fdd8
branches:  
changeset: 7035:ebe26cc5fdd8
user:      Steve Borho <steve at borho.org>
date:      Wed Jun 11 21:27:24 2014 -0500
description:
TComDataCU: nits
Subject: [x265] compress: remove floating point math to avoid needing to use EMMS

details:   http://hg.videolan.org/x265/rev/8da75c4dbbc0
branches:  
changeset: 7036:8da75c4dbbc0
user:      Steve Borho <steve at borho.org>
date:      Wed Jun 11 21:35:08 2014 -0500
description:
compress: remove floating point math to avoid needing to use EMMS
Subject: [x265] weight: ensure weight table is initialized on failure, simplify weightAnalyse()

details:   http://hg.videolan.org/x265/rev/10a4c2d42d3f
branches:  
changeset: 7037:10a4c2d42d3f
user:      Steve Borho <steve at borho.org>
date:      Wed Jun 11 21:42:36 2014 -0500
description:
weight: ensure weight table is initialized on failure, simplify weightAnalyse()
Subject: [x265] compress: nit comment spacing

details:   http://hg.videolan.org/x265/rev/e8df9b57eb09
branches:  
changeset: 7038:e8df9b57eb09
user:      Steve Borho <steve at borho.org>
date:      Wed Jun 11 21:35:54 2014 -0500
description:
compress: nit comment spacing

diffstat:

 source/Lib/TLibCommon/TComDataCU.h  |  16 +++++++-------
 source/encoder/compress.cpp         |  41 ++++++++++++++++--------------------
 source/encoder/frameencoder.cpp     |   5 ----
 source/encoder/nal.cpp              |  25 +++++++++------------
 source/encoder/weightPrediction.cpp |  28 +++++++++++-------------
 5 files changed, 50 insertions(+), 65 deletions(-)

diffs (truncated from 344 to 300 lines):

diff -r 20d74192e097 -r e8df9b57eb09 source/Lib/TLibCommon/TComDataCU.h

--- a/source/Lib/TLibCommon/TComDataCU.h	Tue Jun 10 20:07:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.h	Wed Jun 11 21:35:54 2014 -0500
@@ -222,7 +222,7 @@ public:
 
     char*         getPartitionSize()                      { return m_partSizes; }
 
-    PartSize      getPartitionSize(uint32_t idx)              { return static_cast<PartSize>(m_partSizes[idx]); }
+    PartSize      getPartitionSize(uint32_t idx)          { return static_cast<PartSize>(m_partSizes[idx]); }
 
     void          setPartSizeSubParts(PartSize eMode, uint32_t absPartIdx, uint32_t depth);
     void          setCUTransquantBypassSubParts(bool flag, uint32_t absPartIdx, uint32_t depth);
@@ -235,23 +235,23 @@ public:
 
     char*         getPredictionMode()                 { return m_predModes; }
 
-    PredMode      getPredictionMode(uint32_t idx)         { return static_cast<PredMode>(m_predModes[idx]); }
+    PredMode      getPredictionMode(uint32_t idx)     { return static_cast<PredMode>(m_predModes[idx]); }
 
     bool*         getCUTransquantBypass()             { return m_cuTransquantBypass; }
 
-    bool          getCUTransquantBypass(uint32_t idx)     { return m_cuTransquantBypass[idx]; }
+    bool          getCUTransquantBypass(uint32_t idx) { return m_cuTransquantBypass[idx]; }
 
     void          setPredModeSubParts(PredMode eMode, uint32_t absPartIdx, uint32_t depth);
 
     uint8_t*      getCUSize()                     { return m_cuSize; }
 
-    uint8_t       getCUSize(uint32_t idx)            { return m_cuSize[idx]; }
+    uint8_t       getCUSize(uint32_t idx)         { return m_cuSize[idx]; }
 
-    char*         getQP()                        { return m_qp; }
+    char*         getQP()                         { return m_qp; }
 
-    char          getQP(uint32_t idx)                { return m_qp[idx]; }
+    char          getQP(uint32_t idx)             { return m_qp[idx]; }
 
-    void          setQP(uint32_t idx, char value)    { m_qp[idx] =  value; }
+    void          setQP(uint32_t idx, char value) { m_qp[idx] =  value; }
 
     void          setQPSubParts(int qp,   uint32_t absPartIdx, uint32_t depth);
     int           getLastValidPartIdx(int absPartIdx);
@@ -447,7 +447,7 @@ public:
     // member functions for RD cost storage
     // -------------------------------------------------------------------------------------------------------------------
 
-    uint32_t&     getTotalNumPart()               { return m_numPartitions; }
+    uint32_t&     getTotalNumPart()     { return m_numPartitions; }
 
     uint32_t      getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra);
 
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/compress.cpp	Wed Jun 11 21:35:54 2014 -0500
@@ -383,7 +383,7 @@ void TEncCu::xCompressInterCU(TComDataCU
         char currentQP = outTempCU->getQP(0);
         char previousQP = colocated0->getQP(0);
         uint8_t delta = 0, minDepth0 = 4, minDepth1 = 4;
-        double sum0 = 0, sum1 = 0, avgDepth0 = 0, avgDepth1 = 0, avgDepth = 0;
+        uint32_t sum0 = 0, sum1 = 0;
         for (uint32_t i = 0; i < outTempCU->getTotalNumPart(); i = i + 4)
         {
             if (colocated0 && colocated0->getDepth(i) < minDepth0)
@@ -396,12 +396,9 @@ void TEncCu::xCompressInterCU(TComDataCU
                 sum1 += (colocated1->getDepth(i) * 4);
         }
 
-        avgDepth0 = sum0 / outTempCU->getTotalNumPart();
-        avgDepth1 = sum1 / outTempCU->getTotalNumPart();
-        avgDepth = (avgDepth0 + avgDepth1) / 2;
-
+        uint32_t avgDepth2 = (sum0 + sum1) / outTempCU->getTotalNumPart();
         minDepth = X265_MIN(minDepth0, minDepth1);
-        if (((currentQP - previousQP) < 0) || (((currentQP - previousQP) >= 0) && ((avgDepth - minDepth) > 0.5)))
+        if (((currentQP - previousQP) < 0) || (((currentQP - previousQP) >= 0) && ((avgDepth2 - 2 * minDepth) > 1)))
             delta = 0;
         else
             delta = 1;
@@ -441,17 +438,17 @@ void TEncCu::xCompressInterCU(TComDataCU
 
             if (!earlyskip)
             {
-                /*Compute 2Nx2N mode costs*/
+                /* Compute 2Nx2N mode costs */
                 {
                     xComputeCostInter(m_interCU_2Nx2N[depth], m_modePredYuv[0][depth], SIZE_2Nx2N);
-                    /*Choose best mode; initialise outBestCU to 2Nx2N*/
+                    /* Choose best mode; initialise outBestCU to 2Nx2N */
                     outBestCU = m_interCU_2Nx2N[depth];
                     tempYuv = m_modePredYuv[0][depth];
                     m_modePredYuv[0][depth] = m_bestPredYuv[depth];
                     m_bestPredYuv[depth] = tempYuv;
                 }
 
-                /*Compute Rect costs*/
+                /* Compute Rect costs */
                 if (m_param->bEnableRectInter)
                 {
                     xComputeCostInter(m_interCU_Nx2N[depth], m_modePredYuv[1][depth], SIZE_Nx2N);
@@ -476,7 +473,7 @@ void TEncCu::xCompressInterCU(TComDataCU
 
                 if (m_param->rdLevel > 2)
                 {
-                    //calculate the motion compensation for chroma for the best mode selected
+                    // calculate the motion compensation for chroma for the best mode selected
                     int numPart = outBestCU->getNumPartInter();
                     for (int partIdx = 0; partIdx < numPart; partIdx++)
                     {
@@ -501,7 +498,7 @@ void TEncCu::xCompressInterCU(TComDataCU
                 /* Check for Intra in inter frames only if its a P-slice*/
                 if (slice->getSliceType() == P_SLICE)
                 {
-                    /*compute intra cost */
+                    /* compute intra cost */
                     bool bdoIntra = true;
 
                     if (m_param->rdLevel > 2)
@@ -612,7 +609,7 @@ void TEncCu::xCompressInterCU(TComDataCU
                 m_bestMergeRecoYuv[depth] = tempYuv;
             }
 
-            if (m_param->rdLevel > 0) //checkDQP can be done only after residual encoding is done
+            if (m_param->rdLevel > 0) // checkDQP can be done only after residual encoding is done
                 xCheckDQP(outBestCU);
             /* Disable recursive analysis for whole CUs temporarily */
             if ((outBestCU != 0) && (outBestCU->isSkipped(0)))
@@ -649,7 +646,6 @@ void TEncCu::xCompressInterCU(TComDataCU
 #endif
         {
             uint64_t totalCostNeigh = 0, totalCostCU = 0, totalCountNeigh = 0, totalCountCU = 0;
-            double avgCost = 0;
             TComDataCU* above = outTempCU->getCUAbove();
             TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
             TComDataCU* aboveRight = outTempCU->getCUAboveRight();
@@ -679,13 +675,12 @@ void TEncCu::xCompressInterCU(TComDataCU
                 totalCountNeigh += left->m_count[depth];
             }
 
-            //giving 60% weight to all CU's and 40% weight to neighbour CU's
+            // give 60% weight to all CU's and 40% weight to neighbour CU's
+            uint64_t avgCost = 0;
             if (totalCountNeigh + totalCountCU)
-                avgCost = ((0.6 * totalCostCU) + (0.4 * totalCostNeigh)) / ((0.6 * totalCountCU) + (0.4 * totalCountNeigh));
+                avgCost = ((3 * totalCostCU) + (2 * totalCostNeigh)) / ((3 * totalCountCU) + (2 * totalCountNeigh));
 
-            float lambda = 1.0f;
-
-            if (outBestCU->m_totalRDCost < lambda * avgCost && avgCost != 0 && depth != 0)
+            if (outBestCU->m_totalRDCost < avgCost && avgCost != 0 && depth != 0)
             {
                 /* Copy Best data to Picture for next partition prediction. */
                 outBestCU->copyToPic((uint8_t)depth);
@@ -710,7 +705,7 @@ void TEncCu::xCompressInterCU(TComDataCU
                 ((subTempPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
                  (subTempPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples())))
             {
-                if (0 == nextDepth_partIndex) //initialize RD with previous depth buffer
+                if (0 == nextDepth_partIndex) // initialize RD with previous depth buffer
                 {
                     m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
                 }
@@ -873,7 +868,7 @@ void TEncCu::encodeResidue(TComDataCU* l
     {
         if (!lcu->getSkipFlag(absPartIdx))
         {
-            //Calculate Residue
+            // Calculate Residue
             pixel* src2 = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
             pixel* src1 = m_origYuv[0]->getLumaAddr(absPartIdx);
             int16_t* dst = m_tmpResiYuv[depth]->getLumaAddr();
@@ -897,7 +892,7 @@ void TEncCu::encodeResidue(TComDataCU* l
             dststride = m_tmpResiYuv[depth]->m_cwidth;
             primitives.chroma[m_param->internalCsp].sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
 
-            //Residual encoding
+            // Residual encoding
             m_search->residualTransformQuantInter(cu, 0, m_tmpResiYuv[depth], cu->getDepth(0), true);
             xCheckDQP(cu);
 
@@ -910,7 +905,7 @@ void TEncCu::encodeResidue(TComDataCU* l
             {
                 cu->copyCodedToPic(depth);
 
-                //Generate Recon
+                // Generate Recon
                 pixel* pred = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
                 int16_t* res = m_tmpResiYuv[depth]->getLumaAddr();
                 pixel* reco = m_bestRecoYuv[depth]->getLumaAddr();
@@ -937,7 +932,7 @@ void TEncCu::encodeResidue(TComDataCU* l
             }
         }
 
-        //Generate Recon
+        // Generate Recon
         TComPicYuv* rec = pic->getPicYuvRec();
         int part = partitionFromSize(cu->getCUSize(0));
         pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/frameencoder.cpp	Wed Jun 11 21:35:54 2014 -0500
@@ -494,14 +494,9 @@ void FrameEncoder::compressFrame()
     bool bUseWeightP = slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP();
     bool bUseWeightB = slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred();
     if (bUseWeightP || bUseWeightB)
-    {
-        X265_CHECK(slice->getPPS()->getUseWP(), "weightp not enabled in PPS, but in use\n");
         weightAnalyse(*slice, *m_cfg->m_param);
-    }
     else
-    {
         slice->resetWpScaling();
-    }
 
     // Generate motion references
     int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/nal.cpp
--- a/source/encoder/nal.cpp	Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/nal.cpp	Wed Jun 11 21:35:54 2014 -0500
@@ -30,27 +30,24 @@ namespace x265 {
 
 void NALUnit::serialize(NalUnitType nalUnitType, const TComOutputBitstream& bs)
 {
-    TComOutputBitstream header;
-    header.write(0, 1);           // forbidden_zero_bit
-    header.write(nalUnitType, 6); // nal_unit_type
-    header.write(0, 6);           // nuh_reserved_zero_6bits
-    header.write(1, 3);           // nuh_temporal_id_plus1
-
-    uint32_t headerSize = header.getNumberOfWrittenBytes();
-    const uint8_t* hpayload = header.getFIFO();
-
     uint32_t bitsSize = bs.getNumberOfWrittenBytes();
     const uint8_t* bpayload = bs.getFIFO();
-    if (!bpayload || !hpayload)
+    if (!bpayload)
         return;
 
     /* padded allocation for emulation prevention bytes */
-    uint8_t* out = m_nalUnitData = X265_MALLOC(uint8_t, headerSize + bitsSize + (bitsSize >> 1));
+    uint8_t* out = m_nalUnitData = X265_MALLOC(uint8_t, 2 + bitsSize + (bitsSize >> 1));
     if (!out)
         return;
 
-    memcpy(out, hpayload, headerSize);
-    uint32_t bytes = headerSize;
+    /* 16bit NAL header:
+     * forbidden_zero_bit       1-bit
+     * nal_unit_type            6-bits
+     * nuh_reserved_zero_6bits  6-bits
+     * nuh_temporal_id_plus1    3-bits */
+    out[0] = (uint8_t)nalUnitType << 1;
+    out[1] = 1;
+    uint32_t bytes = 2;
 
     /* 7.4.1 ...
      * Within the NAL unit, the following three-byte sequences shall not occur at
@@ -80,7 +77,7 @@ void NALUnit::serialize(NalUnitType nalU
     if (!out[bytes - 1])
         out[bytes++] = 0x03;
 
-    X265_CHECK(bytes <= headerSize + bitsSize + (bitsSize >> 1), "NAL buffer overflow\n");
+    X265_CHECK(bytes <= 2 + bitsSize + (bitsSize >> 1), "NAL buffer overflow\n");
 
     m_nalUnitType = nalUnitType;
     m_packetSize = bytes;
diff -r 20d74192e097 -r e8df9b57eb09 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Tue Jun 10 20:07:24 2014 -0500
+++ b/source/encoder/weightPrediction.cpp	Wed Jun 11 21:35:54 2014 -0500
@@ -30,7 +30,7 @@
 #include "bitstream.h"
 
 using namespace x265;
-namespace weightp {
+namespace {
 struct Cache
 {
     const int * intraCost;
@@ -225,13 +225,16 @@ uint32_t weightCost(pixel *         fenc
     x265_emms();
     return cost;
 }
+}
 
-void analyzeWeights(TComSlice& slice, x265_param& param, wpScalingParam wp[2][MAX_NUM_REF][3])
+namespace x265 {
+void weightAnalyse(TComSlice& slice, x265_param& param)
 {
+    wpScalingParam wp[2][MAX_NUM_REF][3];
     TComPicYuv *fencYuv = slice.getPic()->getPicYuvOrg();
     Lowres& fenc        = slice.getPic()->m_lowres;
 
-    weightp::Cache cache;
+    Cache cache;
 
     memset(&cache, 0, sizeof(cache));
     cache.intraCost = fenc.intraCost;
@@ -245,7 +248,10 @@ void analyzeWeights(TComSlice& slice, x2
     /* Use single allocation for motion compensated ref and weight buffers */
     pixel *mcbuf = X265_MALLOC(pixel, 2 * fencYuv->getStride() * fencYuv->getHeight());
     if (!mcbuf)