[x265-commits] [x265] search: made separate functions for encoding cbfs in xEst...

Fri Nov 7 19:56:05 CET 2014

details:   http://hg.videolan.org/x265/rev/0b7c709335b2
branches:  
changeset: 8794:0b7c709335b2
user:      Ashok Kumar Mishra<ashok at multicorewareinc.com>
date:      Wed Nov 05 20:34:26 2014 +0530
description:
search: made separate functions for encoding cbfs in xEstimateResidualQT()
Subject: [x265] search: made a function for null cost calculation in xEstimateResidualQT()

details:   http://hg.videolan.org/x265/rev/522baf03fbbd
branches:  
changeset: 8795:522baf03fbbd
user:      Ashok Kumar Mishra<ashok at multicorewareinc.com>
date:      Wed Nov 05 16:23:42 2014 +0530
description:
search: made a function for null cost calculation in xEstimateResidualQT()
Subject: [x265] [REVIEW PATCH/OUTPUT CHANGED]search: removed multiple encode Coefficients from estimateResidualQT()

details:   http://hg.videolan.org/x265/rev/eb5a9eb03dd6
branches:  
changeset: 8796:eb5a9eb03dd6
user:      Ashok Kumar Mishra<ashok at multicorewareinc.com>
date:      Wed Nov 05 20:34:26 2014 +0530
description:
[REVIEW PATCH/OUTPUT CHANGED]search: removed multiple encode Coefficients from estimateResidualQT()

Tried to remove multiple encode coefficients from estimateResidualQT() function.
Coefficients are encoded in three stages: Once for calculation of distortion and twice for split and unsplit
block cost calculation. I have given comments where I have changed the code.
Subject: [x265] fix typo

details:   http://hg.videolan.org/x265/rev/4f034e3adef8
branches:  
changeset: 8797:4f034e3adef8
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Fri Nov 07 17:22:01 2014 +0900
description:
fix typo
Subject: [x265] search: fix warnings

details:   http://hg.videolan.org/x265/rev/7338b1f1f43d
branches:  
changeset: 8798:7338b1f1f43d
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Fri Nov 07 16:26:22 2014 +0530
description:
search: fix warnings
Subject: [x265] fix bug in 522baf03fbbd

details:   http://hg.videolan.org/x265/rev/f2130a4dc876
branches:  
changeset: 8799:f2130a4dc876
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Fri Nov 07 19:29:27 2014 +0900
description:
fix bug in 522baf03fbbd
Subject: [x265] entropy: white-space nits

details:   http://hg.videolan.org/x265/rev/429742055057
branches:  
changeset: 8800:429742055057
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 11:01:41 2014 -0600
description:
entropy: white-space nits
Subject: [x265] entropy: rename encodeBinContext to bitsCodeBin, make const

details:   http://hg.videolan.org/x265/rev/a1ee9422183b
branches:  
changeset: 8801:a1ee9422183b
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 11:07:04 2014 -0600
description:
entropy: rename encodeBinContext to bitsCodeBin, make const

The function is not modifying the context, so there is no need to pass as a
reference, and the function can be const. Also, group the bit counting RDO
functions together
Subject: [x265] entropy: use bitsCodeBin in intra mode bit estimate functions

details:   http://hg.videolan.org/x265/rev/84fc74874406
branches:  
changeset: 8802:84fc74874406
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 11:09:59 2014 -0600
description:
entropy: use bitsCodeBin in intra mode bit estimate functions
Subject: [x265] entropy: inline bit counting functions

details:   http://hg.videolan.org/x265/rev/ca7873cab172
branches:  
changeset: 8803:ca7873cab172
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 11:32:25 2014 -0600
description:
entropy: inline bit counting functions
Subject: [x265] entropy: inline methods which mapped to encodeBin() calls

details:   http://hg.videolan.org/x265/rev/640d2936e699
branches:  
changeset: 8804:640d2936e699
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 12:31:55 2014 -0600
description:
entropy: inline methods which mapped to encodeBin() calls
Subject: [x265] entropy: ensure X265_CHECK() has braces

details:   http://hg.videolan.org/x265/rev/0fd8e0c5272a
branches:  
changeset: 8805:0fd8e0c5272a
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 12:32:40 2014 -0600
description:
entropy: ensure X265_CHECK() has braces
Subject: [x265] entropy: nit

details:   http://hg.videolan.org/x265/rev/b55799a2f5ad
branches:  
changeset: 8806:b55799a2f5ad
user:      Steve Borho <steve at borho.org>
date:      Fri Nov 07 12:45:29 2014 -0600
description:
entropy: nit

diffstat:

 source/encoder/analysis.cpp |    2 +-
 source/encoder/entropy.cpp  |   92 +--------
 source/encoder/entropy.h    |   52 +++-
 source/encoder/search.cpp   |  459 ++++++++++++++++++++++---------------------
 source/encoder/search.h     |    4 +-
 5 files changed, 270 insertions(+), 339 deletions(-)

diffs (truncated from 985 to 300 lines):

diff -r bc4f3dab51db -r b55799a2f5ad source/encoder/analysis.cpp

--- a/source/encoder/analysis.cpp	Fri Nov 07 11:43:15 2014 +0900
+++ b/source/encoder/analysis.cpp	Fri Nov 07 12:45:29 2014 -0600
@@ -1739,7 +1739,7 @@ bool Analysis::recursionDepthCheck(const
     }
 
     // give 60% weight to all CU's and 40% weight to neighbour CU's
-    if (neighCost + cuCount)
+    if (neighCount + cuCount)
     {
         uint64_t avgCost = ((3 * cuCost) + (2 * neighCost)) / ((3 * cuCount) + (2 * neighCount));
         uint64_t curCost = m_param->rdLevel > 1 ? bestMode.rdCost : bestMode.sa8dCost;
diff -r bc4f3dab51db -r b55799a2f5ad source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Fri Nov 07 11:43:15 2014 +0900
+++ b/source/encoder/entropy.cpp	Fri Nov 07 12:45:29 2014 -0600
@@ -397,7 +397,9 @@ void Entropy::codeSliceHeader(const Slic
         // Ideally this process should not be repeated for each slice in a picture
         if (slice.isIRAP())
             for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
+            {
                 X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
+            }
 #endif
 
         WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
@@ -590,9 +592,9 @@ void Entropy::encodeCU(const CUData& cu,
 void Entropy::finishCU(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
 {
     const Slice* slice = cu.m_slice;
-    X265_CHECK(cu.m_slice->m_endCUAddr == cu.m_slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
     uint32_t realEndAddress = slice->m_endCUAddr;
     uint32_t cuAddr = cu.getSCUAddr() + absPartIdx;
+    X265_CHECK(realEndAddress == cu.m_slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
 
     uint32_t granularityMask = g_maxCUSize - 1;
     uint32_t cuSize = 1 << cu.m_log2CUSize[absPartIdx];
@@ -1144,11 +1146,6 @@ void Entropy::copyFrom(const Entropy& sr
     markValid();
 }
 
-void Entropy::codeMVPIdx(uint32_t symbol)
-{
-    encodeBin(symbol, m_contextState[OFF_MVP_IDX_CTX]);
-}
-
 void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
 {
     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
@@ -1199,32 +1196,6 @@ void Entropy::codePartSize(const CUData&
     }
 }
 
-void Entropy::codePredMode(int predMode)
-{
-    encodeBin(predMode == MODE_INTRA ? 1 : 0, m_contextState[OFF_PRED_MODE_CTX]);
-}
-
-void Entropy::codeCUTransquantBypassFlag(uint32_t symbol)
-{
-    encodeBin(symbol, m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX]);
-}
-
-void Entropy::codeSkipFlag(const CUData& cu, uint32_t absPartIdx)
-{
-    // get context function is here
-    uint32_t symbol = cu.isSkipped(absPartIdx) ? 1 : 0;
-    uint32_t ctxSkip = cu.getCtxSkipFlag(absPartIdx);
-
-    encodeBin(symbol, m_contextState[OFF_SKIP_FLAG_CTX + ctxSkip]);
-}
-
-void Entropy::codeMergeFlag(const CUData& cu, uint32_t absPartIdx)
-{
-    const uint32_t symbol = cu.m_mergeFlag[absPartIdx] ? 1 : 0;
-
-    encodeBin(symbol, m_contextState[OFF_MERGE_FLAG_EXT_CTX]);
-}
-
 void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
 {
     uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
@@ -1245,37 +1216,6 @@ void Entropy::codeMergeIndex(const CUDat
     }
 }
 
-void Entropy::codeSplitFlag(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
-{
-    X265_CHECK(depth < g_maxCUDepth, "invalid depth\n");
-
-    uint32_t ctx           = cu.getCtxSplitFlag(absPartIdx, depth);
-    uint32_t currSplitFlag = (cu.m_cuDepth[absPartIdx] > depth) ? 1 : 0;
-
-    X265_CHECK(ctx < 3, "ctx out of range\n");
-    encodeBin(currSplitFlag, m_contextState[OFF_SPLIT_FLAG_CTX + ctx]);
-}
-
-void Entropy::codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx)
-{
-    encodeBin(symbol, m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX + ctx]);
-}
-
-uint32_t Entropy::bitsIntraModeNonMPM() const
-{
-    uint32_t mstate = m_contextState[OFF_ADI_CTX];
-    uint32_t bits = ((uint32_t)(m_fracBits & 32767) + sbacGetEntropyBits(mstate, 0)) >> 15;
-    return bits + 5; /* fixed cost for encodeBinsEP() */
-}
-
-uint32_t Entropy::bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const
-{
-    X265_CHECK(dir == preds[0] || dir == preds[1] || dir == preds[2], "dir must be a most probable mode\n");
-    uint32_t mstate = m_contextState[OFF_ADI_CTX];
-    uint32_t bits = ((uint32_t)(m_fracBits & 32767) + sbacGetEntropyBits(mstate, 1)) >> 15;
-    return bits + (dir == preds[0] ? 1 : 2);
-}
-
 void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
 {
     uint32_t dir[4], j;
@@ -1479,12 +1419,6 @@ void Entropy::codeQtCbf(const CUData& cu
     encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
 }
 
-void Entropy::codeQtCbf(uint32_t cbf, TextType ttype, uint32_t trDepth)
-{
-    uint32_t ctx = ctxCbf[ttype][trDepth];
-    encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
-}
-
 void Entropy::codeTransformSkipFlags(const CUData& cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
 {
     if (cu.m_tqBypass[absPartIdx])
@@ -1496,26 +1430,6 @@ void Entropy::codeTransformSkipFlags(con
     encodeBin(useTransformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]);
 }
 
-void Entropy::codeQtRootCbf(uint32_t cbf)
-{
-    encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]);
-}
-
-void Entropy::codeQtCbfZero(TextType ttype, uint32_t trDepth)
-{
-    // this function is only used to estimate the bits when cbf is 0
-    // and will never be called when writing the bitsream.
-    uint32_t ctx = ctxCbf[ttype][trDepth];
-    encodeBin(0, m_contextState[OFF_QT_CBF_CTX + ctx]);
-}
-
-void Entropy::codeQtRootCbfZero()
-{
-    // this function is only used to estimate the bits when cbf is 0
-    // and will never be called when writing the bistream.
-    encodeBin(0, m_contextState[OFF_QT_ROOT_CBF_CTX]);
-}
-
 /** Encode (X,Y) position of the last significant coefficient
  * \param posx X component of last coefficient
  * \param posy Y component of last coefficient
diff -r bc4f3dab51db -r b55799a2f5ad source/encoder/entropy.h
--- a/source/encoder/entropy.h	Fri Nov 07 11:43:15 2014 +0900
+++ b/source/encoder/entropy.h	Fri Nov 07 12:45:29 2014 -0600
@@ -27,6 +27,7 @@
 #include "common.h"
 #include "bitstream.h"
 #include "frame.h"
+#include "cudata.h"
 #include "contexts.h"
 #include "slice.h"
 
@@ -35,8 +36,6 @@ namespace x265 {
 
 struct SaoCtuParam;
 struct EstBitsSbac;
-class CUData;
-struct CUGeom;
 class ScalingList;
 
 enum SplitType
@@ -154,41 +153,48 @@ public:
     void finishSlice()                 { encodeBinTrm(1); finish(); dynamic_cast<Bitstream*>(m_bitIf)->writeByteAlignment(); }
 
     void encodeCTU(const CUData& cu, const CUGeom& cuGeom);
-    void codeSaoOffset(const SaoCtuParam& ctuParam, int plane);
-    void codeSaoMerge(uint32_t code)   { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
 
-    void codeCUTransquantBypassFlag(uint32_t symbol);
-    void codeSkipFlag(const CUData& cu, uint32_t absPartIdx);
-    void codeMergeFlag(const CUData& cu, uint32_t absPartIdx);
+    void codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple);
+    void codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode);
+
     void codeMergeIndex(const CUData& cu, uint32_t absPartIdx);
-    void codeSplitFlag(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
-    void codeMVPIdx(uint32_t symbol);
     void codeMvd(const CUData& cu, uint32_t absPartIdx, int list);
 
     void codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
-    void codePredMode(int predMode);
     void codePredInfo(const CUData& cu, uint32_t absPartIdx);
-    void codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx);
     void codeQtCbf(const CUData& cu, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, TextType ttype, uint32_t trDepth, bool lowestLevel);
     void codeQtCbf(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth);
-    void codeQtCbf(uint32_t cbf, TextType ttype, uint32_t trDepth);
-    void codeQtCbfZero(TextType ttype, uint32_t trDepth);
-    void codeQtRootCbfZero();
     void codeCoeff(const CUData& cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP, uint32_t depthRange[2]);
     void codeCoeffNxN(const CUData& cu, const coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
 
-    uint32_t bitsIntraModeNonMPM() const;
-    uint32_t bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const;
-    void codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple);
-    void codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode);
+    inline void codeSaoMerge(uint32_t code)                          { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
+    inline void codeMVPIdx(uint32_t symbol)                          { encodeBin(symbol, m_contextState[OFF_MVP_IDX_CTX]); }
+    inline void codeMergeFlag(const CUData& cu, uint32_t absPartIdx) { encodeBin(cu.m_mergeFlag[absPartIdx], m_contextState[OFF_MERGE_FLAG_EXT_CTX]); }
+    inline void codeSkipFlag(const CUData& cu, uint32_t absPartIdx)  { encodeBin(cu.isSkipped(absPartIdx), m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); }
+    inline void codeSplitFlag(const CUData& cu, uint32_t absPartIdx, uint32_t depth) { encodeBin(cu.m_cuDepth[absPartIdx] > depth, m_contextState[OFF_SPLIT_FLAG_CTX + cu.getCtxSplitFlag(absPartIdx, depth)]); }
+    inline void codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx)    { encodeBin(symbol, m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX + ctx]); }
+    inline void codePredMode(int predMode)                                { encodeBin(predMode == MODE_INTRA ? 1 : 0, m_contextState[OFF_PRED_MODE_CTX]); }
+    inline void codeCUTransquantBypassFlag(uint32_t symbol)               { encodeBin(symbol, m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX]); }
+    inline void codeQtCbf(uint32_t cbf, TextType ttype, uint32_t trDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][trDepth]]); }
+    inline void codeQtRootCbf(uint32_t cbf)                               { encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
 
-    // RDO functions
+    void codeSaoOffset(const SaoCtuParam& ctuParam, int plane);
+
+    /* RDO functions */
     void estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const;
     void estCBFBit(EstBitsSbac& estBitsSbac) const;
     void estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const;
     void estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const;
     void estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const;
 
+    inline uint32_t bitsIntraModeNonMPM() const { return bitsCodeBin(0, m_contextState[OFF_ADI_CTX]) + 5; }
+    inline uint32_t bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const { return bitsCodeBin(1, m_contextState[OFF_ADI_CTX]) + (dir == preds[0] ? 1 : 2); }
+    inline uint32_t estimateCbfBits(uint32_t cbf, TextType ttype, uint32_t trDepth) const { return bitsCodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][trDepth]]); }
+
+    /* these functions are only used to estimate the bits when cbf is 0 and will never be called when writing the bistream. */
+    inline void codeQtRootCbfZero() { encodeBin(0, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
+    inline void codeQtCbfZero(TextType ttype, uint32_t trDepth) { encodeBin(0, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][trDepth]]); }
+
 private:
 
     /* CABAC private methods */
@@ -200,6 +206,13 @@ private:
     void encodeBinsEP(uint32_t binValues, int numBins);
     void encodeBinTrm(uint32_t binValue);
 
+    /* return the bits of encoding the context bin without updating */
+    inline uint32_t bitsCodeBin(uint32_t binValue, uint8_t ctxModel) const
+    {
+        uint64_t fracBits = (m_fracBits & 32767) + sbacGetEntropyBits(ctxModel, binValue);
+        return (uint32_t)(fracBits >> 15);
+    }
+
     void encodeCU(const CUData& cu, const CUGeom &cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP);
     void finishCU(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
 
@@ -217,7 +230,6 @@ private:
     void codePredWeightTable(const Slice& slice);
     void codeInterDir(const CUData& cu, uint32_t absPartIdx);
     void codePUWise(const CUData& cu, uint32_t absPartIdx);
-    void codeQtRootCbf(uint32_t cbf);
     void codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list);
     void codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list);
 
diff -r bc4f3dab51db -r b55799a2f5ad source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Nov 07 11:43:15 2014 +0900
+++ b/source/encoder/search.cpp	Fri Nov 07 12:45:29 2014 -0600
@@ -2719,6 +2719,16 @@ void Search::residualTransformQuantInter
     }
 }
 
+uint64_t Search::estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId)
+{
+    uint32_t nullBits = m_entropyCoder.estimateCbfBits(0, compId, tuDepth);
+
+    if (m_rdCost.m_psyRd)
+        return m_rdCost.calcPsyRdCost(dist, nullBits, psyEnergy);
+    else
+        return m_rdCost.calcRdCost(dist, nullBits);
+}
+
 void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, uint32_t depthRange[2])
 {
     CUData& cu = mode.cu;
@@ -2726,6 +2736,7 @@ void Search::estimateResidualQT(Mode& mo
 
     bool bCheckSplit = log2TrSize > depthRange[0];
     bool bCheckFull = log2TrSize <= depthRange[1];
+    bool bSplitPresentFlag = bCheckSplit && bCheckFull;
 
     if (cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && bCheckSplit)
         bCheckFull = false;
@@ -2751,9 +2762,9 @@ void Search::estimateResidualQT(Mode& mo
 
     uint8_t  cbfFlag[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, {0, 0}, {0, 0} };
     uint32_t numSig[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, {0, 0}, {0, 0} };
-    uint32_t singleBitsComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };