[x265-commits] [x265] TEncCU: cleanup

Tue Mar 18 02:52:25 CET 2014

details:   http://hg.videolan.org/x265/rev/cf92f28e5f93
branches:  
changeset: 6525:cf92f28e5f93
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Mon Mar 17 19:53:41 2014 +0530
description:
TEncCU: cleanup
Subject: [x265] TEncCU: fix previous bad patch import

details:   http://hg.videolan.org/x265/rev/8dbcfae4dffc
branches:  
changeset: 6526:8dbcfae4dffc
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Tue Mar 18 05:39:35 2014 +0530
description:
TEncCU: fix previous bad patch import
Subject: [x265] threading: introduce ThreadSafeInteger class

details:   http://hg.videolan.org/x265/rev/c0155c7bb6ca
branches:  
changeset: 6527:c0155c7bb6ca
user:      Steve Borho <steve at borho.org>
date:      Mon Mar 17 15:27:27 2014 -0500
description:
threading: introduce ThreadSafeInteger class

This class uses a condition variable to implement a producer/consumer access
protocol with a single writer and multiple readers for safe multi-core
synchronization
Subject: [x265] y4m: fix copy-paste bug in range checks

details:   http://hg.videolan.org/x265/rev/c688c11a0f12
branches:  
changeset: 6528:c688c11a0f12
user:      Steve Borho <steve at borho.org>
date:      Mon Mar 17 16:07:42 2014 -0500
description:
y4m: fix copy-paste bug in range checks
Subject: [x265] encoder: cleanup nits

details:   http://hg.videolan.org/x265/rev/6627c821ca1f
branches:  
changeset: 6529:6627c821ca1f
user:      Steve Borho <steve at borho.org>
date:      Mon Mar 17 15:27:40 2014 -0500
description:
encoder: cleanup nits
Subject: [x265] cleanup unused m_mvField{A,B,C}

details:   http://hg.videolan.org/x265/rev/7dbae9022757
branches:  
changeset: 6530:7dbae9022757
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Sun Mar 16 17:45:55 2014 +0900
description:
cleanup unused m_mvField{A,B,C}
Subject: [x265] cleanup m_pattern

details:   http://hg.videolan.org/x265/rev/d600c8f8f036
branches:  
changeset: 6531:d600c8f8f036
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Mon Mar 17 19:28:51 2014 +0900
description:
cleanup m_pattern
Subject: [x265] optimize: rewrite TEncBinCABAC::encodeBin

details:   http://hg.videolan.org/x265/rev/3bbcf9f8a701
branches:  
changeset: 6532:3bbcf9f8a701
user:      Min Chen <chenm003 at 163.com>
date:      Mon Mar 17 18:12:43 2014 -0700
description:
optimize: rewrite TEncBinCABAC::encodeBin
Subject: [x265] DC only for HIGH_BIT_DEPTH

details:   http://hg.videolan.org/x265/rev/7b86d42683be
branches:  
changeset: 6533:7b86d42683be
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Sun Mar 16 23:37:56 2014 +0900
description:
DC only for HIGH_BIT_DEPTH

diffstat:

 source/Lib/TLibCommon/ContextTables.h        |    2 +-
 source/Lib/TLibCommon/TComDataCU.cpp         |    3 -
 source/Lib/TLibCommon/TComDataCU.h           |   12 --
 source/Lib/TLibCommon/TComPattern.h          |   26 +++---
 source/Lib/TLibCommon/TComPic.cpp            |    2 +-
 source/Lib/TLibCommon/TComPic.h              |    2 +-
 source/Lib/TLibCommon/TComRom.cpp            |   12 --
 source/Lib/TLibCommon/TComRom.h              |    1 -
 source/Lib/TLibCommon/TComTrQuant.cpp        |   11 +-
 source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp |   58 ++++++++-----
 source/Lib/TLibEncoder/TEncBinCoderCABAC.h   |    1 -
 source/Lib/TLibEncoder/TEncCu.cpp            |   11 --
 source/Lib/TLibEncoder/TEncCu.h              |    1 -
 source/Lib/TLibEncoder/TEncSbac.cpp          |    2 +-
 source/Lib/TLibEncoder/TEncSbac.h            |    4 +-
 source/Lib/TLibEncoder/TEncSearch.cpp        |   14 +-
 source/common/threading.h                    |  111 +++++++++++++++++++++++++++
 source/encoder/compress.cpp                  |    6 +-
 source/encoder/dpb.cpp                       |    2 +-
 source/encoder/encoder.cpp                   |   14 ---
 source/encoder/encoder.h                     |    9 +-
 source/encoder/frameencoder.cpp              |   32 +-----
 source/encoder/frameencoder.h                |    4 -
 source/encoder/framefilter.cpp               |    3 +-
 source/input/y4m.cpp                         |    2 +-
 25 files changed, 196 insertions(+), 149 deletions(-)

diffs (truncated from 791 to 300 lines):

diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/ContextTables.h

--- a/source/Lib/TLibCommon/ContextTables.h	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/ContextTables.h	Sun Mar 16 23:37:56 2014 +0900
@@ -136,7 +136,7 @@ typedef struct ContextModel
     uint8_t bBinsCoded;
 } ContextModel;
 
-extern const int     g_entropyBits[128];
+extern const uint32_t g_entropyBits[128];
 extern const uint8_t g_nextState[128][2];
 uint8_t sbacInit(int qp, int initValue);   ///< initialize state with initial probability
 
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Sun Mar 16 23:37:56 2014 +0900
@@ -87,7 +87,6 @@ TComDataCU::TComDataCU()
     m_iPCMSampleY = NULL;
     m_iPCMSampleCb = NULL;
     m_iPCMSampleCr = NULL;
-    m_pattern = NULL;
     m_cuAboveLeft = NULL;
     m_cuAboveRight = NULL;
     m_cuAbove = NULL;
@@ -157,7 +156,6 @@ bool TComDataCU::create(uint32_t numPart
     CHECKED_MALLOC(m_iPCMSampleY, Pel, width * height);
     CHECKED_MALLOC(m_iPCMSampleCb, Pel, (width >> m_hChromaShift) * (height >> m_vChromaShift));
     CHECKED_MALLOC(m_iPCMSampleCr, Pel, (width >> m_hChromaShift) * (height >> m_vChromaShift));
-    CHECKED_MALLOC(m_pattern, TComPattern, 1);
 
     memset(m_partSizes, SIZE_NONE, numPartition * sizeof(*m_partSizes));
     return ok;
@@ -169,7 +167,6 @@ fail:
 
 void TComDataCU::destroy()
 {
-    X265_FREE(m_pattern);
     X265_FREE(m_qp);
     X265_FREE(m_depth);
     X265_FREE(m_cuSize);
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.h	Sun Mar 16 23:37:56 2014 +0900
@@ -86,7 +86,6 @@ private:
 
     TComPic*      m_pic;            ///< picture class pointer
     TComSlice*    m_slice;          ///< slice header pointer
-    TComPattern*  m_pattern;        ///< neighbor access class pointer
 
     // -------------------------------------------------------------------------------------------------------------------
     // CU description
@@ -134,9 +133,6 @@ private:
     TComDataCU*   m_cuAbove;         ///< pointer of above CU
     TComDataCU*   m_cuLeft;          ///< pointer of left CU
     TComDataCU*   m_cuColocated[2];  ///< pointer of temporally colocated CU's for both directions
-    TComMvField   m_mvFieldA;        ///< motion vector of position A
-    TComMvField   m_mvFieldB;        ///< motion vector of position B
-    TComMvField   m_mvFieldC;        ///< motion vector of position C
 
     // -------------------------------------------------------------------------------------------------------------------
     // coding tool information
@@ -217,8 +213,6 @@ public:
 
     uint32_t      getCUPelY()                      { return m_cuPelY; }
 
-    TComPattern*  getPattern()                     { return m_pattern; }
-
     UChar*        getDepth()                       { return m_depth; }
 
     UChar         getDepth(uint32_t idx)           { return m_depth[idx]; }
@@ -381,12 +375,6 @@ public:
 
     void          clipMv(MV& outMV);
 
-    void          getMvPredLeft(MV& mvPred)       { mvPred = m_mvFieldA.mv; }
-
-    void          getMvPredAbove(MV& mvPred)      { mvPred = m_mvFieldB.mv; }
-
-    void          getMvPredAboveRight(MV& mvPred) { mvPred = m_mvFieldC.mv; }
-
     // -------------------------------------------------------------------------------------------------------------------
     // utility functions for neighboring information
     // -------------------------------------------------------------------------------------------------------------------
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComPattern.h
--- a/source/Lib/TLibCommon/TComPattern.h	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComPattern.h	Sun Mar 16 23:37:56 2014 +0900
@@ -68,29 +68,29 @@ public:
     // -------------------------------------------------------------------------------------------------------------------
 
     /// set parameters from pixel buffers for accessing neighboring pixels
-    void initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
-                        int strideOrig, int heightOrig, pixel* refAbove, pixel* refLeft,
-                        pixel* refAboveFlt, pixel* refLeftFlt);
+    static void initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
+                               int strideOrig, int heightOrig, pixel* refAbove, pixel* refLeft,
+                               pixel* refAboveFlt, pixel* refLeftFlt);
 
     /// set luma parameters from CU data for accessing ADI data
-    void  initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
-                         int strideOrig, int heightOrig);
+    static void  initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
+                                int strideOrig, int heightOrig);
 
     /// set chroma parameters from CU data for accessing ADI data
-    void  initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
-                               pixel* adiBuf, int strideOrig, int heightOrig, int chromaId);
+    static void  initAdiPatternChroma(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth,
+                                      pixel* adiBuf, int strideOrig, int heightOrig, int chromaId);
 
 private:
 
     /// padding of unavailable reference samples for intra prediction
-    void fillReferenceSamples(pixel* roiOrigin, pixel* adiTemp, bool* bNeighborFlags, int numIntraNeighbor, int unitSize, int numUnitsInCU, int totalUnits, uint32_t cuWidth, uint32_t cuHeight, uint32_t width, uint32_t height, int picStride);
+    static void fillReferenceSamples(pixel* roiOrigin, pixel* adiTemp, bool* bNeighborFlags, int numIntraNeighbor, int unitSize, int numUnitsInCU, int totalUnits, uint32_t cuWidth, uint32_t cuHeight, uint32_t width, uint32_t height, int picStride);
 
     /// constrained intra prediction
-    bool  isAboveLeftAvailable(TComDataCU* cu, uint32_t partIdxLT);
-    int   isAboveAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);
-    int   isLeftAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);
-    int   isAboveRightAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);
-    int   isBelowLeftAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);
+    static bool  isAboveLeftAvailable(TComDataCU* cu, uint32_t partIdxLT);
+    static int   isAboveAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);
+    static int   isLeftAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);
+    static int   isAboveRightAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags);
+    static int   isBelowLeftAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags);
 };
 }
 //! \}
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComPic.cpp
--- a/source/Lib/TLibCommon/TComPic.cpp	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComPic.cpp	Sun Mar 16 23:37:56 2014 +0900
@@ -66,7 +66,7 @@ TComPic::TComPic()
     , m_cuCostsForVbv(NULL)
     , m_intraCuCostsForVbv(NULL)
 {
-    m_reconRowCount = 0;
+    m_reconRowCount.set(0);
     m_countRefEncoders = 0;
     memset(&m_lowres, 0, sizeof(m_lowres));
     m_next = NULL;
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComPic.h
--- a/source/Lib/TLibCommon/TComPic.h	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComPic.h	Sun Mar 16 23:37:56 2014 +0900
@@ -77,7 +77,7 @@ private:
 public:
 
     //** Frame Parallelism - notification between FrameEncoders of available motion reference rows **
-    volatile uint32_t     m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
+    ThreadSafeInteger     m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
     volatile uint32_t     m_countRefEncoders;   // count of FrameEncoder threads monitoring m_reconRowCount
     void*                 m_userData;           // user provided pointer passed in with this picture
 
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.cpp	Sun Mar 16 23:37:56 2014 +0900
@@ -573,18 +573,6 @@ const uint8_t g_lpsTable[64][4] =
     {   2,   2,   2,   2 }
 };
 
-const uint8_t g_renormTable[32] =
-{
-    6,  5,  4,  4,
-    3,  3,  3,  3,
-    2,  2,  2,  2,
-    2,  2,  2,  2,
-    1,  1,  1,  1,
-    1,  1,  1,  1,
-    1,  1,  1,  1,
-    1,  1,  1,  1
-};
-
 const uint8_t x265_exp2_lut[64] =
 {
     0,  3,  6,  8,  11, 14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.h	Sun Mar 16 23:37:56 2014 +0900
@@ -272,7 +272,6 @@ extern const double x265_lambda2_tab_I[M
 extern const double x265_lambda2_non_I[MAX_MAX_QP + 1];
 // CABAC tables
 extern const uint8_t g_lpsTable[64][4];
-extern const uint8_t g_renormTable[32];
 extern const uint8_t x265_exp2_lut[64];
 }
 
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Sun Mar 16 23:37:56 2014 +0900
@@ -402,18 +402,19 @@ void TComTrQuant::invtransformNxN(bool t
 
         const uint32_t log2BlockSize = log2TrSize - 2;
 
-#if HIGH_BIT_DEPTH
-        lastPos = !lastPos; // prevent warning
-#else
         // DC only
         if (lastPos == 0 && !((trSize == 4) && (mode != REG_DCT)))
         {
-            int dc_val = (((m_tmpCoeff[0] * 64 + 64) >> 7) * 64 + 2048) >> 12;
+            const int shift_1st = 7;
+            const int add_1st = 1 << (shift_1st - 1);
+            const int shift_2nd = 12 - (X265_DEPTH - 8);
+            const int add_2nd = 1 << (shift_2nd - 1);
+
+            int dc_val = (((m_tmpCoeff[0] * 64 + add_1st) >> shift_1st) * 64 + add_2nd) >> shift_2nd;
             primitives.blockfill_s[log2BlockSize](residual, stride, dc_val);
 
             return;
         }
-#endif
 
         // TODO: this may need larger data types for X265_DEPTH > 8
         primitives.idct[IDCT_4x4 + log2BlockSize - ((trSize == 4) && (mode != REG_DCT))](m_tmpCoeff, residual, stride);
diff -r 8d5deb7cafd8 -r 7b86d42683be source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp
--- a/source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp	Mon Mar 17 00:47:24 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp	Sun Mar 16 23:37:56 2014 +0900
@@ -37,6 +37,7 @@
 
 #include "TEncBinCoderCABAC.h"
 #include "TLibCommon/TComRom.h"
+#include "threading.h"  // CLZ32
 
 using namespace x265;
 
@@ -180,7 +181,7 @@ void TEncBinCABAC::encodeBin(uint32_t bi
 
     uint32_t mstate = ctxModel.m_state;
 
-    ctxModel.m_state = sbacNext(ctxModel.m_state, binValue);
+    ctxModel.m_state = sbacNext(mstate, binValue);
 
     if (m_bIsCounter)
     {
@@ -194,25 +195,32 @@ void TEncBinCABAC::encodeBin(uint32_t bi
     uint32_t lps = g_lpsTable[state][(m_range >> 6) & 3];
     m_range -= lps;
 
-    int numBits = g_renormTable[lps >> 3];
+    assert(lps != 0);
+
+    int numBits = (uint32_t)(m_range - 256) >> 31;
+    uint32_t low = m_low;
+    uint32_t range = m_range;
     if (binValue != mps)
     {
-        m_low     = (m_low + m_range) << numBits;
-        m_range   = lps << numBits;
+        // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
+        //numBits   = g_renormTable[lps >> 3];
+        unsigned long idx;
+        CLZ32(idx, lps);
+        numBits = 8 - idx;
+        if (numBits >= 6)
+            numBits = 6;
+
+        low    += range;
+        range   = lps;
     }
-    else
-    {
-        if (m_range >= 256)
-        {
-            return;
-        }
-        numBits = 1;
-        m_low <<= 1;
-        m_range <<= 1;
-    }
+    m_low = (low << numBits);
+    m_range = (range << numBits);
     m_bitsLeft += numBits;
 
-    testAndWriteOut();
+    if (m_bitsLeft >= 0)
+    {
+        writeOut();
+    }
 }
 
 /**
@@ -240,7 +248,10 @@ void TEncBinCABAC::encodeBinEP(uint32_t 
     }
     m_bitsLeft++;
 
-    testAndWriteOut();
+    if (m_bitsLeft >= 0)
+    {
+        writeOut();
+    }
 }
 
 /**
@@ -274,14 +285,20 @@ void TEncBinCABAC::encodeBinsEP(uint32_t
         binValues -= pattern << numBins;
         m_bitsLeft += 8;
 
-        testAndWriteOut();
+        if (m_bitsLeft >= 0)
+        {
+            writeOut();