[x265-commits] [x265] cmake: nit

Mon Oct 6 00:42:34 CEST 2014

details:   http://hg.videolan.org/x265/rev/fc856c00d49b
branches:  
changeset: 8226:fc856c00d49b
user:      Steve Borho <steve at borho.org>
date:      Sun Oct 05 15:55:26 2014 -0500
description:
cmake: nit
Subject: [x265] threading: nits

details:   http://hg.videolan.org/x265/rev/1867fb89298c
branches:  
changeset: 8227:1867fb89298c
user:      Steve Borho <steve at borho.org>
date:      Sun Oct 05 15:55:35 2014 -0500
description:
threading: nits
Subject: [x265] threadpool: nit

details:   http://hg.videolan.org/x265/rev/ed5b9320afca
branches:  
changeset: 8228:ed5b9320afca
user:      Steve Borho <steve at borho.org>
date:      Sun Oct 05 17:01:50 2014 -0500
description:
threadpool: nit
Subject: [x265] entropy: give each Search instance its own set of RD contexts

details:   http://hg.videolan.org/x265/rev/5420f2a29522
branches:  
changeset: 8229:5420f2a29522
user:      Steve Borho <steve at borho.org>
date:      Sun Oct 05 10:40:48 2014 -0500
description:
entropy: give each Search instance its own set of RD contexts

This gives each ThreadLocalData a complete set of working contexts so each
thread can measure RD cost (for the same row) independent of one other. There
were content problems with the 'temp' and 'rqtRoot' and 'rqtTest' contexts.

For this to work we have to sync the 'cur' context to the slave prior
to it performing any RD measurements.

This commit finally removes the CI_IDX enums and uses a simple struct to hold
the contexts per depth; and the member variables were renamed from
"m_rdEntropyCoders" to "m_rdContexts" since these coders are only ever used to
save and restore CABAC state (never to code with)

This change exposed a bug. The next patch adds some tools to catch this class of
bug and the patch after that fixes it.
Subject: [x265] entropy: add a mechanism to detect reads without writes in checked builds

details:   http://hg.videolan.org/x265/rev/ead3d26c7747
branches:  
changeset: 8230:ead3d26c7747
user:      Steve Borho <steve at borho.org>
date:      Sun Oct 05 11:47:26 2014 -0500
description:
entropy: add a mechanism to detect reads without writes in checked builds
Subject: [x265] analysis: fix CABAC context state handling after splits [CHANGES OUTPUTS]

details:   http://hg.videolan.org/x265/rev/d07fbd3bdecc
branches:  
changeset: 8231:d07fbd3bdecc
user:      Steve Borho <steve at borho.org>
date:      Sun Oct 05 17:16:19 2014 -0500
description:
analysis: fix CABAC context state handling after splits [CHANGES OUTPUTS]

In RDlevel<=4, if split is chosen then copy depth+1 next to depth next

This fixes a long standing bug in presets slow and above, and improves
compression efficiency.

diffstat:

 source/CMakeLists.txt           |    2 +-
 source/Lib/TLibCommon/TypeDef.h |   12 ---
 source/common/threading.cpp     |    6 -
 source/common/threadpool.cpp    |    7 +-
 source/encoder/analysis.cpp     |  137 +++++++++++++++++++++------------------
 source/encoder/analysis.h       |    2 +-
 source/encoder/entropy.cpp      |   12 +++-
 source/encoder/entropy.h        |    9 ++-
 source/encoder/frameencoder.cpp |   28 ++-----
 source/encoder/frameencoder.h   |    9 +-
 source/encoder/framefilter.cpp  |    4 +-
 source/encoder/sao.cpp          |   36 +++++-----
 source/encoder/sao.h            |    9 ++-
 source/encoder/search.cpp       |   94 ++++++++++++++++-----------
 source/encoder/search.h         |   16 ++++-
 15 files changed, 207 insertions(+), 176 deletions(-)

diffs (truncated from 1269 to 300 lines):

diff -r 997b210ab94a -r d07fbd3bdecc source/CMakeLists.txt

--- a/source/CMakeLists.txt	Sun Oct 05 10:28:25 2014 -0500
+++ b/source/CMakeLists.txt	Sun Oct 05 17:16:19 2014 -0500
@@ -59,7 +59,7 @@ if(UNIX)
     SET(PLATFORM_LIBS pthread)
     find_library(LIBRT rt)
     if(LIBRT)
-        SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
+        set(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
     endif()
 endif(UNIX)
 
diff -r 997b210ab94a -r d07fbd3bdecc source/Lib/TLibCommon/TypeDef.h
--- a/source/Lib/TLibCommon/TypeDef.h	Sun Oct 05 10:28:25 2014 -0500
+++ b/source/Lib/TLibCommon/TypeDef.h	Sun Oct 05 17:16:19 2014 -0500
@@ -78,18 +78,6 @@ enum TextType
     MAX_NUM_COMPONENT = 3
 };
 
-// index for SBAC based RD optimization
-enum CI_IDX
-{
-    CI_CURR_BEST = 0,   // best mode index
-    CI_NEXT_BEST,       // next best index
-    CI_TEMP_BEST,       // temporal index
-    CI_QT_TRAFO_TEST,
-    CI_QT_TRAFO_ROOT,
-    CI_NUM,             // total number
-    CI_NUM_SAO   = 3,
-};
-
 // motion vector predictor direction used in AMVP
 enum MVP_DIR
 {
diff -r 997b210ab94a -r d07fbd3bdecc source/common/threading.cpp
--- a/source/common/threading.cpp	Sun Oct 05 10:28:25 2014 -0500
+++ b/source/common/threading.cpp	Sun Oct 05 17:16:19 2014 -0500
@@ -56,17 +56,13 @@ bool Thread::start()
 void Thread::stop()
 {
     if (this->thread)
-    {
         WaitForSingleObject(this->thread, INFINITE);
-    }
 }
 
 Thread::~Thread()
 {
     if (this->thread)
-    {
         CloseHandle(this->thread);
-    }
 }
 
 #else /* POSIX / pthreads */
@@ -96,9 +92,7 @@ bool Thread::start()
 void Thread::stop()
 {
     if (this->thread)
-    {
         pthread_join(this->thread, NULL);
-    }
 }
 
 Thread::~Thread() {}
diff -r 997b210ab94a -r d07fbd3bdecc source/common/threadpool.cpp
--- a/source/common/threadpool.cpp	Sun Oct 05 10:28:25 2014 -0500
+++ b/source/common/threadpool.cpp	Sun Oct 05 17:16:19 2014 -0500
@@ -122,15 +122,12 @@ public:
 
     int getThreadCount() const { return m_numThreads; }
 
+    bool IsValid() const       { return m_ok; }
+
     void release();
 
     void Stop();
 
-    bool IsValid() const
-    {
-        return m_ok;
-    }
-
     void enqueueJobProvider(JobProvider &);
 
     void dequeueJobProvider(JobProvider &);
diff -r 997b210ab94a -r d07fbd3bdecc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sun Oct 05 10:28:25 2014 -0500
+++ b/source/encoder/analysis.cpp	Sun Oct 05 17:16:19 2014 -0500
@@ -287,14 +287,13 @@ void Analysis::parallelAnalysisJob(int t
         slave = &m_tld[threadId].analysis;
         slave->m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
         slave->m_log = &slave->m_sliceTypeLog[cu->m_slice->m_sliceType];
-        slave->m_rdEntropyCoders = this->m_rdEntropyCoders;
         m_origYuv[0]->copyPartToYuv(slave->m_origYuv[depth], m_curCUData->encodeIdx);
         slave->setQP(cu->m_slice, m_rdCost.m_qp);
         if (!jobId || m_param->rdLevel > 4)
         {
             slave->m_quant.setQPforQuant(cu);
             slave->m_quant.m_nr = m_quant.m_nr;
-            slave->m_rdEntropyCoders[depth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+            slave->m_rdContexts[depth].cur.load(m_rdContexts[depth].cur);
         }
     }
 
@@ -413,12 +412,15 @@ void Analysis::parallelME(int threadId, 
     }
 }
 
-void Analysis::compressCU(TComDataCU* cu)
+void Analysis::compressCTU(TComDataCU* ctu, const Entropy& initialContext)
 {
-    Frame* pic = cu->m_pic;
-    uint32_t cuAddr = cu->m_cuAddr;
+    Frame* pic = ctu->m_pic;
+    uint32_t cuAddr = ctu->m_cuAddr;
 
-    if (cu->m_slice->m_pps->bUseDQP)
+    invalidateContexts(0);
+    m_rdContexts[0].cur.load(initialContext);
+
+    if (ctu->m_slice->m_pps->bUseDQP)
         m_bEncodeDQP = true;
 
     // initialize CU data
@@ -426,27 +428,27 @@ void Analysis::compressCU(TComDataCU* cu
     m_tempCU[0]->initCU(pic, cuAddr);
 
     // analysis of CU
-    uint32_t numPartition = cu->m_cuLocalData->numPartitions;
+    uint32_t numPartition = ctu->m_cuLocalData->numPartitions;
     if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
     {
         if (m_param->analysisMode == X265_ANALYSIS_LOAD && pic->m_intraData)
         {
             uint32_t zOrder = 0;
-            compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, cu->m_cuLocalData, 
-                &pic->m_intraData->depth[cuAddr * cu->m_numPartitions],
-                &pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions],
-                &pic->m_intraData->modes[cuAddr * cu->m_numPartitions], zOrder);
+            compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, ctu->m_cuLocalData, 
+                &pic->m_intraData->depth[cuAddr * ctu->m_numPartitions],
+                &pic->m_intraData->partSizes[cuAddr * ctu->m_numPartitions],
+                &pic->m_intraData->modes[cuAddr * ctu->m_numPartitions], zOrder);
         }
         else
         {
-            compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu->m_cuLocalData);
+            compressIntraCU(m_bestCU[0], m_tempCU[0], false, ctu->m_cuLocalData);
             if (m_param->analysisMode == X265_ANALYSIS_SAVE && pic->m_intraData)
             {
-                memcpy(&pic->m_intraData->depth[cuAddr * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * numPartition);
-                memcpy(&pic->m_intraData->modes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * numPartition);
-                memcpy(&pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * numPartition);
+                memcpy(&pic->m_intraData->depth[cuAddr * ctu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * numPartition);
+                memcpy(&pic->m_intraData->modes[cuAddr * ctu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * numPartition);
+                memcpy(&pic->m_intraData->partSizes[cuAddr * ctu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * numPartition);
                 pic->m_intraData->cuAddr[cuAddr] = cuAddr;
-                pic->m_intraData->poc[cuAddr]    = cu->m_pic->m_POC;
+                pic->m_intraData->poc[cuAddr]    = ctu->m_pic->m_POC;
             }
         }
         if (m_param->bLogCuStats || m_param->rc.bStatWrite)
@@ -455,18 +457,18 @@ void Analysis::compressCU(TComDataCU* cu
             do
             {
                 m_log->totalCu++;
-                uint32_t depth = cu->getDepth(i);
+                uint32_t depth = ctu->getDepth(i);
                 int next = numPartition >> (depth * 2);
                 m_log->qTreeIntraCnt[depth]++;
-                if (depth == g_maxCUDepth && cu->getPartitionSize(i) != SIZE_2Nx2N)
+                if (depth == g_maxCUDepth && ctu->getPartitionSize(i) != SIZE_2Nx2N)
                     m_log->cntIntraNxN++;
                 else
                 {
                     m_log->cntIntra[depth]++;
-                    if (cu->getLumaIntraDir(i) > 1)
+                    if (ctu->getLumaIntraDir(i) > 1)
                         m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
                     else
-                        m_log->cuIntraDistribution[depth][cu->getLumaIntraDir(i)]++;
+                        m_log->cuIntraDistribution[depth][ctu->getLumaIntraDir(i)]++;
                 }
                 i += next;
             }
@@ -481,20 +483,20 @@ void Analysis::compressCU(TComDataCU* cu
 
             /* At the start of analysis, the best CU is a null pointer
              * On return, it points to the CU encode with best chosen mode */
-            compressInterCU_rd0_4(outBestCU, m_tempCU[0], cu, 0, cu->m_cuLocalData, false, 0, 4);
+            compressInterCU_rd0_4(outBestCU, m_tempCU[0], ctu, 0, ctu->m_cuLocalData, false, 0, 4);
         }
         else
-            compressInterCU_rd5_6(m_bestCU[0], m_tempCU[0], 0, cu->m_cuLocalData);
+            compressInterCU_rd5_6(m_bestCU[0], m_tempCU[0], 0, ctu->m_cuLocalData);
 
         if (m_param->bLogCuStats || m_param->rc.bStatWrite)
         {
             uint32_t i = 0;
             do
             {
-                uint32_t depth = cu->getDepth(i);
+                uint32_t depth = ctu->getDepth(i);
                 m_log->cntTotalCu[depth]++;
                 int next = numPartition >> (depth * 2);
-                if (cu->isSkipped(i))
+                if (ctu->isSkipped(i))
                 {
                     m_log->cntSkipCu[depth]++;
                     m_log->qTreeSkipCnt[depth]++;
@@ -502,29 +504,29 @@ void Analysis::compressCU(TComDataCU* cu
                 else
                 {
                     m_log->totalCu++;
-                    if (cu->getPredictionMode(0) == MODE_INTER)
+                    if (ctu->getPredictionMode(0) == MODE_INTER)
                     {
                         m_log->cntInter[depth]++;
                         m_log->qTreeInterCnt[depth]++;
-                        if (cu->getPartitionSize(0) < AMP_ID)
-                            m_log->cuInterDistribution[depth][cu->getPartitionSize(0)]++;
+                        if (ctu->getPartitionSize(0) < AMP_ID)
+                            m_log->cuInterDistribution[depth][ctu->getPartitionSize(0)]++;
                         else
                             m_log->cuInterDistribution[depth][AMP_ID]++;
                     }
-                    else if (cu->getPredictionMode(0) == MODE_INTRA)
+                    else if (ctu->getPredictionMode(0) == MODE_INTRA)
                     {
                         m_log->qTreeIntraCnt[depth]++;
-                        if (depth == g_maxCUDepth && cu->getPartitionSize(0) == SIZE_NxN)
+                        if (depth == g_maxCUDepth && ctu->getPartitionSize(0) == SIZE_NxN)
                         {
                             m_log->cntIntraNxN++;
                         }
                         else
                         {
                             m_log->cntIntra[depth]++;
-                            if (cu->getLumaIntraDir(0) > 1)
+                            if (ctu->getLumaIntraDir(0) > 1)
                                 m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
                             else
-                                m_log->cuIntraDistribution[depth][cu->getLumaIntraDir(0)]++;
+                                m_log->cuIntraDistribution[depth][ctu->getLumaIntraDir(0)]++;
                         }
                     }
                 }
@@ -583,7 +585,8 @@ void Analysis::compressIntraCU(TComDataC
     // further split
     if (cu_split_flag)
     {
-        uint32_t    nextDepth     = depth + 1;
+        uint32_t nextDepth = depth + 1;
+        invalidateContexts(nextDepth);
         TComDataCU* subBestPartCU = m_bestCU[nextDepth];
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
@@ -594,10 +597,10 @@ void Analysis::compressIntraCU(TComDataC
             if (child_cu->flags & CU::PRESENT)
             {
                 subTempPartCU->initSubCU(outTempCU, child_cu, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
-                if (0 == partUnitIdx) //initialize RD with previous depth buffer
-                    m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+                if (!partUnitIdx)
+                    m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);
                 else
-                    m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
+                    m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next);
 
                 compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, child_cu);
                 outTempCU->copyPartFrom(subBestPartCU, child_cu, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
@@ -645,7 +648,7 @@ void Analysis::compressIntraCU(TComDataC
                 outTempCU->setQPSubParts(outTempCU->getRefQP(targetPartIdx), 0, depth); // set QP to default QP
         }
 
-        m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
+        m_rdContexts[nextDepth].next.store(m_rdContexts[depth].temp);
         checkBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
     }
 
@@ -718,7 +721,8 @@ void Analysis::compressSharedIntraCTU(TC
     // further split
     if (cu_split_flag && bSubBranch)
     {
-        uint32_t    nextDepth     = depth + 1;
+        uint32_t nextDepth = depth + 1;
+        invalidateContexts(nextDepth);
         TComDataCU* subBestPartCU = m_bestCU[nextDepth];
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
@@ -731,9 +735,9 @@ void Analysis::compressSharedIntraCTU(TC
                 subTempPartCU->initSubCU(outTempCU, child_cu, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
 
                 if (partUnitIdx) // initialize RD with previous depth buffer
-                    m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
+                    m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next);
                 else
-                    m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+                    m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);