[x265-commits] [x265] cmake: nit
Steve Borho
steve at borho.org
Mon Oct 6 00:42:34 CEST 2014
details: http://hg.videolan.org/x265/rev/fc856c00d49b
branches:
changeset: 8226:fc856c00d49b
user: Steve Borho <steve at borho.org>
date: Sun Oct 05 15:55:26 2014 -0500
description:
cmake: nit
Subject: [x265] threading: nits
details: http://hg.videolan.org/x265/rev/1867fb89298c
branches:
changeset: 8227:1867fb89298c
user: Steve Borho <steve at borho.org>
date: Sun Oct 05 15:55:35 2014 -0500
description:
threading: nits
Subject: [x265] threadpool: nit
details: http://hg.videolan.org/x265/rev/ed5b9320afca
branches:
changeset: 8228:ed5b9320afca
user: Steve Borho <steve at borho.org>
date: Sun Oct 05 17:01:50 2014 -0500
description:
threadpool: nit
Subject: [x265] entropy: give each Search instance its own set of RD contexts
details: http://hg.videolan.org/x265/rev/5420f2a29522
branches:
changeset: 8229:5420f2a29522
user: Steve Borho <steve at borho.org>
date: Sun Oct 05 10:40:48 2014 -0500
description:
entropy: give each Search instance its own set of RD contexts
This gives each ThreadLocalData a complete set of working contexts so each
thread can measure RD cost (for the same row) independent of one other. There
were content problems with the 'temp' and 'rqtRoot' and 'rqtTest' contexts.
For this to work we have to sync the 'cur' context to the slave prior
to it performing any RD measurements.
This commit finally removes the CI_IDX enums and uses a simple struct to hold
the contexts per depth; and the member variables were renamed from
"m_rdEntropyCoders" to "m_rdContexts" since these coders are only ever used to
save and restore CABAC state (never to code with)
This change exposed a bug. The next patch adds some tools to catch this class of
bug and the patch after that fixes it.
Subject: [x265] entropy: add a mechanism to detect reads without writes in checked builds
details: http://hg.videolan.org/x265/rev/ead3d26c7747
branches:
changeset: 8230:ead3d26c7747
user: Steve Borho <steve at borho.org>
date: Sun Oct 05 11:47:26 2014 -0500
description:
entropy: add a mechanism to detect reads without writes in checked builds
Subject: [x265] analysis: fix CABAC context state handling after splits [CHANGES OUTPUTS]
details: http://hg.videolan.org/x265/rev/d07fbd3bdecc
branches:
changeset: 8231:d07fbd3bdecc
user: Steve Borho <steve at borho.org>
date: Sun Oct 05 17:16:19 2014 -0500
description:
analysis: fix CABAC context state handling after splits [CHANGES OUTPUTS]
In RDlevel<=4, if split is chosen then copy depth+1 next to depth next
This fixes a long standing bug in presets slow and above, and improves
compression efficiency.
diffstat:
source/CMakeLists.txt | 2 +-
source/Lib/TLibCommon/TypeDef.h | 12 ---
source/common/threading.cpp | 6 -
source/common/threadpool.cpp | 7 +-
source/encoder/analysis.cpp | 137 +++++++++++++++++++++------------------
source/encoder/analysis.h | 2 +-
source/encoder/entropy.cpp | 12 +++-
source/encoder/entropy.h | 9 ++-
source/encoder/frameencoder.cpp | 28 ++-----
source/encoder/frameencoder.h | 9 +-
source/encoder/framefilter.cpp | 4 +-
source/encoder/sao.cpp | 36 +++++-----
source/encoder/sao.h | 9 ++-
source/encoder/search.cpp | 94 ++++++++++++++++-----------
source/encoder/search.h | 16 ++++-
15 files changed, 207 insertions(+), 176 deletions(-)
diffs (truncated from 1269 to 300 lines):
diff -r 997b210ab94a -r d07fbd3bdecc source/CMakeLists.txt
--- a/source/CMakeLists.txt Sun Oct 05 10:28:25 2014 -0500
+++ b/source/CMakeLists.txt Sun Oct 05 17:16:19 2014 -0500
@@ -59,7 +59,7 @@ if(UNIX)
SET(PLATFORM_LIBS pthread)
find_library(LIBRT rt)
if(LIBRT)
- SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
+ set(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
endif()
endif(UNIX)
diff -r 997b210ab94a -r d07fbd3bdecc source/Lib/TLibCommon/TypeDef.h
--- a/source/Lib/TLibCommon/TypeDef.h Sun Oct 05 10:28:25 2014 -0500
+++ b/source/Lib/TLibCommon/TypeDef.h Sun Oct 05 17:16:19 2014 -0500
@@ -78,18 +78,6 @@ enum TextType
MAX_NUM_COMPONENT = 3
};
-// index for SBAC based RD optimization
-enum CI_IDX
-{
- CI_CURR_BEST = 0, // best mode index
- CI_NEXT_BEST, // next best index
- CI_TEMP_BEST, // temporal index
- CI_QT_TRAFO_TEST,
- CI_QT_TRAFO_ROOT,
- CI_NUM, // total number
- CI_NUM_SAO = 3,
-};
-
// motion vector predictor direction used in AMVP
enum MVP_DIR
{
diff -r 997b210ab94a -r d07fbd3bdecc source/common/threading.cpp
--- a/source/common/threading.cpp Sun Oct 05 10:28:25 2014 -0500
+++ b/source/common/threading.cpp Sun Oct 05 17:16:19 2014 -0500
@@ -56,17 +56,13 @@ bool Thread::start()
void Thread::stop()
{
if (this->thread)
- {
WaitForSingleObject(this->thread, INFINITE);
- }
}
Thread::~Thread()
{
if (this->thread)
- {
CloseHandle(this->thread);
- }
}
#else /* POSIX / pthreads */
@@ -96,9 +92,7 @@ bool Thread::start()
void Thread::stop()
{
if (this->thread)
- {
pthread_join(this->thread, NULL);
- }
}
Thread::~Thread() {}
diff -r 997b210ab94a -r d07fbd3bdecc source/common/threadpool.cpp
--- a/source/common/threadpool.cpp Sun Oct 05 10:28:25 2014 -0500
+++ b/source/common/threadpool.cpp Sun Oct 05 17:16:19 2014 -0500
@@ -122,15 +122,12 @@ public:
int getThreadCount() const { return m_numThreads; }
+ bool IsValid() const { return m_ok; }
+
void release();
void Stop();
- bool IsValid() const
- {
- return m_ok;
- }
-
void enqueueJobProvider(JobProvider &);
void dequeueJobProvider(JobProvider &);
diff -r 997b210ab94a -r d07fbd3bdecc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Sun Oct 05 10:28:25 2014 -0500
+++ b/source/encoder/analysis.cpp Sun Oct 05 17:16:19 2014 -0500
@@ -287,14 +287,13 @@ void Analysis::parallelAnalysisJob(int t
slave = &m_tld[threadId].analysis;
slave->m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
slave->m_log = &slave->m_sliceTypeLog[cu->m_slice->m_sliceType];
- slave->m_rdEntropyCoders = this->m_rdEntropyCoders;
m_origYuv[0]->copyPartToYuv(slave->m_origYuv[depth], m_curCUData->encodeIdx);
slave->setQP(cu->m_slice, m_rdCost.m_qp);
if (!jobId || m_param->rdLevel > 4)
{
slave->m_quant.setQPforQuant(cu);
slave->m_quant.m_nr = m_quant.m_nr;
- slave->m_rdEntropyCoders[depth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+ slave->m_rdContexts[depth].cur.load(m_rdContexts[depth].cur);
}
}
@@ -413,12 +412,15 @@ void Analysis::parallelME(int threadId,
}
}
-void Analysis::compressCU(TComDataCU* cu)
+void Analysis::compressCTU(TComDataCU* ctu, const Entropy& initialContext)
{
- Frame* pic = cu->m_pic;
- uint32_t cuAddr = cu->m_cuAddr;
+ Frame* pic = ctu->m_pic;
+ uint32_t cuAddr = ctu->m_cuAddr;
- if (cu->m_slice->m_pps->bUseDQP)
+ invalidateContexts(0);
+ m_rdContexts[0].cur.load(initialContext);
+
+ if (ctu->m_slice->m_pps->bUseDQP)
m_bEncodeDQP = true;
// initialize CU data
@@ -426,27 +428,27 @@ void Analysis::compressCU(TComDataCU* cu
m_tempCU[0]->initCU(pic, cuAddr);
// analysis of CU
- uint32_t numPartition = cu->m_cuLocalData->numPartitions;
+ uint32_t numPartition = ctu->m_cuLocalData->numPartitions;
if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
{
if (m_param->analysisMode == X265_ANALYSIS_LOAD && pic->m_intraData)
{
uint32_t zOrder = 0;
- compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, cu->m_cuLocalData,
- &pic->m_intraData->depth[cuAddr * cu->m_numPartitions],
- &pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions],
- &pic->m_intraData->modes[cuAddr * cu->m_numPartitions], zOrder);
+ compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, ctu->m_cuLocalData,
+ &pic->m_intraData->depth[cuAddr * ctu->m_numPartitions],
+ &pic->m_intraData->partSizes[cuAddr * ctu->m_numPartitions],
+ &pic->m_intraData->modes[cuAddr * ctu->m_numPartitions], zOrder);
}
else
{
- compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu->m_cuLocalData);
+ compressIntraCU(m_bestCU[0], m_tempCU[0], false, ctu->m_cuLocalData);
if (m_param->analysisMode == X265_ANALYSIS_SAVE && pic->m_intraData)
{
- memcpy(&pic->m_intraData->depth[cuAddr * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * numPartition);
- memcpy(&pic->m_intraData->modes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * numPartition);
- memcpy(&pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * numPartition);
+ memcpy(&pic->m_intraData->depth[cuAddr * ctu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * numPartition);
+ memcpy(&pic->m_intraData->modes[cuAddr * ctu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * numPartition);
+ memcpy(&pic->m_intraData->partSizes[cuAddr * ctu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * numPartition);
pic->m_intraData->cuAddr[cuAddr] = cuAddr;
- pic->m_intraData->poc[cuAddr] = cu->m_pic->m_POC;
+ pic->m_intraData->poc[cuAddr] = ctu->m_pic->m_POC;
}
}
if (m_param->bLogCuStats || m_param->rc.bStatWrite)
@@ -455,18 +457,18 @@ void Analysis::compressCU(TComDataCU* cu
do
{
m_log->totalCu++;
- uint32_t depth = cu->getDepth(i);
+ uint32_t depth = ctu->getDepth(i);
int next = numPartition >> (depth * 2);
m_log->qTreeIntraCnt[depth]++;
- if (depth == g_maxCUDepth && cu->getPartitionSize(i) != SIZE_2Nx2N)
+ if (depth == g_maxCUDepth && ctu->getPartitionSize(i) != SIZE_2Nx2N)
m_log->cntIntraNxN++;
else
{
m_log->cntIntra[depth]++;
- if (cu->getLumaIntraDir(i) > 1)
+ if (ctu->getLumaIntraDir(i) > 1)
m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
else
- m_log->cuIntraDistribution[depth][cu->getLumaIntraDir(i)]++;
+ m_log->cuIntraDistribution[depth][ctu->getLumaIntraDir(i)]++;
}
i += next;
}
@@ -481,20 +483,20 @@ void Analysis::compressCU(TComDataCU* cu
/* At the start of analysis, the best CU is a null pointer
* On return, it points to the CU encode with best chosen mode */
- compressInterCU_rd0_4(outBestCU, m_tempCU[0], cu, 0, cu->m_cuLocalData, false, 0, 4);
+ compressInterCU_rd0_4(outBestCU, m_tempCU[0], ctu, 0, ctu->m_cuLocalData, false, 0, 4);
}
else
- compressInterCU_rd5_6(m_bestCU[0], m_tempCU[0], 0, cu->m_cuLocalData);
+ compressInterCU_rd5_6(m_bestCU[0], m_tempCU[0], 0, ctu->m_cuLocalData);
if (m_param->bLogCuStats || m_param->rc.bStatWrite)
{
uint32_t i = 0;
do
{
- uint32_t depth = cu->getDepth(i);
+ uint32_t depth = ctu->getDepth(i);
m_log->cntTotalCu[depth]++;
int next = numPartition >> (depth * 2);
- if (cu->isSkipped(i))
+ if (ctu->isSkipped(i))
{
m_log->cntSkipCu[depth]++;
m_log->qTreeSkipCnt[depth]++;
@@ -502,29 +504,29 @@ void Analysis::compressCU(TComDataCU* cu
else
{
m_log->totalCu++;
- if (cu->getPredictionMode(0) == MODE_INTER)
+ if (ctu->getPredictionMode(0) == MODE_INTER)
{
m_log->cntInter[depth]++;
m_log->qTreeInterCnt[depth]++;
- if (cu->getPartitionSize(0) < AMP_ID)
- m_log->cuInterDistribution[depth][cu->getPartitionSize(0)]++;
+ if (ctu->getPartitionSize(0) < AMP_ID)
+ m_log->cuInterDistribution[depth][ctu->getPartitionSize(0)]++;
else
m_log->cuInterDistribution[depth][AMP_ID]++;
}
- else if (cu->getPredictionMode(0) == MODE_INTRA)
+ else if (ctu->getPredictionMode(0) == MODE_INTRA)
{
m_log->qTreeIntraCnt[depth]++;
- if (depth == g_maxCUDepth && cu->getPartitionSize(0) == SIZE_NxN)
+ if (depth == g_maxCUDepth && ctu->getPartitionSize(0) == SIZE_NxN)
{
m_log->cntIntraNxN++;
}
else
{
m_log->cntIntra[depth]++;
- if (cu->getLumaIntraDir(0) > 1)
+ if (ctu->getLumaIntraDir(0) > 1)
m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
else
- m_log->cuIntraDistribution[depth][cu->getLumaIntraDir(0)]++;
+ m_log->cuIntraDistribution[depth][ctu->getLumaIntraDir(0)]++;
}
}
}
@@ -583,7 +585,8 @@ void Analysis::compressIntraCU(TComDataC
// further split
if (cu_split_flag)
{
- uint32_t nextDepth = depth + 1;
+ uint32_t nextDepth = depth + 1;
+ invalidateContexts(nextDepth);
TComDataCU* subBestPartCU = m_bestCU[nextDepth];
TComDataCU* subTempPartCU = m_tempCU[nextDepth];
for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
@@ -594,10 +597,10 @@ void Analysis::compressIntraCU(TComDataC
if (child_cu->flags & CU::PRESENT)
{
subTempPartCU->initSubCU(outTempCU, child_cu, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
- if (0 == partUnitIdx) //initialize RD with previous depth buffer
- m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+ if (!partUnitIdx)
+ m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);
else
- m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
+ m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next);
compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, child_cu);
outTempCU->copyPartFrom(subBestPartCU, child_cu, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
@@ -645,7 +648,7 @@ void Analysis::compressIntraCU(TComDataC
outTempCU->setQPSubParts(outTempCU->getRefQP(targetPartIdx), 0, depth); // set QP to default QP
}
- m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
+ m_rdContexts[nextDepth].next.store(m_rdContexts[depth].temp);
checkBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
}
@@ -718,7 +721,8 @@ void Analysis::compressSharedIntraCTU(TC
// further split
if (cu_split_flag && bSubBranch)
{
- uint32_t nextDepth = depth + 1;
+ uint32_t nextDepth = depth + 1;
+ invalidateContexts(nextDepth);
TComDataCU* subBestPartCU = m_bestCU[nextDepth];
TComDataCU* subTempPartCU = m_tempCU[nextDepth];
for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
@@ -731,9 +735,9 @@ void Analysis::compressSharedIntraCTU(TC
subTempPartCU->initSubCU(outTempCU, child_cu, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
if (partUnitIdx) // initialize RD with previous depth buffer
- m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
+ m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next);
else
- m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+ m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);
More information about the x265-commits
mailing list