[x265] [PATCH 2 of 4] limitTU : use spatial and temporal CUs' TU depth to limit recursion
bhavna at multicorewareinc.com
bhavna at multicorewareinc.com
Thu Nov 17 12:36:08 CET 2016
# HG changeset patch
# User Bhavna Hariharan <bhavna at multicorewareinc.com>
# Date 1479365378 -19800
# Thu Nov 17 12:19:38 2016 +0530
# Node ID 07a4e4d785a69f719922129ca5997b12552bb4ab
# Parent da1c770fa6e905fe341705b3f95a201a1a31fcf9
limitTU : use spatial and temporal CUs' TU depth to limit recursion
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/common/cudata.cpp
--- a/source/common/cudata.cpp Tue Nov 15 11:34:06 2016 +0530
+++ b/source/common/cudata.cpp Thu Nov 17 12:19:38 2016 +0530
@@ -295,6 +295,9 @@
/* initialize the remaining CU data in one memset */
memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);
+
+ for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
+ m_refTuDepth[i] = -1;
uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/common/cudata.h
--- a/source/common/cudata.h Tue Nov 15 11:34:06 2016 +0530
+++ b/source/common/cudata.h Thu Nov 17 12:19:38 2016 +0530
@@ -28,6 +28,8 @@
#include "slice.h"
#include "mv.h"
+#define NUM_TU_DEPTH 21
+
namespace X265_NS {
// private namespace
@@ -204,6 +206,7 @@
enum { BytesPerPartition = 21 }; // combined sizeof() of all per-part data
coeff_t* m_trCoeff[3]; // transformed coefficient buffer per plane
+ int8_t m_refTuDepth[NUM_TU_DEPTH]; // TU depth of CU at depths 0, 1 and 2
MV* m_mv[2]; // array of motion vectors per list
MV* m_mvd[2]; // array of coded motion vector deltas per list
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Tue Nov 15 11:34:06 2016 +0530
+++ b/source/encoder/analysis.cpp Thu Nov 17 12:19:38 2016 +0530
@@ -203,6 +203,57 @@
return *m_modeDepth[0].bestMode;
}
+int32_t Analysis::loadTUDepth(CUGeom cuGeom, CUData parentCTU)
+{
+ float predDepth = 0;
+ CUData* neighbourCU;
+ uint8_t count = 0;
+ int32_t maxTUDepth = -1;
+ neighbourCU = m_slice->m_refFrameList[0][0]->m_encData->m_picCTU;
+ predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+ count++;
+ if (m_slice->isInterB())
+ {
+ neighbourCU = m_slice->m_refFrameList[1][0]->m_encData->m_picCTU;
+ predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+ count++;
+ }
+ if (parentCTU.m_cuAbove)
+ {
+ predDepth += parentCTU.m_cuAbove->m_refTuDepth[cuGeom.geomRecurId];
+ count++;
+ if (parentCTU.m_cuAboveLeft)
+ {
+ predDepth += parentCTU.m_cuAboveLeft->m_refTuDepth[cuGeom.geomRecurId];
+ count++;
+ }
+ if (parentCTU.m_cuAboveRight)
+ {
+ predDepth += parentCTU.m_cuAboveRight->m_refTuDepth[cuGeom.geomRecurId];
+ count++;
+ }
+ }
+ if (parentCTU.m_cuLeft)
+ {
+ predDepth += parentCTU.m_cuLeft->m_refTuDepth[cuGeom.geomRecurId];
+ count++;
+ }
+ predDepth /= count;
+
+ if (predDepth == 0)
+ maxTUDepth = 0;
+ else if (predDepth < 1)
+ maxTUDepth = 1;
+ else if (predDepth >= 1 && predDepth <= 1.5)
+ maxTUDepth = 2;
+ else if (predDepth > 1.5 && predDepth <= 2.5)
+ maxTUDepth = 3;
+ else
+ maxTUDepth = -1;
+
+ return maxTUDepth;
+}
+
void Analysis::tryLossless(const CUGeom& cuGeom)
{
ModeDepth& md = m_modeDepth[cuGeom.depth];
@@ -326,6 +377,15 @@
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
+ if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+ {
+ CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+ int8_t maxTUDepth = -1;
+ for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+ maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+ ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+ }
+
if (m_bTryLossless)
tryLossless(cuGeom);
@@ -894,6 +954,9 @@
bool skipRectAmp = false;
bool chooseMerge = false;
+ if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+ m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
+
SplitData splitData[4];
splitData[0].initSplitCUData();
splitData[1].initSplitCUData();
@@ -1400,6 +1463,17 @@
if (m_param->rdLevel)
md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx);
+ if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+ {
+ if (mightNotSplit)
+ {
+ CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+ int8_t maxTUDepth = -1;
+ for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+ maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+ ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+ }
+ }
return splitCUData;
}
@@ -1424,6 +1498,9 @@
md.pred[PRED_2Nx2N].rdCost = 0;
}
+ if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+ m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
+
SplitData splitData[4];
splitData[0].initSplitCUData();
splitData[1].initSplitCUData();
@@ -1750,7 +1827,17 @@
if (mightSplit)
addSplitFlagCost(*md.bestMode, cuGeom.depth);
}
-
+ if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+ {
+ if (mightNotSplit)
+ {
+ CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+ int8_t maxTUDepth = -1;
+ for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+ maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+ ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+ }
+ }
/* compare split RD cost against best cost */
if (mightSplit && !skipRecursion)
checkBestMode(md.pred[PRED_SPLIT], depth);
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/encoder/analysis.h
--- a/source/encoder/analysis.h Tue Nov 15 11:34:06 2016 +0530
+++ b/source/encoder/analysis.h Thu Nov 17 12:19:38 2016 +0530
@@ -116,6 +116,7 @@
void destroy();
Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
+ int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU);
protected:
/* Analysis data for save/load mode, writes/reads data based on absPartIdx */
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/encoder/search.cpp
--- a/source/encoder/search.cpp Tue Nov 15 11:34:06 2016 +0530
+++ b/source/encoder/search.cpp Thu Nov 17 12:19:38 2016 +0530
@@ -67,7 +67,7 @@
m_param = NULL;
m_slice = NULL;
m_frame = NULL;
- m_maxTUDepth = 0;
+ m_maxTUDepth = -1;
}
bool Search::initSearch(const x265_param& param, ScalingList& scalingList)
@@ -2625,12 +2625,17 @@
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);
+ if (limitTU == X265_TU_LIMIT_NEIGH)
+ {
+ int maxLog2CUSize = (int)g_log2Size[m_param->maxCUSize];
+ m_maxTUDepth = x265_clip3(maxLog2CUSize - (int32_t)tuDepthRange[1], maxLog2CUSize - (int32_t)tuDepthRange[0], m_maxTUDepth);
+ }
m_entropyCoder.load(m_rqt[depth].cur);
- if (m_param->limitTU == X265_TU_LIMIT_DFS)
- m_maxTUDepth = 0;
- else if (m_param->limitTU == X265_TU_LIMIT_BFS)
+ if (limitTU == X265_TU_LIMIT_DFS)
+ m_maxTUDepth = -1;
+ else if (limitTU == X265_TU_LIMIT_BFS)
memset(&m_cacheTU, 0, sizeof(TUInfoCache));
Cost costs;
@@ -2893,7 +2898,7 @@
uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- if (m_param->limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
+ if (limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
{
// Fetch maximum TU depth of first sub partition to limit recursion of others
for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
@@ -2944,12 +2949,7 @@
bool bSaveTUData = false, bLoadTUData = false;
uint32_t idx = 0;
- if (m_param->limitTU == X265_TU_LIMIT_DFS && m_maxTUDepth)
- {
- uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth;
- bCheckSplit = log2TrSize > log2MaxTrSize;
- }
- else if (m_param->limitTU == X265_TU_LIMIT_BFS && splitMore >= 0)
+ if (limitTU == X265_TU_LIMIT_BFS && splitMore >= 0)
{
if (bCheckSplit && bCheckFull && tuDepth)
{
@@ -2968,6 +2968,14 @@
}
}
}
+ else if (limitTU == X265_TU_LIMIT_DFS || limitTU == X265_TU_LIMIT_NEIGH)
+ {
+ if (bCheckSplit && m_maxTUDepth >= 0)
+ {
+ uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth;
+ bCheckSplit = log2TrSize > log2MaxTrSize;
+ }
+ }
bool bSplitPresentFlag = bCheckSplit && bCheckFull;
@@ -3495,7 +3503,7 @@
{
if (splitCost.rdcost < fullCost.rdcost)
{
- if (m_param->limitTU == X265_TU_LIMIT_BFS)
+ if (limitTU == X265_TU_LIMIT_BFS)
{
uint32_t nextlog2TrSize = cuGeom.log2CUSize - (tuDepth + 1);
bool nextSplit = nextlog2TrSize > depthRange[0];
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/encoder/search.h
--- a/source/encoder/search.h Tue Nov 15 11:34:06 2016 +0530
+++ b/source/encoder/search.h Thu Nov 17 12:19:38 2016 +0530
@@ -276,7 +276,7 @@
bool m_bFrameParallel;
uint32_t m_numLayers;
uint32_t m_refLagPixels;
- uint32_t m_maxTUDepth;
+ int32_t m_maxTUDepth;
uint16_t limitTU;
int16_t m_sliceMaxY;
More information about the x265-devel
mailing list