[x265-commits] [x265] refine tuDepth related
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Dec 8 18:47:55 CET 2014
details: http://hg.videolan.org/x265/rev/53f7efef5ebd
branches:
changeset: 8953:53f7efef5ebd
user: Satoshi Nakagawa <nakagawa424 at oki.com>
date: Sat Dec 06 17:17:59 2014 +0900
description:
refine tuDepth related
diffstat:
source/common/cudata.h | 2 +-
source/encoder/analysis.cpp | 8 +-
source/encoder/entropy.cpp | 297 +++++++-----------
source/encoder/entropy.h | 25 +-
source/encoder/search.cpp | 696 ++++++++++++++++++++-----------------------
source/encoder/search.h | 32 +-
6 files changed, 469 insertions(+), 591 deletions(-)
diffs (truncated from 1989 to 300 lines):
diff -r 35d086074bb5 -r 53f7efef5ebd source/common/cudata.h
--- a/source/common/cudata.h Fri Dec 05 10:59:33 2014 -0600
+++ b/source/common/cudata.h Sat Dec 06 17:17:59 2014 +0900
@@ -192,7 +192,7 @@ public:
void setPUMv(int list, const MV& mv, int absPartIdx, int puIdx);
void setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx);
- uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t trDepth) const { return (m_cbf[ttype][absPartIdx] >> trDepth) & 0x1; }
+ uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; }
uint8_t getQtRootCbf(uint32_t absPartIdx) const { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; }
int8_t getRefQP(uint32_t currAbsIdxInCTU) const;
uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const;
diff -r 35d086074bb5 -r 53f7efef5ebd source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/analysis.cpp Sat Dec 06 17:17:59 2014 +0900
@@ -943,8 +943,8 @@ void Analysis::compressInterCU_rd0_4(con
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
- uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
- residualTransformQuantIntra(*md.bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+ residualTransformQuantIntra(*md.bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
getBestIntraModeChroma(*md.bestMode, cuGeom);
residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.encodeIdx); // TODO:
@@ -1682,8 +1682,8 @@ void Analysis::encodeResidue(const CUDat
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
- uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
- residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+ residualTransformQuantIntra(*bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
getBestIntraModeChroma(*bestMode, cuGeom);
residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
}
diff -r 35d086074bb5 -r 53f7efef5ebd source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/entropy.cpp Sat Dec 06 17:17:59 2014 +0900
@@ -529,10 +529,10 @@ void Entropy::encodeCU(const CUData& ctu
if (!cuUnsplitFlag)
{
- uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
+ uint32_t qNumParts = cuGeom.numPartitions >> 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
{
- const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
if (childGeom.flags & CUGeom::PRESENT)
encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
}
@@ -545,11 +545,10 @@ void Entropy::encodeCU(const CUData& ctu
if (depth < ctu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
{
- uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
-
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
+ uint32_t qNumParts = cuGeom.numPartitions >> 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
{
- const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
}
return;
@@ -582,7 +581,7 @@ void Entropy::encodeCU(const CUData& ctu
ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
// Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
- codeCoeff(ctu, absPartIdx, depth, bEncodeDQP, tuDepthRange);
+ codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
// --- write terminating bit ---
finishCU(ctu, absPartIdx, depth);
@@ -619,41 +618,18 @@ void Entropy::finishCU(const CUData& ctu
}
}
-void Entropy::encodeTransform(const CUData& cu, CoeffCodeState& state, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx,
- uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t trIdx, bool& bCodeDQP, uint32_t depthRange[2])
+void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
+ bool& bCodeDQP, const uint32_t depthRange[2])
{
- const bool subdiv = cu.m_tuDepth[absPartIdx] + cu.m_cuDepth[absPartIdx] > (uint8_t)depth;
- uint32_t hChromaShift = cu.m_hChromaShift;
- uint32_t vChromaShift = cu.m_vChromaShift;
- uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, trIdx);
- uint32_t cbfU = cu.getCbf(absPartIdx, TEXT_CHROMA_U, trIdx);
- uint32_t cbfV = cu.getCbf(absPartIdx, TEXT_CHROMA_V, trIdx);
-
- if (!trIdx)
- state.bakAbsPartIdxCU = absPartIdx;
-
- if (log2TrSize == 2 && cu.m_chromaFormat != X265_CSP_I444)
- {
- uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
- if (!(absPartIdx & (partNum - 1)))
- {
- state.bakAbsPartIdx = absPartIdx;
- state.bakChromaOffset = offsetChroma;
- }
- else if ((absPartIdx & (partNum - 1)) == (partNum - 1))
- {
- cbfU = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_U, trIdx);
- cbfV = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_V, trIdx);
- }
- }
+ const bool subdiv = cu.m_tuDepth[absPartIdx] > tuDepth;
/* in each of these conditions, the subdiv flag is implied and not signaled,
* so we have checks to make sure the implied value matches our intentions */
- if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx])
+ if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth)
{
X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
}
- else if (cu.isInter(absPartIdx) && (cu.m_partSize[absPartIdx] != SIZE_2Nx2N) && depth == cu.m_cuDepth[absPartIdx] &&
+ else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth &&
cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
{
X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
@@ -672,127 +648,111 @@ void Entropy::encodeTransform(const CUDa
codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
}
- const uint32_t trDepthCurr = depth - cu.m_cuDepth[absPartIdx];
- const bool bFirstCbfOfCU = trDepthCurr == 0;
-
- bool mCodeAll = true;
- const uint32_t numPels = 1 << (log2TrSize * 2 - hChromaShift - vChromaShift);
- if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
- mCodeAll = false;
-
- if (bFirstCbfOfCU || mCodeAll)
+ uint32_t hChromaShift = cu.m_hChromaShift;
+ uint32_t vChromaShift = cu.m_vChromaShift;
+ bool bSmallChroma = (log2TrSize - hChromaShift < 2);
+ if (!tuDepth || !bSmallChroma)
{
- uint32_t tuSize = 1 << log2TrSize;
- if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1))
- codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_U, trDepthCurr, (subdiv == 0));
- if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1))
- codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_V, trDepthCurr, (subdiv == 0));
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
+ codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv);
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
+ codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !subdiv);
}
else
{
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1), "chroma xform size match failure\n");
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1), "chroma xform size match failure\n");
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma xform size match failure\n");
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma xform size match failure\n");
}
if (subdiv)
{
- log2TrSize--;
- uint32_t numCoeff = 1 << (log2TrSize * 2);
- uint32_t numCoeffC = (numCoeff >> (hChromaShift + vChromaShift));
- trIdx++;
- ++depth;
- absPartIdxStep >>= 2;
- const uint32_t partNum = NUM_CU_PARTITIONS >> (depth << 1);
+ --log2TrSize;
+ ++tuDepth;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
- absPartIdx += partNum;
- offsetLuma += numCoeff;
- offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 0 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 1 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 2 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 3 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ return;
+ }
- absPartIdx += partNum;
- offsetLuma += numCoeff;
- offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
- absPartIdx += partNum;
- offsetLuma += numCoeff;
- offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ if (cu.isInter(absPartIdxC) && !tuDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
+ {
+ X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
+ }
+ else
+ codeQtCbfLuma(cu, absPartIdx, tuDepth);
+
+ uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth);
+ uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, tuDepth);
+ uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, tuDepth);
+ if (!(cbfY || cbfU || cbfV))
+ return;
+
+ // dQP: only for CTU once
+ if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
+ {
+ uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
+ uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
+ codeDeltaQP(cu, absPartIdxLT);
+ bCodeDQP = false;
+ }
+
+ if (cbfY)
+ {
+ uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
+ codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2TrSize, TEXT_LUMA);
+ if (!(cbfU || cbfV))
+ return;
+ }
+
+ if (bSmallChroma)
+ {
+ if ((absPartIdx & 3) != 3)
+ return;
+
+ const uint32_t log2TrSizeC = 2;
+ const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
+ const uint32_t curPartNum = 4;
+ uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
+ for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+ {
+ TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
+ const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
+ do
+ {
+ if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
+ {
+ uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+ codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
+ }
+ }
+ while (tuIterator.isNextSection());
+ }
}
else
{
- if (cu.isInter(absPartIdx) && depth == cu.m_cuDepth[absPartIdx] && !cu.getCbf(absPartIdx, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdx, TEXT_CHROMA_V, 0))
+ uint32_t log2TrSizeC = log2TrSize - hChromaShift;
+ const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
+ uint32_t curPartNum = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
+ uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
+ for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
- }
- else
- codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu.m_tuDepth[absPartIdx]);
-
- if (cbfY || cbfU || cbfV)
- {
- // dQP: only for CTU once
- if (cu.m_slice->m_pps->bUseDQP)
+ TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
+ const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
+ do
{
- if (bCodeDQP)
+ if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
{
- codeDeltaQP(cu, state.bakAbsPartIdxCU);
- bCodeDQP = false;
+ uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+ codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
}
}
- }
- if (cbfY)
- codeCoeffNxN(cu, cu.m_trCoeff[0] + offsetLuma, absPartIdx, log2TrSize, TEXT_LUMA);
-
- int chFmt = cu.m_chromaFormat;
- if (log2TrSize == 2 && chFmt != X265_CSP_I444)
- {
- uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
- if ((absPartIdx & (partNum - 1)) == (partNum - 1))
- {
- const uint32_t log2TrSizeC = 2;
- const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
-
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
-
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
- {
- TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, state.bakAbsPartIdx);
More information about the x265-commits
mailing list