[x265] refine tuDepth related
Satoshi Nakagawa
nakagawa424 at oki.com
Sat Dec 6 09:20:37 CET 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1417853879 -32400
# Sat Dec 06 17:17:59 2014 +0900
# Node ID 94d0bc6841dd6e17dd8af83a3e0294faeb341019
# Parent 35d086074bb545d70bf8beea563f8904d324855f
refine tuDepth related
diff -r 35d086074bb5 -r 94d0bc6841dd source/common/cudata.h
--- a/source/common/cudata.h Fri Dec 05 10:59:33 2014 -0600
+++ b/source/common/cudata.h Sat Dec 06 17:17:59 2014 +0900
@@ -192,7 +192,7 @@
void setPUMv(int list, const MV& mv, int absPartIdx, int puIdx);
void setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx);
- uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t trDepth) const { return (m_cbf[ttype][absPartIdx] >> trDepth) & 0x1; }
+ uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; }
uint8_t getQtRootCbf(uint32_t absPartIdx) const { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; }
int8_t getRefQP(uint32_t currAbsIdxInCTU) const;
uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*mvFieldNeighbours)[2], uint8_t* interDirNeighbours) const;
diff -r 35d086074bb5 -r 94d0bc6841dd source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/analysis.cpp Sat Dec 06 17:17:59 2014 +0900
@@ -943,8 +943,8 @@
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
- uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
- residualTransformQuantIntra(*md.bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+ residualTransformQuantIntra(*md.bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
getBestIntraModeChroma(*md.bestMode, cuGeom);
residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.encodeIdx); // TODO:
@@ -1682,8 +1682,8 @@
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
- uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
- residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+ residualTransformQuantIntra(*bestMode, cuGeom, initTuDepth, 0, tuDepthRange);
getBestIntraModeChroma(*bestMode, cuGeom);
residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
}
diff -r 35d086074bb5 -r 94d0bc6841dd source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/entropy.cpp Sat Dec 06 17:17:59 2014 +0900
@@ -529,10 +529,10 @@
if (!cuUnsplitFlag)
{
- uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
+ uint32_t qNumParts = cuGeom.numPartitions >> 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
{
- const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
if (childGeom.flags & CUGeom::PRESENT)
encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
}
@@ -545,11 +545,10 @@
if (depth < ctu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
{
- uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
-
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
+ uint32_t qNumParts = cuGeom.numPartitions >> 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
{
- const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
}
return;
@@ -582,7 +581,7 @@
ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
// Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
- codeCoeff(ctu, absPartIdx, depth, bEncodeDQP, tuDepthRange);
+ codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
// --- write terminating bit ---
finishCU(ctu, absPartIdx, depth);
@@ -619,41 +618,18 @@
}
}
-void Entropy::encodeTransform(const CUData& cu, CoeffCodeState& state, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx,
- uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t trIdx, bool& bCodeDQP, uint32_t depthRange[2])
+void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
+ bool& bCodeDQP, const uint32_t depthRange[2])
{
- const bool subdiv = cu.m_tuDepth[absPartIdx] + cu.m_cuDepth[absPartIdx] > (uint8_t)depth;
- uint32_t hChromaShift = cu.m_hChromaShift;
- uint32_t vChromaShift = cu.m_vChromaShift;
- uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, trIdx);
- uint32_t cbfU = cu.getCbf(absPartIdx, TEXT_CHROMA_U, trIdx);
- uint32_t cbfV = cu.getCbf(absPartIdx, TEXT_CHROMA_V, trIdx);
-
- if (!trIdx)
- state.bakAbsPartIdxCU = absPartIdx;
-
- if (log2TrSize == 2 && cu.m_chromaFormat != X265_CSP_I444)
- {
- uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
- if (!(absPartIdx & (partNum - 1)))
- {
- state.bakAbsPartIdx = absPartIdx;
- state.bakChromaOffset = offsetChroma;
- }
- else if ((absPartIdx & (partNum - 1)) == (partNum - 1))
- {
- cbfU = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_U, trIdx);
- cbfV = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_V, trIdx);
- }
- }
+ const bool subdiv = cu.m_tuDepth[absPartIdx] > tuDepth;
/* in each of these conditions, the subdiv flag is implied and not signaled,
* so we have checks to make sure the implied value matches our intentions */
- if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx])
+ if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth)
{
X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
}
- else if (cu.isInter(absPartIdx) && (cu.m_partSize[absPartIdx] != SIZE_2Nx2N) && depth == cu.m_cuDepth[absPartIdx] &&
+ else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth &&
cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
{
X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
@@ -672,127 +648,111 @@
codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
}
- const uint32_t trDepthCurr = depth - cu.m_cuDepth[absPartIdx];
- const bool bFirstCbfOfCU = trDepthCurr == 0;
-
- bool mCodeAll = true;
- const uint32_t numPels = 1 << (log2TrSize * 2 - hChromaShift - vChromaShift);
- if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
- mCodeAll = false;
-
- if (bFirstCbfOfCU || mCodeAll)
+ uint32_t hChromaShift = cu.m_hChromaShift;
+ uint32_t vChromaShift = cu.m_vChromaShift;
+ bool bSmallChroma = (log2TrSize - hChromaShift < 2);
+ if (!tuDepth || !bSmallChroma)
{
- uint32_t tuSize = 1 << log2TrSize;
- if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1))
- codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_U, trDepthCurr, (subdiv == 0));
- if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1))
- codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_V, trDepthCurr, (subdiv == 0));
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
+ codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv);
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
+ codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !subdiv);
}
else
{
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1), "chroma xform size match failure\n");
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1), "chroma xform size match failure\n");
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma xform size match failure\n");
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma xform size match failure\n");
}
if (subdiv)
{
- log2TrSize--;
- uint32_t numCoeff = 1 << (log2TrSize * 2);
- uint32_t numCoeffC = (numCoeff >> (hChromaShift + vChromaShift));
- trIdx++;
- ++depth;
- absPartIdxStep >>= 2;
- const uint32_t partNum = NUM_CU_PARTITIONS >> (depth << 1);
+ --log2TrSize;
+ ++tuDepth;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
- absPartIdx += partNum;
- offsetLuma += numCoeff;
- offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 0 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 1 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 2 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ encodeTransform(cu, absPartIdx + 3 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
+ return;
+ }
- absPartIdx += partNum;
- offsetLuma += numCoeff;
- offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
- absPartIdx += partNum;
- offsetLuma += numCoeff;
- offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
+ if (cu.isInter(absPartIdxC) && !tuDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
+ {
+ X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
+ }
+ else
+ codeQtCbfLuma(cu, absPartIdx, tuDepth);
+
+ uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth);
+ uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, tuDepth);
+ uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, tuDepth);
+ if (!(cbfY || cbfU || cbfV))
+ return;
+
+ // dQP: only for CTU once
+ if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
+ {
+ uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
+ uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
+ codeDeltaQP(cu, absPartIdxLT);
+ bCodeDQP = false;
+ }
+
+ if (cbfY)
+ {
+ uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
+ codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2TrSize, TEXT_LUMA);
+ if (!(cbfU || cbfV))
+ return;
+ }
+
+ if (bSmallChroma)
+ {
+ if ((absPartIdx & 3) != 3)
+ return;
+
+ const uint32_t log2TrSizeC = 2;
+ const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
+ const uint32_t curPartNum = 4;
+ uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
+ for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+ {
+ TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
+ const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
+ do
+ {
+ if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
+ {
+ uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+ codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
+ }
+ }
+ while (tuIterator.isNextSection());
+ }
}
else
{
- if (cu.isInter(absPartIdx) && depth == cu.m_cuDepth[absPartIdx] && !cu.getCbf(absPartIdx, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdx, TEXT_CHROMA_V, 0))
+ uint32_t log2TrSizeC = log2TrSize - hChromaShift;
+ const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
+ uint32_t curPartNum = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
+ uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
+ for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
- }
- else
- codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu.m_tuDepth[absPartIdx]);
-
- if (cbfY || cbfU || cbfV)
- {
- // dQP: only for CTU once
- if (cu.m_slice->m_pps->bUseDQP)
+ TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
+ const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
+ do
{
- if (bCodeDQP)
+ if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
{
- codeDeltaQP(cu, state.bakAbsPartIdxCU);
- bCodeDQP = false;
+ uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+ codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
}
}
- }
- if (cbfY)
- codeCoeffNxN(cu, cu.m_trCoeff[0] + offsetLuma, absPartIdx, log2TrSize, TEXT_LUMA);
-
- int chFmt = cu.m_chromaFormat;
- if (log2TrSize == 2 && chFmt != X265_CSP_I444)
- {
- uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
- if ((absPartIdx & (partNum - 1)) == (partNum - 1))
- {
- const uint32_t log2TrSizeC = 2;
- const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
-
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
-
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
- {
- TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, state.bakAbsPartIdx);
- const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
- do
- {
- uint32_t cbf = cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
- if (cbf)
- {
- uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
- codeCoeffNxN(cu, coeffChroma + state.bakChromaOffset + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
- }
- }
- while (tuIterator.isNextSection());
- }
- }
- }
- else
- {
- uint32_t log2TrSizeC = log2TrSize - hChromaShift;
- const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
- uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
- {
- TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
- const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
- do
- {
- uint32_t cbf = cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
- if (cbf)
- {
- uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
- codeCoeffNxN(cu, coeffChroma + offsetChroma + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
- }
- }
- while (tuIterator.isNextSection());
- }
+ while (tuIterator.isNextSection());
}
}
}
@@ -811,12 +771,12 @@
if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
{
- uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
- for (uint32_t i = 1; i <= 3; i++)
+ uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
{
- uint32_t offset = absPartIdx + i * partOffset;
- cu.getAllowedChromaDir(offset, chromaDirMode);
- codeIntraDirChroma(cu, offset, chromaDirMode);
+ absPartIdx += qNumParts;
+ cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
+ codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
}
}
}
@@ -868,7 +828,7 @@
codeRefFrmIdx(cu, absPartIdx, list);
}
-void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP, uint32_t depthRange[2])
+void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
{
if (!cu.isIntra(absPartIdx))
{
@@ -878,12 +838,8 @@
return;
}
- uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
- uint32_t lumaOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
- uint32_t chromaOffset = lumaOffset >> (cu.m_hChromaShift + cu.m_vChromaShift);
- uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> (depth << 1);
- CoeffCodeState state;
- encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange);
+ uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
+ encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
}
void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
@@ -1222,12 +1178,12 @@
uint32_t preds[4][3];
int predIdx[4];
uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
- uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
+ uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
- for (j = 0; j < partNum; j++)
+ for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
{
- dir[j] = cu.m_lumaIntraDir[absPartIdx + partOffset * j];
- cu.getIntraDirLumaPredictor(absPartIdx + partOffset * j, preds[j]);
+ dir[j] = cu.m_lumaIntraDir[absPartIdx];
+ cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
predIdx[j] = -1;
for (uint32_t i = 0; i < 3; i++)
if (dir[j] == preds[j][i])
@@ -1382,40 +1338,25 @@
}
}
-void Entropy::codeQtCbf(const CUData& cu, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, TextType ttype, uint32_t trDepth, bool lowestLevel)
+void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
{
- uint32_t ctx = ctxCbf[ttype][trDepth];
+ uint32_t ctx = tuDepth + 2;
- bool canQuadSplit = (width >= (MIN_TU_SIZE * 2)) && (height >= (MIN_TU_SIZE * 2));
- uint32_t lowestTUDepth = trDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
+ uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
+ bool canQuadSplit = (log2TrSize - cu.m_hChromaShift > 2);
+ uint32_t lowestTUDepth = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
- if ((width != height) && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
+ if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
{
uint32_t subTUDepth = lowestTUDepth + 1; // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
// Otherwise, this must be the level above the lowest level (as specified above)
- uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
+ uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
- for (uint32_t subTU = 0; subTU < 2; subTU++)
- {
- uint32_t subTUAbsPartIdx = absPartIdx + (subTU * partIdxesPerSubTU);
- uint32_t cbf = cu.getCbf(subTUAbsPartIdx, ttype, subTUDepth);
-
- encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
- }
+ encodeBin(cu.getCbf(absPartIdx , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
+ encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
}
else
- {
- uint32_t cbf = cu.getCbf(absPartIdx, ttype, lowestTUDepth);
-
- encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
- }
-}
-
-void Entropy::codeQtCbf(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth)
-{
- uint32_t ctx = ctxCbf[ttype][trDepth];
- uint32_t cbf = cu.getCbf(absPartIdx, ttype, trDepth);
- encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
+ encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
}
void Entropy::codeTransformSkipFlags(const CUData& cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
diff -r 35d086074bb5 -r 94d0bc6841dd source/encoder/entropy.h
--- a/source/encoder/entropy.h Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/entropy.h Sat Dec 06 17:17:59 2014 +0900
@@ -162,9 +162,10 @@
void codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
void codePredInfo(const CUData& cu, uint32_t absPartIdx);
- void codeQtCbf(const CUData& cu, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, TextType ttype, uint32_t trDepth, bool lowestLevel);
- void codeQtCbf(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth);
- void codeCoeff(const CUData& cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP, uint32_t depthRange[2]);
+ inline void codeQtCbfLuma(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth) { codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth), tuDepth); }
+
+ void codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel);
+ void codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2]);
void codeCoeffNxN(const CUData& cu, const coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
inline void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); }
@@ -175,7 +176,8 @@
inline void codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx) { encodeBin(symbol, m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX + ctx]); }
inline void codePredMode(int predMode) { encodeBin(predMode == MODE_INTRA ? 1 : 0, m_contextState[OFF_PRED_MODE_CTX]); }
inline void codeCUTransquantBypassFlag(uint32_t symbol) { encodeBin(symbol, m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX]); }
- inline void codeQtCbf(uint32_t cbf, TextType ttype, uint32_t trDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][trDepth]]); }
+ inline void codeQtCbfLuma(uint32_t cbf, uint32_t tuDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + !tuDepth]); }
+ inline void codeQtCbfChroma(uint32_t cbf, uint32_t tuDepth) { encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + 2 + tuDepth]); }
inline void codeQtRootCbf(uint32_t cbf) { encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
void codeSaoOffset(const SaoCtuParam& ctuParam, int plane);
@@ -189,11 +191,10 @@
inline uint32_t bitsIntraModeNonMPM() const { return bitsCodeBin(0, m_contextState[OFF_ADI_CTX]) + 5; }
inline uint32_t bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const { return bitsCodeBin(1, m_contextState[OFF_ADI_CTX]) + (dir == preds[0] ? 1 : 2); }
- inline uint32_t estimateCbfBits(uint32_t cbf, TextType ttype, uint32_t trDepth) const { return bitsCodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][trDepth]]); }
+ inline uint32_t estimateCbfBits(uint32_t cbf, TextType ttype, uint32_t tuDepth) const { return bitsCodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][tuDepth]]); }
/* these functions are only used to estimate the bits when cbf is 0 and will never be called when writing the bistream. */
inline void codeQtRootCbfZero() { encodeBin(0, m_contextState[OFF_QT_ROOT_CBF_CTX]); }
- inline void codeQtCbfZero(TextType ttype, uint32_t trDepth) { encodeBin(0, m_contextState[OFF_QT_CBF_CTX + ctxCbf[ttype][trDepth]]); }
private:
@@ -239,16 +240,8 @@
void codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx);
void codeTransformSkipFlags(const CUData& cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype);
- struct CoeffCodeState
- {
- uint32_t bakAbsPartIdx;
- uint32_t bakChromaOffset;
- uint32_t bakAbsPartIdxCU;
- };
-
- void encodeTransform(const CUData& cu, CoeffCodeState& state, uint32_t offsetLumaOffset, uint32_t offsetChroma,
- uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t trIdx,
- bool& bCodeDQP, uint32_t depthRange[2]);
+ void encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
+ bool& bCodeDQP, const uint32_t depthRange[2]);
void copyFrom(const Entropy& src);
void copyContextsFrom(const Entropy& src);
diff -r 35d086074bb5 -r 94d0bc6841dd source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/search.cpp Sat Dec 06 17:17:59 2014 +0900
@@ -165,70 +165,55 @@
void Search::invalidateContexts(int) {}
#endif
-void Search::codeSubdivCbfQTChroma(const CUData& cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height)
+void Search::codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx)
{
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
- uint32_t tuDepthL = cu.m_tuDepth[absPartIdx];
- uint32_t subdiv = tuDepthL > trDepth;
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
+ uint32_t subdiv = tuDepth < cu.m_tuDepth[absPartIdx];
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
- bool mCodeAll = true;
- const uint32_t numPels = 1 << (log2TrSize * 2 - m_hChromaShift - m_vChromaShift);
- if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
- mCodeAll = false;
-
- if (mCodeAll)
+ if (!(log2TrSize - m_hChromaShift < 2))
{
- if (!trDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
- m_entropyCoder.codeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_U, trDepth, !subdiv);
-
- if (!trDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepth - 1))
- m_entropyCoder.codeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_V, trDepth, !subdiv);
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
+ m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv);
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
+ m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !subdiv);
}
if (subdiv)
{
- absPartIdxStep >>= 2;
- width >>= 1;
- height >>= 1;
-
- uint32_t qtPartNum = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
- for (uint32_t part = 0; part < 4; part++)
- codeSubdivCbfQTChroma(cu, trDepth + 1, absPartIdx + part * qtPartNum, absPartIdxStep, width, height);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+ codeSubdivCbfQTChroma(cu, tuDepth + 1, absPartIdx);
}
}
-void Search::codeCoeffQTChroma(const CUData& cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype)
+void Search::codeCoeffQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx, TextType ttype)
{
- if (!cu.getCbf(absPartIdx, ttype, trDepth))
+ if (!cu.getCbf(absPartIdx, ttype, tuDepth))
return;
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
- uint32_t tuDepthL = cu.m_tuDepth[absPartIdx];
-
- if (tuDepthL > trDepth)
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+
+ if (tuDepth < cu.m_tuDepth[absPartIdx])
{
- uint32_t qtPartNum = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
- for (uint32_t part = 0; part < 4; part++)
- codeCoeffQTChroma(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+ codeCoeffQTChroma(cu, tuDepth + 1, absPartIdx, ttype);
return;
}
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
-
- uint32_t trDepthC = trDepth;
+ uint32_t tuDepthC = tuDepth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-
- if (log2TrSizeC == 1)
+
+ if (log2TrSizeC < 2)
{
- X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && trDepth, "transform size too small\n");
- trDepthC--;
- log2TrSizeC++;
- uint32_t qpdiv = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + trDepthC) << 1);
- bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
- if (!bFirstQ)
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ if (absPartIdx & 3)
return;
+ log2TrSizeC = 2;
+ tuDepthC--;
}
uint32_t qtLayer = log2TrSize - 2;
@@ -245,17 +230,17 @@
uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2 - 1);
coeff_t* coeff = m_rqt[qtLayer].coeffRQT[ttype] + coeffOffset;
uint32_t subTUSize = 1 << (log2TrSizeC * 2);
- uint32_t partIdxesPerSubTU = NUM_CU_PARTITIONS >> (((cu.m_cuDepth[absPartIdx] + trDepthC) << 1) + 1);
- if (cu.getCbf(absPartIdx, ttype, trDepth + 1))
+ uint32_t tuNumParts = 2 << ((log2TrSizeC - LOG2_UNIT_SIZE) * 2);
+ if (cu.getCbf(absPartIdx, ttype, tuDepth + 1))
m_entropyCoder.codeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, ttype);
- if (cu.getCbf(absPartIdx + partIdxesPerSubTU, ttype, trDepth + 1))
- m_entropyCoder.codeCoeffNxN(cu, coeff + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, ttype);
+ if (cu.getCbf(absPartIdx + tuNumParts, ttype, tuDepth + 1))
+ m_entropyCoder.codeCoeffNxN(cu, coeff + subTUSize, absPartIdx + tuNumParts, log2TrSizeC, ttype);
}
}
-void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, uint32_t depthRange[2])
+void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const uint32_t depthRange[2])
{
- uint32_t fullDepth = mode.cu.m_cuDepth[0] + trDepth;
+ uint32_t fullDepth = mode.cu.m_cuDepth[0] + tuDepth;
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t qtLayer = log2TrSize - 2;
uint32_t sizeIdx = log2TrSize - 2;
@@ -289,13 +274,13 @@
// init availability pattern
uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
- initAdiPattern(cu, cuGeom, absPartIdx, trDepth, lumaPredMode);
+ initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);
// get prediction signal
predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
- cu.setTUDepthSubParts(trDepth, absPartIdx, fullDepth);
+ cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
coeff_t* coeffY = m_rqt[qtLayer].coeffRQT[0] + coeffOffsetY;
@@ -316,7 +301,7 @@
// no coded residual, recon = pred
primitives.luma_copy_pp[sizeIdx](reconQt, reconQtStride, pred, stride);
- bCBF = !!numSig << trDepth;
+ bCBF = !!numSig << tuDepth;
cu.setCbfSubParts(bCBF, TEXT_LUMA, absPartIdx, fullDepth);
fullCost.distortion = primitives.sse_pp[sizeIdx](reconQt, reconQtStride, fenc, stride);
@@ -340,21 +325,21 @@
}
else
{
- uint32_t qtNumParts = cuGeom.numPartitions >> 2;
- if (!trDepth)
+ uint32_t qNumParts = cuGeom.numPartitions >> 2;
+ if (!tuDepth)
{
- for (uint32_t part = 0; part < 4; part++)
- m_entropyCoder.codeIntraDirLumaAng(cu, part * qtNumParts, false);
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx)
+ m_entropyCoder.codeIntraDirLumaAng(cu, qIdx * qNumParts, false);
}
- else if (!(absPartIdx & (qtNumParts - 1)))
+ else if (!(absPartIdx & (qNumParts - 1)))
m_entropyCoder.codeIntraDirLumaAng(cu, absPartIdx, false);
}
if (log2TrSize != depthRange[0])
m_entropyCoder.codeTransformSubdivFlag(0, 5 - log2TrSize);
- m_entropyCoder.codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu.m_tuDepth[absPartIdx]);
-
- if (cu.getCbf(absPartIdx, TEXT_LUMA, trDepth))
+ m_entropyCoder.codeQtCbfLuma(!!numSig, tuDepth);
+
+ if (cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth))
m_entropyCoder.codeCoeffNxN(cu, coeffY, absPartIdx, log2TrSize, TEXT_LUMA);
fullCost.bits = m_entropyCoder.getNumberOfWrittenBits();
@@ -382,8 +367,7 @@
}
// code split block
- uint32_t qPartsDiv = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
- uint32_t absPartIdxSub = absPartIdx;
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];
if (m_param->bEnableTSkipFast)
@@ -391,17 +375,17 @@
Cost splitCost;
uint32_t cbf = 0;
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdxSub += qPartsDiv)
+ for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
if (checkTransformSkip)
- codeIntraLumaTSkip(mode, cuGeom, trDepth + 1, absPartIdxSub, splitCost);
+ codeIntraLumaTSkip(mode, cuGeom, tuDepth + 1, qPartIdx, splitCost);
else
- codeIntraLumaQT(mode, cuGeom, trDepth + 1, absPartIdxSub, bAllowSplit, splitCost, depthRange);
-
- cbf |= cu.getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);
+ codeIntraLumaQT(mode, cuGeom, tuDepth + 1, qPartIdx, bAllowSplit, splitCost, depthRange);
+
+ cbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
}
- for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
- cu.m_cbf[0][absPartIdx + offs] |= (cbf << trDepth);
+ for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
+ cu.m_cbf[0][absPartIdx + offs] |= (cbf << tuDepth);
if (mightNotSplit && log2TrSize != depthRange[0])
{
@@ -430,7 +414,7 @@
m_entropyCoder.load(m_rqt[fullDepth].rqtTest);
// recover transform index and Cbf values
- cu.setTUDepthSubParts(trDepth, absPartIdx, fullDepth);
+ cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
cu.setCbfSubParts(bCBF, TEXT_LUMA, absPartIdx, fullDepth);
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
}
@@ -447,9 +431,9 @@
outCost.energy += fullCost.energy;
}
-void Search::codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, Cost& outCost)
+void Search::codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& outCost)
{
- uint32_t fullDepth = mode.cu.m_cuDepth[0] + trDepth;
+ uint32_t fullDepth = mode.cu.m_cuDepth[0] + tuDepth;
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t tuSize = 1 << log2TrSize;
@@ -472,12 +456,12 @@
// init availability pattern
uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
- initAdiPattern(cu, cuGeom, absPartIdx, trDepth, lumaPredMode);
+ initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);
// get prediction signal
predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
- cu.setTUDepthSubParts(trDepth, absPartIdx, fullDepth);
+ cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
uint32_t qtLayer = log2TrSize - 2;
uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
@@ -525,7 +509,7 @@
uint32_t tmpDist = primitives.sse_pp[sizeIdx](tmpRecon, tmpReconStride, fenc, stride);
cu.setTransformSkipSubParts(useTSkip, TEXT_LUMA, absPartIdx, fullDepth);
- cu.setCbfSubParts((!!numSig) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
+ cu.setCbfSubParts((!!numSig) << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
if (useTSkip)
m_entropyCoder.load(m_rqt[fullDepth].rqtRoot);
@@ -550,20 +534,20 @@
}
else
{
- uint32_t qtNumParts = cuGeom.numPartitions >> 2;
- if (!trDepth)
+ uint32_t qNumParts = cuGeom.numPartitions >> 2;
+ if (!tuDepth)
{
- for (uint32_t part = 0; part < 4; part++)
- m_entropyCoder.codeIntraDirLumaAng(cu, part * qtNumParts, false);
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx)
+ m_entropyCoder.codeIntraDirLumaAng(cu, qIdx * qNumParts, false);
}
- else if (!(absPartIdx & (qtNumParts - 1)))
+ else if (!(absPartIdx & (qNumParts - 1)))
m_entropyCoder.codeIntraDirLumaAng(cu, absPartIdx, false);
}
m_entropyCoder.codeTransformSubdivFlag(0, 5 - log2TrSize);
- m_entropyCoder.codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu.m_tuDepth[absPartIdx]);
-
- if (cu.getCbf(absPartIdx, TEXT_LUMA, trDepth))
+ m_entropyCoder.codeQtCbfLuma(!!numSig, tuDepth);
+
+ if (cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth))
m_entropyCoder.codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, TEXT_LUMA);
uint32_t tmpBits = m_entropyCoder.getNumberOfWrittenBits();
@@ -598,7 +582,7 @@
else if (checkTransformSkip)
{
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
- cu.setCbfSubParts(bCBF << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
+ cu.setCbfSubParts(bCBF << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
m_entropyCoder.load(m_rqt[fullDepth].rqtTemp);
}
@@ -614,11 +598,11 @@
}
/* fast luma intra residual generation. Only perform the minimum number of TU splits required by the CU size */
-void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2])
+void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t depthRange[2])
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
bool bCheckFull = log2TrSize <= depthRange[1];
@@ -642,11 +626,11 @@
uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
coeff_t* coeff = cu.m_trCoeff[TEXT_LUMA] + coeffOffsetY;
- initAdiPattern(cu, cuGeom, absPartIdx, trDepth, lumaPredMode);
+ initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode);
predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], "unexpected tskip flag in residualTransformQuantIntra\n");
- cu.setTUDepthSubParts(trDepth, absPartIdx, fullDepth);
+ cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, false);
@@ -654,7 +638,7 @@
{
m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, false, numSig);
primitives.luma_add_ps[sizeIdx](picReconY, picStride, pred, residual, stride, stride);
- cu.setCbfSubParts(1 << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
+ cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
}
else
{
@@ -667,26 +651,25 @@
X265_CHECK(log2TrSize > depthRange[0], "intra luma split state failure\n");
/* code split block */
- uint32_t qPartsDiv = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
uint32_t cbf = 0;
- for (uint32_t subPartIdx = 0, absPartIdxSub = absPartIdx; subPartIdx < 4; subPartIdx++, absPartIdxSub += qPartsDiv)
+ for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- residualTransformQuantIntra(mode, cuGeom, trDepth + 1, absPartIdxSub, depthRange);
- cbf |= cu.getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);
+ residualTransformQuantIntra(mode, cuGeom, tuDepth + 1, qPartIdx, depthRange);
+ cbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
}
- for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
- cu.m_cbf[TEXT_LUMA][absPartIdx + offs] |= (cbf << trDepth);
+ for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
+ cu.m_cbf[TEXT_LUMA][absPartIdx + offs] |= (cbf << tuDepth);
}
}
-void Search::extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t trDepth, uint32_t absPartIdx)
+void Search::extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t tuDepth, uint32_t absPartIdx)
{
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
- uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
-
- if (tuDepth == trDepth)
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+
+ if (tuDepth == cu.m_tuDepth[absPartIdx])
{
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t qtLayer = log2TrSize - 2;
// copy transform coefficients
@@ -700,88 +683,80 @@
}
else
{
- uint32_t numQPart = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
- extractIntraResultQT(cu, reconYuv, trDepth + 1, absPartIdx + subPartIdx * numQPart);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+ extractIntraResultQT(cu, reconYuv, tuDepth + 1, absPartIdx);
}
}
+inline void offsetCBFs(uint8_t subTUCBF[2])
+{
+ uint8_t combinedCBF = subTUCBF[0] | subTUCBF[1];
+ subTUCBF[0] = subTUCBF[0] << 1 | combinedCBF;
+ subTUCBF[1] = subTUCBF[1] << 1 | combinedCBF;
+}
+
/* 4:2:2 post-TU split processing */
-void Search::offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t trDepth, uint32_t absPartIdx)
+void Search::offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx)
{
uint32_t depth = cu.m_cuDepth[0];
- uint32_t fullDepth = depth + trDepth;
+ uint32_t fullDepth = depth + tuDepth;
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
- uint32_t trDepthC = trDepth;
if (log2TrSize == 2)
{
- X265_CHECK(m_csp != X265_CSP_I444 && trDepthC, "trDepthC invalid\n");
- trDepthC--;
+ X265_CHECK(m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ ++log2TrSize;
}
- uint32_t partIdxesPerSubTU = (NUM_CU_PARTITIONS >> ((depth + trDepthC) << 1)) >> 1;
+ uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
// move the CBFs down a level and set the parent CBF
uint8_t subTUCBF[2];
- uint8_t combinedSubTUCBF = 0;
-
- for (uint32_t subTU = 0; subTU < 2; subTU++)
- {
- const uint32_t subTUAbsPartIdx = absPartIdx + (subTU * partIdxesPerSubTU);
-
- subTUCBF[subTU] = cu.getCbf(subTUAbsPartIdx, ttype, trDepth);
- combinedSubTUCBF |= subTUCBF[subTU];
- }
-
- for (uint32_t subTU = 0; subTU < 2; subTU++)
- {
- const uint32_t subTUAbsPartIdx = absPartIdx + (subTU * partIdxesPerSubTU);
- const uint8_t compositeCBF = (subTUCBF[subTU] << 1) | combinedSubTUCBF;
-
- cu.setCbfPartRange((compositeCBF << trDepth), ttype, subTUAbsPartIdx, partIdxesPerSubTU);
- }
+ subTUCBF[0] = cu.getCbf(absPartIdx , ttype, tuDepth);
+ subTUCBF[1] = cu.getCbf(absPartIdx+ tuNumParts, ttype, tuDepth);
+ offsetCBFs(subTUCBF);
+
+ cu.setCbfPartRange(subTUCBF[0] << tuDepth, ttype, absPartIdx , tuNumParts);
+ cu.setCbfPartRange(subTUCBF[1] << tuDepth, ttype, absPartIdx + tuNumParts, tuNumParts);
}
/* returns distortion */
-uint32_t Search::codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, uint32_t& psyEnergy)
+uint32_t Search::codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, uint32_t& psyEnergy)
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
- uint32_t tuDepthL = cu.m_tuDepth[absPartIdx];
-
- if (tuDepthL > trDepth)
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+
+ if (tuDepth < cu.m_tuDepth[absPartIdx])
{
- uint32_t qPartsDiv = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
uint32_t outDist = 0, splitCbfU = 0, splitCbfV = 0;
- for (uint32_t subPartIdx = 0, absPartIdxSub = absPartIdx; subPartIdx < 4; subPartIdx++, absPartIdxSub += qPartsDiv)
+ for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- outDist += codeIntraChromaQt(mode, cuGeom, trDepth + 1, absPartIdxSub, psyEnergy);
- splitCbfU |= cu.getCbf(absPartIdxSub, TEXT_CHROMA_U, trDepth + 1);
- splitCbfV |= cu.getCbf(absPartIdxSub, TEXT_CHROMA_V, trDepth + 1);
+ outDist += codeIntraChromaQt(mode, cuGeom, tuDepth + 1, qPartIdx, psyEnergy);
+ splitCbfU |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
+ splitCbfV |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
}
- for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
+ for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
{
- cu.m_cbf[TEXT_CHROMA_U][absPartIdx + offs] |= (splitCbfU << trDepth);
- cu.m_cbf[TEXT_CHROMA_V][absPartIdx + offs] |= (splitCbfV << trDepth);
+ cu.m_cbf[TEXT_CHROMA_U][absPartIdx + offs] |= (splitCbfU << tuDepth);
+ cu.m_cbf[TEXT_CHROMA_V][absPartIdx + offs] |= (splitCbfV << tuDepth);
}
return outDist;
}
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- uint32_t trDepthC = trDepth;
- if (log2TrSizeC == 1)
+ uint32_t tuDepthC = tuDepth;
+ if (log2TrSizeC < 2)
{
- X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && trDepth, "invalid trDepth\n");
- trDepthC--;
- log2TrSizeC++;
- uint32_t qpdiv = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + trDepthC) << 1);
- bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
- if (!bFirstQ)
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ if (absPartIdx & 3)
return 0;
+ log2TrSizeC = 2;
+ tuDepthC--;
}
if (m_bEnableRDOQ)
@@ -790,13 +765,13 @@
bool checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && log2TrSizeC <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];
checkTransformSkip &= !m_param->bEnableTSkipFast || (log2TrSize <= MAX_LOG2_TS_SIZE && cu.m_transformSkip[TEXT_LUMA][absPartIdx]);
if (checkTransformSkip)
- return codeIntraChromaTSkip(mode, cuGeom, trDepth, trDepthC, absPartIdx, psyEnergy);
+ return codeIntraChromaTSkip(mode, cuGeom, tuDepth, tuDepthC, absPartIdx, psyEnergy);
uint32_t qtLayer = log2TrSize - 2;
uint32_t tuSize = 1 << log2TrSizeC;
uint32_t outDist = 0;
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + trDepthC) << 1);
+ uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
@@ -823,7 +798,7 @@
intptr_t picStride = m_frame->m_reconPic->m_strideC;
// init availability pattern
- initAdiPatternChroma(cu, cuGeom, absPartIdxC, trDepthC, chromaId);
+ initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);
pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
@@ -844,7 +819,7 @@
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
primitives.luma_add_ps[sizeIdxC](reconQt, reconQtStride, pred, residual, stride, stride);
- cu.setCbfPartRange(1 << trDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
+ cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
else
{
@@ -864,19 +839,19 @@
while (tuIterator.isNextSection());
if (splitType == VERTICAL_SPLIT)
- offsetSubTUCBFs(cu, ttype, trDepth, absPartIdx);
+ offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx);
}
return outDist;
}
/* returns distortion */
-uint32_t Search::codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t trDepthC, uint32_t absPartIdx, uint32_t& psyEnergy)
+uint32_t Search::codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t tuDepthC, uint32_t absPartIdx, uint32_t& psyEnergy)
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
- uint32_t log2TrSizeC = 2;
+ const uint32_t log2TrSizeC = 2;
uint32_t tuSize = 4;
uint32_t qtLayer = log2TrSize - 2;
uint32_t outDist = 0;
@@ -889,7 +864,7 @@
ALIGN_VAR_32(coeff_t, tskipCoeffC[MAX_TS_SIZE * MAX_TS_SIZE]);
ALIGN_VAR_32(pixel, tskipReconC[MAX_TS_SIZE * MAX_TS_SIZE]);
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + trDepthC) << 1);
+ uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
@@ -905,7 +880,7 @@
pixel* pred = mode.predYuv.getChromaAddr(chromaId, absPartIdxC);
int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getChromaAddr(chromaId, absPartIdxC);
uint32_t stride = mode.fencYuv->m_csize;
- uint32_t sizeIdxC = log2TrSizeC - 2;
+ const uint32_t sizeIdxC = log2TrSizeC - 2;
uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
coeff_t* coeffC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
@@ -913,7 +888,7 @@
uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;
// init availability pattern
- initAdiPatternChroma(cu, cuGeom, absPartIdxC, trDepthC, chromaId);
+ initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);
pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
@@ -945,7 +920,7 @@
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
primitives.luma_add_ps[sizeIdxC](recon, reconStride, pred, residual, stride, stride);
- cu.setCbfPartRange(1 << trDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
+ cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
else if (useTSkip)
{
@@ -996,7 +971,7 @@
primitives.luma_copy_pp[sizeIdxC](reconQt, reconQtStride, tskipReconC, MAX_TS_SIZE);
}
- cu.setCbfPartRange(bCbf << trDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
+ cu.setCbfPartRange(bCbf << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
cu.setTransformSkipPartRange(bTSkip, ttype, absPartIdxC, tuIterator.absPartIdxStep);
pixel* reconPicC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);
@@ -1009,34 +984,27 @@
while (tuIterator.isNextSection());
if (splitType == VERTICAL_SPLIT)
- offsetSubTUCBFs(cu, ttype, trDepth, absPartIdx);
+ offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx);
}
m_entropyCoder.load(m_rqt[fullDepth].rqtRoot);
return outDist;
}
-void Search::extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t trDepth, bool tuQuad)
+void Search::extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t tuDepth)
{
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
uint32_t tuDepthL = cu.m_tuDepth[absPartIdx];
-
- if (tuDepthL == trDepth)
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
+
+ if (tuDepthL == tuDepth || log2TrSizeC == 2)
{
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
- uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
-
- if (tuQuad)
- {
- log2TrSizeC++; /* extract one 4x4 instead of 4 2x2 */
- trDepth--; /* also adjust the number of coeff read */
- }
-
// copy transform coefficients
uint32_t numCoeffC = 1 << (log2TrSizeC * 2 + (m_csp == X265_CSP_I422));
uint32_t coeffOffsetC = absPartIdx << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
- uint32_t qtLayer = log2TrSize - 2;
+ uint32_t qtLayer = log2TrSize - 2 - (tuDepthL - tuDepth);
coeff_t* coeffSrcU = m_rqt[qtLayer].coeffRQT[1] + coeffOffsetC;
coeff_t* coeffSrcV = m_rqt[qtLayer].coeffRQT[2] + coeffOffsetC;
coeff_t* coeffDstU = cu.m_trCoeff[1] + coeffOffsetC;
@@ -1049,38 +1017,29 @@
}
else
{
- if (g_maxLog2CUSize - fullDepth - 1 == 2 && m_csp != X265_CSP_I444)
- /* no such thing as chroma 2x2, so extract one 4x4 instead of 4 2x2 */
- extractIntraResultChromaQT(cu, reconYuv, absPartIdx, trDepth + 1, true);
- else
- {
- uint32_t numQPart = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
- extractIntraResultChromaQT(cu, reconYuv, absPartIdx + subPartIdx * numQPart, trDepth + 1, false);
- }
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+ extractIntraResultChromaQT(cu, reconYuv, absPartIdx, tuDepth + 1);
}
}
-void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx)
+void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx)
{
CUData& cu = mode.cu;
- uint32_t fullDepth = cu.m_cuDepth[0] + trDepth;
- uint32_t tuDepthL = cu.m_tuDepth[absPartIdx];
+ uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
- if (tuDepthL == trDepth)
+ if (tuDepth == cu.m_tuDepth[absPartIdx])
{
- uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- uint32_t trDepthC = trDepth;
- if (log2TrSizeC == 1)
+ uint32_t tuDepthC = tuDepth;
+ if (log2TrSizeC < 2)
{
- X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && trDepth > 0, "invalid trDepth\n");
- trDepthC--;
- log2TrSizeC++;
- uint32_t qpdiv = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + trDepthC) << 1);
- bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
- if (!bFirstQ)
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ if (absPartIdx & 3)
return;
+ log2TrSizeC = 2;
+ tuDepthC--;
}
ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
@@ -1088,7 +1047,7 @@
uint32_t stride = mode.fencYuv->m_csize;
const int sizeIdxC = log2TrSizeC - 2;
- uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + trDepthC) << 1);
+ uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT : DONT_SPLIT;
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
@@ -1113,7 +1072,7 @@
if (chromaPredMode == DM_CHROMA_IDX)
chromaPredMode = cu.m_lumaIntraDir[(m_csp == X265_CSP_I444) ? absPartIdxC : 0];
chromaPredMode = (m_csp == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
- initAdiPatternChroma(cu, cuGeom, absPartIdxC, trDepthC, chromaId);
+ initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, chromaId);
pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize);
predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
@@ -1127,7 +1086,7 @@
m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], residual, stride, coeff, log2TrSizeC, ttype, true, false, numSig);
primitives.luma_add_ps[sizeIdxC](recon, stride, pred, residual, stride, stride);
primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, recon, stride);
- cu.setCbfPartRange(1 << trDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
+ cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
else
{
@@ -1139,23 +1098,23 @@
while (tuIterator.isNextSection());
if (splitType == VERTICAL_SPLIT)
- offsetSubTUCBFs(cu, (TextType)chromaId, trDepth, absPartIdx);
+ offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, absPartIdx);
}
}
else
{
- uint32_t qPartsDiv = NUM_CU_PARTITIONS >> ((fullDepth + 1) << 1);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
uint32_t splitCbfU = 0, splitCbfV = 0;
- for (uint32_t subPartIdx = 0, absPartIdxC = absPartIdx; subPartIdx < 4; subPartIdx++, absPartIdxC += qPartsDiv)
+ for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- residualQTIntraChroma(mode, cuGeom, trDepth + 1, absPartIdxC);
- splitCbfU |= cu.getCbf(absPartIdxC, TEXT_CHROMA_U, trDepth + 1);
- splitCbfV |= cu.getCbf(absPartIdxC, TEXT_CHROMA_V, trDepth + 1);
+ residualQTIntraChroma(mode, cuGeom, tuDepth + 1, qPartIdx);
+ splitCbfU |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
+ splitCbfV |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
}
- for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
+ for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
{
- cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << trDepth);
- cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << trDepth);
+ cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << tuDepth);
+ cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << tuDepth);
}
}
}
@@ -1190,7 +1149,7 @@
intraMode.mvBits = m_entropyCoder.getNumberOfWrittenBits();
bool bCodeDQP = m_slice->m_pps->bUseDQP;
- m_entropyCoder.codeCoeff(cu, 0, depth, bCodeDQP, tuDepthRange);
+ m_entropyCoder.codeCoeff(cu, 0, bCodeDQP, tuDepthRange);
m_entropyCoder.store(intraMode.contexts);
intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits();
intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits;
@@ -1210,13 +1169,13 @@
cu.setPartSizeSubParts(SIZE_2Nx2N);
cu.setPredModeSubParts(MODE_INTRA);
- const uint32_t initTrDepth = 0;
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTrDepth;
+ const uint32_t initTuDepth = 0;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
uint32_t tuSize = 1 << log2TrSize;
const uint32_t absPartIdx = 0;
// Reference sample smoothing
- initAdiPattern(cu, cuGeom, absPartIdx, initTrDepth, ALL_IDX);
+ initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX);
const pixel* fenc = intraMode.fencYuv->m_buf[0];
uint32_t stride = intraMode.fencYuv->m_size;
@@ -1365,7 +1324,7 @@
}
}
- cu.setLumaIntraDirSubParts((uint8_t)bmode, absPartIdx, depth + initTrDepth);
+ cu.setLumaIntraDirSubParts((uint8_t)bmode, absPartIdx, depth + initTuDepth);
intraMode.initCosts();
intraMode.totalBits = bbits;
intraMode.distortion = bsad;
@@ -1406,7 +1365,7 @@
intraMode.mvBits += m_entropyCoder.getNumberOfWrittenBits();
bool bCodeDQP = m_slice->m_pps->bUseDQP;
- m_entropyCoder.codeCoeff(cu, 0, cuGeom.depth, bCodeDQP, tuDepthRange);
+ m_entropyCoder.codeCoeff(cu, 0, bCodeDQP, tuDepthRange);
intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits();
intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits;
@@ -1417,7 +1376,7 @@
updateModeCost(intraMode);
}
-uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, uint32_t depthRange[2], uint8_t* sharedModes)
+uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes)
{
CUData& cu = intraMode.cu;
Yuv* reconYuv = &intraMode.reconYuv;
@@ -1425,9 +1384,9 @@
const Yuv* fencYuv = intraMode.fencYuv;
uint32_t depth = cu.m_cuDepth[0];
- uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
- uint32_t numPU = 1 << (2 * initTrDepth);
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTrDepth;
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
+ uint32_t numPU = 1 << (2 * initTuDepth);
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
uint32_t tuSize = 1 << log2TrSize;
uint32_t qNumParts = cuGeom.numPartitions >> 2;
uint32_t sizeIdx = log2TrSize - 2;
@@ -1446,7 +1405,7 @@
else
{
// Reference sample smoothing
- initAdiPattern(cu, cuGeom, absPartIdx, initTrDepth, ALL_IDX);
+ initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX);
// determine set of modes to be tested (using prediction signal only)
const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
@@ -1549,7 +1508,7 @@
* levels and at higher depths */
uint64_t candCostList[MAX_RD_INTRA_MODES];
uint32_t rdModeList[MAX_RD_INTRA_MODES];
- int maxCandCount = 2 + m_param->rdLevel + ((depth + initTrDepth) >> 1);
+ int maxCandCount = 2 + m_param->rdLevel + ((depth + initTuDepth) >> 1);
for (int i = 0; i < maxCandCount; i++)
candCostList[i] = MAX_INT64;
@@ -1565,29 +1524,29 @@
if (candCostList[i] == MAX_INT64)
break;
m_entropyCoder.load(m_rqt[depth].cur);
- cu.setLumaIntraDirSubParts(rdModeList[i], absPartIdx, depth + initTrDepth);
+ cu.setLumaIntraDirSubParts(rdModeList[i], absPartIdx, depth + initTuDepth);
Cost icosts;
if (checkTransformSkip)
- codeIntraLumaTSkip(intraMode, cuGeom, initTrDepth, absPartIdx, icosts);
+ codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
else
- codeIntraLumaQT(intraMode, cuGeom, initTrDepth, absPartIdx, false, icosts, depthRange);
+ codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, false, icosts, depthRange);
COPY2_IF_LT(bcost, icosts.rdcost, bmode, rdModeList[i]);
}
}
/* remeasure best mode, allowing TU splits */
- cu.setLumaIntraDirSubParts(bmode, absPartIdx, depth + initTrDepth);
+ cu.setLumaIntraDirSubParts(bmode, absPartIdx, depth + initTuDepth);
m_entropyCoder.load(m_rqt[depth].cur);
Cost icosts;
if (checkTransformSkip)
- codeIntraLumaTSkip(intraMode, cuGeom, initTrDepth, absPartIdx, icosts);
+ codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
else
- codeIntraLumaQT(intraMode, cuGeom, initTrDepth, absPartIdx, true, icosts, depthRange);
+ codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, true, icosts, depthRange);
totalDistortion += icosts.distortion;
- extractIntraResultQT(cu, *reconYuv, initTrDepth, absPartIdx);
+ extractIntraResultQT(cu, *reconYuv, initTuDepth, absPartIdx);
// set reconstruction for next intra prediction blocks
if (pu != numPU - 1)
@@ -1607,9 +1566,8 @@
if (numPU > 1)
{
uint32_t combCbfY = 0;
- uint32_t partIdx = 0;
- for (uint32_t part = 0; part < 4; part++, partIdx += qNumParts)
- combCbfY |= cu.getCbf(partIdx, TEXT_LUMA, 1);
+ for (uint32_t qIdx = 0, qPartIdx = 0; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
+ combCbfY |= cu.getCbf(qPartIdx, TEXT_LUMA, 1);
for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
cu.m_cbf[0][offs] |= combCbfY;
@@ -1684,19 +1642,18 @@
Yuv& reconYuv = intraMode.reconYuv;
uint32_t depth = cu.m_cuDepth[0];
- uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N && m_csp == X265_CSP_I444;
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTrDepth;
+ uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N && m_csp == X265_CSP_I444;
+ uint32_t log2TrSize = cu.m_log2CUSize[0] - initTuDepth;
uint32_t absPartStep = (NUM_CU_PARTITIONS >> (depth << 1));
uint32_t totalDistortion = 0;
int part = partitionFromLog2Size(log2TrSize);
- TURecurse tuIterator((initTrDepth == 0) ? DONT_SPLIT : QUAD_SPLIT, absPartStep, 0);
+ TURecurse tuIterator((initTuDepth == 0) ? DONT_SPLIT : QUAD_SPLIT, absPartStep, 0);
do
{
uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
- int cuSize = 1 << cu.m_log2CUSize[absPartIdxC];
uint32_t bestMode = 0;
uint32_t bestDist = 0;
@@ -1715,9 +1672,9 @@
// restore context models
m_entropyCoder.load(m_rqt[depth].cur);
- cu.setChromIntraDirSubParts(modeList[mode], absPartIdxC, depth + initTrDepth);
+ cu.setChromIntraDirSubParts(modeList[mode], absPartIdxC, depth + initTuDepth);
uint32_t psyEnergy = 0;
- uint32_t dist = codeIntraChromaQt(intraMode, cuGeom, initTrDepth, absPartIdxC, psyEnergy);
+ uint32_t dist = codeIntraChromaQt(intraMode, cuGeom, initTuDepth, absPartIdxC, psyEnergy);
if (m_slice->m_pps->bTransformSkipEnabled)
m_entropyCoder.load(m_rqt[depth].cur);
@@ -1731,14 +1688,14 @@
}
else
{
- uint32_t qtNumParts = cuGeom.numPartitions >> 2;
- if (!(absPartIdxC & (qtNumParts - 1)))
+ uint32_t qNumParts = cuGeom.numPartitions >> 2;
+ if (!(absPartIdxC & (qNumParts - 1)))
m_entropyCoder.codeIntraDirChroma(cu, absPartIdxC, modeList);
}
- codeSubdivCbfQTChroma(cu, initTrDepth, absPartIdxC, tuIterator.absPartIdxStep, cuSize, cuSize);
- codeCoeffQTChroma(cu, initTrDepth, absPartIdxC, TEXT_CHROMA_U);
- codeCoeffQTChroma(cu, initTrDepth, absPartIdxC, TEXT_CHROMA_V);
+ codeSubdivCbfQTChroma(cu, initTuDepth, absPartIdxC);
+ codeCoeffQTChroma(cu, initTuDepth, absPartIdxC, TEXT_CHROMA_U);
+ codeCoeffQTChroma(cu, initTuDepth, absPartIdxC, TEXT_CHROMA_V);
uint32_t bits = m_entropyCoder.getNumberOfWrittenBits();
uint64_t cost = m_rdCost.m_psyRd ? m_rdCost.calcPsyRdCost(dist, bits, psyEnergy) : m_rdCost.calcRdCost(dist, bits);
@@ -1747,7 +1704,7 @@
bestCost = cost;
bestDist = dist;
bestMode = modeList[mode];
- extractIntraResultChromaQT(cu, reconYuv, absPartIdxC, initTrDepth, false);
+ extractIntraResultChromaQT(cu, reconYuv, absPartIdxC, initTuDepth);
memcpy(m_qtTempCbf[1], cu.m_cbf[1] + absPartIdxC, tuIterator.absPartIdxStep * sizeof(uint8_t));
memcpy(m_qtTempCbf[2], cu.m_cbf[2] + absPartIdxC, tuIterator.absPartIdxStep * sizeof(uint8_t));
memcpy(m_qtTempTransformSkipFlag[1], cu.m_transformSkip[1] + absPartIdxC, tuIterator.absPartIdxStep * sizeof(uint8_t));
@@ -1775,23 +1732,23 @@
memcpy(cu.m_cbf[2] + absPartIdxC, m_qtTempCbf[2], tuIterator.absPartIdxStep * sizeof(uint8_t));
memcpy(cu.m_transformSkip[1] + absPartIdxC, m_qtTempTransformSkipFlag[1], tuIterator.absPartIdxStep * sizeof(uint8_t));
memcpy(cu.m_transformSkip[2] + absPartIdxC, m_qtTempTransformSkipFlag[2], tuIterator.absPartIdxStep * sizeof(uint8_t));
- cu.setChromIntraDirSubParts(bestMode, absPartIdxC, depth + initTrDepth);
+ cu.setChromIntraDirSubParts(bestMode, absPartIdxC, depth + initTuDepth);
totalDistortion += bestDist;
}
while (tuIterator.isNextSection());
- if (initTrDepth != 0)
+ if (initTuDepth != 0)
{
uint32_t combCbfU = 0;
uint32_t combCbfV = 0;
- uint32_t partIdx = 0;
- for (uint32_t p = 0; p < 4; p++, partIdx += tuIterator.absPartIdxStep)
+ uint32_t qNumParts = tuIterator.absPartIdxStep;
+ for (uint32_t qIdx = 0, qPartIdx = 0; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- combCbfU |= cu.getCbf(partIdx, TEXT_CHROMA_U, 1);
- combCbfV |= cu.getCbf(partIdx, TEXT_CHROMA_V, 1);
+ combCbfU |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, 1);
+ combCbfV |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, 1);
}
- for (uint32_t offs = 0; offs < 4 * tuIterator.absPartIdxStep; offs++)
+ for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
{
cu.m_cbf[1][offs] |= combCbfU;
cu.m_cbf[2][offs] |= combCbfV;
@@ -2562,7 +2519,7 @@
uint32_t mvBits = m_entropyCoder.getNumberOfWrittenBits();
bool bCodeDQP = m_slice->m_pps->bUseDQP;
- m_entropyCoder.codeCoeff(cu, 0, cu.m_cuDepth[0], bCodeDQP, tuDepthRange);
+ m_entropyCoder.codeCoeff(cu, 0, bCodeDQP, tuDepthRange);
bits = m_entropyCoder.getNumberOfWrittenBits();
coeffBits = bits - mvBits;
@@ -2589,7 +2546,7 @@
updateModeCost(interMode);
}
-void Search::residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, uint32_t depthRange[2])
+void Search::residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2])
{
CUData& cu = mode.cu;
X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "invalid depth\n");
@@ -2607,13 +2564,12 @@
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
uint32_t tuDepthC = tuDepth;
- if (log2TrSizeC == 1)
+ if (log2TrSizeC < 2)
{
- X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444, "tuQuad check failed\n");
- log2TrSizeC++;
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ log2TrSizeC = 2;
tuDepthC--;
- uint32_t qpdiv = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
- bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
+ bCodeChroma = !(absPartIdx & 3);
}
uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
@@ -2707,16 +2663,16 @@
{
X265_CHECK(log2TrSize > depthRange[0], "residualTransformQuantInter recursion check failure\n");
- const uint32_t qPartNumSubdiv = NUM_CU_PARTITIONS >> ((depth + 1) << 1);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
- for (uint32_t i = 0; i < 4; i++)
+ for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- residualTransformQuantInter(mode, cuGeom, absPartIdx + i * qPartNumSubdiv, depth + 1, depthRange);
- ycbf |= cu.getCbf(absPartIdx + i * qPartNumSubdiv, TEXT_LUMA, tuDepth + 1);
- ucbf |= cu.getCbf(absPartIdx + i * qPartNumSubdiv, TEXT_CHROMA_U, tuDepth + 1);
- vcbf |= cu.getCbf(absPartIdx + i * qPartNumSubdiv, TEXT_CHROMA_V, tuDepth + 1);
+ residualTransformQuantInter(mode, cuGeom, qPartIdx, depth + 1, depthRange);
+ ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
+ ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
+ vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
}
- for (uint32_t i = 0; i < 4 * qPartNumSubdiv; i++)
+ for (uint32_t i = 0; i < 4 * qNumParts; i++)
{
cu.m_cbf[TEXT_LUMA][absPartIdx + i] |= ycbf << tuDepth;
cu.m_cbf[TEXT_CHROMA_U][absPartIdx + i] |= ucbf << tuDepth;
@@ -2735,7 +2691,7 @@
return m_rdCost.calcRdCost(dist, nullBits);
}
-void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, uint32_t depthRange[2])
+void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, const uint32_t depthRange[2])
{
CUData& cu = mode.cu;
uint32_t log2TrSize = g_maxLog2CUSize - depth;
@@ -2754,12 +2710,12 @@
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
uint32_t tuDepthC = tuDepth;
- if ((log2TrSize == 2) && !(m_csp == X265_CSP_I444))
+ if (log2TrSizeC < 2)
{
- log2TrSizeC++;
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ log2TrSizeC = 2;
tuDepthC--;
- uint32_t qpdiv = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
- bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
+ bCodeChroma = !(absPartIdx & 3);
}
// code full block
@@ -2814,7 +2770,7 @@
// Coding luma cbf flag has been removed from here. The context for cbf flag is different for each depth.
// So it is valid if we encode coefficients and then cbfs at least for analysis.
-// m_entropyCoder.codeQtCbf(cbfFlag[TEXT_LUMA][0], TEXT_LUMA, tuDepth);
+// m_entropyCoder.codeQtCbfLuma(cbfFlag[TEXT_LUMA][0], tuDepth);
if (cbfFlag[TEXT_LUMA][0])
m_entropyCoder.codeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
@@ -2919,7 +2875,7 @@
cbfFlag[chromaId][tuIterator.section] = !!numSig[chromaId][tuIterator.section];
//Coding cbf flags has been removed from here
-// m_entropyCoder.codeQtCbf(cbfFlag[chromaId][tuIterator.section], (TextType)chromaId, tuDepth);
+// m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][tuIterator.section], tuDepth);
if (cbfFlag[chromaId][tuIterator.section])
m_entropyCoder.codeCoeffNxN(cu, coeffCurC + subTUOffset, absPartIdxC, log2TrSizeC, (TextType)chromaId);
uint32_t newBits = m_entropyCoder.getNumberOfWrittenBits();
@@ -3019,7 +2975,7 @@
if (numSigTSkipY)
{
m_entropyCoder.resetBits();
- m_entropyCoder.codeQtCbf(!!numSigTSkipY, TEXT_LUMA, tuDepth);
+ m_entropyCoder.codeQtCbfLuma(!!numSigTSkipY, tuDepth);
m_entropyCoder.codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
const uint32_t skipSingleBitsY = m_entropyCoder.getNumberOfWrittenBits();
@@ -3090,7 +3046,7 @@
if (numSigTSkipC)
{
- m_entropyCoder.codeQtCbf(!!numSigTSkipC, (TextType)chromaId, tuDepth);
+ m_entropyCoder.codeQtCbfChroma(!!numSigTSkipC, tuDepth);
m_entropyCoder.codeCoeffNxN(cu, tsCoeffC, absPartIdxC, log2TrSizeC, (TextType)chromaId);
singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
@@ -3139,17 +3095,17 @@
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
if (!splitIntoSubTUs)
- m_entropyCoder.codeQtCbf(cbfFlag[chromaId][0], (TextType)chromaId, tuDepth);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], tuDepth);
else
{
offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, absPartIdx);
- for (uint32_t subTU = 0; subTU < 2; subTU++)
- m_entropyCoder.codeQtCbf(cbfFlag[chromaId][subTU], (TextType)chromaId, tuDepth);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], tuDepth);
+ m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][1], tuDepth);
}
}
}
- m_entropyCoder.codeQtCbf(cbfFlag[TEXT_LUMA][0], TEXT_LUMA, tuDepth);
+ m_entropyCoder.codeQtCbfLuma(cbfFlag[TEXT_LUMA][0], tuDepth);
uint32_t cbfBits = m_entropyCoder.getNumberOfWrittenBits();
@@ -3199,16 +3155,16 @@
splitCost.bits = m_entropyCoder.getNumberOfWrittenBits();
}
- const uint32_t qPartNumSubdiv = NUM_CU_PARTITIONS >> ((depth + 1) << 1);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
- for (uint32_t i = 0; i < 4; ++i)
+ for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
- estimateResidualQT(mode, cuGeom, absPartIdx + i * qPartNumSubdiv, depth + 1, resiYuv, splitCost, depthRange);
- ycbf |= cu.getCbf(absPartIdx + i * qPartNumSubdiv, TEXT_LUMA, tuDepth + 1);
- ucbf |= cu.getCbf(absPartIdx + i * qPartNumSubdiv, TEXT_CHROMA_U, tuDepth + 1);
- vcbf |= cu.getCbf(absPartIdx + i * qPartNumSubdiv, TEXT_CHROMA_V, tuDepth + 1);
+ estimateResidualQT(mode, cuGeom, qPartIdx, depth + 1, resiYuv, splitCost, depthRange);
+ ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
+ ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
+ vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
}
- for (uint32_t i = 0; i < 4 * qPartNumSubdiv; ++i)
+ for (uint32_t i = 0; i < 4 * qNumParts; ++i)
{
cu.m_cbf[0][absPartIdx + i] |= ycbf << tuDepth;
cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth;
@@ -3248,15 +3204,18 @@
cu.setTransformSkipSubParts(bestTransformMode[TEXT_LUMA][0], TEXT_LUMA, absPartIdx, depth);
if (bCodeChroma)
{
- const uint32_t numberOfSections = splitIntoSubTUs ? 2 : 1;
-
- uint32_t partIdxesPerSubTU = absPartIdxStep >> (splitIntoSubTUs ? 1 : 0);
- for (uint32_t subTUIndex = 0; subTUIndex < numberOfSections; subTUIndex++)
+ if (!splitIntoSubTUs)
{
- const uint32_t subTUPartIdx = absPartIdx + (subTUIndex * partIdxesPerSubTU);
-
- cu.setTransformSkipPartRange(bestTransformMode[TEXT_CHROMA_U][subTUIndex], TEXT_CHROMA_U, subTUPartIdx, partIdxesPerSubTU);
- cu.setTransformSkipPartRange(bestTransformMode[TEXT_CHROMA_V][subTUIndex], TEXT_CHROMA_V, subTUPartIdx, partIdxesPerSubTU);
+ cu.setTransformSkipSubParts(bestTransformMode[TEXT_CHROMA_U][0], TEXT_CHROMA_U, absPartIdx, depth);
+ cu.setTransformSkipSubParts(bestTransformMode[TEXT_CHROMA_V][0], TEXT_CHROMA_V, absPartIdx, depth);
+ }
+ else
+ {
+ uint32_t tuNumParts = absPartIdxStep >> 1;
+ cu.setTransformSkipPartRange(bestTransformMode[TEXT_CHROMA_U][0], TEXT_CHROMA_U, absPartIdx , tuNumParts);
+ cu.setTransformSkipPartRange(bestTransformMode[TEXT_CHROMA_U][1], TEXT_CHROMA_U, absPartIdx + tuNumParts, tuNumParts);
+ cu.setTransformSkipPartRange(bestTransformMode[TEXT_CHROMA_V][0], TEXT_CHROMA_V, absPartIdx , tuNumParts);
+ cu.setTransformSkipPartRange(bestTransformMode[TEXT_CHROMA_V][1], TEXT_CHROMA_V, absPartIdx + tuNumParts, tuNumParts);
}
}
X265_CHECK(bCheckFull, "check-full must be set\n");
@@ -3268,23 +3227,21 @@
if (bCodeChroma)
{
- uint32_t numberOfSections = splitIntoSubTUs ? 2 : 1;
- uint32_t partIdxesPerSubTU = absPartIdxStep >> (splitIntoSubTUs ? 1 : 0);
-
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
+ if (!splitIntoSubTUs)
{
- for (uint32_t subTUIndex = 0; subTUIndex < numberOfSections; subTUIndex++)
- {
- const uint32_t subTUPartIdx = absPartIdx + (subTUIndex * partIdxesPerSubTU);
-
- if (splitIntoSubTUs)
- {
- uint8_t combinedSubTUCBF = cbfFlag[chromaId][0] | cbfFlag[chromaId][1];
- cu.setCbfPartRange(((cbfFlag[chromaId][subTUIndex] << 1) | combinedSubTUCBF) << tuDepth, (TextType)chromaId, subTUPartIdx, partIdxesPerSubTU);
- }
- else
- cu.setCbfPartRange(cbfFlag[chromaId][subTUIndex] << tuDepth, (TextType)chromaId, subTUPartIdx, partIdxesPerSubTU);
- }
+ cu.setCbfSubParts(cbfFlag[TEXT_CHROMA_U][0] << tuDepth, TEXT_CHROMA_U, absPartIdx, depth);
+ cu.setCbfSubParts(cbfFlag[TEXT_CHROMA_V][0] << tuDepth, TEXT_CHROMA_V, absPartIdx, depth);
+ }
+ else
+ {
+ uint32_t tuNumParts = absPartIdxStep >> 1;
+
+ offsetCBFs(cbfFlag[TEXT_CHROMA_U]);
+ offsetCBFs(cbfFlag[TEXT_CHROMA_V]);
+ cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_U][0] << tuDepth, TEXT_CHROMA_U, absPartIdx , tuNumParts);
+ cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_U][1] << tuDepth, TEXT_CHROMA_U, absPartIdx + tuNumParts, tuNumParts);
+ cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_V][0] << tuDepth, TEXT_CHROMA_V, absPartIdx , tuNumParts);
+ cu.setCbfPartRange(cbfFlag[TEXT_CHROMA_V][1] << tuDepth, TEXT_CHROMA_V, absPartIdx + tuNumParts, tuNumParts);
}
}
@@ -3294,74 +3251,62 @@
outCosts.energy += fullCost.energy;
}
-void Search::codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, uint32_t depthRange[2])
+void Search::codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, const uint32_t depthRange[2])
{
X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
X265_CHECK(cu.isInter(absPartIdx), "codeInterSubdivCbfQT() with intra block\n");
+ const uint32_t tuDepth = depth - cu.m_cuDepth[0];
+ const bool bSubdiv = tuDepth != cu.m_tuDepth[absPartIdx];
+ const uint32_t log2TrSize = g_maxLog2CUSize - depth;
+
+ if (!(log2TrSize - m_hChromaShift < 2))
+ {
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
+ m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !bSubdiv);
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
+ m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !bSubdiv);
+ }
+ else
+ {
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma CBF not matching\n");
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma CBF not matching\n");
+ }
+
+ if (!bSubdiv)
+ {
+ m_entropyCoder.codeQtCbfLuma(cu, absPartIdx, tuDepth);
+ }
+ else
+ {
+ uint32_t qNumParts = 1 << (log2TrSize -1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+ codeInterSubdivCbfQT(cu, absPartIdx, depth + 1, depthRange);
+ }
+}
+
+void Search::encodeResidualQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, TextType ttype, const uint32_t depthRange[2])
+{
+ X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
+ X265_CHECK(cu.isInter(absPartIdx), "encodeResidualQT() with intra block\n");
+
const uint32_t curTuDepth = depth - cu.m_cuDepth[0];
const uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
const bool bSubdiv = curTuDepth != tuDepth;
const uint32_t log2TrSize = g_maxLog2CUSize - depth;
- const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
- uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- uint32_t trWidthC = 1 << log2TrSizeC;
- uint32_t trHeightC = splitIntoSubTUs ? (trWidthC << 1) : trWidthC;
-
- bool mCodeAll = true;
- const uint32_t numPels = trWidthC * trHeightC;
- if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
- mCodeAll = false;
-
- if (mCodeAll)
- {
- uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + curTuDepth) << 1);
- if (!curTuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curTuDepth - 1))
- m_entropyCoder.codeQtCbf(cu, absPartIdx, absPartIdxStep, trWidthC, trHeightC, TEXT_CHROMA_U, curTuDepth, !bSubdiv);
- if (!curTuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curTuDepth - 1))
- m_entropyCoder.codeQtCbf(cu, absPartIdx, absPartIdxStep, trWidthC, trHeightC, TEXT_CHROMA_V, curTuDepth, !bSubdiv);
- }
- else
- {
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curTuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, curTuDepth - 1), "chroma CBF not matching\n");
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curTuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, curTuDepth - 1), "chroma CBF not matching\n");
- }
-
- if (!bSubdiv)
- {
- m_entropyCoder.codeQtCbf(cu, absPartIdx, TEXT_LUMA, tuDepth);
- }
- else
- {
- const uint32_t qpartNumSubdiv = NUM_CU_PARTITIONS >> ((depth + 1) << 1);
- for (uint32_t i = 0; i < 4; ++i)
- codeInterSubdivCbfQT(cu, absPartIdx + i * qpartNumSubdiv, depth + 1, depthRange);
- }
-}
-
-void Search::encodeResidualQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, TextType ttype, uint32_t depthRange[2])
-{
- X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
- X265_CHECK(cu.isInter(absPartIdx), "encodeResidualQT() with intra block\n");
-
- const uint32_t curTuDepth = depth - cu.m_cuDepth[0];
- const uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
- const bool bSubdiv = curTuDepth != tuDepth;
-
if (bSubdiv)
{
if (cu.getCbf(absPartIdx, ttype, curTuDepth))
{
- const uint32_t qpartNumSubdiv = NUM_CU_PARTITIONS >> ((depth + 1) << 1);
- for (uint32_t i = 0; i < 4; ++i)
- encodeResidualQT(cu, absPartIdx + i * qpartNumSubdiv, depth + 1, ttype, depthRange);
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
+ encodeResidualQT(cu, absPartIdx, depth + 1, ttype, depthRange);
}
+ return;
}
else
{
- const uint32_t log2TrSize = g_maxLog2CUSize - depth;
-
const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
@@ -3373,12 +3318,12 @@
// Chroma
bool bCodeChroma = true;
uint32_t tuDepthC = tuDepth;
- if ((log2TrSize == 2) && !(m_csp == X265_CSP_I444))
+ if (log2TrSize == 2 && m_csp != X265_CSP_I444)
{
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
log2TrSizeC++;
tuDepthC--;
- uint32_t qpdiv = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
- bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
+ bCodeChroma = !(absPartIdx & 3);
}
if (ttype == TEXT_LUMA && cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth))
@@ -3399,21 +3344,21 @@
}
else
{
- uint32_t partIdxesPerSubTU = NUM_CU_PARTITIONS >> (((cu.m_cuDepth[absPartIdx] + tuDepthC) << 1) + 1);
+ uint32_t tuNumParts = 2 << ((log2TrSizeC - LOG2_UNIT_SIZE) * 2);
uint32_t subTUSize = 1 << (log2TrSizeC * 2);
if (ttype == TEXT_CHROMA_U && cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth))
{
if (cu.getCbf(absPartIdx, ttype, tuDepth + 1))
m_entropyCoder.codeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
- if (cu.getCbf(absPartIdx + partIdxesPerSubTU, ttype, tuDepth + 1))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_U);
+ if (cu.getCbf(absPartIdx + tuNumParts, ttype, tuDepth + 1))
+ m_entropyCoder.codeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + tuNumParts, log2TrSizeC, TEXT_CHROMA_U);
}
if (ttype == TEXT_CHROMA_V && cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth))
{
if (cu.getCbf(absPartIdx, ttype, tuDepth + 1))
m_entropyCoder.codeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
- if (cu.getCbf(absPartIdx + partIdxesPerSubTU, ttype, tuDepth + 1))
- m_entropyCoder.codeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_V);
+ if (cu.getCbf(absPartIdx + tuNumParts, ttype, tuDepth + 1))
+ m_entropyCoder.codeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + tuNumParts, log2TrSizeC, TEXT_CHROMA_V);
}
}
}
@@ -3425,28 +3370,27 @@
X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "depth not matching\n");
const uint32_t curTrMode = depth - cu.m_cuDepth[0];
const uint32_t tuDepth = cu.m_tuDepth[absPartIdx];
+ const uint32_t log2TrSize = g_maxLog2CUSize - depth;
if (curTrMode < tuDepth)
{
- uint32_t qPartNumSubdiv = NUM_CU_PARTITIONS >> ((depth + 1) << 1);
- for (uint32_t i = 0; i < 4; i++, absPartIdx += qPartNumSubdiv)
+ uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
+ for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
saveResidualQTData(cu, resiYuv, absPartIdx, depth + 1);
return;
}
- const uint32_t log2TrSize = g_maxLog2CUSize - depth;
const uint32_t qtLayer = log2TrSize - 2;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
uint32_t tuDepthC = tuDepth;
- if (log2TrSizeC == 1)
+ if (log2TrSizeC < 2)
{
- X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444, "tuQuad check failed\n");
- log2TrSizeC++;
+ X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
+ log2TrSizeC = 2;
tuDepthC--;
- uint32_t qpdiv = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + tuDepthC) << 1);
- bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
+ bCodeChroma = !(absPartIdx & 3);
}
m_rqt[qtLayer].resiQtYuv.copyPartToPartLuma(resiYuv, absPartIdx, log2TrSize);
diff -r 35d086074bb5 -r 94d0bc6841dd source/encoder/search.h
--- a/source/encoder/search.h Fri Dec 05 10:59:33 2014 -0600
+++ b/source/encoder/search.h Sat Dec 06 17:17:59 2014 +0900
@@ -178,9 +178,9 @@
void encodeResAndCalcRdSkipCU(Mode& interMode);
// encode residual without rd-cost
- void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, uint32_t depthRange[2]);
- void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]);
- void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx);
+ void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2]);
+ void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t depthRange[2]);
+ void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx);
// pick be chroma mode from available using just sa8d costs
void getBestIntraModeChroma(Mode& intraMode, const CUGeom& cuGeom);
@@ -204,14 +204,14 @@
void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t depth);
// RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
- uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, uint32_t depthRange[2], uint8_t* sharedModes);
+ uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
// RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
- void codeSubdivCbfQTChroma(const CUData& cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height);
- void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, uint32_t depthRange[2]);
- void codeCoeffQTChroma(const CUData& cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
+ void codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
+ void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t depth, const uint32_t depthRange[2]);
+ void codeCoeffQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx, TextType ttype);
struct Cost
{
@@ -223,23 +223,23 @@
};
uint64_t estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
- void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, uint32_t depthRange[2]);
+ void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2]);
// estimate bit cost of residual QT
- void encodeResidualQT(CUData& cu, uint32_t absPartIdx, uint32_t depth, TextType ttype, uint32_t depthRange[2]);
+ void encodeResidualQT(CUData& cu, uint32_t absPartIdx, uint32_t depth, TextType ttype, const uint32_t depthRange[2]);
// generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
- void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, uint32_t depthRange[2]);
- void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, Cost& costs);
- void extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t trDepth, uint32_t absPartIdx);
+ void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, const uint32_t depthRange[2]);
+ void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& costs);
+ void extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t tuDepth, uint32_t absPartIdx);
// generate chroma prediction, generate residual and recon
- uint32_t codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t absPartIdx, uint32_t& psyEnergy);
- uint32_t codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t trDepth, uint32_t trDepthC, uint32_t absPartIdx, uint32_t& psyEnergy);
- void extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t trDepth, bool tuQuad);
+ uint32_t codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, uint32_t& psyEnergy);
+ uint32_t codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t tuDepthC, uint32_t absPartIdx, uint32_t& psyEnergy);
+ void extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t tuDepth);
// reshuffle CBF flags after coding a pair of 4:2:2 chroma blocks
- void offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t trDepth, uint32_t absPartIdx);
+ void offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx);
struct MergeData
{
More information about the x265-devel
mailing list