[x265] refine tskip related
Satoshi Nakagawa
nakagawa424 at oki.com
Tue Jun 10 11:56:36 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1402394075 -32400
# Tue Jun 10 18:54:35 2014 +0900
# Node ID b6302b087ea414d52fe76050acd2889e34b352c8
# Parent 0cbc7320c9f2904bb1439dca70fd278ea42ed5aa
refine tskip related
diff -r 0cbc7320c9f2 -r b6302b087ea4 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Jun 09 11:34:11 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Tue Jun 10 18:54:35 2014 +0900
@@ -211,7 +211,7 @@
void TEncEntropy::xEncodeTransform(TComDataCU* cu, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t tuSize, uint32_t trIdx, bool& bCodeDQP)
{
const uint32_t subdiv = cu->getTransformIdx(absPartIdx) + cu->getDepth(absPartIdx) > depth;
- const uint32_t log2TrafoSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
uint32_t hChromaShift = cu->getHorzChromaShift();
uint32_t vChromaShift = cu->getVertChromaShift();
uint32_t cbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trIdx);
@@ -223,7 +223,7 @@
m_bakAbsPartIdxCU = absPartIdx;
}
- if ((log2TrafoSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
+ if ((log2TrSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
if ((absPartIdx & (partNum - 1)) == 0)
@@ -244,7 +244,7 @@
}
else if (cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N) && depth == cu->getDepth(absPartIdx) && (cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1))
{
- if (log2TrafoSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
+ if (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
{
X265_CHECK(subdiv, "subdivision state failure\n");
}
@@ -253,22 +253,22 @@
X265_CHECK(!subdiv, "subdivision state failure\n");
}
}
- else if (log2TrafoSize > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
+ else if (log2TrSize > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
{
X265_CHECK(subdiv, "subdivision state failure\n");
}
- else if (log2TrafoSize == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
+ else if (log2TrSize == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
{
X265_CHECK(!subdiv, "subdivision state failure\n");
}
- else if (log2TrafoSize == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
+ else if (log2TrSize == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
{
X265_CHECK(!subdiv, "subdivision state failure\n");
}
else
{
- X265_CHECK(log2TrafoSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size failure\n");
- m_entropyCoderIf->codeTransformSubdivFlag(subdiv, 5 - log2TrafoSize);
+ X265_CHECK(log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size failure\n");
+ m_entropyCoderIf->codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
}
const uint32_t trDepthCurr = depth - cu->getDepth(absPartIdx);
@@ -365,12 +365,12 @@
}
int chFmt = cu->getChromaFormat();
- if ((log2TrafoSize == 2) && !(chFmt == CHROMA_444))
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
if ((absPartIdx & (partNum - 1)) == (partNum - 1))
{
- uint32_t trSizeC = 1 << log2TrafoSize;
+ uint32_t trSizeC = 1 << log2TrSize;
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
diff -r 0cbc7320c9f2 -r b6302b087ea4 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 09 11:34:11 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 10 18:54:35 2014 +0900
@@ -160,37 +160,37 @@
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trMode = cu->getTransformIdx(absPartIdx);
uint32_t subdiv = (trMode > trDepth ? 1 : 0);
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
if (cu->getPredictionMode(0) == MODE_INTRA && cu->getPartitionSize(0) == SIZE_NxN && trDepth == 0)
{
X265_CHECK(subdiv, "subdivision not present\n");
}
- else if (trSizeLog2 > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
+ else if (log2TrSize > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
{
X265_CHECK(subdiv, "subdivision not present\n");
}
- else if (trSizeLog2 == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
+ else if (log2TrSize == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
{
X265_CHECK(!subdiv, "subdivision present\n");
}
- else if (trSizeLog2 == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
+ else if (log2TrSize == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
{
X265_CHECK(!subdiv, "subdivision present\n");
}
else
{
- X265_CHECK(trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size too small\n");
+ X265_CHECK(log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size too small\n");
if (bLuma)
{
- m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - trSizeLog2);
+ m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - log2TrSize);
}
}
if (bChroma)
{
int chFmt = cu->getChromaFormat();
- if ((trSizeLog2 > 2) && !(chFmt == CHROMA_444))
+ if ((log2TrSize > 2) && !(chFmt == CHROMA_444))
{
if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_U, trDepth, (subdiv == 0));
@@ -245,9 +245,9 @@
uint32_t origTrDepth = trDepth;
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
int chFmt = cu->getChromaFormat();
- if ((ttype != TEXT_LUMA) && (trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ if ((ttype != TEXT_LUMA) && (log2TrSize == 2) && !(chFmt == CHROMA_444))
{
X265_CHECK(trDepth > 0, "transform size too small\n");
trDepth--;
@@ -267,7 +267,7 @@
uint32_t height = cu->getCUSize(0) >> (trDepth + cspy);
height = splitIntoSubTUs ? height >> 1 : height;
uint32_t coeffOffset = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (cspx + cspy));
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
if (width == height)
@@ -386,35 +386,32 @@
}
void TEncSearch::xIntraCodingLumaBlk(TComDataCU* cu,
- uint32_t trDepth,
uint32_t absPartIdx,
+ uint32_t log2TrSize,
TComYuv* fencYuv,
TComYuv* predYuv,
ShortYuv* resiYuv,
+ uint32_t& cbf,
uint32_t& outDist)
{
- uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t tuSize = cu->getCUSize(0) >> trDepth;
+ uint32_t tuSize = 1 << log2TrSize;
uint32_t stride = fencYuv->getStride();
pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
pixel* pred = predYuv->getLumaAddr(absPartIdx);
int16_t* residual = resiYuv->getLumaAddr(absPartIdx);
- int part = partitionFromSize(tuSize);
- int sizeIdx = g_convertToBit[tuSize];
-
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
coeff_t* coeff = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
-
int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
const uint32_t reconQtStride = MAX_CU_SIZE;
-
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
bool useTransformSkip = !!cu->getTransformSkip(absPartIdx, TEXT_LUMA);
+ int part = partitionFromSize(tuSize);
+ int sizeIdx = log2TrSize - 2;
//===== get residual signal =====
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
@@ -430,9 +427,8 @@
}
//--- transform and quantization ---
- uint32_t absSum = 0;
+ uint32_t absSum;
int lastPos = -1;
- cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
int chFmt = cu->getChromaFormat();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
@@ -441,7 +437,7 @@
absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
//--- set coded block flag ---
- cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
+ cbf = absSum ? 1 : 0;
if (absSum)
{
@@ -449,7 +445,7 @@
int scalingListType = 0 + TEXT_LUMA;
X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
- X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
+ X265_CHECK(log2TrSize <= 5, "log2TrSize is too large %d\n", log2TrSize);
//===== reconstruction =====
primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
//===== update distortion =====
@@ -461,66 +457,48 @@
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
#endif
//===== reconstruction =====
- primitives.luma_copy_ps[part](reconQt, reconQtStride, pred, stride);
- primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);
+ primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, pred, stride);
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
//===== update distortion =====
outDist += primitives.sse_pp[part](pred, stride, fenc, stride);
}
}
void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,
- uint32_t trDepth,
uint32_t absPartIdx,
- uint32_t absPartIdxStep,
+ uint32_t log2TrSize,
TComYuv* fencYuv,
TComYuv* predYuv,
ShortYuv* resiYuv,
+ uint32_t& cbf,
uint32_t& outDist,
- uint32_t chromaId)
+ uint32_t chromaId,
+ uint32_t log2TrSizeC)
{
- uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- int chFmt = cu->getChromaFormat();
-
- uint32_t origTrDepth = trDepth;
-
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
- {
- X265_CHECK(trDepth > 0, "trDepth should be non-zero\n");
- trDepth--;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
- bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
- if ((!bFirstQ) && (!bSecondQ))
- {
- return;
- }
- }
-
- TextType ttype = (TextType)chromaId;
- uint32_t tuSize = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
- uint32_t stride = fencYuv->getCStride();
- pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
- pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
- int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdx);
-
- uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ TextType ttype = (TextType)chromaId;
+ uint32_t tuSize = 1 << log2TrSizeC;
+ uint32_t stride = fencYuv->getCStride();
+ pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
+ pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
+ int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdx);
+
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
- coeff_t* coeff = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
- int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
- uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
+ coeff_t* coeff = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
+ uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
- bool useTransformSkipChroma = !!cu->getTransformSkip(absPartIdx, ttype);
+ bool useTransformSkipC = !!cu->getTransformSkip(absPartIdx, ttype);
int part = partitionFromSize(tuSize);
- int sizeIdx = g_convertToBit[tuSize];
+ int sizeIdxC = log2TrSizeC - 2;
//===== get residual signal =====
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment check fail\n");
X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment check fail\n");
- primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
+ primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
//===== transform and quantization =====
//--- init rate estimation arrays for RDOQ ---
@@ -530,9 +508,10 @@
}
//--- transform and quantization ---
- uint32_t absSum = 0;
+ uint32_t absSum;
int lastPos = -1;
+ int chFmt = cu->getChromaFormat();
int curChromaQpOffset;
if (ttype == TEXT_CHROMA_U)
{
@@ -545,10 +524,10 @@
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
m_trQuant->selectLambda(TEXT_CHROMA);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipC);
//--- set coded block flag ---
- cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
+ cbf = absSum ? 1 : 0;
uint32_t dist;
if (absSum)
@@ -556,10 +535,10 @@
//--- inverse transform ---
int scalingListType = 0 + ttype;
X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
- X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipC, lastPos);
+ X265_CHECK(log2TrSizeC <= 5, "log2TrSizeC is too large %d\n", log2TrSizeC);
//===== reconstruction =====
- primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
+ primitives.calcrecon[sizeIdxC](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
//===== update distortion =====
dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
}
@@ -569,8 +548,8 @@
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
#endif
//===== reconstruction =====
- primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, pred, stride);
- primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
+ primitives.square_copy_ps[sizeIdxC](reconQt, reconQtStride, pred, stride);
+ primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, pred, stride);
//===== update distortion =====
dist = primitives.sse_pp[part](pred, stride, fenc, stride);
}
@@ -597,9 +576,9 @@
uint64_t& rdCost)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- bool bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
- bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ bool bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
+ bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
int maxTuSize = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
int isIntraSlice = (cu->getSlice()->getSliceType() == I_SLICE);
@@ -610,12 +589,12 @@
if (m_cfg->m_param->rdPenalty && !isIntraSlice)
{
// in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice
- noSplitIntraMaxTuSize = (trSizeLog2 <= X265_MIN(maxTuSize, 4));
+ noSplitIntraMaxTuSize = (log2TrSize <= X265_MIN(maxTuSize, 4));
// if maximum RD-penalty don't check TU size 32x32
if (m_cfg->m_param->rdPenalty == 2)
{
- bCheckFull = (trSizeLog2 <= X265_MIN(maxTuSize, 4));
+ bCheckFull = (log2TrSize <= X265_MIN(maxTuSize, 4));
}
}
if (bCheckFirst && noSplitIntraMaxTuSize)
@@ -631,10 +610,10 @@
if (bCheckFull)
{
- uint32_t tuSize = 1 << trSizeLog2;
+ uint32_t tuSize = 1 << log2TrSize;
bool checkTransformSkip = (cu->getSlice()->getPPS()->getUseTransformSkip() &&
- trSizeLog2 <= LOG2_MAX_TS_SIZE &&
+ log2TrSize <= LOG2_MAX_TS_SIZE &&
!cu->getCUTransquantBypass(0));
if (checkTransformSkip)
{
@@ -657,6 +636,8 @@
//===== get prediction signal =====
predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
+ cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
+
if (checkTransformSkip || checkTQbypass)
{
//----- store original entropy coding status -----
@@ -680,8 +661,8 @@
}
//----- code luma block with given intra prediction mode and store Cbf-----
- xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistYTmp);
- singleCbfYTmp = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
+ xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfYTmp, singleDistYTmp);
+ cu->setCbfSubParts(singleCbfYTmp << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
singleTQbypass = cu->getCUTransquantBypass(absPartIdx);
if ((modeId == 1) && (singleCbfYTmp == 0) && checkTransformSkip)
@@ -704,7 +685,7 @@
bestModeId = modeId;
if (bestModeId == firstCheckId)
{
- xStoreIntraResultQT(cu, trDepth, absPartIdx);
+ xStoreIntraResultQT(cu, absPartIdx, log2TrSize);
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
}
@@ -722,7 +703,7 @@
if (bestModeId == firstCheckId)
{
- xLoadIntraResultQT(cu, trDepth, absPartIdx);
+ xLoadIntraResultQT(cu, absPartIdx, log2TrSize);
cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
@@ -733,13 +714,11 @@
//----- code luma block with given intra prediction mode and store Cbf-----
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
- xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistY);
-
- if (bCheckSplit)
- singleCbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
+ xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfY, singleDistY);
+ cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
- if (m_cfg->m_param->rdPenalty && (trSizeLog2 == 5) && !isIntraSlice)
+ if (m_cfg->m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)
singleBits *= 4;
singleCost = m_rdCost->calcRdCost(singleDistY, singleBits);
@@ -804,15 +783,16 @@
cu->setTransformSkipSubParts(bestModeId, TEXT_LUMA, absPartIdx, fullDepth);
//--- set reconstruction for next intra prediction blocks ---
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
- int16_t* src = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
- const uint32_t srcstride = MAX_CU_SIZE;
+ const uint32_t reconQtStride = MAX_CU_SIZE;
+
pixel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
- int sizeIdx = trSizeLog2 - 2;
- primitives.square_copy_sp[sizeIdx](dst, dststride, src, srcstride);
+ int sizeIdx = log2TrSize - 2;
+ primitives.square_copy_sp[sizeIdx](dst, dststride, reconQt, reconQtStride);
}
outDistY += singleDistY;
@@ -828,9 +808,9 @@
TComYuv* reconYuv)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- bool bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
- bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ bool bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
+ bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
int maxTuSize = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
int isIntraSlice = (cu->getSlice()->getSliceType() == I_SLICE);
@@ -838,7 +818,7 @@
if (m_cfg->m_param->rdPenalty == 2 && !isIntraSlice)
{
// if maximum RD-penalty don't check TU size 32x32
- bCheckFull = (trSizeLog2 <= X265_MIN(maxTuSize, 4));
+ bCheckFull = (log2TrSize <= X265_MIN(maxTuSize, 4));
}
if (bCheckFull)
{
@@ -846,7 +826,7 @@
//----- code luma block with given intra prediction mode and store Cbf-----
uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
- uint32_t tuSize = cu->getCUSize(0) >> trDepth;
+ uint32_t tuSize = 1 << log2TrSize;
int chFmt = cu->getChromaFormat();
uint32_t stride = fencYuv->getStride();
pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
@@ -867,17 +847,18 @@
//===== get prediction signal =====
predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
+ cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
+
//===== get residual signal =====
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment failure\n");
X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment failure\n");
- int sizeIdx = g_convertToBit[tuSize];
+ int sizeIdx = log2TrSize - 2;
primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//===== transform and quantization =====
uint32_t absSum = 0;
int lastPos = -1;
- cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
m_trQuant->selectLambda(TEXT_LUMA);
@@ -941,18 +922,18 @@
if (trMode == trDepth)
{
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
//===== copy transform coefficients =====
- uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t numCoeffY = 1 << (log2TrSize * 2);
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
+ coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
coeff_t* coeffDestY = cu->getCoeffY() + coeffOffsetY;
::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
//===== copy reconstruction =====
- m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2);
+ m_qtTempShortYuv[qtLayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << log2TrSize);
}
else
{
@@ -964,164 +945,116 @@
}
}
-void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
+void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize)
{
- uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
//===== copy transform coefficients =====
- uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t numCoeffY = 1 << (log2TrSize * 2);
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
+ coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
coeff_t* coeffDstY = m_qtTempTUCoeff[0];
-
::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
//===== copy reconstruction =====
- m_qtTempShortYuv[qtlayer].copyPartToPartLuma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2);
+ pixel* reconTs = m_qtTempTransformSkipYuv.getLumaAddr(absPartIdx);
+ uint32_t reconTsStride = m_qtTempTransformSkipYuv.getStride();
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
+ X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+ const uint32_t reconQtStride = MAX_CU_SIZE;
+ int sizeIdx = log2TrSize - 2;
+ primitives.square_copy_sp[sizeIdx](reconTs, reconTsStride, reconQt, reconQtStride);
}
-void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
+void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize)
{
- uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
//===== copy transform coefficients =====
- uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t numCoeffY = 1 << (log2TrSize * 2);
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t* coeffDstY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
+ coeff_t* coeffDstY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
coeff_t* coeffSrcY = m_qtTempTUCoeff[0];
-
::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
//===== copy reconstruction =====
- uint32_t trSize = 1 << trSizeLog2;
- m_qtTempTransformSkipYuv.copyPartToPartLuma(&m_qtTempShortYuv[qtlayer], absPartIdx, trSize);
-
- uint32_t zOrder = cu->getZorderIdxInCU() + absPartIdx;
- pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zOrder);
- uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
- int16_t* reconQt = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
- X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+ pixel* reconTs = m_qtTempTransformSkipYuv.getLumaAddr(absPartIdx);
+ uint32_t reconTsStride = m_qtTempTransformSkipYuv.getStride();
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
+ X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
const uint32_t reconQtStride = MAX_CU_SIZE;
- int sizeIdx = trSizeLog2 - 2;
- primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride, reconQt, reconQtStride);
+ int sizeIdx = log2TrSize - 2;
+ primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, reconTs, reconTsStride);
+
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, reconTs, reconTsStride);
}
-void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
-{
- assert(chromaId == 1 || chromaId == 2);
-
- uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trMode = cu->getTransformIdx(absPartIdx);
-
- if (trMode == trDepth)
- {
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- int chFmt = cu->getChromaFormat();
-
- bool bChromaSame = false;
- if (trSizeLog2 == 2 && !(chFmt == CHROMA_444))
- {
- X265_CHECK(trDepth > 0, "invalid trDepth\n");
- trDepth--;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
- bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
- if ((!bFirstQ) && (!bSecondQ))
- {
- return;
- }
- bChromaSame = true;
- }
- uint32_t width = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
- uint32_t height = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
- height = splitIntoSubTUs ? height >> 1 : height;
- uint32_t numCoeffC = width * height;
- uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
-
- coeff_t* coeffSrc = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
- coeff_t* coeffDst = m_qtTempTUCoeff[chromaId];
- ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
-
- //===== copy reconstruction =====
- uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
- m_qtTempShortYuv[qtlayer].copyPartToPartYuvChroma(&m_qtTempTransformSkipYuv, absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
- }
-}
-
-void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId)
+void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId)
{
X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
- uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trMode = cu->getTransformIdx(absPartIdx);
-
- if (trMode == trDepth)
- {
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- int chFmt = cu->getChromaFormat();
- const bool splitIntoSubTUs = (chFmt == CHROMA_422);
-
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
- bool bChromaSame = false;
- if (trSizeLog2 == 2 && !(chFmt == CHROMA_444))
- {
- X265_CHECK(trDepth > 0, "invalid trDepth\n");
- trDepth--;
- trSizeCLog2++;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
- bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
- if ((!bFirstQ) && (!bSecondQ))
- {
- return;
- }
- bChromaSame = true;
- }
-
- //===== copy transform coefficients =====
- uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);
- uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
-
- coeff_t* coeffDst = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
- coeff_t* coeffSrc = m_qtTempTUCoeff[chromaId];
- ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
-
- //===== copy reconstruction =====
- uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
- m_qtTempTransformSkipYuv.copyPartToPartChroma(&m_qtTempShortYuv[qtlayer], absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
-
- uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
- uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
-
- pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
- int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
- uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
- int sizeIdxC = trSizeCLog2 - 2;
- primitives.square_copy_sp[sizeIdxC](reconIPred, reconIPredStride, reconQt, reconQtStride);
- }
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
+
+ //===== copy transform coefficients =====
+ uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
+ uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+ coeff_t* coeffSrcC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
+ coeff_t* coeffDstC = m_qtTempTUCoeff[chromaId];
+ ::memcpy(coeffDstC, coeffSrcC, sizeof(coeff_t) * numCoeffC);
+
+ //===== copy reconstruction =====
+ pixel* reconTs = m_qtTempTransformSkipYuv.getChromaAddr(chromaId, absPartIdx);
+ uint32_t reconTsStride = m_qtTempTransformSkipYuv.getCStride();
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
+ uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
+ int sizeIdxC = log2TrSizeC - 2;
+ primitives.square_copy_sp[sizeIdxC](reconTs, reconTsStride, reconQt, reconQtStride);
+}
+
+void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId)
+{
+ X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
+
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
+
+ //===== copy transform coefficients =====
+ uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
+ uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+ coeff_t* coeffDstC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
+ coeff_t* coeffSrcC = m_qtTempTUCoeff[chromaId];
+ ::memcpy(coeffDstC, coeffSrcC, sizeof(coeff_t) * numCoeffC);
+
+ //===== copy reconstruction =====
+ pixel* reconTs = m_qtTempTransformSkipYuv.getChromaAddr(chromaId, absPartIdx);
+ uint32_t reconTsStride = m_qtTempTransformSkipYuv.getCStride();
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
+ uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
+ int sizeIdxC = log2TrSizeC - 2;
+ primitives.square_copy_ps[sizeIdxC](reconQt, reconQtStride, reconTs, reconTsStride);
+
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
+ primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, reconTs, reconTsStride);
}
void TEncSearch::offsetSubTUCBFs(TComDataCU* cu, TextType ttype, uint32_t trDepth, uint32_t absPartIdx)
{
uint32_t depth = cu->getDepth(0);
uint32_t fullDepth = depth + trDepth;
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
-
- uint32_t actualTrDepth = trDepth;
-
- if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+
+ uint32_t trDepthC = trDepth;
+ if ((log2TrSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
- X265_CHECK(actualTrDepth > 0, "actualTrDepth invalid\n");
- actualTrDepth--;
+ X265_CHECK(trDepthC > 0, "trDepthC invalid\n");
+ trDepthC--;
}
- uint32_t partIdxesPerSubTU = (cu->getPic()->getNumPartInCU() >> ((depth + actualTrDepth) << 1)) >> 1;
+ uint32_t partIdxesPerSubTU = (cu->getPic()->getNumPartInCU() >> ((depth + trDepthC) << 1)) >> 1;
//move the CBFs down a level and set the parent CBF
uint8_t subTUCBF[2];
@@ -1158,15 +1091,16 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
- uint32_t actualTrDepth = trDepth;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
+
+ uint32_t trDepthC = trDepth;
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
X265_CHECK(trDepth > 0, "invalid trDepth\n");
- actualTrDepth--;
- trSizeCLog2++;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
+ trDepthC--;
+ log2TrSizeC++;
+ uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
if (!bFirstQ)
{
@@ -1174,12 +1108,12 @@
}
}
- uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
+ uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = fencYuv->getCStride();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
bool checkTransformSkip = (cu->getSlice()->getPPS()->getUseTransformSkip() &&
- trSizeCLog2 <= LOG2_MAX_TS_SIZE &&
+ log2TrSizeC <= LOG2_MAX_TS_SIZE &&
!cu->getCUTransquantBypass(0));
if (m_cfg->m_param->bEnableTSkipFast)
@@ -1200,7 +1134,7 @@
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
TComTURecurse tuIterator;
- uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
+ uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
do
@@ -1209,7 +1143,7 @@
pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdxC);
//===== init availability pattern =====
- TComPattern::initAdiPatternChroma(cu, absPartIdxC, actualTrDepth, m_predBuf, chromaId);
+ TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId);
pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
@@ -1223,6 +1157,8 @@
//===== get prediction signal =====
predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
+ uint32_t singleCbfC = 0;
+
if (checkTransformSkip)
{
// use RDO to decide whether Cr/Cb takes TS
@@ -1231,7 +1167,6 @@
uint64_t singleCost = MAX_INT64;
int bestModeId = 0;
uint32_t singleDistC = 0;
- uint32_t singleCbfC = 0;
uint32_t singleDistCTmp = 0;
uint64_t singleCostTmp = 0;
uint32_t singleCbfCTmp = 0;
@@ -1243,9 +1178,8 @@
cu->setTransformSkipPartRange(chromaModeId, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
singleDistCTmp = 0;
- xIntraCodingChromaBlk(cu, trDepth, absPartIdxC, tuIterator.m_absPartIdxStep, fencYuv, predYuv, resiYuv, singleDistCTmp, chromaId);
-
- singleCbfCTmp = cu->getCbf(absPartIdxC, (TextType)chromaId, trDepth);
+ xIntraCodingChromaBlk(cu, absPartIdxC, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfCTmp, singleDistCTmp, chromaId, log2TrSizeC);
+ cu->setCbfPartRange(singleCbfCTmp << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
if (chromaModeId == 1 && singleCbfCTmp == 0)
{
@@ -1267,7 +1201,7 @@
if (bestModeId == firstCheckId)
{
- xStoreIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
+ xStoreIntraResultChromaQT(cu, absPartIdxC, log2TrSize, log2TrSizeC, chromaId);
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
}
@@ -1279,7 +1213,7 @@
if (bestModeId == firstCheckId)
{
- xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId);
+ xLoadIntraResultChromaQT(cu, absPartIdxC, log2TrSize, log2TrSizeC, chromaId);
cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
@@ -1297,7 +1231,8 @@
else
{
cu->setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
- xIntraCodingChromaBlk(cu, trDepth, absPartIdxC, tuIterator.m_absPartIdxStep, fencYuv, predYuv, resiYuv, outDist, chromaId);
+ xIntraCodingChromaBlk(cu, absPartIdxC, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfC, outDist, chromaId, log2TrSizeC);
+ cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
}
}
while (isNextSection(&tuIterator));
@@ -1337,14 +1272,16 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
- uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
bool bChromaSame = false;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
X265_CHECK(trDepth > 0, "invalid trDepth\n");
trDepth--;
+ log2TrSizeC++;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
if ((absPartIdx & (qpdiv - 1)) != 0)
{
@@ -1355,20 +1292,18 @@
//===== copy transform coefficients =====
- uint32_t width = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
- uint32_t height = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
- uint32_t numCoeffC = width * height;
+ uint32_t numCoeffC = 1 << (log2TrSizeC * 2 + (chFmt == CHROMA_422));
uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
- coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
- coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
+ coeff_t* coeffSrcU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
+ coeff_t* coeffSrcV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
//===== copy reconstruction =====
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeLog2, (bChromaSame && (chFmt != CHROMA_422)));
+ m_qtTempShortYuv[qtLayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << log2TrSize, (bChromaSame && (chFmt != CHROMA_422)));
}
else
{
@@ -1394,14 +1329,16 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
uint32_t origTrDepth = trDepth;
- uint32_t actualTrDepth = trDepth;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ uint32_t trDepthC = trDepth;
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
X265_CHECK(trDepth > 0, "invalid trDepth\n");
- actualTrDepth--;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
+ trDepthC--;
+ log2TrSizeC++;
+ uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
if (!bFirstQ)
{
@@ -1409,16 +1346,16 @@
}
}
- uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
+ uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = fencYuv->getCStride();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
- int sizeIdx = g_convertToBit[tuSize];
+ int sizeIdxC = log2TrSizeC - 2;
int part = partitionFromSize(tuSize);
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
TComTURecurse tuIterator;
- uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
+ uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
do
@@ -1436,8 +1373,8 @@
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
- //bool useTransformSkipChroma = cu->getTransformSkip(absPartIdxC, ttype);
- const bool useTransformSkipChroma = false;
+ //bool useTransformSkipC = cu->getTransformSkip(absPartIdxC, ttype);
+ const bool useTransformSkipC = false;
cu->setTransformSkipPartRange(0, ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
@@ -1449,7 +1386,7 @@
}
chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
//===== init availability pattern =====
- TComPattern::initAdiPatternChroma(cu, absPartIdxC, actualTrDepth, m_predBuf, chromaId);
+ TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId);
pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
//===== get prediction signal =====
@@ -1459,7 +1396,7 @@
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment failure\n");
X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment failure\n");
- primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
+ primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
//--- transform and quantization ---
uint32_t absSum = 0;
@@ -1478,7 +1415,7 @@
m_trQuant->selectLambda(TEXT_CHROMA);
- absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdxC, &lastPos, useTransformSkipChroma);
+ absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdxC, &lastPos, useTransformSkipC);
//--- set coded block flag ---
cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -1488,12 +1425,12 @@
//--- inverse transform ---
int scalingListType = 0 + ttype;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipC, lastPos);
//===== reconstruction =====
// use square primitives
primitives.chroma[CHROMA_444].add_ps[part](recon, stride, pred, residual, stride, stride);
- primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, recon, stride);
+ primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, recon, stride);
}
else
{
@@ -1502,8 +1439,8 @@
#endif
//===== reconstruction =====
- primitives.square_copy_pp[sizeIdx](recon, stride, pred, stride);
- primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
+ primitives.square_copy_pp[sizeIdxC](recon, stride, pred, stride);
+ primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, pred, stride);
}
}
while (isNextSection(&tuIterator));
@@ -2840,17 +2777,17 @@
{
X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "invalid depth\n");
const uint32_t trMode = depth - cu->getDepth(0);
- const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
const uint32_t setCbf = 1 << trMode;
int chFmt = cu->getChromaFormat();
bool bSplitFlag = ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
bool bCheckFull;
- if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
+ if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
bCheckFull = false;
else
- bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
- const bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
+ bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
+ const bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
// code full block
@@ -2858,12 +2795,12 @@
int lastPosY = -1, lastPosU = -1, lastPosV = -1;
if (bCheckFull)
{
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
uint32_t trModeC = trMode;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
- trSizeCLog2++;
+ log2TrSizeC++;
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
@@ -2878,10 +2815,10 @@
coeff_t *coeffCurU = cu->getCoeffCb() + coeffOffsetC;
coeff_t *coeffCurV = cu->getCoeffCr() + coeffOffsetC;
- uint32_t trSize = 1 << trSizeLog2;
- uint32_t trSizeC = 1 << trSizeCLog2;
- uint32_t sizeIdx = trSizeLog2 - 2;
- uint32_t sizeIdxC = trSizeCLog2 - 2;
+ uint32_t trSize = 1 << log2TrSize;
+ uint32_t trSizeC = 1 << log2TrSizeC;
+ uint32_t sizeIdx = log2TrSize - 2;
+ uint32_t sizeIdxC = log2TrSizeC - 2;
cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
@@ -3026,26 +2963,26 @@
{
X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "depth not matching\n");
const uint32_t trMode = depth - cu->getDepth(0);
- const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
const uint32_t subTUDepth = trMode + 1;
const uint32_t setCbf = 1 << trMode;
int chFmt = cu->getChromaFormat();
bool bSplitFlag = ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
bool bCheckFull;
- if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
+ if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
bCheckFull = false;
else
- bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
- const bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
+ bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
+ const bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
uint32_t trModeC = trMode;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
- trSizeCLog2++;
+ log2TrSizeC++;
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
@@ -3066,27 +3003,27 @@
uint32_t bestsubTUCBF[MAX_NUM_COMPONENT][2];
m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
- uint32_t trSize = 1 << trSizeLog2;
+ uint32_t trSize = 1 << log2TrSize;
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
uint32_t absPartIdxStep = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
// code full block
if (bCheckFull)
{
- uint32_t trSizeC = 1 << trSizeCLog2;
- int sizeIdx = trSizeLog2 - 2;
- int sizeIdxC = trSizeCLog2 - 2;
- const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t trSizeC = 1 << log2TrSizeC;
+ int sizeIdx = log2TrSize - 2;
+ int sizeIdxC = log2TrSizeC - 2;
+ const uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
- coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
- coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
- coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
+ coeff_t *coeffCurY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
+ coeff_t *coeffCurU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
+ coeff_t *coeffCurV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip() && !cu->getCUTransquantBypass(0);
- bool checkTransformSkipY = checkTransformSkip && trSizeLog2 <= LOG2_MAX_TS_SIZE;
- bool checkTransformSkipUV = checkTransformSkip && trSizeCLog2 <= LOG2_MAX_TS_SIZE;
+ bool checkTransformSkipY = checkTransformSkip && log2TrSize <= LOG2_MAX_TS_SIZE;
+ bool checkTransformSkipUV = checkTransformSkip && log2TrSizeC <= LOG2_MAX_TS_SIZE;
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
@@ -3161,8 +3098,8 @@
while (isNextSection(&tuIterator));
}
- const uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
- const uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);
+ const uint32_t numCoeffY = 1 << (log2TrSize * 2);
+ const uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)
{
@@ -3173,10 +3110,10 @@
int partSize = partitionFromSize(trSize);
uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, (pixel*)RDCost::zeroPel, trSize);
- int16_t *curResiY = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
- X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");
+ int16_t *curResiY = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
+ X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width not full CU\n");
const uint32_t strideResiY = MAX_CU_SIZE;
- const uint32_t strideResiC = m_qtTempShortYuv[qtlayer].m_cwidth;
+ const uint32_t strideResiC = m_qtTempShortYuv[qtLayer].m_cwidth;
if (outZeroDist)
{
@@ -3253,8 +3190,8 @@
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
- int16_t *curResiU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
- int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
+ int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
+ int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
distU = m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, trSizeC));
@@ -3399,11 +3336,8 @@
uint32_t nonZeroDistY = 0, absSumTransformSkipY;
uint64_t singleCostY = MAX_INT64;
- coeff_t bestCoeffY[MAX_TS_SIZE * MAX_TS_SIZE];
- memcpy(bestCoeffY, coeffCurY, sizeof(coeff_t) * numCoeffY);
-
- int16_t bestResiY[MAX_TS_SIZE * MAX_TS_SIZE];
- primitives.square_copy_ss[sizeIdx](bestResiY, trSize, curResiY, strideResiY);
+ ALIGN_VAR_32(coeff_t, tsCoeffY[MAX_TS_SIZE * MAX_TS_SIZE]);
+ ALIGN_VAR_32(int16_t, tsResiY[MAX_TS_SIZE * MAX_TS_SIZE]);
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
@@ -3417,7 +3351,7 @@
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
m_trQuant->selectLambda(TEXT_LUMA);
- absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
+ absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsCoeffY,
trSize, TEXT_LUMA, absPartIdx, &lastPosTransformSkip[TEXT_LUMA][0], true, curuseRDOQ);
cu->setCbfSubParts(absSumTransformSkipY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
@@ -3425,7 +3359,7 @@
{
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
+ m_entropyCoder->encodeCoeffNxN(cu, tsCoeffY, absPartIdx, trSize, TEXT_LUMA);
const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
@@ -3433,10 +3367,10 @@
int scalingListType = 3 + TEXT_LUMA;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, tsResiY, trSize, tsCoeffY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);
nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width,
- curResiY, strideResiY);
+ tsResiY, trSize);
singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);
}
@@ -3444,14 +3378,14 @@
if (!absSumTransformSkipY || minCost[TEXT_LUMA][0] < singleCostY)
{
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
- memcpy(coeffCurY, bestCoeffY, sizeof(coeff_t) * numCoeffY);
- primitives.square_copy_ss[sizeIdx](curResiY, strideResiY, bestResiY, trSize);
}
else
{
singleDistComp[TEXT_LUMA][0] = nonZeroDistY;
absSum[TEXT_LUMA][0] = absSumTransformSkipY;
bestTransformMode[TEXT_LUMA][0] = 1;
+ memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
+ primitives.square_copy_ss[sizeIdx](curResiY, strideResiY, tsResiY, trSize);
}
cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
@@ -3475,16 +3409,13 @@
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
- int16_t *curResiU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
- int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
-
- coeff_t bestCoeffU[MAX_TS_SIZE * MAX_TS_SIZE], bestCoeffV[MAX_TS_SIZE * MAX_TS_SIZE];
- memcpy(bestCoeffU, coeffCurU + subTUBufferOffset, sizeof(coeff_t) * numCoeffC);
- memcpy(bestCoeffV, coeffCurV + subTUBufferOffset, sizeof(coeff_t) * numCoeffC);
-
- int16_t bestResiU[MAX_TS_SIZE * MAX_TS_SIZE], bestResiV[MAX_TS_SIZE * MAX_TS_SIZE];
- primitives.square_copy_ss[sizeIdxC](bestResiU, trSizeC, curResiU, strideResiC);
- primitives.square_copy_ss[sizeIdxC](bestResiV, trSizeC, curResiV, strideResiC);
+ int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
+ int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
+
+ ALIGN_VAR_32(coeff_t, tsCoeffU[MAX_TS_SIZE * MAX_TS_SIZE]);
+ ALIGN_VAR_32(int16_t, tsResiU[MAX_TS_SIZE * MAX_TS_SIZE]);
+ ALIGN_VAR_32(coeff_t, tsCoeffV[MAX_TS_SIZE * MAX_TS_SIZE]);
+ ALIGN_VAR_32(int16_t, tsResiV[MAX_TS_SIZE * MAX_TS_SIZE]);
cu->setTransformSkipPartRange(1, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
cu->setTransformSkipPartRange(1, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -3498,11 +3429,11 @@
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
m_trQuant->selectLambda(TEXT_CHROMA);
- absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
+ absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffU,
trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section], true, curuseRDOQ);
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
- absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
+ absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffV,
trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section], true, curuseRDOQ);
cu->setCbfPartRange(absSumTransformSkipU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -3514,7 +3445,7 @@
if (absSumTransformSkipU)
{
m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_U);
+ m_entropyCoder->encodeCoeffNxN(cu, tsCoeffU, absPartIdxC, trSizeC, TEXT_CHROMA_U);
singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits();
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
@@ -3522,10 +3453,10 @@
int scalingListType = 3 + TEXT_CHROMA_U;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset,
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, tsResiU, trSizeC, tsCoeffU,
trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
- curResiU, strideResiC);
+ tsResiU, trSizeC);
nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
}
@@ -3533,21 +3464,20 @@
if (!absSumTransformSkipU || minCost[TEXT_CHROMA_U][tuIterator.m_section] < singleCostU)
{
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
-
- memcpy(coeffCurU + subTUBufferOffset, bestCoeffU, sizeof(coeff_t) * numCoeffC);
- primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, bestResiU, trSizeC);
}
else
{
singleDistComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroDistU;
absSum[TEXT_CHROMA_U][tuIterator.m_section] = absSumTransformSkipU;
bestTransformMode[TEXT_CHROMA_U][tuIterator.m_section] = 1;
+ memcpy(coeffCurU + subTUBufferOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
+ primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, tsResiU, trSizeC);
}
if (absSumTransformSkipV)
{
m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_V);
+ m_entropyCoder->encodeCoeffNxN(cu, tsCoeffV, absPartIdxC, trSizeC, TEXT_CHROMA_V);
singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section];
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
@@ -3555,10 +3485,10 @@
int scalingListType = 3 + TEXT_CHROMA_V;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset,
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, tsResiV, trSizeC, tsCoeffV,
trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
- curResiV, strideResiC);
+ tsResiV, trSizeC);
nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
}
@@ -3566,15 +3496,14 @@
if (!absSumTransformSkipV || minCost[TEXT_CHROMA_V][tuIterator.m_section] < singleCostV)
{
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
-
- memcpy(coeffCurV + subTUBufferOffset, bestCoeffV, sizeof(coeff_t) * numCoeffC);
- primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, bestResiV, trSizeC);
}
else
{
singleDistComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroDistV;
absSum[TEXT_CHROMA_V][tuIterator.m_section] = absSumTransformSkipV;
bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
+ memcpy(coeffCurV + subTUBufferOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
+ primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, tsResiV, trSizeC);
}
cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -3588,9 +3517,9 @@
m_entropyCoder->resetBits();
- if (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
+ if (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
{
- m_entropyCoder->encodeTransformSubdivFlag(0, 5 - trSizeLog2);
+ m_entropyCoder->encodeTransformSubdivFlag(0, 5 - log2TrSize);
}
if (bCodeChroma)
@@ -3793,21 +3722,21 @@
const uint32_t curTrMode = depth - cu->getDepth(0);
const uint32_t trMode = cu->getTransformIdx(absPartIdx);
const bool bSubdiv = curTrMode != trMode;
- const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
int chFmt = cu->getChromaFormat();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
- if (bSubdivAndCbf && trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
+ if (bSubdivAndCbf && log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
{
- m_entropyCoder->encodeTransformSubdivFlag(bSubdiv, 5 - trSizeLog2);
+ m_entropyCoder->encodeTransformSubdivFlag(bSubdiv, 5 - log2TrSize);
}
X265_CHECK(cu->getPredictionMode(absPartIdx) != MODE_INTRA, "xEncodeResidualQT() with intra block\n");
bool mCodeAll = true;
- uint32_t trSize = 1 << trSizeLog2;
- uint32_t trWidthC = 1 << trSizeCLog2;
+ uint32_t trSize = 1 << log2TrSize;
+ uint32_t trWidthC = 1 << log2TrSizeC;
uint32_t trHeightC = splitIntoSubTUs ? (trWidthC << 1) : trWidthC;
const uint32_t numPels = trWidthC * trHeightC;
@@ -3841,16 +3770,16 @@
if (!bSubdiv)
{
//Luma
- const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ const uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
+ coeff_t *coeffCurY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
//Chroma
bool bCodeChroma = true;
uint32_t trModeC = trMode;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
- trSizeCLog2++;
+ log2TrSizeC++;
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
@@ -3869,9 +3798,9 @@
if (bCodeChroma)
{
uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
- coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
- coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
- uint32_t trSizeC = 1 << trSizeCLog2;
+ coeff_t *coeffCurU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
+ coeff_t *coeffCurV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
+ uint32_t trSizeC = 1 << log2TrSizeC;
if (!splitIntoSubTUs)
{
@@ -3928,16 +3857,16 @@
if (curTrMode == trMode)
{
int chFmt = cu->getChromaFormat();
- const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
- const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
-
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
+
+ uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
bool bChromaSame = false;
uint32_t trModeC = trMode;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
- trSizeCLog2++;
+ log2TrSizeC++;
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
@@ -3946,28 +3875,28 @@
if (bSpatial)
{
- uint32_t trSize = 1 << trSizeLog2;
- m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize);
+ uint32_t trSize = 1 << log2TrSize;
+ m_qtTempShortYuv[qtLayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize);
if (bCodeChroma)
{
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(resiYuv, absPartIdx, trSize, (bChromaSame && (chFmt != CHROMA_422)));
+ m_qtTempShortYuv[qtLayer].copyPartToPartChroma(resiYuv, absPartIdx, trSize, (bChromaSame && (chFmt != CHROMA_422)));
}
}
else
{
- uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t numCoeffY = 1 << (log2TrSize * 2);
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
+ coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
coeff_t* coeffDstY = cu->getCoeffY() + coeffOffsetY;
::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
if (bCodeChroma)
{
- uint32_t numCoeffC = 1 << (trSizeCLog2 * 2 + (chFmt == CHROMA_422));
+ uint32_t numCoeffC = 1 << (log2TrSizeC * 2 + (chFmt == CHROMA_422));
uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
- coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
- coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
+ coeff_t* coeffSrcU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
+ coeff_t* coeffSrcV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
diff -r 0cbc7320c9f2 -r b6302b087ea4 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Mon Jun 09 11:34:11 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h Tue Jun 10 18:54:35 2014 +0900
@@ -207,11 +207,11 @@
void xEncIntraHeader(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLuma, bool bChroma);
uint32_t xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma);
uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
- void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
- ShortYuv* resiYuv, uint32_t& outDist);
+ void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,
+ ShortYuv* resiYuv, uint32_t& cbf, uint32_t& outDist);
- void xIntraCodingChromaBlk(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, TComYuv* fencYuv, TComYuv* predYuv,
- ShortYuv* resiYuv, uint32_t& outDist, uint32_t chromaId);
+ void xIntraCodingChromaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,
+ ShortYuv* resiYuv, uint32_t& cbf, uint32_t& outDist, uint32_t chromaId, uint32_t log2TrSizeC);
void xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& outDist);
@@ -223,10 +223,10 @@
void xSetIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);
- void xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
- void xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
- void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
- void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId);
+ void xStoreIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize);
+ void xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize);
+ void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId);
+ void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId);
// --------------------------------------------------------------------------------------------
// Inter search (AMP)
More information about the x265-devel
mailing list