[x265] refine tskip related
Steve Borho
steve at borho.org
Tue Jun 10 19:58:06 CEST 2014
On Tue, Jun 10, 2014 at 4:56 AM, Satoshi Nakagawa <nakagawa424 at oki.com> wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1402394075 -32400
> # Tue Jun 10 18:54:35 2014 +0900
> # Node ID b6302b087ea414d52fe76050acd2889e34b352c8
> # Parent 0cbc7320c9f2904bb1439dca70fd278ea42ed5aa
> refine tskip related
rebased on top of the quant patch and queued for testing
> diff -r 0cbc7320c9f2 -r b6302b087ea4 source/Lib/TLibEncoder/TEncEntropy.cpp
> --- a/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Jun 09 11:34:11 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Tue Jun 10 18:54:35 2014 +0900
> @@ -211,7 +211,7 @@
> void TEncEntropy::xEncodeTransform(TComDataCU* cu, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t tuSize, uint32_t trIdx, bool& bCodeDQP)
> {
> const uint32_t subdiv = cu->getTransformIdx(absPartIdx) + cu->getDepth(absPartIdx) > depth;
> - const uint32_t log2TrafoSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> + const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> uint32_t hChromaShift = cu->getHorzChromaShift();
> uint32_t vChromaShift = cu->getVertChromaShift();
> uint32_t cbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trIdx);
> @@ -223,7 +223,7 @@
> m_bakAbsPartIdxCU = absPartIdx;
> }
>
> - if ((log2TrafoSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
> + if ((log2TrSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
> if ((absPartIdx & (partNum - 1)) == 0)
> @@ -244,7 +244,7 @@
> }
> else if (cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N) && depth == cu->getDepth(absPartIdx) && (cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1))
> {
> - if (log2TrafoSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> + if (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> {
> X265_CHECK(subdiv, "subdivision state failure\n");
> }
> @@ -253,22 +253,22 @@
> X265_CHECK(!subdiv, "subdivision state failure\n");
> }
> }
> - else if (log2TrafoSize > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
> + else if (log2TrSize > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
> {
> X265_CHECK(subdiv, "subdivision state failure\n");
> }
> - else if (log2TrafoSize == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
> + else if (log2TrSize == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
> {
> X265_CHECK(!subdiv, "subdivision state failure\n");
> }
> - else if (log2TrafoSize == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> + else if (log2TrSize == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> {
> X265_CHECK(!subdiv, "subdivision state failure\n");
> }
> else
> {
> - X265_CHECK(log2TrafoSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size failure\n");
> - m_entropyCoderIf->codeTransformSubdivFlag(subdiv, 5 - log2TrafoSize);
> + X265_CHECK(log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size failure\n");
> + m_entropyCoderIf->codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
> }
>
> const uint32_t trDepthCurr = depth - cu->getDepth(absPartIdx);
> @@ -365,12 +365,12 @@
> }
>
> int chFmt = cu->getChromaFormat();
> - if ((log2TrafoSize == 2) && !(chFmt == CHROMA_444))
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
> if ((absPartIdx & (partNum - 1)) == (partNum - 1))
> {
> - uint32_t trSizeC = 1 << log2TrafoSize;
> + uint32_t trSizeC = 1 << log2TrSize;
> const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>
> uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
> diff -r 0cbc7320c9f2 -r b6302b087ea4 source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 09 11:34:11 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 10 18:54:35 2014 +0900
> @@ -160,37 +160,37 @@
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t trMode = cu->getTransformIdx(absPartIdx);
> uint32_t subdiv = (trMode > trDepth ? 1 : 0);
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
>
> if (cu->getPredictionMode(0) == MODE_INTRA && cu->getPartitionSize(0) == SIZE_NxN && trDepth == 0)
> {
> X265_CHECK(subdiv, "subdivision not present\n");
> }
> - else if (trSizeLog2 > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
> + else if (log2TrSize > cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize())
> {
> X265_CHECK(subdiv, "subdivision not present\n");
> }
> - else if (trSizeLog2 == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
> + else if (log2TrSize == cu->getSlice()->getSPS()->getQuadtreeTULog2MinSize())
> {
> X265_CHECK(!subdiv, "subdivision present\n");
> }
> - else if (trSizeLog2 == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> + else if (log2TrSize == cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> {
> X265_CHECK(!subdiv, "subdivision present\n");
> }
> else
> {
> - X265_CHECK(trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size too small\n");
> + X265_CHECK(log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size too small\n");
> if (bLuma)
> {
> - m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - trSizeLog2);
> + m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - log2TrSize);
> }
> }
>
> if (bChroma)
> {
> int chFmt = cu->getChromaFormat();
> - if ((trSizeLog2 > 2) && !(chFmt == CHROMA_444))
> + if ((log2TrSize > 2) && !(chFmt == CHROMA_444))
> {
> if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
> m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_U, trDepth, (subdiv == 0));
> @@ -245,9 +245,9 @@
>
> uint32_t origTrDepth = trDepth;
>
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> int chFmt = cu->getChromaFormat();
> - if ((ttype != TEXT_LUMA) && (trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + if ((ttype != TEXT_LUMA) && (log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> X265_CHECK(trDepth > 0, "transform size too small\n");
> trDepth--;
> @@ -267,7 +267,7 @@
> uint32_t height = cu->getCUSize(0) >> (trDepth + cspy);
> height = splitIntoSubTUs ? height >> 1 : height;
> uint32_t coeffOffset = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (cspx + cspy));
> - uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
>
> if (width == height)
> @@ -386,35 +386,32 @@
> }
>
> void TEncSearch::xIntraCodingLumaBlk(TComDataCU* cu,
> - uint32_t trDepth,
> uint32_t absPartIdx,
> + uint32_t log2TrSize,
> TComYuv* fencYuv,
> TComYuv* predYuv,
> ShortYuv* resiYuv,
> + uint32_t& cbf,
> uint32_t& outDist)
> {
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t tuSize = cu->getCUSize(0) >> trDepth;
> + uint32_t tuSize = 1 << log2TrSize;
> uint32_t stride = fencYuv->getStride();
> pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
> pixel* pred = predYuv->getLumaAddr(absPartIdx);
> int16_t* residual = resiYuv->getLumaAddr(absPartIdx);
> - int part = partitionFromSize(tuSize);
> - int sizeIdx = g_convertToBit[tuSize];
> -
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> +
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> coeff_t* coeff = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
> -
> int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
> const uint32_t reconQtStride = MAX_CU_SIZE;
> -
> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
> bool useTransformSkip = !!cu->getTransformSkip(absPartIdx, TEXT_LUMA);
> + int part = partitionFromSize(tuSize);
> + int sizeIdx = log2TrSize - 2;
>
> //===== get residual signal =====
> X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
> @@ -430,9 +427,8 @@
> }
>
> //--- transform and quantization ---
> - uint32_t absSum = 0;
> + uint32_t absSum;
> int lastPos = -1;
> - cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
>
> int chFmt = cu->getChromaFormat();
> m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
> @@ -441,7 +437,7 @@
> absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
>
> //--- set coded block flag ---
> - cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
> + cbf = absSum ? 1 : 0;
>
> if (absSum)
> {
> @@ -449,7 +445,7 @@
> int scalingListType = 0 + TEXT_LUMA;
> X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
> m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), cu->getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos);
> - X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
> + X265_CHECK(log2TrSize <= 5, "log2TrSize is too large %d\n", log2TrSize);
> //===== reconstruction =====
> primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
> //===== update distortion =====
> @@ -461,66 +457,48 @@
> memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
> #endif
> //===== reconstruction =====
> - primitives.luma_copy_ps[part](reconQt, reconQtStride, pred, stride);
> - primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);
> + primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, pred, stride);
> + primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
> //===== update distortion =====
> outDist += primitives.sse_pp[part](pred, stride, fenc, stride);
> }
> }
>
> void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,
> - uint32_t trDepth,
> uint32_t absPartIdx,
> - uint32_t absPartIdxStep,
> + uint32_t log2TrSize,
> TComYuv* fencYuv,
> TComYuv* predYuv,
> ShortYuv* resiYuv,
> + uint32_t& cbf,
> uint32_t& outDist,
> - uint32_t chromaId)
> + uint32_t chromaId,
> + uint32_t log2TrSizeC)
> {
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - int chFmt = cu->getChromaFormat();
> -
> - uint32_t origTrDepth = trDepth;
> -
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> - {
> - X265_CHECK(trDepth > 0, "trDepth should be non-zero\n");
> - trDepth--;
> - uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
> - bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
> - bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
> - if ((!bFirstQ) && (!bSecondQ))
> - {
> - return;
> - }
> - }
> -
> - TextType ttype = (TextType)chromaId;
> - uint32_t tuSize = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
> - uint32_t stride = fencYuv->getCStride();
> - pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
> - pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
> - int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdx);
> -
> - uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + TextType ttype = (TextType)chromaId;
> + uint32_t tuSize = 1 << log2TrSizeC;
> + uint32_t stride = fencYuv->getCStride();
> + pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
> + pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
> + int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdx);
> +
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> - coeff_t* coeff = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> - int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
> - uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
> + coeff_t* coeff = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
> + int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
> + uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
> - bool useTransformSkipChroma = !!cu->getTransformSkip(absPartIdx, ttype);
> + bool useTransformSkipC = !!cu->getTransformSkip(absPartIdx, ttype);
> int part = partitionFromSize(tuSize);
> - int sizeIdx = g_convertToBit[tuSize];
> + int sizeIdxC = log2TrSizeC - 2;
>
> //===== get residual signal =====
> X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
> X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment check fail\n");
> X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment check fail\n");
> - primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
> + primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
>
> //===== transform and quantization =====
> //--- init rate estimation arrays for RDOQ ---
> @@ -530,9 +508,10 @@
> }
>
> //--- transform and quantization ---
> - uint32_t absSum = 0;
> + uint32_t absSum;
> int lastPos = -1;
>
> + int chFmt = cu->getChromaFormat();
> int curChromaQpOffset;
> if (ttype == TEXT_CHROMA_U)
> {
> @@ -545,10 +524,10 @@
> m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> - absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipChroma);
> + absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, &lastPos, useTransformSkipC);
>
> //--- set coded block flag ---
> - cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdx, absPartIdxStep);
> + cbf = absSum ? 1 : 0;
>
> uint32_t dist;
> if (absSum)
> @@ -556,10 +535,10 @@
> //--- inverse transform ---
> int scalingListType = 0 + ttype;
> X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
> - m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
> - X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
> + m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipC, lastPos);
> + X265_CHECK(log2TrSizeC <= 5, "log2TrSizeC is too large %d\n", log2TrSizeC);
> //===== reconstruction =====
> - primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
> + primitives.calcrecon[sizeIdxC](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
> //===== update distortion =====
> dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
> }
> @@ -569,8 +548,8 @@
> memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
> #endif
> //===== reconstruction =====
> - primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, pred, stride);
> - primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
> + primitives.square_copy_ps[sizeIdxC](reconQt, reconQtStride, pred, stride);
> + primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, pred, stride);
> //===== update distortion =====
> dist = primitives.sse_pp[part](pred, stride, fenc, stride);
> }
> @@ -597,9 +576,9 @@
> uint64_t& rdCost)
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - bool bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> - bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + bool bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> + bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
>
> int maxTuSize = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
> int isIntraSlice = (cu->getSlice()->getSliceType() == I_SLICE);
> @@ -610,12 +589,12 @@
> if (m_cfg->m_param->rdPenalty && !isIntraSlice)
> {
> // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice
> - noSplitIntraMaxTuSize = (trSizeLog2 <= X265_MIN(maxTuSize, 4));
> + noSplitIntraMaxTuSize = (log2TrSize <= X265_MIN(maxTuSize, 4));
>
> // if maximum RD-penalty don't check TU size 32x32
> if (m_cfg->m_param->rdPenalty == 2)
> {
> - bCheckFull = (trSizeLog2 <= X265_MIN(maxTuSize, 4));
> + bCheckFull = (log2TrSize <= X265_MIN(maxTuSize, 4));
> }
> }
> if (bCheckFirst && noSplitIntraMaxTuSize)
> @@ -631,10 +610,10 @@
>
> if (bCheckFull)
> {
> - uint32_t tuSize = 1 << trSizeLog2;
> + uint32_t tuSize = 1 << log2TrSize;
>
> bool checkTransformSkip = (cu->getSlice()->getPPS()->getUseTransformSkip() &&
> - trSizeLog2 <= LOG2_MAX_TS_SIZE &&
> + log2TrSize <= LOG2_MAX_TS_SIZE &&
> !cu->getCUTransquantBypass(0));
> if (checkTransformSkip)
> {
> @@ -657,6 +636,8 @@
> //===== get prediction signal =====
> predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
>
> + cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
> +
> if (checkTransformSkip || checkTQbypass)
> {
> //----- store original entropy coding status -----
> @@ -680,8 +661,8 @@
> }
>
> //----- code luma block with given intra prediction mode and store Cbf-----
> - xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistYTmp);
> - singleCbfYTmp = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
> + xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfYTmp, singleDistYTmp);
> + cu->setCbfSubParts(singleCbfYTmp << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
> singleTQbypass = cu->getCUTransquantBypass(absPartIdx);
>
> if ((modeId == 1) && (singleCbfYTmp == 0) && checkTransformSkip)
> @@ -704,7 +685,7 @@
> bestModeId = modeId;
> if (bestModeId == firstCheckId)
> {
> - xStoreIntraResultQT(cu, trDepth, absPartIdx);
> + xStoreIntraResultQT(cu, absPartIdx, log2TrSize);
> m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
> }
> }
> @@ -722,7 +703,7 @@
>
> if (bestModeId == firstCheckId)
> {
> - xLoadIntraResultQT(cu, trDepth, absPartIdx);
> + xLoadIntraResultQT(cu, absPartIdx, log2TrSize);
> cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
> }
> @@ -733,13 +714,11 @@
>
> //----- code luma block with given intra prediction mode and store Cbf-----
> cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
> - xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistY);
> -
> - if (bCheckSplit)
> - singleCbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
> + xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfY, singleDistY);
> + cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
>
> uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
> - if (m_cfg->m_param->rdPenalty && (trSizeLog2 == 5) && !isIntraSlice)
> + if (m_cfg->m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)
> singleBits *= 4;
>
> singleCost = m_rdCost->calcRdCost(singleDistY, singleBits);
> @@ -804,15 +783,16 @@
> cu->setTransformSkipSubParts(bestModeId, TEXT_LUMA, absPartIdx, fullDepth);
>
> //--- set reconstruction for next intra prediction blocks ---
> - uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> - int16_t* src = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> + int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
> - const uint32_t srcstride = MAX_CU_SIZE;
> + const uint32_t reconQtStride = MAX_CU_SIZE;
> +
> pixel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
> - int sizeIdx = trSizeLog2 - 2;
> - primitives.square_copy_sp[sizeIdx](dst, dststride, src, srcstride);
> + int sizeIdx = log2TrSize - 2;
> + primitives.square_copy_sp[sizeIdx](dst, dststride, reconQt, reconQtStride);
> }
>
> outDistY += singleDistY;
> @@ -828,9 +808,9 @@
> TComYuv* reconYuv)
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - bool bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> - bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + bool bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> + bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
>
> int maxTuSize = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
> int isIntraSlice = (cu->getSlice()->getSliceType() == I_SLICE);
> @@ -838,7 +818,7 @@
> if (m_cfg->m_param->rdPenalty == 2 && !isIntraSlice)
> {
> // if maximum RD-penalty don't check TU size 32x32
> - bCheckFull = (trSizeLog2 <= X265_MIN(maxTuSize, 4));
> + bCheckFull = (log2TrSize <= X265_MIN(maxTuSize, 4));
> }
> if (bCheckFull)
> {
> @@ -846,7 +826,7 @@
>
> //----- code luma block with given intra prediction mode and store Cbf-----
> uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
> - uint32_t tuSize = cu->getCUSize(0) >> trDepth;
> + uint32_t tuSize = 1 << log2TrSize;
> int chFmt = cu->getChromaFormat();
> uint32_t stride = fencYuv->getStride();
> pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
> @@ -867,17 +847,18 @@
> //===== get prediction signal =====
> predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
>
> + cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
> +
> //===== get residual signal =====
> X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
> X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment failure\n");
> X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment failure\n");
> - int sizeIdx = g_convertToBit[tuSize];
> + int sizeIdx = log2TrSize - 2;
> primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
>
> //===== transform and quantization =====
> uint32_t absSum = 0;
> int lastPos = -1;
> - cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
>
> m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
> m_trQuant->selectLambda(TEXT_LUMA);
> @@ -941,18 +922,18 @@
>
> if (trMode == trDepth)
> {
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
>
> //===== copy transform coefficients =====
> - uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> + uint32_t numCoeffY = 1 << (log2TrSize * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
> coeff_t* coeffDestY = cu->getCoeffY() + coeffOffsetY;
> ::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
> //===== copy reconstruction =====
> - m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2);
> + m_qtTempShortYuv[qtLayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << log2TrSize);
> }
> else
> {
> @@ -964,164 +945,116 @@
> }
> }
>
> -void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
> +void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize)
> {
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
>
> //===== copy transform coefficients =====
> - uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> + uint32_t numCoeffY = 1 << (log2TrSize * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
> coeff_t* coeffDstY = m_qtTempTUCoeff[0];
> -
> ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
> //===== copy reconstruction =====
> - m_qtTempShortYuv[qtlayer].copyPartToPartLuma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2);
> + pixel* reconTs = m_qtTempTransformSkipYuv.getLumaAddr(absPartIdx);
> + uint32_t reconTsStride = m_qtTempTransformSkipYuv.getStride();
> + int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> + X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
> + const uint32_t reconQtStride = MAX_CU_SIZE;
> + int sizeIdx = log2TrSize - 2;
> + primitives.square_copy_sp[sizeIdx](reconTs, reconTsStride, reconQt, reconQtStride);
> }
>
> -void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
> +void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize)
> {
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
>
> //===== copy transform coefficients =====
> - uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> + uint32_t numCoeffY = 1 << (log2TrSize * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffDstY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffDstY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
> coeff_t* coeffSrcY = m_qtTempTUCoeff[0];
> -
> ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
>
> //===== copy reconstruction =====
> - uint32_t trSize = 1 << trSizeLog2;
> - m_qtTempTransformSkipYuv.copyPartToPartLuma(&m_qtTempShortYuv[qtlayer], absPartIdx, trSize);
> -
> - uint32_t zOrder = cu->getZorderIdxInCU() + absPartIdx;
> - pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zOrder);
> - uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
> - int16_t* reconQt = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
> - X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
> + pixel* reconTs = m_qtTempTransformSkipYuv.getLumaAddr(absPartIdx);
> + uint32_t reconTsStride = m_qtTempTransformSkipYuv.getStride();
> + int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> + X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
> const uint32_t reconQtStride = MAX_CU_SIZE;
> - int sizeIdx = trSizeLog2 - 2;
> - primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride, reconQt, reconQtStride);
> + int sizeIdx = log2TrSize - 2;
> + primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, reconTs, reconTsStride);
> +
> + uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> + uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
> + primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, reconTs, reconTsStride);
> }
>
> -void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
> -{
> - assert(chromaId == 1 || chromaId == 2);
> -
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trMode = cu->getTransformIdx(absPartIdx);
> -
> - if (trMode == trDepth)
> - {
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> - int chFmt = cu->getChromaFormat();
> -
> - bool bChromaSame = false;
> - if (trSizeLog2 == 2 && !(chFmt == CHROMA_444))
> - {
> - X265_CHECK(trDepth > 0, "invalid trDepth\n");
> - trDepth--;
> - uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
> - bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
> - bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
> - if ((!bFirstQ) && (!bSecondQ))
> - {
> - return;
> - }
> - bChromaSame = true;
> - }
> - uint32_t width = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
> - uint32_t height = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
> - height = splitIntoSubTUs ? height >> 1 : height;
> - uint32_t numCoeffC = width * height;
> - uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> -
> - coeff_t* coeffSrc = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> - coeff_t* coeffDst = m_qtTempTUCoeff[chromaId];
> - ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
> -
> - //===== copy reconstruction =====
> - uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
> - m_qtTempShortYuv[qtlayer].copyPartToPartYuvChroma(&m_qtTempTransformSkipYuv, absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
> - }
> -}
> -
> -void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId)
> +void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId)
> {
> X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
>
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trMode = cu->getTransformIdx(absPartIdx);
> -
> - if (trMode == trDepth)
> - {
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> - int chFmt = cu->getChromaFormat();
> - const bool splitIntoSubTUs = (chFmt == CHROMA_422);
> -
> - uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
> - bool bChromaSame = false;
> - if (trSizeLog2 == 2 && !(chFmt == CHROMA_444))
> - {
> - X265_CHECK(trDepth > 0, "invalid trDepth\n");
> - trDepth--;
> - trSizeCLog2++;
> - uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
> - bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
> - bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
> - if ((!bFirstQ) && (!bSecondQ))
> - {
> - return;
> - }
> - bChromaSame = true;
> - }
> -
> - //===== copy transform coefficients =====
> - uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);
> - uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> -
> - coeff_t* coeffDst = m_qtTempCoeff[chromaId][qtlayer] + coeffOffsetC;
> - coeff_t* coeffSrc = m_qtTempTUCoeff[chromaId];
> - ::memcpy(coeffDst, coeffSrc, sizeof(coeff_t) * numCoeffC);
> -
> - //===== copy reconstruction =====
> - uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
> - m_qtTempTransformSkipYuv.copyPartToPartChroma(&m_qtTempShortYuv[qtlayer], absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
> -
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> - uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
> -
> - pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> - int16_t* reconQt = m_qtTempShortYuv[qtlayer].getChromaAddr(chromaId, absPartIdx);
> - uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
> - int sizeIdxC = trSizeCLog2 - 2;
> - primitives.square_copy_sp[sizeIdxC](reconIPred, reconIPredStride, reconQt, reconQtStride);
> - }
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> +
> + //===== copy transform coefficients =====
> + uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
> + uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> + coeff_t* coeffSrcC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
> + coeff_t* coeffDstC = m_qtTempTUCoeff[chromaId];
> + ::memcpy(coeffDstC, coeffSrcC, sizeof(coeff_t) * numCoeffC);
> +
> + //===== copy reconstruction =====
> + pixel* reconTs = m_qtTempTransformSkipYuv.getChromaAddr(chromaId, absPartIdx);
> + uint32_t reconTsStride = m_qtTempTransformSkipYuv.getCStride();
> + int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
> + uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
> + int sizeIdxC = log2TrSizeC - 2;
> + primitives.square_copy_sp[sizeIdxC](reconTs, reconTsStride, reconQt, reconQtStride);
> +}
> +
> +void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId)
> +{
> + X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
> +
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> +
> + //===== copy transform coefficients =====
> + uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
> + uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
> + coeff_t* coeffDstC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
> + coeff_t* coeffSrcC = m_qtTempTUCoeff[chromaId];
> + ::memcpy(coeffDstC, coeffSrcC, sizeof(coeff_t) * numCoeffC);
> +
> + //===== copy reconstruction =====
> + pixel* reconTs = m_qtTempTransformSkipYuv.getChromaAddr(chromaId, absPartIdx);
> + uint32_t reconTsStride = m_qtTempTransformSkipYuv.getCStride();
> + int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
> + uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
> + int sizeIdxC = log2TrSizeC - 2;
> + primitives.square_copy_ps[sizeIdxC](reconQt, reconQtStride, reconTs, reconTsStride);
> +
> + uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> + uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
> + primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, reconTs, reconTsStride);
> }
>
> void TEncSearch::offsetSubTUCBFs(TComDataCU* cu, TextType ttype, uint32_t trDepth, uint32_t absPartIdx)
> {
> uint32_t depth = cu->getDepth(0);
> uint32_t fullDepth = depth + trDepth;
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> -
> - uint32_t actualTrDepth = trDepth;
> -
> - if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> +
> + uint32_t trDepthC = trDepth;
> + if ((log2TrSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
> {
> - X265_CHECK(actualTrDepth > 0, "actualTrDepth invalid\n");
> - actualTrDepth--;
> + X265_CHECK(trDepthC > 0, "trDepthC invalid\n");
> + trDepthC--;
> }
>
> - uint32_t partIdxesPerSubTU = (cu->getPic()->getNumPartInCU() >> ((depth + actualTrDepth) << 1)) >> 1;
> + uint32_t partIdxesPerSubTU = (cu->getPic()->getNumPartInCU() >> ((depth + trDepthC) << 1)) >> 1;
>
> //move the CBFs down a level and set the parent CBF
> uint8_t subTUCBF[2];
> @@ -1158,15 +1091,16 @@
> if (trMode == trDepth)
> {
> int chFmt = cu->getChromaFormat();
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
> - uint32_t actualTrDepth = trDepth;
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> +
> + uint32_t trDepthC = trDepth;
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> X265_CHECK(trDepth > 0, "invalid trDepth\n");
> - actualTrDepth--;
> - trSizeCLog2++;
> - uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
> + trDepthC--;
> + log2TrSizeC++;
> + uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
> bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
> if (!bFirstQ)
> {
> @@ -1174,12 +1108,12 @@
> }
> }
>
> - uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
> + uint32_t tuSize = 1 << log2TrSizeC;
> uint32_t stride = fencYuv->getCStride();
> const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>
> bool checkTransformSkip = (cu->getSlice()->getPPS()->getUseTransformSkip() &&
> - trSizeCLog2 <= LOG2_MAX_TS_SIZE &&
> + log2TrSizeC <= LOG2_MAX_TS_SIZE &&
> !cu->getCUTransquantBypass(0));
>
> if (m_cfg->m_param->bEnableTSkipFast)
> @@ -1200,7 +1134,7 @@
> for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> TComTURecurse tuIterator;
> - uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
> + uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
> initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
>
> do
> @@ -1209,7 +1143,7 @@
> pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdxC);
>
> //===== init availability pattern =====
> - TComPattern::initAdiPatternChroma(cu, absPartIdxC, actualTrDepth, m_predBuf, chromaId);
> + TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId);
> pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
>
> uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
> @@ -1223,6 +1157,8 @@
> //===== get prediction signal =====
> predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
>
> + uint32_t singleCbfC = 0;
> +
> if (checkTransformSkip)
> {
> // use RDO to decide whether Cr/Cb takes TS
> @@ -1231,7 +1167,6 @@
> uint64_t singleCost = MAX_INT64;
> int bestModeId = 0;
> uint32_t singleDistC = 0;
> - uint32_t singleCbfC = 0;
> uint32_t singleDistCTmp = 0;
> uint64_t singleCostTmp = 0;
> uint32_t singleCbfCTmp = 0;
> @@ -1243,9 +1178,8 @@
> cu->setTransformSkipPartRange(chromaModeId, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
> singleDistCTmp = 0;
> - xIntraCodingChromaBlk(cu, trDepth, absPartIdxC, tuIterator.m_absPartIdxStep, fencYuv, predYuv, resiYuv, singleDistCTmp, chromaId);
> -
> - singleCbfCTmp = cu->getCbf(absPartIdxC, (TextType)chromaId, trDepth);
> + xIntraCodingChromaBlk(cu, absPartIdxC, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfCTmp, singleDistCTmp, chromaId, log2TrSizeC);
> + cu->setCbfPartRange(singleCbfCTmp << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
> if (chromaModeId == 1 && singleCbfCTmp == 0)
> {
> @@ -1267,7 +1201,7 @@
>
> if (bestModeId == firstCheckId)
> {
> - xStoreIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
> + xStoreIntraResultChromaQT(cu, absPartIdxC, log2TrSize, log2TrSizeC, chromaId);
> m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
> }
> }
> @@ -1279,7 +1213,7 @@
>
> if (bestModeId == firstCheckId)
> {
> - xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId);
> + xLoadIntraResultChromaQT(cu, absPartIdxC, log2TrSize, log2TrSizeC, chromaId);
> cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
> @@ -1297,7 +1231,8 @@
> else
> {
> cu->setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
> - xIntraCodingChromaBlk(cu, trDepth, absPartIdxC, tuIterator.m_absPartIdxStep, fencYuv, predYuv, resiYuv, outDist, chromaId);
> + xIntraCodingChromaBlk(cu, absPartIdxC, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfC, outDist, chromaId, log2TrSizeC);
> + cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
> }
> }
> while (isNextSection(&tuIterator));
> @@ -1337,14 +1272,16 @@
> if (trMode == trDepth)
> {
> int chFmt = cu->getChromaFormat();
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> - uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> + uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
>
> bool bChromaSame = false;
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> X265_CHECK(trDepth > 0, "invalid trDepth\n");
> trDepth--;
> + log2TrSizeC++;
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
> if ((absPartIdx & (qpdiv - 1)) != 0)
> {
> @@ -1355,20 +1292,18 @@
>
> //===== copy transform coefficients =====
>
> - uint32_t width = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
> - uint32_t height = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
> - uint32_t numCoeffC = width * height;
> + uint32_t numCoeffC = 1 << (log2TrSizeC * 2 + (chFmt == CHROMA_422));
> uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
>
> - coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> - coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> + coeff_t* coeffSrcU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
> + coeff_t* coeffSrcV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
> coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
> coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
> ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> ::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
>
> //===== copy reconstruction =====
> - m_qtTempShortYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeLog2, (bChromaSame && (chFmt != CHROMA_422)));
> + m_qtTempShortYuv[qtLayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << log2TrSize, (bChromaSame && (chFmt != CHROMA_422)));
> }
> else
> {
> @@ -1394,14 +1329,16 @@
> if (trMode == trDepth)
> {
> int chFmt = cu->getChromaFormat();
> - uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> uint32_t origTrDepth = trDepth;
> - uint32_t actualTrDepth = trDepth;
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + uint32_t trDepthC = trDepth;
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> X265_CHECK(trDepth > 0, "invalid trDepth\n");
> - actualTrDepth--;
> - uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
> + trDepthC--;
> + log2TrSizeC++;
> + uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
> bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
> if (!bFirstQ)
> {
> @@ -1409,16 +1346,16 @@
> }
> }
>
> - uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
> + uint32_t tuSize = 1 << log2TrSizeC;
> uint32_t stride = fencYuv->getCStride();
> const bool splitIntoSubTUs = (chFmt == CHROMA_422);
> - int sizeIdx = g_convertToBit[tuSize];
> + int sizeIdxC = log2TrSizeC - 2;
> int part = partitionFromSize(tuSize);
>
> for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> {
> TComTURecurse tuIterator;
> - uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
> + uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
> initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
>
> do
> @@ -1436,8 +1373,8 @@
> pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
>
> - //bool useTransformSkipChroma = cu->getTransformSkip(absPartIdxC, ttype);
> - const bool useTransformSkipChroma = false;
> + //bool useTransformSkipC = cu->getTransformSkip(absPartIdxC, ttype);
> + const bool useTransformSkipC = false;
> cu->setTransformSkipPartRange(0, ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
>
> uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
> @@ -1449,7 +1386,7 @@
> }
> chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
> //===== init availability pattern =====
> - TComPattern::initAdiPatternChroma(cu, absPartIdxC, actualTrDepth, m_predBuf, chromaId);
> + TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId);
> pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
>
> //===== get prediction signal =====
> @@ -1459,7 +1396,7 @@
> X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
> X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment failure\n");
> X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment failure\n");
> - primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
> + primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
>
> //--- transform and quantization ---
> uint32_t absSum = 0;
> @@ -1478,7 +1415,7 @@
>
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> - absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdxC, &lastPos, useTransformSkipChroma);
> + absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdxC, &lastPos, useTransformSkipC);
>
> //--- set coded block flag ---
> cu->setCbfPartRange((((absSum > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
> @@ -1488,12 +1425,12 @@
> //--- inverse transform ---
> int scalingListType = 0 + ttype;
> X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
> - m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipChroma, lastPos);
> + m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, residual, stride, coeff, tuSize, scalingListType, useTransformSkipC, lastPos);
>
> //===== reconstruction =====
> // use square primitives
> primitives.chroma[CHROMA_444].add_ps[part](recon, stride, pred, residual, stride, stride);
> - primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, recon, stride);
> + primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, recon, stride);
> }
> else
> {
> @@ -1502,8 +1439,8 @@
> #endif
>
> //===== reconstruction =====
> - primitives.square_copy_pp[sizeIdx](recon, stride, pred, stride);
> - primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
> + primitives.square_copy_pp[sizeIdxC](recon, stride, pred, stride);
> + primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, pred, stride);
> }
> }
> while (isNextSection(&tuIterator));
> @@ -2840,17 +2777,17 @@
> {
> X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "invalid depth\n");
> const uint32_t trMode = depth - cu->getDepth(0);
> - const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> + const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> const uint32_t setCbf = 1 << trMode;
> int chFmt = cu->getChromaFormat();
>
> bool bSplitFlag = ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
> bool bCheckFull;
> - if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
> + if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
> bCheckFull = false;
> else
> - bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> - const bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
> + bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> + const bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
> X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
>
> // code full block
> @@ -2858,12 +2795,12 @@
> int lastPosY = -1, lastPosU = -1, lastPosV = -1;
> if (bCheckFull)
> {
> - uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
> + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> bool bCodeChroma = true;
> uint32_t trModeC = trMode;
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> - trSizeCLog2++;
> + log2TrSizeC++;
> trModeC--;
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
> bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
> @@ -2878,10 +2815,10 @@
> coeff_t *coeffCurU = cu->getCoeffCb() + coeffOffsetC;
> coeff_t *coeffCurV = cu->getCoeffCr() + coeffOffsetC;
>
> - uint32_t trSize = 1 << trSizeLog2;
> - uint32_t trSizeC = 1 << trSizeCLog2;
> - uint32_t sizeIdx = trSizeLog2 - 2;
> - uint32_t sizeIdxC = trSizeCLog2 - 2;
> + uint32_t trSize = 1 << log2TrSize;
> + uint32_t trSizeC = 1 << log2TrSizeC;
> + uint32_t sizeIdx = log2TrSize - 2;
> + uint32_t sizeIdxC = log2TrSizeC - 2;
> cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
>
> cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
> @@ -3026,26 +2963,26 @@
> {
> X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "depth not matching\n");
> const uint32_t trMode = depth - cu->getDepth(0);
> - const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> + const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> const uint32_t subTUDepth = trMode + 1;
> const uint32_t setCbf = 1 << trMode;
> int chFmt = cu->getChromaFormat();
>
> bool bSplitFlag = ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
> bool bCheckFull;
> - if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
> + if (bSplitFlag && depth == cu->getDepth(absPartIdx) && (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx)))
> bCheckFull = false;
> else
> - bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> - const bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
> + bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
> + const bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
> X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
>
> - uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
> + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> bool bCodeChroma = true;
> uint32_t trModeC = trMode;
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> - trSizeCLog2++;
> + log2TrSizeC++;
> trModeC--;
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
> bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
> @@ -3066,27 +3003,27 @@
> uint32_t bestsubTUCBF[MAX_NUM_COMPONENT][2];
> m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
>
> - uint32_t trSize = 1 << trSizeLog2;
> + uint32_t trSize = 1 << log2TrSize;
> const bool splitIntoSubTUs = (chFmt == CHROMA_422);
> uint32_t absPartIdxStep = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
>
> // code full block
> if (bCheckFull)
> {
> - uint32_t trSizeC = 1 << trSizeCLog2;
> - int sizeIdx = trSizeLog2 - 2;
> - int sizeIdxC = trSizeCLog2 - 2;
> - const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + uint32_t trSizeC = 1 << log2TrSizeC;
> + int sizeIdx = log2TrSize - 2;
> + int sizeIdxC = log2TrSizeC - 2;
> + const uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> - coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> - coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> - coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> + coeff_t *coeffCurY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
> + coeff_t *coeffCurU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
> + coeff_t *coeffCurV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
>
> cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
> bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip() && !cu->getCUTransquantBypass(0);
> - bool checkTransformSkipY = checkTransformSkip && trSizeLog2 <= LOG2_MAX_TS_SIZE;
> - bool checkTransformSkipUV = checkTransformSkip && trSizeCLog2 <= LOG2_MAX_TS_SIZE;
> + bool checkTransformSkipY = checkTransformSkip && log2TrSize <= LOG2_MAX_TS_SIZE;
> + bool checkTransformSkipUV = checkTransformSkip && log2TrSizeC <= LOG2_MAX_TS_SIZE;
>
> cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
>
> @@ -3161,8 +3098,8 @@
> while (isNextSection(&tuIterator));
> }
>
> - const uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> - const uint32_t numCoeffC = 1 << (trSizeCLog2 * 2);
> + const uint32_t numCoeffY = 1 << (log2TrSize * 2);
> + const uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
>
> for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)
> {
> @@ -3173,10 +3110,10 @@
>
> int partSize = partitionFromSize(trSize);
> uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, (pixel*)RDCost::zeroPel, trSize);
> - int16_t *curResiY = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
> - X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");
> + int16_t *curResiY = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> + X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width not full CU\n");
> const uint32_t strideResiY = MAX_CU_SIZE;
> - const uint32_t strideResiC = m_qtTempShortYuv[qtlayer].m_cwidth;
> + const uint32_t strideResiC = m_qtTempShortYuv[qtLayer].m_cwidth;
>
> if (outZeroDist)
> {
> @@ -3253,8 +3190,8 @@
> uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
>
> - int16_t *curResiU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
> - int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
> + int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
> + int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
>
> distU = m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, trSizeC));
>
> @@ -3399,11 +3336,8 @@
> uint32_t nonZeroDistY = 0, absSumTransformSkipY;
> uint64_t singleCostY = MAX_INT64;
>
> - coeff_t bestCoeffY[MAX_TS_SIZE * MAX_TS_SIZE];
> - memcpy(bestCoeffY, coeffCurY, sizeof(coeff_t) * numCoeffY);
> -
> - int16_t bestResiY[MAX_TS_SIZE * MAX_TS_SIZE];
> - primitives.square_copy_ss[sizeIdx](bestResiY, trSize, curResiY, strideResiY);
> + ALIGN_VAR_32(coeff_t, tsCoeffY[MAX_TS_SIZE * MAX_TS_SIZE]);
> + ALIGN_VAR_32(int16_t, tsResiY[MAX_TS_SIZE * MAX_TS_SIZE]);
>
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
>
> @@ -3417,7 +3351,7 @@
> m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>
> m_trQuant->selectLambda(TEXT_LUMA);
> - absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
> + absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsCoeffY,
> trSize, TEXT_LUMA, absPartIdx, &lastPosTransformSkip[TEXT_LUMA][0], true, curuseRDOQ);
> cu->setCbfSubParts(absSumTransformSkipY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
>
> @@ -3425,7 +3359,7 @@
> {
> m_entropyCoder->resetBits();
> m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
> - m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
> + m_entropyCoder->encodeCoeffNxN(cu, tsCoeffY, absPartIdx, trSize, TEXT_LUMA);
> const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
>
> m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
> @@ -3433,10 +3367,10 @@
> int scalingListType = 3 + TEXT_LUMA;
> X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
>
> - m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);
> + m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, tsResiY, trSize, tsCoeffY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);
>
> nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width,
> - curResiY, strideResiY);
> + tsResiY, trSize);
>
> singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);
> }
> @@ -3444,14 +3378,14 @@
> if (!absSumTransformSkipY || minCost[TEXT_LUMA][0] < singleCostY)
> {
> cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
> - memcpy(coeffCurY, bestCoeffY, sizeof(coeff_t) * numCoeffY);
> - primitives.square_copy_ss[sizeIdx](curResiY, strideResiY, bestResiY, trSize);
> }
> else
> {
> singleDistComp[TEXT_LUMA][0] = nonZeroDistY;
> absSum[TEXT_LUMA][0] = absSumTransformSkipY;
> bestTransformMode[TEXT_LUMA][0] = 1;
> + memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
> + primitives.square_copy_ss[sizeIdx](curResiY, strideResiY, tsResiY, trSize);
> }
>
> cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
> @@ -3475,16 +3409,13 @@
> uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
>
> - int16_t *curResiU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
> - int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
> -
> - coeff_t bestCoeffU[MAX_TS_SIZE * MAX_TS_SIZE], bestCoeffV[MAX_TS_SIZE * MAX_TS_SIZE];
> - memcpy(bestCoeffU, coeffCurU + subTUBufferOffset, sizeof(coeff_t) * numCoeffC);
> - memcpy(bestCoeffV, coeffCurV + subTUBufferOffset, sizeof(coeff_t) * numCoeffC);
> -
> - int16_t bestResiU[MAX_TS_SIZE * MAX_TS_SIZE], bestResiV[MAX_TS_SIZE * MAX_TS_SIZE];
> - primitives.square_copy_ss[sizeIdxC](bestResiU, trSizeC, curResiU, strideResiC);
> - primitives.square_copy_ss[sizeIdxC](bestResiV, trSizeC, curResiV, strideResiC);
> + int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
> + int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
> +
> + ALIGN_VAR_32(coeff_t, tsCoeffU[MAX_TS_SIZE * MAX_TS_SIZE]);
> + ALIGN_VAR_32(int16_t, tsResiU[MAX_TS_SIZE * MAX_TS_SIZE]);
> + ALIGN_VAR_32(coeff_t, tsCoeffV[MAX_TS_SIZE * MAX_TS_SIZE]);
> + ALIGN_VAR_32(int16_t, tsResiV[MAX_TS_SIZE * MAX_TS_SIZE]);
>
> cu->setTransformSkipPartRange(1, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> cu->setTransformSkipPartRange(1, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
> @@ -3498,11 +3429,11 @@
> m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
> m_trQuant->selectLambda(TEXT_CHROMA);
>
> - absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
> + absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffU,
> trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section], true, curuseRDOQ);
> curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
> m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
> - absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
> + absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffV,
> trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section], true, curuseRDOQ);
>
> cu->setCbfPartRange(absSumTransformSkipU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> @@ -3514,7 +3445,7 @@
> if (absSumTransformSkipU)
> {
> m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
> - m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_U);
> + m_entropyCoder->encodeCoeffNxN(cu, tsCoeffU, absPartIdxC, trSizeC, TEXT_CHROMA_U);
> singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits();
>
> curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
> @@ -3522,10 +3453,10 @@
>
> int scalingListType = 3 + TEXT_CHROMA_U;
> X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
> - m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset,
> + m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, tsResiU, trSizeC, tsCoeffU,
> trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section]);
> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
> - curResiU, strideResiC);
> + tsResiU, trSizeC);
> nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
> singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
> }
> @@ -3533,21 +3464,20 @@
> if (!absSumTransformSkipU || minCost[TEXT_CHROMA_U][tuIterator.m_section] < singleCostU)
> {
> cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> -
> - memcpy(coeffCurU + subTUBufferOffset, bestCoeffU, sizeof(coeff_t) * numCoeffC);
> - primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, bestResiU, trSizeC);
> }
> else
> {
> singleDistComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroDistU;
> absSum[TEXT_CHROMA_U][tuIterator.m_section] = absSumTransformSkipU;
> bestTransformMode[TEXT_CHROMA_U][tuIterator.m_section] = 1;
> + memcpy(coeffCurU + subTUBufferOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
> + primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, tsResiU, trSizeC);
> }
>
> if (absSumTransformSkipV)
> {
> m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
> - m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_V);
> + m_entropyCoder->encodeCoeffNxN(cu, tsCoeffV, absPartIdxC, trSizeC, TEXT_CHROMA_V);
> singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section];
>
> curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
> @@ -3555,10 +3485,10 @@
>
> int scalingListType = 3 + TEXT_CHROMA_V;
> X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
> - m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset,
> + m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, tsResiV, trSizeC, tsCoeffV,
> trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section]);
> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
> - curResiV, strideResiC);
> + tsResiV, trSizeC);
> nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
> singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
> }
> @@ -3566,15 +3496,14 @@
> if (!absSumTransformSkipV || minCost[TEXT_CHROMA_V][tuIterator.m_section] < singleCostV)
> {
> cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
> -
> - memcpy(coeffCurV + subTUBufferOffset, bestCoeffV, sizeof(coeff_t) * numCoeffC);
> - primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, bestResiV, trSizeC);
> }
> else
> {
> singleDistComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroDistV;
> absSum[TEXT_CHROMA_V][tuIterator.m_section] = absSumTransformSkipV;
> bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
> + memcpy(coeffCurV + subTUBufferOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
> + primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, tsResiV, trSizeC);
> }
>
> cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> @@ -3588,9 +3517,9 @@
>
> m_entropyCoder->resetBits();
>
> - if (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> + if (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> {
> - m_entropyCoder->encodeTransformSubdivFlag(0, 5 - trSizeLog2);
> + m_entropyCoder->encodeTransformSubdivFlag(0, 5 - log2TrSize);
> }
>
> if (bCodeChroma)
> @@ -3793,21 +3722,21 @@
> const uint32_t curTrMode = depth - cu->getDepth(0);
> const uint32_t trMode = cu->getTransformIdx(absPartIdx);
> const bool bSubdiv = curTrMode != trMode;
> - const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> - uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
> + const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> int chFmt = cu->getChromaFormat();
> const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>
> - if (bSubdivAndCbf && trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> + if (bSubdivAndCbf && log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
> {
> - m_entropyCoder->encodeTransformSubdivFlag(bSubdiv, 5 - trSizeLog2);
> + m_entropyCoder->encodeTransformSubdivFlag(bSubdiv, 5 - log2TrSize);
> }
>
> X265_CHECK(cu->getPredictionMode(absPartIdx) != MODE_INTRA, "xEncodeResidualQT() with intra block\n");
>
> bool mCodeAll = true;
> - uint32_t trSize = 1 << trSizeLog2;
> - uint32_t trWidthC = 1 << trSizeCLog2;
> + uint32_t trSize = 1 << log2TrSize;
> + uint32_t trWidthC = 1 << log2TrSizeC;
> uint32_t trHeightC = splitIntoSubTUs ? (trWidthC << 1) : trWidthC;
>
> const uint32_t numPels = trWidthC * trHeightC;
> @@ -3841,16 +3770,16 @@
> if (!bSubdiv)
> {
> //Luma
> - const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> + const uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t *coeffCurY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t *coeffCurY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
>
> //Chroma
> bool bCodeChroma = true;
> uint32_t trModeC = trMode;
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> - trSizeCLog2++;
> + log2TrSizeC++;
> trModeC--;
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
> bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
> @@ -3869,9 +3798,9 @@
> if (bCodeChroma)
> {
> uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
> - coeff_t *coeffCurU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> - coeff_t *coeffCurV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> - uint32_t trSizeC = 1 << trSizeCLog2;
> + coeff_t *coeffCurU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
> + coeff_t *coeffCurV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
> + uint32_t trSizeC = 1 << log2TrSizeC;
>
> if (!splitIntoSubTUs)
> {
> @@ -3928,16 +3857,16 @@
> if (curTrMode == trMode)
> {
> int chFmt = cu->getChromaFormat();
> - const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> - const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> -
> - uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
> + const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
> + const uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> +
> + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> bool bCodeChroma = true;
> bool bChromaSame = false;
> uint32_t trModeC = trMode;
> - if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
> + if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
> {
> - trSizeCLog2++;
> + log2TrSizeC++;
> trModeC--;
> uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
> bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
> @@ -3946,28 +3875,28 @@
>
> if (bSpatial)
> {
> - uint32_t trSize = 1 << trSizeLog2;
> - m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize);
> + uint32_t trSize = 1 << log2TrSize;
> + m_qtTempShortYuv[qtLayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize);
>
> if (bCodeChroma)
> {
> - m_qtTempShortYuv[qtlayer].copyPartToPartChroma(resiYuv, absPartIdx, trSize, (bChromaSame && (chFmt != CHROMA_422)));
> + m_qtTempShortYuv[qtLayer].copyPartToPartChroma(resiYuv, absPartIdx, trSize, (bChromaSame && (chFmt != CHROMA_422)));
> }
> }
> else
> {
> - uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
> + uint32_t numCoeffY = 1 << (log2TrSize * 2);
> uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
> - coeff_t* coeffSrcY = m_qtTempCoeff[0][qtlayer] + coeffOffsetY;
> + coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
> coeff_t* coeffDstY = cu->getCoeffY() + coeffOffsetY;
> ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
> if (bCodeChroma)
> {
> - uint32_t numCoeffC = 1 << (trSizeCLog2 * 2 + (chFmt == CHROMA_422));
> + uint32_t numCoeffC = 1 << (log2TrSizeC * 2 + (chFmt == CHROMA_422));
> uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
>
> - coeff_t* coeffSrcU = m_qtTempCoeff[1][qtlayer] + coeffOffsetC;
> - coeff_t* coeffSrcV = m_qtTempCoeff[2][qtlayer] + coeffOffsetC;
> + coeff_t* coeffSrcU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
> + coeff_t* coeffSrcV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
> coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
> coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
> ::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
> diff -r 0cbc7320c9f2 -r b6302b087ea4 source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Jun 09 11:34:11 2014 +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.h Tue Jun 10 18:54:35 2014 +0900
> @@ -207,11 +207,11 @@
> void xEncIntraHeader(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLuma, bool bChroma);
> uint32_t xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma);
> uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
> - void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, uint32_t& outDist);
> + void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,
> + ShortYuv* resiYuv, uint32_t& cbf, uint32_t& outDist);
>
> - void xIntraCodingChromaBlk(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, uint32_t& outDist, uint32_t chromaId);
> + void xIntraCodingChromaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,
> + ShortYuv* resiYuv, uint32_t& cbf, uint32_t& outDist, uint32_t chromaId, uint32_t log2TrSizeC);
>
> void xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& outDist);
> @@ -223,10 +223,10 @@
>
> void xSetIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);
>
> - void xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> - void xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> - void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
> - void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId);
> + void xStoreIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize);
> + void xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize);
> + void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId);
> + void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId);
>
> // --------------------------------------------------------------------------------------------
> // Inter search (AMP)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list