[x265] refine block size related
Satoshi Nakagawa
nakagawa424 at oki.com
Fri May 23 06:37:27 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1400819691 -32400
# Fri May 23 13:34:51 2014 +0900
# Node ID 085be1ffd4a9752f64f8422e404985527e890921
# Parent 5134e76aa729b6fece18701fdc00390c2f2ffb32
refine block size related
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibCommon/TComBitStream.cpp
--- a/source/Lib/TLibCommon/TComBitStream.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibCommon/TComBitStream.cpp Fri May 23 13:34:51 2014 +0900
@@ -88,7 +88,7 @@
/* any modulo 8 remainder of num_total_bits cannot be written this time,
* and will be held until next time. */
uint32_t num_total_bits = numBits + m_num_held_bits;
- uint32_t next_num_held_bits = num_total_bits % 8;
+ uint32_t next_num_held_bits = num_total_bits & 7;
/* form a byte aligned word (write_bits), by concatenating any held bits
* with the new bits, discarding the bits that will form the next_held_bits.
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Fri May 23 13:34:51 2014 +0900
@@ -1337,7 +1337,7 @@
{
uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
- return ((m_absIdxInLCU + absPartIdx) % curPartNum) == 0;
+ return ((m_absIdxInLCU + absPartIdx) & (curPartNum - 1)) == 0;
}
void TComDataCU::setPartSizeSubParts(PartSize mode, uint32_t absPartIdx, uint32_t depth)
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibCommon/TComPrediction.cpp Fri May 23 13:34:51 2014 +0900
@@ -117,15 +117,15 @@
{
bool bFilter;
- if (dirMode == DC_IDX)
+ if (dirMode == DC_IDX || tuSize <= 4)
{
- bFilter = false; // no smoothing for DC or LM chroma
+ bFilter = false; // no smoothing for DC
}
else
{
int diff = std::min<int>(abs((int)dirMode - HOR_IDX), abs((int)dirMode - VER_IDX));
- uint32_t sizeIndex = g_convertToBit[tuSize];
- bFilter = diff > intraFilterThreshold[sizeIndex];
+ uint32_t sizeIdx = g_convertToBit[tuSize];
+ bFilter = diff > intraFilterThreshold[sizeIdx];
}
return bFilter;
@@ -134,7 +134,7 @@
void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize)
{
X265_CHECK(tuSize >= 4 && tuSize <= 64, "intra block size is out of range\n");
- int log2BlkSize = g_convertToBit[tuSize];
+ int sizeIdx = g_convertToBit[tuSize];
bool bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
pixel *refLft, *refAbv;
@@ -148,13 +148,13 @@
}
bool bFilter = tuSize <= 16 && dirMode != PLANAR_IDX;
- primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
+ primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
}
// Angular chroma
void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize, int chFmt)
{
- int log2BlkSize = g_convertToBit[tuSize];
+ int sizeIdx = g_convertToBit[tuSize];
uint32_t tuSize2 = tuSize << 1;
// Create the prediction
@@ -222,7 +222,7 @@
}
}
- primitives.intra_pred[log2BlkSize][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
+ primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
}
/** Function for checking identical motion.
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibCommon/TComSlice.h Fri May 23 13:34:51 2014 +0900
@@ -906,6 +906,8 @@
void setLog2DiffMaxMinCodingBlockSize(int val) { m_log2DiffMaxMinCodingBlockSize = val; }
+ int getLog2MaxCodingBlockSize() const { return m_log2MinCodingBlockSize + m_log2DiffMaxMinCodingBlockSize; }
+
void setMaxCUSize(uint32_t u) { m_maxCUSize = u; }
uint32_t getMaxCUSize() const { return m_maxCUSize; }
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.cpp Fri May 23 13:34:51 2014 +0900
@@ -186,7 +186,7 @@
void TComYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize)
{
- int part = partitionFromSizes(lumaSize, lumaSize);
+ int part = partitionFromSize(lumaSize);
int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
uint32_t dststride = dstPicYuv->m_width;
@@ -196,7 +196,7 @@
void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
{
- int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSizes(lumaSize, lumaSize);
+ int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
if (chromaId == 1)
{
@@ -235,7 +235,7 @@
void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
{
- int part = partitionFromSizes(partSize, partSize);
+ int part = partitionFromSize(partSize);
addClipLuma(srcYuv0, srcYuv1, part);
addClipChroma(srcYuv0, srcYuv1, part);
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Fri May 23 13:34:51 2014 +0900
@@ -571,13 +571,14 @@
m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
}
+ uint32_t cuSize = outTempCU->getCUSize(0);
TComSlice* slice = outTempCU->getSlice();
if (!bInsidePicture)
{
uint32_t lpelx = outBestCU->getCUPelX();
uint32_t tpely = outBestCU->getCUPelY();
- uint32_t rpelx = lpelx + outBestCU->getCUSize(0);
- uint32_t bpely = tpely + outBestCU->getCUSize(0);
+ uint32_t rpelx = lpelx + cuSize;
+ uint32_t bpely = tpely + cuSize;
bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
bpely <= slice->getSPS()->getPicHeightInLumaSamples());
}
@@ -592,7 +593,7 @@
if (depth == g_maxCUDepth - g_addCUDepth)
{
- if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+ if (cuSize > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
{
xCheckRDCostIntra(outBestCU, outTempCU, SIZE_NxN);
}
@@ -715,13 +716,14 @@
bool doNotBlockPu = true;
bool earlyDetectionSkipMode = false;
+ uint32_t cuSize = outTempCU->getCUSize(0);
TComSlice* slice = outTempCU->getSlice();
if (!bInsidePicture)
{
uint32_t lpelx = outBestCU->getCUPelX();
uint32_t tpely = outBestCU->getCUPelY();
- uint32_t rpelx = lpelx + outBestCU->getCUSize(0);
- uint32_t bpely = tpely + outBestCU->getCUSize(0);
+ uint32_t rpelx = lpelx + cuSize;
+ uint32_t bpely = tpely + cuSize;
bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
bpely <= slice->getSPS()->getPicHeightInLumaSamples());
}
@@ -765,7 +767,7 @@
if (slice->getSliceType() != I_SLICE)
{
// 2Nx2N, NxN
- if (!(outBestCU->getCUSize(0) == 8))
+ if (!(cuSize == 8))
{
if (depth == g_maxCUDepth - g_addCUDepth && doNotBlockPu)
{
@@ -899,7 +901,7 @@
if (depth == g_maxCUDepth - g_addCUDepth)
{
- if (outTempCU->getCUSize(0) > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+ if (cuSize > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
{
xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_NxN);
outTempCU->initEstData(depth);
@@ -908,10 +910,10 @@
}
// test PCM
if (slice->getSPS()->getUsePCM()
- && outTempCU->getCUSize(0) <= (1 << slice->getSPS()->getPCMLog2MaxSize())
- && outTempCU->getCUSize(0) >= (1 << slice->getSPS()->getPCMLog2MinSize()))
+ && cuSize <= (1 << slice->getSPS()->getPCMLog2MaxSize())
+ && cuSize >= (1 << slice->getSPS()->getPCMLog2MinSize()))
{
- uint32_t rawbits = (2 * X265_DEPTH + X265_DEPTH) * outBestCU->getCUSize(0) * outBestCU->getCUSize(0) / 2;
+ uint32_t rawbits = (2 * X265_DEPTH + X265_DEPTH) * cuSize * cuSize / 2;
uint32_t bestbits = outBestCU->m_totalBits;
if ((bestbits > rawbits) || (outBestCU->m_totalCost > m_rdCost->calcRdCost(0, rawbits)))
{
@@ -1045,6 +1047,7 @@
uint32_t posy = (externalAddress / pic->getFrameWidthInCU()) * g_maxCUSize + g_rasterToPelY[g_zscanToRaster[internalAddress]];
uint32_t width = slice->getSPS()->getPicWidthInLumaSamples();
uint32_t height = slice->getSPS()->getPicHeightInLumaSamples();
+ uint32_t cuSize = cu->getCUSize(absPartIdx);
while (posx >= width || posy >= height)
{
@@ -1070,8 +1073,8 @@
uint32_t uiGranularityWidth = g_maxCUSize;
posx = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
posy = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
- bool granularityBoundary = ((posx + cu->getCUSize(absPartIdx)) % uiGranularityWidth == 0 || (posx + cu->getCUSize(absPartIdx) == width))
- && ((posy + cu->getCUSize(absPartIdx)) % uiGranularityWidth == 0 || (posy + cu->getCUSize(absPartIdx) == height));
+ bool granularityBoundary = ((posx + cuSize) % uiGranularityWidth == 0 || (posx + cuSize == width))
+ && ((posy + cuSize) % uiGranularityWidth == 0 || (posy + cuSize == height));
if (granularityBoundary)
{
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Fri May 23 13:34:51 2014 +0900
@@ -212,7 +212,7 @@
void TEncEntropy::xEncodeTransform(TComDataCU* cu, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t tuSize, uint32_t trIdx, bool& bCodeDQP)
{
const uint32_t subdiv = cu->getTransformIdx(absPartIdx) + cu->getDepth(absPartIdx) > depth;
- const uint32_t log2TrafoSize = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize()] + 2 - depth;
+ const uint32_t log2TrafoSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
uint32_t hChromaShift = cu->getHorzChromaShift();
uint32_t vChromaShift = cu->getVertChromaShift();
uint32_t cbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trIdx);
@@ -227,12 +227,12 @@
if ((log2TrafoSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
{
uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
- if ((absPartIdx % partNum) == 0)
+ if ((absPartIdx & (partNum - 1)) == 0)
{
m_bakAbsPartIdx = absPartIdx;
m_bakChromaOffset = offsetChroma;
}
- else if ((absPartIdx % partNum) == (partNum - 1))
+ else if ((absPartIdx & (partNum - 1)) == (partNum - 1))
{
cbfU = cu->getCbf(m_bakAbsPartIdx, TEXT_CHROMA_U, trIdx);
cbfV = cu->getCbf(m_bakAbsPartIdx, TEXT_CHROMA_V, trIdx);
@@ -369,9 +369,9 @@
if ((log2TrafoSize == 2) && !(chFmt == CHROMA_444))
{
uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
- if ((absPartIdx % partNum) == (partNum - 1))
+ if ((absPartIdx & (partNum - 1)) == (partNum - 1))
{
- uint32_t trWidthC = log2TrafoSize << 1;
+ uint32_t trSizeC = 1 << log2TrafoSize;
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
@@ -384,10 +384,10 @@
do
{
uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
- uint32_t subTUIndex = tuIterator.m_section * trWidthC * trWidthC;
+ uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
if (cbf)
{
- m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trWidthC, (TextType)chromaId);
+ m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
}
}
while (isNextTUSection(&tuIterator));
@@ -396,10 +396,8 @@
}
else
{
- uint32_t trWidthC = tuSize >> hChromaShift;
- uint32_t trHeightC = tuSize >> vChromaShift;
+ uint32_t trSizeC = tuSize >> hChromaShift;
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
- trHeightC = splitIntoSubTUs ? trHeightC >> 1 : trHeightC;
uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> (depth << 1);
for (uint32_t chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
{
@@ -409,10 +407,10 @@
do
{
uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
- uint32_t subTUIndex = tuIterator.m_section * trWidthC * trHeightC;
+ uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
if (cbf)
{
- m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trWidthC, (TextType)chromaId);
+ m_entropyCoderIf->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
}
}
while (isNextTUSection(&tuIterator));
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Fri May 23 13:34:51 2014 +0900
@@ -1930,8 +1930,7 @@
m_binIf->encodePCMAlignBits();
uint32_t lumaOffset = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- uint32_t chromaOffset = lumaOffset >> 2;
- //uint32_t chromaOffset = lumaOffset >> (m_hChromaShift + m_vChromaShift);
+ uint32_t chromaOffset = lumaOffset >> (cu->getHorzChromaShift() + cu->getVertChromaShift());
uint32_t width;
uint32_t height;
uint32_t sampleBits;
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri May 23 13:34:51 2014 +0900
@@ -160,7 +160,7 @@
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trMode = cu->getTransformIdx(absPartIdx);
uint32_t subdiv = (trMode > trDepth ? 1 : 0);
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize()] + 2 - fullDepth;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
if (cu->getPredictionMode(0) == MODE_INTRA && cu->getPartitionSize(0) == SIZE_NxN && trDepth == 0)
{
@@ -245,14 +245,14 @@
uint32_t origTrDepth = trDepth;
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize()] + 2 - fullDepth;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
int chFmt = cu->getChromaFormat();
if ((ttype != TEXT_LUMA) && (trSizeLog2 == 2) && !(chFmt == CHROMA_444))
{
X265_CHECK(trDepth > 0, "transform size too small\n");
trDepth--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- bool bFirstQ = ((absPartIdx % qpdiv) == 0);
+ bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
if (!bFirstQ)
{
return;
@@ -290,7 +290,7 @@
{
uint32_t subTUSize = width * width;
uint32_t partIdxesPerSubTU = cu->getPic()->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trDepth) << 1) + 1);
-
+
if (cu->getCbf(absPartIdx, ttype, origTrDepth + 1))
m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, width, ttype);
if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, origTrDepth + 1))
@@ -346,7 +346,7 @@
m_entropyCoder->encodeIntraDirModeLuma(cu, part * qtNumParts);
}
}
- else if ((absPartIdx % qtNumParts) == 0)
+ else if ((absPartIdx & (qtNumParts - 1)) == 0)
{
m_entropyCoder->encodeIntraDirModeLuma(cu, absPartIdx);
}
@@ -366,7 +366,7 @@
{
uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
X265_CHECK(trDepth > 0, "unexpected trDepth %d\n", trDepth);
- if ((absPartIdx % qtNumParts) == 0)
+ if ((absPartIdx & (qtNumParts - 1)) == 0)
m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
}
}
@@ -418,12 +418,13 @@
pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
pixel* pred = predYuv->getLumaAddr(absPartIdx);
int16_t* residual = resiYuv->getLumaAddr(absPartIdx);
- int part = partitionFromSizes(tuSize, tuSize);
-
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ int part = partitionFromSize(tuSize);
+ int sizeIdx = g_convertToBit[tuSize];
+
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- uint32_t numCoeffPerInc = cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
- coeff_t* coeff = m_qtTempCoeffY[qtLayer] + numCoeffPerInc * absPartIdx;
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t* coeff = m_qtTempCoeffY[qtLayer] + coeffOffsetY;
int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
@@ -435,10 +436,10 @@
bool useTransformSkip = !!cu->getTransformSkip(absPartIdx, TEXT_LUMA);
//===== get residual signal =====
- X265_CHECK(!((uint32_t)(size_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
- X265_CHECK(!((uint32_t)(size_t)pred & (tuSize - 1)), "pred alignment check fail\n");
- X265_CHECK(!((uint32_t)(size_t)residual & (tuSize - 1)), "residual alignment check fail\n");
- primitives.calcresidual[(int)g_convertToBit[tuSize]](fenc, pred, residual, stride);
+ X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
+ X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment check fail\n");
+ X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment check fail\n");
+ primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//===== transform and quantization =====
//--- init rate estimation arrays for RDOQ ---
@@ -462,7 +463,6 @@
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
//--- inverse transform ---
- int size = g_convertToBit[tuSize];
if (absSum)
{
int scalingListType = 0 + TEXT_LUMA;
@@ -473,12 +473,12 @@
{
int16_t* resiTmp = residual;
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
- primitives.blockfill_s[size](resiTmp, stride, 0);
+ primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
}
X265_CHECK(tuSize <= 32, "tuSize is too large %d\n", tuSize);
//===== reconstruction =====
- primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
+ primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
//===== update distortion =====
outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride);
}
@@ -494,7 +494,7 @@
uint32_t chromaId)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
int chFmt = cu->getChromaFormat();
uint32_t origTrDepth = trDepth;
@@ -504,8 +504,8 @@
X265_CHECK(trDepth > 0, "trDepth should be non-zero\n");
trDepth--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- bool bFirstQ = ((absPartIdx % qpdiv) == 0);
- bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx % qpdiv) == 2) : false;
+ bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
+ bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
if ((!bFirstQ) && (!bSecondQ))
{
return;
@@ -520,22 +520,22 @@
int16_t* residual = (chromaId == 1) ? resiYuv->getCbAddr(absPartIdx) : resiYuv->getCrAddr(absPartIdx);
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> (m_hChromaShift + m_vChromaShift);
- coeff_t* coeff = (chromaId == 1 ? m_qtTempCoeffCb[qtlayer] : m_qtTempCoeffCr[qtlayer]) + numCoeffPerInc * absPartIdx;
+ uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+ coeff_t* coeff = (chromaId == 1 ? m_qtTempCoeffCb[qtlayer] : m_qtTempCoeffCr[qtlayer]) + coeffOffsetC;
int16_t* reconQt = (chromaId == 1) ? m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdx) : m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdx);
uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
bool useTransformSkipChroma = !!cu->getTransformSkip(absPartIdx, ttype);
- int part = partitionFromSizes(tuSize, tuSize);
+ int part = partitionFromSize(tuSize);
+ int sizeIdx = g_convertToBit[tuSize];
//===== get residual signal =====
- X265_CHECK(!((uint32_t)(size_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
- X265_CHECK(!((uint32_t)(size_t)pred & (tuSize - 1)), "pred alignment check fail\n");
- X265_CHECK(!((uint32_t)(size_t)residual & (tuSize - 1)), "residual alignment check fail\n");
- int size = g_convertToBit[tuSize];
- primitives.calcresidual[size](fenc, pred, residual, stride);
+ X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
+ X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment check fail\n");
+ X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment check fail\n");
+ primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//===== transform and quantization =====
{
@@ -577,14 +577,14 @@
{
int16_t* resiTmp = residual;
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
- primitives.blockfill_s[size](resiTmp, stride, 0);
+ primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
}
}
X265_CHECK(((intptr_t)residual & (tuSize - 1)) == 0, "residual alignment check failure\n");
X265_CHECK(tuSize <= 32, "tuSize invalud\n");
//===== reconstruction =====
- primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
+ primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
//===== update distortion =====
uint32_t dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
if (ttype == TEXT_CHROMA_U)
@@ -612,7 +612,7 @@
uint64_t& rdCost)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
bool bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
@@ -829,7 +829,7 @@
TComYuv* reconYuv)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
bool bCheckFull = (trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
@@ -854,9 +854,8 @@
pixel* pred = predYuv->getLumaAddr(absPartIdx);
int16_t* residual = resiYuv->getLumaAddr(absPartIdx);
pixel* recon = reconYuv->getLumaAddr(absPartIdx);
-
- uint32_t numCoeffPerInc = cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
- coeff_t* coeff = cu->getCoeffY() + numCoeffPerInc * absPartIdx;
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t* coeff = cu->getCoeffY() + coeffOffsetY;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
@@ -870,10 +869,11 @@
predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
//===== get residual signal =====
- X265_CHECK(!((uint32_t)(size_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
- X265_CHECK(!((uint32_t)(size_t)pred & (tuSize - 1)), "pred alignment failure\n");
- X265_CHECK(!((uint32_t)(size_t)residual & (tuSize - 1)), "residual alignment failure\n");
- primitives.calcresidual[(int)g_convertToBit[tuSize]](fenc, pred, residual, stride);
+ X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
+ X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment failure\n");
+ X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment failure\n");
+ int sizeIdx = g_convertToBit[tuSize];
+ primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//===== transform and quantization =====
uint32_t absSum = 0;
@@ -888,7 +888,6 @@
cu->setCbfSubParts((absSum ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
//--- inverse transform ---
- int size = g_convertToBit[tuSize];
if (absSum)
{
int scalingListType = 0 + TEXT_LUMA;
@@ -899,12 +898,12 @@
{
int16_t* resiTmp = residual;
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
- primitives.blockfill_s[size](resiTmp, stride, 0);
+ primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
}
//Generate Recon
X265_CHECK(tuSize <= 32, "tuSize is too large\n");
- int part = partitionFromSizes(tuSize, tuSize);
+ int part = partitionFromSize(tuSize);
primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
primitives.blockcpy_pp(tuSize, tuSize, reconIPred, reconIPredStride, recon, stride);
}
@@ -939,14 +938,14 @@
if (trMode == trDepth)
{
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
//===== copy transform coefficients =====
- uint32_t numCoeffY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (fullDepth << 1);
- uint32_t numCoeffIncY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
- coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + (numCoeffIncY * absPartIdx);
- coeff_t* coeffDestY = cu->getCoeffY() + (numCoeffIncY * absPartIdx);
+ uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
+ coeff_t* coeffDestY = cu->getCoeffY() + coeffOffsetY;
::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
//===== copy reconstruction =====
@@ -964,15 +963,14 @@
void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
{
- uint32_t fullMode = cu->getDepth(0) + trDepth;
-
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullMode] + 2;
+ uint32_t fullDepth = cu->getDepth(0) + trDepth;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
//===== copy transform coefficients =====
- uint32_t numCoeffY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (fullMode << 1);
- uint32_t numCoeffIncY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
- coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + (numCoeffIncY * absPartIdx);
+ uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
coeff_t* coeffDstY = m_qtTempTUCoeffY;
::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
@@ -984,14 +982,13 @@
void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
-
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
//===== copy transform coefficients =====
- uint32_t numCoeffY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (fullDepth << 1);
- uint32_t numCoeffIncY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
- coeff_t* coeffDstY = m_qtTempCoeffY[qtlayer] + (numCoeffIncY * absPartIdx);
+ uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t* coeffDstY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
coeff_t* coeffSrcY = m_qtTempTUCoeffY;
::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
@@ -1008,25 +1005,27 @@
X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
}
-void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t stateU0V1Both2, const bool splitIntoSubTUs)
+void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
{
+ assert(chromaId == 1 || chromaId == 2);
+
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trMode = cu->getTransformIdx(absPartIdx);
if (trMode == trDepth)
{
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
int chFmt = cu->getChromaFormat();
bool bChromaSame = false;
if (trSizeLog2 == 2 && !(chFmt == CHROMA_444))
{
- X265_CHECK(trDepth > 0, "trDepth is invalid\n");
+ X265_CHECK(trDepth > 0, "invalid trDepth\n");
trDepth--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- bool bFirstQ = ((absPartIdx % qpdiv) == 0);
- bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx % qpdiv) == 2) : false;
+ bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
+ bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
if ((!bFirstQ) && (!bSecondQ))
{
return;
@@ -1037,35 +1036,37 @@
uint32_t height = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
height = splitIntoSubTUs ? height >> 1 : height;
uint32_t numCoeffC = width * height;
-
- uint32_t numCoeffIncC = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> ((cu->getSlice()->getSPS()->getMaxCUDepth() << 1) + (m_hChromaShift + m_vChromaShift));
- if (stateU0V1Both2 == 1 || stateU0V1Both2 == 3)
+ uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+
+ if (chromaId == 1)
{
- coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);
+ coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
coeff_t* coeffDstU = m_qtTempTUCoeffCb;
::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
}
- if (stateU0V1Both2 == 2 || stateU0V1Both2 == 3)
+ if (chromaId == 2)
{
- coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);
+ coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
coeff_t* coeffDstV = m_qtTempTUCoeffCr;
::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
}
//===== copy reconstruction =====
uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
- m_qtTempShortYuv[qtlayer].copyPartToPartYuvChroma(&m_qtTempTransformSkipYuv, absPartIdx, lumaSize, stateU0V1Both2, splitIntoSubTUs);
+ m_qtTempShortYuv[qtlayer].copyPartToPartYuvChroma(&m_qtTempTransformSkipYuv, absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
}
}
-void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t stateU0V1Both2, const bool splitIntoSubTUs)
+void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
{
+ assert(chromaId == 1 || chromaId == 2);
+
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trMode = cu->getTransformIdx(absPartIdx);
if (trMode == trDepth)
{
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
int chFmt = cu->getChromaFormat();
@@ -1075,8 +1076,8 @@
X265_CHECK(trDepth > 0, "invalid trDepth\n");
trDepth--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- bool bFirstQ = ((absPartIdx % qpdiv) == 0);
- bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx % qpdiv) == 2) : false;
+ bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
+ bool bSecondQ = (chFmt == CHROMA_422) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
if ((!bFirstQ) && (!bSecondQ))
{
return;
@@ -1085,45 +1086,44 @@
}
//===== copy transform coefficients =====
- uint32_t trWidth = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
- uint32_t trHeight = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
- trHeight = splitIntoSubTUs ? trHeight >> 1 : trHeight;
- uint32_t numCoeffC = trWidth * trHeight;
-
- uint32_t numCoeffIncC = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> ((cu->getSlice()->getSPS()->getMaxCUDepth() << 1) + (m_hChromaShift + m_vChromaShift));
-
- if (stateU0V1Both2 == 1 || stateU0V1Both2 == 3)
+ uint32_t trWidthC = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
+ uint32_t trHeightC = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
+ trHeightC = splitIntoSubTUs ? trHeightC >> 1 : trHeightC;
+ uint32_t numCoeffC = trWidthC * trHeightC;
+ uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+
+ if (chromaId == 1)
{
- coeff_t* coeffDstU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);
+ coeff_t* coeffDstU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
coeff_t* coeffSrcU = m_qtTempTUCoeffCb;
::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
}
- if (stateU0V1Both2 == 2 || stateU0V1Both2 == 3)
+ if (chromaId == 2)
{
- coeff_t* coeffDstV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);
+ coeff_t* coeffDstV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
coeff_t* coeffSrcV = m_qtTempTUCoeffCr;
::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
}
//===== copy reconstruction =====
uint32_t lumaSize = 1 << (bChromaSame ? trSizeLog2 + 1 : trSizeLog2);
- m_qtTempTransformSkipYuv.copyPartToPartChroma(&m_qtTempShortYuv[qtlayer], absPartIdx, lumaSize, stateU0V1Both2, splitIntoSubTUs);
+ m_qtTempTransformSkipYuv.copyPartToPartChroma(&m_qtTempShortYuv[qtlayer], absPartIdx, lumaSize, chromaId, splitIntoSubTUs);
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
- if (stateU0V1Both2 == 1 || stateU0V1Both2 == 3)
+ if (chromaId == 1)
{
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
int16_t* reconQt = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdx);
- primitives.blockcpy_ps(trWidth, trHeight, reconIPred, reconIPredStride, reconQt, reconQtStride);
+ primitives.blockcpy_ps(trWidthC, trHeightC, reconIPred, reconIPredStride, reconQt, reconQtStride);
}
- if (stateU0V1Both2 == 2 || stateU0V1Both2 == 3)
+ if (chromaId == 2)
{
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
int16_t* reconQt = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdx);
- primitives.blockcpy_ps(trWidth, trHeight, reconIPred, reconIPredStride, reconQt, reconQtStride);
+ primitives.blockcpy_ps(trWidthC, trHeightC, reconIPred, reconIPredStride, reconQt, reconQtStride);
}
}
}
@@ -1132,7 +1132,7 @@
{
uint32_t depth = cu->getDepth(0);
uint32_t fullDepth = depth + trDepth;
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t actualTrDepth = trDepth;
@@ -1179,7 +1179,7 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
uint32_t actualTrDepth = trDepth;
if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
@@ -1188,7 +1188,7 @@
actualTrDepth--;
trSizeCLog2++;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
- bool bFirstQ = ((absPartIdx % qpdiv) == 0);
+ bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
if (!bFirstQ)
{
return;
@@ -1288,7 +1288,7 @@
if (bestModeId == firstCheckId)
{
- xStoreIntraResultChromaQT(cu, trDepth, absPartIdxC, (TextType)chromaId, splitIntoSubTUs);
+ xStoreIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
}
@@ -1300,7 +1300,7 @@
if (bestModeId == firstCheckId)
{
- xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, (TextType)chromaId, splitIntoSubTUs);
+ xLoadIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
@@ -1358,7 +1358,7 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
bool bChromaSame = false;
@@ -1367,7 +1367,7 @@
X265_CHECK(trDepth > 0, "invalid trDepth\n");
trDepth--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
- if ((absPartIdx % qpdiv) != 0)
+ if ((absPartIdx & (qpdiv - 1)) != 0)
{
return;
}
@@ -1379,12 +1379,12 @@
uint32_t width = cu->getCUSize(absPartIdx) >> (trDepth + m_hChromaShift);
uint32_t height = cu->getCUSize(absPartIdx) >> (trDepth + m_vChromaShift);
uint32_t numCoeffC = width * height;
- uint32_t numCoeffIncC = ((cu->getSlice()->getSPS()->getMaxCUSize() >> m_hChromaShift) * (cu->getSlice()->getSPS()->getMaxCUSize() >> m_vChromaShift)) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
-
- coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);
- coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);
- coeff_t* coeffDstU = cu->getCoeffCb() + (numCoeffIncC * absPartIdx);
- coeff_t* coeffDstV = cu->getCoeffCr() + (numCoeffIncC * absPartIdx);
+ uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+
+ coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
+ coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
+ coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
+ coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
@@ -1415,7 +1415,7 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
+ uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
uint32_t origTrDepth = trDepth;
uint32_t actualTrDepth = trDepth;
if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
@@ -1423,7 +1423,7 @@
X265_CHECK(trDepth > 0, "invalid trDepth\n");
actualTrDepth--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + actualTrDepth) << 1);
- bool bFirstQ = ((absPartIdx % qpdiv) == 0);
+ bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
if (!bFirstQ)
{
return;
@@ -1433,6 +1433,7 @@
uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth + m_hChromaShift);
uint32_t stride = fencYuv->getCStride();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
+ int sizeIdx = g_convertToBit[tuSize];
for (int chromaId = TEXT_CHROMA; chromaId < MAX_NUM_COMPONENT; chromaId++)
{
@@ -1450,8 +1451,8 @@
pixel* pred = (chromaId == 1) ? predYuv->getCbAddr(absPartIdxC) : predYuv->getCrAddr(absPartIdxC);
int16_t* residual = (chromaId == 1) ? resiYuv->getCbAddr(absPartIdxC) : resiYuv->getCrAddr(absPartIdxC);
pixel* recon = (chromaId == 1) ? reconYuv->getCbAddr(absPartIdxC) : reconYuv->getCrAddr(absPartIdxC);
- uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> (m_hChromaShift + m_vChromaShift);
- coeff_t* coeff = (chromaId == 1 ? cu->getCoeffCb() : cu->getCoeffCr()) + numCoeffPerInc * absPartIdxC;
+ uint32_t coeffOffsetC = absPartIdxC << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+ coeff_t* coeff = (chromaId == 1 ? cu->getCoeffCb() : cu->getCoeffCr()) + coeffOffsetC;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
pixel* reconIPred = (chromaId == 1) ? cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
@@ -1474,11 +1475,10 @@
predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
//===== get residual signal =====
- X265_CHECK(!((uint32_t)(size_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
- X265_CHECK(!((uint32_t)(size_t)pred & (tuSize - 1)), "pred alignment failure\n");
- X265_CHECK(!((uint32_t)(size_t)residual & (tuSize - 1)), "residual alignment failure\n");
- int size = g_convertToBit[tuSize];
- primitives.calcresidual[size](fenc, pred, residual, stride);
+ X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
+ X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment failure\n");
+ X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment failure\n");
+ primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//--- transform and quantization ---
uint32_t absSum = 0;
@@ -1513,7 +1513,7 @@
{
int16_t* resiTmp = residual;
memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
- primitives.blockfill_s[size](resiTmp, stride, 0);
+ primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
}
//===== reconstruction =====
@@ -1521,7 +1521,7 @@
X265_CHECK(tuSize <= 32, "tuSize out of range\n");
// use square primitive
- int part = partitionFromSizes(tuSize, tuSize);
+ int part = partitionFromSize(tuSize);
primitives.chroma[CHROMA_444].add_ps[part](recon, stride, pred, residual, stride, stride);
primitives.chroma[CHROMA_444].copy_pp[part](reconIPred, reconIPredStride, recon, stride);
}
@@ -1565,7 +1565,7 @@
uint32_t overallDistY = 0;
uint32_t candNum;
uint64_t candCostList[FAST_UDI_MAX_RDMODE_NUM];
- uint32_t tuSizeIdx = g_convertToBit[tuSize]; // log2(tuSize) - 2
+ uint32_t sizeIdx = g_convertToBit[tuSize]; // log2(tuSize) - 2
static const uint8_t intraModeNumFast[] = { 8, 8, 3, 3, 3 }; // 4x4, 8x8, 16x16, 32x32, 64x64
//===== loop over partitions =====
@@ -1581,7 +1581,7 @@
pixel* fenc = fencYuv->getLumaAddr(pu, tuSize);
uint32_t stride = predYuv->getStride();
uint32_t rdModeList[FAST_UDI_MAX_RDMODE_NUM];
- int numModesForFullRD = intraModeNumFast[tuSizeIdx];
+ int numModesForFullRD = intraModeNumFast[sizeIdx];
bool doFastSearch = (numModesForFullRD != numModesAvailable);
if (doFastSearch)
@@ -1629,6 +1629,7 @@
scaleTuSize = 32;
scaleStride = 32;
costShift = 2;
+ sizeIdx = 5 - 2; // g_convertToBit[scaleTuSize];
// Filtered and Unfiltered refAbove and refLeft pointing to above and left.
above = aboveScale;
@@ -1637,11 +1638,10 @@
leftFiltered = leftScale;
}
- int log2SizeMinus2 = g_convertToBit[scaleTuSize];
- pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
+ pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
// DC
- primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+ primitives.intra_pred[sizeIdx][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
modeCosts[DC_IDX] = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
pixel *abovePlanar = above;
@@ -1654,13 +1654,13 @@
}
// PLANAR
- primitives.intra_pred[log2SizeMinus2][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+ primitives.intra_pred[sizeIdx][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
modeCosts[PLANAR_IDX] = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
// Transpose NxN
- primitives.transpose[log2SizeMinus2](buf_trans, fenc, scaleStride);
-
- primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
+ primitives.transpose[sizeIdx](buf_trans, fenc, scaleStride);
+
+ primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
for (uint32_t mode = 2; mode < numModesAvailable; mode++)
{
@@ -1786,7 +1786,7 @@
if (pu != numPU - 1)
{
uint32_t zorder = cu->getZorderIdxInCU() + partOffset;
- int part = partitionFromSizes(tuSize, tuSize);
+ int part = partitionFromSize(tuSize);
pixel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
pixel* src = reconYuv->getLumaAddr(partOffset);
@@ -1844,6 +1844,8 @@
scaleTuSize = 32;
costShift = 2;
}
+ int sizeIdx = g_convertToBit[scaleTuSize];
+ pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, 1);
TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, 2);
@@ -1866,8 +1868,6 @@
//===== get prediction signal =====
predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, scaleTuSize, chFmt);
- int log2SizeMinus2 = g_convertToBit[scaleTuSize];
- pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
cost += sa8d(fenc, stride, pred, stride) << costShift;
}
@@ -1980,7 +1980,7 @@
if (!isLastSection(&tuIterator))
{
- uint32_t compWidth = (cu->getCUSize(0) >> m_hChromaShift) >> initTrDepth;
+ uint32_t compWidth = (cu->getCUSize(0) >> m_hChromaShift) >> initTrDepth;
uint32_t compHeight = (cu->getCUSize(0) >> m_vChromaShift) >> initTrDepth;
uint32_t zorder = cu->getZorderIdxInCU() + tuIterator.m_partOffset;
pixel* dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
@@ -2662,8 +2662,7 @@
uint32_t bits = 0, bestBits = 0;
uint32_t distortion = 0, bdist = 0;
- uint32_t width = cu->getCUSize(0);
- uint32_t height = cu->getCUSize(0);
+ uint32_t cuSize = cu->getCUSize(0);
// No residual coding : SKIP mode
if (bSkipRes)
@@ -2672,10 +2671,10 @@
predYuv->copyToPartYuv(outReconYuv, 0);
// Luma
- int part = partitionFromSizes(width, height);
+ int part = partitionFromSize(cuSize);
distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
// Chroma
- part = partitionFromSizes(width >> m_hChromaShift, height >> m_vChromaShift);
+ part = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
distortion += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
distortion += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
@@ -2695,9 +2694,9 @@
cu->m_totalDistortion = distortion;
if (m_rdCost->psyRdEnabled())
{
- int size = g_convertToBit[cu->getCUSize(0)];
+ int size = g_convertToBit[cuSize];
uint32_t psyRdCost = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
- outReconYuv->getLumaAddr(), outReconYuv->getStride());
+ outReconYuv->getLumaAddr(), outReconYuv->getStride());
cu->m_totalCost = m_rdCost->calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, psyRdCost);
}
else
@@ -2718,7 +2717,7 @@
bits = 0;
distortion = 0;
- outResiYuv->subtract(fencYuv, predYuv, width);
+ outResiYuv->subtract(fencYuv, predYuv, cuSize);
m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
xEstimateResidualQT(cu, 0, outResiYuv, cu->getDepth(0), cost, bits, distortion, &zeroDistortion, curUseRDOQ);
@@ -2739,9 +2738,9 @@
::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(uint8_t));
::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(uint8_t));
::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(uint8_t));
- ::memset(cu->getCoeffY(), 0, width * height * sizeof(coeff_t));
- ::memset(cu->getCoeffCb(), 0, width * height * sizeof(coeff_t) >> (m_hChromaShift + m_vChromaShift));
- ::memset(cu->getCoeffCr(), 0, width * height * sizeof(coeff_t) >> (m_hChromaShift + m_vChromaShift));
+ ::memset(cu->getCoeffY(), 0, cuSize * cuSize * sizeof(coeff_t));
+ ::memset(cu->getCoeffCb(), 0, cuSize * cuSize * sizeof(coeff_t) >> (m_hChromaShift + m_vChromaShift));
+ ::memset(cu->getCoeffCr(), 0, cuSize * cuSize * sizeof(coeff_t) >> (m_hChromaShift + m_vChromaShift));
cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));
}
else
@@ -2771,7 +2770,7 @@
if (cu->getQtRootCbf(0))
{
- outReconYuv->addClip(predYuv, outBestResiYuv, width);
+ outReconYuv->addClip(predYuv, outBestResiYuv, cuSize);
}
else
{
@@ -2779,16 +2778,16 @@
}
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
- int part = partitionFromSizes(width, height);
+ int part = partitionFromSize(cuSize);
bdist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
- part = partitionFromSizes(width >> cu->getHorzChromaShift(), height >> cu->getVertChromaShift());
+ part = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
bdist += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
bdist += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
if (m_rdCost->psyRdEnabled())
{
- int size = g_convertToBit[cu->getCUSize(0)];
+ int size = g_convertToBit[cuSize];
uint32_t psyRdCost = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
- outReconYuv->getLumaAddr(), outReconYuv->getStride());
+ outReconYuv->getLumaAddr(), outReconYuv->getStride());
bcost = m_rdCost->calcPsyRdCost(bdist, bestBits, psyRdCost);
}
else
@@ -2847,8 +2846,7 @@
{
X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "invalid depth\n");
const uint32_t trMode = depth - cu->getDepth(0);
- const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
const uint32_t setCbf = 1 << trMode;
int chFmt = cu->getChromaFormat();
@@ -2861,34 +2859,35 @@
const bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
- bool bCodeChroma = true;
- uint32_t trModeC = trMode;
- if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
- {
- trSizeCLog2++;
- trModeC--;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
- bCodeChroma = ((absPartIdx % qpdiv) == 0);
- }
-
- const bool splitIntoSubTUs = (chFmt == CHROMA_422);
- uint32_t absPartIdxStep = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
-
// code full block
uint32_t absSumY = 0, absSumU = 0, absSumV = 0;
int lastPosY = -1, lastPosU = -1, lastPosV = -1;
if (bCheckFull)
{
- const uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
-
- coeff_t *coeffCurY = cu->getCoeffY() + (numCoeffPerAbsPartIdxIncrement * absPartIdx);
- coeff_t *coeffCurU = cu->getCoeffCb() + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
- coeff_t *coeffCurV = cu->getCoeffCr() + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
-
- int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;
-
- trWidth = trHeight = 1 << trSizeLog2;
- trWidthC = trHeightC = 1 << trSizeCLog2;
+ uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ bool bCodeChroma = true;
+ uint32_t trModeC = trMode;
+ if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
+ {
+ trSizeCLog2++;
+ trModeC--;
+ uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
+ bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
+ }
+
+ const bool splitIntoSubTUs = (chFmt == CHROMA_422);
+ uint32_t absPartIdxStep = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
+
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+ coeff_t *coeffCurY = cu->getCoeffY() + coeffOffsetY;
+ coeff_t *coeffCurU = cu->getCoeffCb() + coeffOffsetC;
+ coeff_t *coeffCurV = cu->getCoeffCr() + coeffOffsetC;
+
+ uint32_t trSize = 1 << trSizeLog2;
+ uint32_t trSizeC = 1 << trSizeCLog2;
+ uint32_t sizeIdx = trSizeLog2 - 2;
+ uint32_t sizeIdxC = trSizeCLog2 - 2;
cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
@@ -2897,7 +2896,7 @@
m_trQuant->selectLambda(TEXT_LUMA);
absSumY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
- trWidth, TEXT_LUMA, absPartIdx, &lastPosY, false, curuseRDOQ);
+ trSize, TEXT_LUMA, absPartIdx, &lastPosY, false, curuseRDOQ);
cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
@@ -2909,13 +2908,12 @@
int scalingListType = 3 + TEXT_LUMA;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, resiYuv->m_width, coeffCurY, trWidth, scalingListType, false, lastPosY); //this is for inter mode only
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, resiYuv->m_width, coeffCurY, trSize, scalingListType, false, lastPosY); //this is for inter mode only
}
else
{
int16_t *ptr = resiYuv->getLumaAddr(absPartIdx);
- X265_CHECK(trWidth == trHeight, "square transform expected\n");
- primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr, resiYuv->m_width, 0);
+ primitives.blockfill_s[sizeIdx](ptr, resiYuv->m_width, 0);
}
cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
@@ -2924,13 +2922,10 @@
TComTURecurse tuIterator;
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
- uint32_t widthC = trWidthC;
- uint32_t heightC = trWidthC;
-
do
{
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
- uint32_t subTUBufferOffset = widthC * heightC * tuIterator.m_section;
+ uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -2941,12 +2936,12 @@
m_trQuant->selectLambda(TEXT_CHROMA);
absSumU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
- trWidthC, TEXT_CHROMA_U, absPartIdxC, &lastPosU, false, curuseRDOQ);
+ trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosU, false, curuseRDOQ);
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
- trWidthC, TEXT_CHROMA_V, absPartIdxC, &lastPosV, false, curuseRDOQ);
+ trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosV, false, curuseRDOQ);
cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -2960,13 +2955,12 @@
int scalingListType = 3 + TEXT_CHROMA_U;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, pcResiCurrU, resiYuv->m_cwidth, coeffCurU + subTUBufferOffset, trWidthC, scalingListType, false, lastPosU);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, pcResiCurrU, resiYuv->m_cwidth, coeffCurU + subTUBufferOffset, trSizeC, scalingListType, false, lastPosU);
}
else
{
int16_t *ptr = resiYuv->getCbAddr(absPartIdxC);
- X265_CHECK(widthC == heightC, "square chroma transform expected\n");
- primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv->m_cwidth, 0);
+ primitives.blockfill_s[sizeIdxC](ptr, resiYuv->m_cwidth, 0);
}
if (absSumV)
{
@@ -2976,13 +2970,12 @@
int scalingListType = 3 + TEXT_CHROMA_V;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, resiYuv->m_cwidth, coeffCurV + subTUBufferOffset, trWidthC, scalingListType, false, lastPosV);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, resiYuv->m_cwidth, coeffCurV + subTUBufferOffset, trSizeC, scalingListType, false, lastPosV);
}
else
{
int16_t *ptr = resiYuv->getCrAddr(absPartIdxC);
- X265_CHECK(widthC == heightC, "square chroma transform expected\n");
- primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv->m_cwidth, 0);
+ primitives.blockfill_s[sizeIdxC](ptr, resiYuv->m_cwidth, 0);
}
cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -3040,8 +3033,7 @@
{
X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "depth not matching\n");
const uint32_t trMode = depth - cu->getDepth(0);
- const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
const uint32_t subTUDepth = trMode + 1;
const uint32_t setCbf = 1 << trMode;
int chFmt = cu->getChromaFormat();
@@ -3055,6 +3047,7 @@
const bool bCheckSplit = (trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
+ uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
bool bCodeChroma = true;
uint32_t trModeC = trMode;
if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
@@ -3062,7 +3055,7 @@
trSizeCLog2++;
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
- bCodeChroma = ((absPartIdx % qpdiv) == 0);
+ bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
}
// code full block
@@ -3080,22 +3073,20 @@
uint32_t bestsubTUCBF[MAX_NUM_COMPONENT][2];
m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
- int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;
-
- trWidth = trHeight = 1 << trSizeLog2;
- trWidthC = 1 << trSizeCLog2;
- trHeightC = (chFmt == CHROMA_422) ? (trWidthC << 1) : trWidthC;
+ uint32_t trSize = 1 << trSizeLog2;
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
uint32_t absPartIdxStep = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
// code full block
if (bCheckFull)
{
- const uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
+ uint32_t trSizeC = 1 << trSizeCLog2;
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx);
- coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
- coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+ coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
+ coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
+ coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip() && !cu->getCUTransquantBypass(0);
@@ -3106,21 +3097,21 @@
if (m_cfg->bEnableRDOQ && curuseRDOQ)
{
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidth, TEXT_LUMA);
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trSize, TEXT_LUMA);
}
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
m_trQuant->selectLambda(TEXT_LUMA);
absSum[TEXT_LUMA][0] = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
- trWidth, TEXT_LUMA, absPartIdx, &lastPos[TEXT_LUMA][0], false, curuseRDOQ);
+ trSize, TEXT_LUMA, absPartIdx, &lastPos[TEXT_LUMA][0], false, curuseRDOQ);
cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
m_entropyCoder->resetBits();
- m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trWidth, trHeight, TEXT_LUMA, trMode, true);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
if (absSum[TEXT_LUMA][0])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, TEXT_LUMA);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
singleBitsComp[TEXT_LUMA][0] = m_entropyCoder->getNumberOfWrittenBits();
uint32_t singleBitsPrev = singleBitsComp[TEXT_LUMA][0];
@@ -3130,20 +3121,17 @@
TComTURecurse tuIterator;
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
- uint32_t widthC = trWidthC;
- uint32_t heightC = splitIntoSubTUs ? (trHeightC >> 1) : trHeightC;
-
do
{
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
- uint32_t subTUBufferOffset = widthC * heightC * tuIterator.m_section;
+ uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
if (m_cfg->bEnableRDOQ && curuseRDOQ)
{
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, widthC, TEXT_CHROMA);
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trSizeC, TEXT_CHROMA);
}
//Cb transform
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
@@ -3152,24 +3140,24 @@
m_trQuant->selectLambda(TEXT_CHROMA);
absSum[TEXT_CHROMA_U][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
- widthC, TEXT_CHROMA_U, absPartIdxC, &lastPos[TEXT_CHROMA_U][tuIterator.m_section], false, curuseRDOQ);
+ trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPos[TEXT_CHROMA_U][tuIterator.m_section], false, curuseRDOQ);
//Cr transform
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
absSum[TEXT_CHROMA_V][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
- widthC, TEXT_CHROMA_V, absPartIdxC, &lastPos[TEXT_CHROMA_V][tuIterator.m_section], false, curuseRDOQ);
+ trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPos[TEXT_CHROMA_V][tuIterator.m_section], false, curuseRDOQ);
cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
- m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_U, trMode, true);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
if (absSum[TEXT_CHROMA_U][tuIterator.m_section])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_U);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_U);
singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsPrev;
- m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_V, trMode, true);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
if (absSum[TEXT_CHROMA_V][tuIterator.m_section])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_V);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_V);
uint32_t newBits = m_entropyCoder->getNumberOfWrittenBits();
singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = newBits - (singleBitsPrev + singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
@@ -3187,8 +3175,8 @@
minCost[TEXT_CHROMA_V][subTUIndex] = MAX_INT64;
}
- int partSize = partitionFromSizes(trWidth, trHeight);
- uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, (pixel*)RDCost::zeroPel, trWidth);
+ int partSize = partitionFromSize(trSize);
+ uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, (pixel*)RDCost::zeroPel, trSize);
if (outZeroDist)
{
@@ -3203,7 +3191,7 @@
int scalingListType = 3 + TEXT_LUMA;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trWidth, scalingListType, false, lastPos[TEXT_LUMA][0]); //this is for inter mode only
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trSize, scalingListType, false, lastPos[TEXT_LUMA][0]); //this is for inter mode only
const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx), MAX_CU_SIZE);
if (cu->isLosslessCoded(0))
@@ -3250,8 +3238,8 @@
{
int16_t *ptr = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");
- X265_CHECK(trWidth == trHeight, "not square block\n");
- primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr, MAX_CU_SIZE, 0);
+ int sizeIdx = trSizeLog2 - 2;
+ primitives.blockfill_s[sizeIdx](ptr, MAX_CU_SIZE, 0);
}
cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
@@ -3262,18 +3250,15 @@
TComTURecurse tuIterator;
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
- uint32_t widthC = trWidthC;
- uint32_t heightC = splitIntoSubTUs ? (trHeightC >> 1) : trHeightC;
-
- int partSizeC = partitionFromSizes(widthC, heightC);
- const uint32_t numSamplesChroma = widthC * heightC;
+ int partSizeC = partitionFromSize(trSizeC);
+ const uint32_t numSamplesChroma = trSizeC * trSizeC;
do
{
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
- uint32_t subTUBufferOffset = widthC * heightC * tuIterator.m_section;
-
- distU = m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, widthC));
+ uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
+
+ distU = m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, trSizeC));
if (outZeroDist)
{
@@ -3289,7 +3274,7 @@
int scalingListType = 3 + TEXT_CHROMA_U;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, pcResiCurrU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU + subTUBufferOffset,
- widthC, scalingListType, false, lastPos[TEXT_CHROMA_U][tuIterator.m_section]);
+ trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_U][tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC),
m_qtTempShortYuv[qtlayer].m_cwidth);
@@ -3339,11 +3324,11 @@
{
int16_t *ptr = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
const uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth;
- X265_CHECK(widthC == heightC, "square chroma transform expected\n");
- primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0);
+ int sizeIdxC = trSizeCLog2 - 2;
+ primitives.blockfill_s[sizeIdxC](ptr, stride, 0);
}
- distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, widthC));
+ distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, (pixel*)RDCost::zeroPel, trSizeC));
if (outZeroDist)
{
*outZeroDist += distV;
@@ -3357,7 +3342,7 @@
int scalingListType = 3 + TEXT_CHROMA_V;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV + subTUBufferOffset,
- widthC, scalingListType, false, lastPos[TEXT_CHROMA_V][tuIterator.m_section]);
+ trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_V][tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC),
m_qtTempShortYuv[qtlayer].m_cwidth);
@@ -3407,8 +3392,8 @@
{
int16_t *ptr = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
const uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth;
- X265_CHECK(widthC == heightC, "square chroma transform expected\n");
- primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0);
+ int sizeIdxC = trSizeCLog2 - 2;
+ primitives.blockfill_s[sizeIdxC](ptr, stride, 0);
}
cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -3430,9 +3415,9 @@
memcpy(bestCoeffY, coeffCurY, sizeof(coeff_t) * numSamplesLuma);
int16_t bestResiY[32 * 32];
- for (int i = 0; i < trHeight; ++i)
+ for (int i = 0; i < trSize; ++i)
{
- memcpy(bestResiY + i * trWidth, curResiY + i * MAX_CU_SIZE, sizeof(int16_t) * trWidth);
+ memcpy(bestResiY + i * trSize, curResiY + i * MAX_CU_SIZE, sizeof(int16_t) * trSize);
}
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
@@ -3441,21 +3426,21 @@
if (m_cfg->bEnableRDOQTS)
{
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidth, TEXT_LUMA);
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trSize, TEXT_LUMA);
}
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
m_trQuant->selectLambda(TEXT_LUMA);
absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
- trWidth, TEXT_LUMA, absPartIdx, &lastPosTransformSkip[TEXT_LUMA][0], true, curuseRDOQ);
+ trSize, TEXT_LUMA, absPartIdx, &lastPosTransformSkip[TEXT_LUMA][0], true, curuseRDOQ);
cu->setCbfSubParts(absSumTransformSkipY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
if (absSumTransformSkipY)
{
m_entropyCoder->resetBits();
- m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trWidth, trHeight, TEXT_LUMA, trMode, true);
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, TEXT_LUMA);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
@@ -3464,7 +3449,7 @@
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");
- m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trWidth, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);
+ m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trSize, scalingListType, true, lastPosTransformSkip[TEXT_LUMA][0]);
nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width,
m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx),
@@ -3477,9 +3462,9 @@
{
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
memcpy(coeffCurY, bestCoeffY, sizeof(coeff_t) * numSamplesLuma);
- for (int i = 0; i < trHeight; ++i)
+ for (int i = 0; i < trSize; ++i)
{
- memcpy(curResiY + i * MAX_CU_SIZE, &bestResiY[i * trWidth], sizeof(int16_t) * trWidth);
+ memcpy(curResiY + i * MAX_CU_SIZE, &bestResiY[i * trSize], sizeof(int16_t) * trSize);
}
}
else
@@ -3503,16 +3488,13 @@
TComTURecurse tuIterator;
initSection(&tuIterator, splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
- uint32_t widthC = trWidthC;
- uint32_t heightC = splitIntoSubTUs ? (trHeightC >> 1) : trHeightC;
-
- int partSizeC = partitionFromSizes(widthC, heightC);
- const uint32_t numSamplesChroma = widthC * heightC;
+ int partSizeC = partitionFromSize(trSizeC);
+ const uint32_t numSamplesChroma = trSizeC * trSizeC;
do
{
uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
- uint32_t subTUBufferOffset = widthC * heightC * tuIterator.m_section;
+ uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
int16_t *curResiU = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
int16_t *curResiV = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
@@ -3523,10 +3505,10 @@
memcpy(bestCoeffV, coeffCurV + subTUBufferOffset, sizeof(coeff_t) * numSamplesChroma);
int16_t bestResiU[32 * 32], bestResiV[32 * 32];
- for (int i = 0; i < heightC; ++i)
+ for (int i = 0; i < trSizeC; ++i)
{
- memcpy(&bestResiU[i * widthC], curResiU + i * stride, sizeof(int16_t) * widthC);
- memcpy(&bestResiV[i * widthC], curResiV + i * stride, sizeof(int16_t) * widthC);
+ memcpy(&bestResiU[i * trSizeC], curResiU + i * stride, sizeof(int16_t) * trSizeC);
+ memcpy(&bestResiV[i * trSizeC], curResiV + i * stride, sizeof(int16_t) * trSizeC);
}
cu->setTransformSkipPartRange(1, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -3534,7 +3516,7 @@
if (m_cfg->bEnableRDOQTS)
{
- m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, widthC, TEXT_CHROMA);
+ m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trSizeC, TEXT_CHROMA);
}
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
@@ -3542,11 +3524,11 @@
m_trQuant->selectLambda(TEXT_CHROMA);
absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
- widthC, TEXT_CHROMA_U, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section], true, curuseRDOQ);
+ trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section], true, curuseRDOQ);
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
- widthC, TEXT_CHROMA_V, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section], true, curuseRDOQ);
+ trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section], true, curuseRDOQ);
cu->setCbfPartRange(absSumTransformSkipU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
cu->setCbfPartRange(absSumTransformSkipV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -3556,8 +3538,8 @@
if (absSumTransformSkipU)
{
- m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_U, trMode, true);
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_U);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_U);
singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits();
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
@@ -3566,7 +3548,7 @@
int scalingListType = 3 + TEXT_CHROMA_U;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU + subTUBufferOffset,
- widthC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section]);
+ trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC),
m_qtTempShortYuv[qtlayer].m_cwidth);
@@ -3579,9 +3561,9 @@
cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
memcpy(coeffCurU + subTUBufferOffset, bestCoeffU, sizeof(coeff_t) * numSamplesChroma);
- for (int i = 0; i < heightC; ++i)
+ for (int i = 0; i < trSizeC; ++i)
{
- memcpy(curResiU + i * stride, &bestResiU[i * widthC], sizeof(int16_t) * widthC);
+ memcpy(curResiU + i * stride, &bestResiU[i * trSizeC], sizeof(int16_t) * trSizeC);
}
}
else
@@ -3593,8 +3575,8 @@
if (absSumTransformSkipV)
{
- m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_V, trMode, true);
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_V);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_V);
singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section];
curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
@@ -3603,7 +3585,7 @@
int scalingListType = 3 + TEXT_CHROMA_V;
X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV + subTUBufferOffset,
- widthC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section]);
+ trSizeC, scalingListType, true, lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section]);
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC),
m_qtTempShortYuv[qtlayer].m_cwidth);
@@ -3616,9 +3598,9 @@
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
memcpy(coeffCurV + subTUBufferOffset, bestCoeffV, sizeof(coeff_t) * numSamplesChroma);
- for (int i = 0; i < heightC; ++i)
+ for (int i = 0; i < trSizeC; ++i)
{
- memcpy(curResiV + i * stride, &bestResiV[i * widthC], sizeof(int16_t) * widthC);
+ memcpy(curResiV + i * stride, &bestResiV[i * trSizeC], sizeof(int16_t) * trSizeC);
}
}
else
@@ -3651,36 +3633,37 @@
offsetSubTUCBFs(cu, TEXT_CHROMA_V, trMode, absPartIdx);
}
- m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, trWidthC, trHeightC, TEXT_CHROMA_U, trMode, true);
- m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, trWidthC, trHeightC, TEXT_CHROMA_V, trMode, true);
+ uint32_t trHeightC = (chFmt == CHROMA_422) ? (trSizeC << 1) : trSizeC;
+ m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, trSizeC, trHeightC, TEXT_CHROMA_U, trMode, true);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, trSizeC, trHeightC, TEXT_CHROMA_V, trMode, true);
}
- m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trWidth, trHeight, TEXT_LUMA, trMode, true);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
if (absSum[TEXT_LUMA][0])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, TEXT_LUMA);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
if (bCodeChroma)
{
if (!splitIntoSubTUs)
{
if (absSum[TEXT_CHROMA_U][0])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trWidthC, TEXT_CHROMA_U);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
if (absSum[TEXT_CHROMA_V][0])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trWidthC, TEXT_CHROMA_V);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
}
else
{
- uint32_t subTUSize = trWidthC * trWidthC;
+ uint32_t subTUSize = trSizeC * trSizeC;
uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
if (absSum[TEXT_CHROMA_U][0])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trWidthC, TEXT_CHROMA_U);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
if (absSum[TEXT_CHROMA_U][1])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, trWidthC, TEXT_CHROMA_U);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_U);
if (absSum[TEXT_CHROMA_V][0])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trWidthC, TEXT_CHROMA_V);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
if (absSum[TEXT_CHROMA_V][1])
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, trWidthC, TEXT_CHROMA_V);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_V);
}
}
@@ -3842,7 +3825,7 @@
const uint32_t curTrMode = depth - cu->getDepth(0);
const uint32_t trMode = cu->getTransformIdx(absPartIdx);
const bool bSubdiv = curTrMode != trMode;
- const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
+ const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
int chFmt = cu->getChromaFormat();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
@@ -3855,8 +3838,7 @@
X265_CHECK(cu->getPredictionMode(absPartIdx) != MODE_INTRA, "xEncodeResidualQT() with intra block\n");
bool mCodeAll = true;
- uint32_t trWidth = 1 << trSizeLog2;
- uint32_t trHeight = trWidth;
+ uint32_t trSize = 1 << trSizeLog2;
uint32_t trWidthC = 1 << trSizeCLog2;
uint32_t trHeightC = splitIntoSubTUs ? (trWidthC << 1) : trWidthC;
@@ -3891,13 +3873,11 @@
if (!bSubdiv)
{
//Luma
- const uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + numCoeffPerAbsPartIdxIncrement * absPartIdx;
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t *coeffCurY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
//Chroma
- coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
- coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
bool bCodeChroma = true;
uint32_t trModeC = trMode;
if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
@@ -3905,21 +3885,24 @@
trSizeCLog2++;
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
- bCodeChroma = ((absPartIdx % qpdiv) == 0);
+ bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
}
if (bSubdivAndCbf)
{
- m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trWidth, trHeight, TEXT_LUMA, trMode, true);
+ m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
}
else
{
if (ttype == TEXT_LUMA && cu->getCbf(absPartIdx, TEXT_LUMA, trMode))
{
- m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, TEXT_LUMA);
+ m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
}
if (bCodeChroma)
{
+ uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+ coeff_t *coeffCurU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
+ coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
uint32_t trSizeC = 1 << trSizeCLog2;
if (!splitIntoSubTUs)
@@ -3977,10 +3960,10 @@
if (curTrMode == trMode)
{
int chFmt = cu->getChromaFormat();
- const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> depth] + 2;
- uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
+ const uint32_t trSizeLog2 = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
+ uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
bool bCodeChroma = true;
bool bChromaSame = false;
uint32_t trModeC = trMode;
@@ -3989,15 +3972,14 @@
trSizeCLog2++;
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
- bCodeChroma = ((absPartIdx % qpdiv) == 0);
+ bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
bChromaSame = true;
}
if (bSpatial)
{
- uint32_t trWidth = 1 << trSizeLog2;
- uint32_t trHeight = 1 << trSizeLog2;
- m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trWidth, trHeight);
+ uint32_t trSize = 1 << trSizeLog2;
+ m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize, trSize);
if (bCodeChroma)
{
@@ -4006,21 +3988,20 @@
}
else
{
- uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
- uint32_t numCoeffY = (1 << (trSizeLog2 << 1));
- coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + numCoeffPerAbsPartIdxIncrement * absPartIdx;
- coeff_t* coeffDstY = cu->getCoeffY() + numCoeffPerAbsPartIdxIncrement * absPartIdx;
+ uint32_t numCoeffY = 1 << (trSizeLog2 * 2);
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t* coeffSrcY = m_qtTempCoeffY[qtlayer] + coeffOffsetY;
+ coeff_t* coeffDstY = cu->getCoeffY() + coeffOffsetY;
::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
if (bCodeChroma)
{
- uint32_t trWidthC = 1 << trSizeCLog2;
- uint32_t trHeightC = (chFmt == CHROMA_422) ? (trWidthC << 1) : trWidthC;
- uint32_t numCoeffC = trWidthC * trHeightC;
-
- coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
- coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
- coeff_t* coeffDstU = cu->getCoeffCb() + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
- coeff_t* coeffDstV = cu->getCoeffCr() + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));
+ uint32_t numCoeffC = 1 << (trSizeCLog2 * 2 + (chFmt == CHROMA_422));
+ uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+
+ coeff_t* coeffSrcU = m_qtTempCoeffCb[qtlayer] + coeffOffsetC;
+ coeff_t* coeffSrcV = m_qtTempCoeffCr[qtlayer] + coeffOffsetC;
+ coeff_t* coeffDstU = cu->getCoeffCb() + coeffOffsetC;
+ coeff_t* coeffDstV = cu->getCoeffCr() + coeffOffsetC;
::memcpy(coeffDstU, coeffSrcU, sizeof(coeff_t) * numCoeffC);
::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
}
diff -r 5134e76aa729 -r 085be1ffd4a9 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Thu May 22 21:46:21 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.h Fri May 23 13:34:51 2014 +0900
@@ -229,8 +229,8 @@
void xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
void xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
- void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t stateU0V1Both2, const bool splitIntoSubTUs);
- void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t stateU0V1Both2, const bool splitIntoSubTUs);
+ void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
+ void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
// --------------------------------------------------------------------------------------------
// Inter search (AMP)
diff -r 5134e76aa729 -r 085be1ffd4a9 source/common/pixel.cpp
--- a/source/common/pixel.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/common/pixel.cpp Fri May 23 13:34:51 2014 +0900
@@ -1151,11 +1151,11 @@
p.calcrecon[BLOCK_32x32] = calcRecons<32>;
p.calcrecon[BLOCK_64x64] = NULL;
- p.transpose[0] = transpose<4>;
- p.transpose[1] = transpose<8>;
- p.transpose[2] = transpose<16>;
- p.transpose[3] = transpose<32>;
- p.transpose[4] = transpose<64>;
+ p.transpose[BLOCK_4x4] = transpose<4>;
+ p.transpose[BLOCK_8x8] = transpose<8>;
+ p.transpose[BLOCK_16x16] = transpose<16>;
+ p.transpose[BLOCK_32x32] = transpose<32>;
+ p.transpose[BLOCK_64x64] = transpose<64>;
p.weight_pp = weight_pp_c;
p.weight_sp = weight_sp_c;
diff -r 5134e76aa729 -r 085be1ffd4a9 source/common/primitives.cpp
--- a/source/common/primitives.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/common/primitives.cpp Fri May 23 13:34:51 2014 +0900
@@ -29,7 +29,7 @@
namespace x265 {
// x265 private namespace
-uint8_t lumaPartitioneMapTable[] =
+extern const uint8_t lumaPartitionMapTable[] =
{
// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64
LUMA_4x4, LUMA_4x8, 255, LUMA_4x16, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 4
@@ -50,6 +50,11 @@
255, 255, 255, LUMA_64x16, 255, 255, 255, LUMA_64x32, 255, 255, 255, LUMA_64x48, 255, 255, 255, LUMA_64x64 // 64
};
+extern const uint8_t lumaSquarePartitionMapTable[] =
+{
+ LUMA_4x4, LUMA_8x8, 255, LUMA_16x16, 255, 255, 255, LUMA_32x32, 255, 255, 255, 255, 255, 255, 255, LUMA_64x64
+};
+
/* the "authoritative" set of encoder primitives */
EncoderPrimitives primitives;
diff -r 5134e76aa729 -r 085be1ffd4a9 source/common/primitives.h
--- a/source/common/primitives.h Thu May 22 21:46:21 2014 -0500
+++ b/source/common/primitives.h Fri May 23 13:34:51 2014 +0900
@@ -103,14 +103,23 @@
inline int partitionFromSizes(int width, int height)
{
X265_CHECK(((width | height) & ~(4 | 8 | 16 | 32 | 64)) == 0, "Invalid block width/height\n");
- extern uint8_t lumaPartitioneMapTable[];
+ extern const uint8_t lumaPartitionMapTable[];
int w = (width >> 2) - 1;
int h = (height >> 2) - 1;
- int part = (int)lumaPartitioneMapTable[(w << 4) + h];
+ int part = (int)lumaPartitionMapTable[(w << 4) + h];
X265_CHECK(part != 255, "Invalid block width %d height %d\n", width, height);
return part;
}
+inline int partitionFromSize(int size)
+{
+ X265_CHECK((size & ~(4 | 8 | 16 | 32 | 64)) == 0, "Invalid block size\n");
+ extern const uint8_t lumaSquarePartitionMapTable[];
+ int part = (int)lumaSquarePartitionMapTable[(size >> 2) - 1];
+ X265_CHECK(part != 255, "Invalid block size %d\n", size);
+ return part;
+}
+
typedef int (*pixelcmp_t)(pixel *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride); // fenc is aligned
typedef int (*pixelcmp_ss_t)(int16_t *fenc, intptr_t fencstride, int16_t *fref, intptr_t frefstride);
typedef int (*pixelcmp_sp_t)(int16_t *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride);
diff -r 5134e76aa729 -r 085be1ffd4a9 source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/common/shortyuv.cpp Fri May 23 13:34:51 2014 +0900
@@ -84,7 +84,7 @@
void ShortYuv::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partSize)
{
- int part = partitionFromSizes(partSize, partSize);
+ int part = partitionFromSize(partSize);
pixel* srcY0 = srcYuv0->getLumaAddr();
pixel* srcY1 = srcYuv1->getLumaAddr();
@@ -136,7 +136,7 @@
void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame)
{
- int part = partitionFromSizes(lumaSize, lumaSize);
+ int part = partitionFromSize(lumaSize);
part = ((part == 0) && (m_csp == CHROMA_422)) ? 1 : part;
int16_t* srcU = getCbAddr(partIdx);
@@ -158,7 +158,7 @@
void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame)
{
- int part = partitionFromSizes(lumaSize, lumaSize);
+ int part = partitionFromSize(lumaSize);
int16_t* srcU = getCbAddr(partIdx);
int16_t* srcV = getCrAddr(partIdx);
pixel* dstU = dstPicYuv->getCbAddr(partIdx);
@@ -181,7 +181,7 @@
void ShortYuv::copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
{
- int part = partitionFromSizes(lumaSize, lumaSize);
+ int part = partitionFromSize(lumaSize);
if (chromaId == 0)
{
@@ -214,7 +214,9 @@
void ShortYuv::copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
{
- int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSizes(lumaSize, lumaSize);
+ assert(chromaId == 1 || chromaId == 2);
+
+ int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
if (chromaId == 1)
{
@@ -224,7 +226,7 @@
uint32_t dstStride = dstPicYuv->getCStride();
primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
}
- else if (chromaId == 2)
+ else
{
int16_t* srcV = getCrAddr(partIdx);
pixel* dstV = dstPicYuv->getCrAddr(partIdx);
@@ -232,16 +234,4 @@
uint32_t dstStride = dstPicYuv->getCStride();
primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
- else
- {
- int16_t* srcU = getCbAddr(partIdx);
- int16_t* srcV = getCrAddr(partIdx);
- pixel* dstU = dstPicYuv->getCbAddr(partIdx);
- pixel* dstV = dstPicYuv->getCrAddr(partIdx);
-
- uint32_t srcStride = m_cwidth;
- uint32_t dstStride = dstPicYuv->getCStride();
- primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
- primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
- }
}
diff -r 5134e76aa729 -r 085be1ffd4a9 source/common/vec/blockcopy-sse3.cpp
--- a/source/common/vec/blockcopy-sse3.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/common/vec/blockcopy-sse3.cpp Fri May 23 13:34:51 2014 +0900
@@ -30,7 +30,7 @@
#if HIGH_BIT_DEPTH
void blockcopy_pp(int bx, int by, pixel *dst, intptr_t dstride, pixel *src, intptr_t sstride)
{
- if ((bx & 7) || (((size_t)dst | (size_t)src | sstride | dstride) & 15))
+ if ((bx & 7) || (((intptr_t)dst | (intptr_t)src | sstride | dstride) & 15))
{
// slow path, irregular memory alignments or sizes
for (int y = 0; y < by; y++)
@@ -60,7 +60,7 @@
#else // if HIGH_BIT_DEPTH
void blockcopy_pp(int bx, int by, pixel *dst, intptr_t dstride, pixel *src, intptr_t sstride)
{
- size_t aligncheck = (size_t)dst | (size_t)src | bx | sstride | dstride;
+ intptr_t aligncheck = (intptr_t)dst | (intptr_t)src | bx | sstride | dstride;
if (!(aligncheck & 15))
{
@@ -91,7 +91,7 @@
void blockcopy_ps(int bx, int by, pixel *dst, intptr_t dstride, int16_t *src, intptr_t sstride)
{
- size_t aligncheck = (size_t)dst | (size_t)src | bx | sstride | dstride;
+ intptr_t aligncheck = (intptr_t)dst | (intptr_t)src | bx | sstride | dstride;
if (!(aligncheck & 15))
{
@@ -134,7 +134,7 @@
void pixeladd_ss(int bx, int by, int16_t *dst, intptr_t dstride, int16_t *src0, int16_t *src1, intptr_t sstride0, intptr_t sstride1)
{
- size_t aligncheck = (size_t)dst | (size_t)src0 | sstride0 | sstride1 | dstride;
+ intptr_t aligncheck = (intptr_t)dst | (intptr_t)src0 | sstride0 | sstride1 | dstride;
if (!(aligncheck & 15) && !(bx & 7))
{
diff -r 5134e76aa729 -r 085be1ffd4a9 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/encoder/compress.cpp Fri May 23 13:34:51 2014 +0900
@@ -138,8 +138,8 @@
leftFiltered = leftScale;
}
- int log2SizeMinus2 = g_convertToBit[scaleTuSize];
- pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
+ int sizeIdx = g_convertToBit[scaleTuSize];
+ pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
uint32_t preds[3];
cu->getIntraDirLumaPredictor(partOffset, preds);
@@ -148,7 +148,7 @@
uint32_t rbits = m_search->xModeBitsRemIntra(cu, partOffset, depth, preds, mpms);
// DC
- primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
+ primitives.intra_pred[sizeIdx][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
bsad = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
bmode = mode = DC_IDX;
bbits = !(mpms & ((uint64_t)1 << mode)) ? rbits : m_search->xModeBitsIntra(cu, mode, partOffset, depth);
@@ -164,7 +164,7 @@
}
// PLANAR
- primitives.intra_pred[log2SizeMinus2][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
+ primitives.intra_pred[sizeIdx][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
sad = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
mode = PLANAR_IDX;
bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : m_search->xModeBitsIntra(cu, mode, partOffset, depth);
@@ -172,9 +172,9 @@
COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
// Transpose NxN
- primitives.transpose[log2SizeMinus2](buf_trans, fenc, scaleStride);
+ primitives.transpose[sizeIdx](buf_trans, fenc, scaleStride);
- primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
+ primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
for (mode = 2; mode < 35; mode++)
{
@@ -211,9 +211,9 @@
outTempCU->m_totalBits = 0;
if (m_search->predInterSearch(outTempCU, outPredYuv, bUseMRG, false))
{
- int part = g_convertToBit[outTempCU->getCUSize(0)];
- uint32_t distortion = primitives.sa8d[part](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
- outPredYuv->getLumaAddr(), outPredYuv->getStride());
+ int sizeIdx = g_convertToBit[outTempCU->getCUSize(0)];
+ uint32_t distortion = primitives.sa8d[sizeIdx](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
+ outPredYuv->getLumaAddr(), outPredYuv->getStride());
outTempCU->m_totalDistortion = distortion;
outTempCU->m_totalCost = m_rdCost->calcRdSADCost(distortion, outTempCU->m_totalBits);
}
@@ -243,7 +243,7 @@
outBestCU->setPredModeSubParts(MODE_INTER, 0, depth);
outBestCU->setMergeFlag(0, true);
- int part = g_convertToBit[outTempCU->getCUSize(0)];
+ int sizeIdx = g_convertToBit[outTempCU->getCUSize(0)];
int bestMergeCand = -1;
for (uint32_t mergeCand = 0; mergeCand < maxNumMergeCand; ++mergeCand)
@@ -262,8 +262,8 @@
m_search->motionCompensation(outTempCU, m_tmpPredYuv[depth], REF_PIC_LIST_X, 0, true, false);
uint32_t bitsCand = getTUBits(mergeCand, maxNumMergeCand);
outTempCU->m_totalBits = bitsCand;
- outTempCU->m_totalDistortion = primitives.sa8d[part](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
- m_tmpPredYuv[depth]->getLumaAddr(), m_tmpPredYuv[depth]->getStride());
+ outTempCU->m_totalDistortion = primitives.sa8d[sizeIdx](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
+ m_tmpPredYuv[depth]->getLumaAddr(), m_tmpPredYuv[depth]->getStride());
outTempCU->m_totalCost = m_rdCost->calcRdSADCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
if (outTempCU->m_totalCost < outBestCU->m_totalCost)
@@ -866,7 +866,7 @@
uint32_t src2stride = m_bestPredYuv[0]->getStride();
uint32_t src1stride = m_origYuv[0]->getStride();
uint32_t dststride = m_tmpResiYuv[depth]->m_width;
- int part = partitionFromSizes(cu->getCUSize(0), cu->getCUSize(0));
+ int part = partitionFromSize(cu->getCUSize(0));
primitives.luma_sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
src2 = m_bestPredYuv[0]->getCbAddr(absPartIdx);
@@ -925,7 +925,7 @@
//Generate Recon
TComPicYuv* rec = pic->getPicYuvRec();
- int part = partitionFromSizes(cu->getCUSize(0), cu->getCUSize(0));
+ int part = partitionFromSize(cu->getCUSize(0));
pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
pixel* dst = rec->getLumaAddr(cu->getAddr(), absPartIdx);
uint32_t srcstride = m_bestPredYuv[0]->getStride();
diff -r 5134e76aa729 -r 085be1ffd4a9 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Thu May 22 21:46:21 2014 -0500
+++ b/source/encoder/slicetype.cpp Fri May 23 13:34:51 2014 +0900
@@ -1604,7 +1604,7 @@
}
if (!fenc->bIntraCalculated)
{
- int nLog2SizeMinus2 = g_convertToBit[cuSize]; // partition size
+ int sizeIdx = g_convertToBit[cuSize]; // partition size
pixel _above0[X265_LOWRES_CU_SIZE * 4 + 1], *const above0 = _above0 + 2 * X265_LOWRES_CU_SIZE;
pixel _above1[X265_LOWRES_CU_SIZE * 4 + 1], *const above1 = _above1 + 2 * X265_LOWRES_CU_SIZE;
@@ -1643,16 +1643,16 @@
int predsize = cuSize * cuSize;
// generate 35 intra predictions into tmp
- primitives.intra_pred[nLog2SizeMinus2][DC_IDX](predictions, cuSize, left0, above0, 0, (cuSize <= 16));
+ primitives.intra_pred[sizeIdx][DC_IDX](predictions, cuSize, left0, above0, 0, (cuSize <= 16));
pixel *above = (cuSize >= 8) ? above1 : above0;
pixel *left = (cuSize >= 8) ? left1 : left0;
- primitives.intra_pred[nLog2SizeMinus2][PLANAR_IDX](predictions + predsize, cuSize, left, above, 0, 0);
- primitives.intra_pred_allangs[nLog2SizeMinus2](predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
+ primitives.intra_pred[sizeIdx][PLANAR_IDX](predictions + predsize, cuSize, left, above, 0, 0);
+ primitives.intra_pred_allangs[sizeIdx](predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
// calculate 35 satd costs, keep least cost
ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
- primitives.transpose[nLog2SizeMinus2](buf_trans, me.fenc, FENC_STRIDE);
- pixelcmp_t satd = primitives.satd[partitionFromSizes(cuSize, cuSize)];
+ primitives.transpose[sizeIdx](buf_trans, me.fenc, FENC_STRIDE);
+ pixelcmp_t satd = primitives.satd[partitionFromSize(cuSize)];
int icost = me.COST_MAX, cost;
for (uint32_t mode = 0; mode < 35; mode++)
{
More information about the x265-devel
mailing list