[x265] [PATCH 2 of 2] refine intra tskip related.
Satoshi Nakagawa
nakagawa424 at oki.com
Tue Jun 24 08:50:51 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1403592156 -32400
# Tue Jun 24 15:42:36 2014 +0900
# Node ID ed2786407c46be823515c78cf23d7e0f32ee10fc
# Parent 3af58371c5ff95fc838db106610423f2c0ee8265
refine intra tskip related.
diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Tue Jun 24 15:41:55 2014 +0900
+++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jun 24 15:42:36 2014 +0900
@@ -197,21 +197,6 @@
primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), getStride());
}
-void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
-{
- X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
-
- int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
-
- pixel* src = getChromaAddr(chromaId, partIdx);
- int16_t* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
-
- uint32_t srcstride = getCStride();
- uint32_t dststride = dstPicYuv->m_cwidth;
-
- primitives.chroma[m_csp].copy_ps[part](dst, dststride, src, srcstride);
-}
-
void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
{
int part = partitionFromSize(partSize);
diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h Tue Jun 24 15:41:55 2014 +0900
+++ b/source/Lib/TLibCommon/TComYuv.h Tue Jun 24 15:42:36 2014 +0900
@@ -131,7 +131,6 @@
void copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize);
- void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs);
// ------------------------------------------------------------------------------------------------------------------
// Algebraic operation for YUV buffer
diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:41:55 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:42:36 2014 +0900
@@ -57,9 +57,6 @@
m_qtTempCoeff[2] = NULL;
m_qtTempTrIdx = NULL;
m_qtTempShortYuv = NULL;
- m_qtTempTUCoeff[0] = NULL;
- m_qtTempTUCoeff[1] = NULL;
- m_qtTempTUCoeff[2] = NULL;
for (int i = 0; i < 3; i++)
{
m_qtTempTransformSkipFlag[i] = NULL;
@@ -83,14 +80,12 @@
m_qtTempShortYuv[i].destroy();
}
- X265_FREE(m_qtTempTUCoeff[0]);
X265_FREE(m_qtTempTrIdx);
X265_FREE(m_qtTempCbf[0]);
X265_FREE(m_qtTempTransformSkipFlag[0]);
delete[] m_qtTempCoeff[0];
delete[] m_qtTempShortYuv;
- m_qtTempTransformSkipYuv.destroy();
}
bool TEncSearch::init(Encoder* top, RDCost* rdCost, TComTrQuant* trQuant)
@@ -133,11 +128,7 @@
m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] + numPartitions;
m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] + numPartitions * 2;
- CHECKED_MALLOC(m_qtTempTUCoeff[0], coeff_t, MAX_CU_SIZE * MAX_CU_SIZE * 3);
- m_qtTempTUCoeff[1] = m_qtTempTUCoeff[0] + MAX_CU_SIZE * MAX_CU_SIZE;
- m_qtTempTUCoeff[2] = m_qtTempTUCoeff[0] + MAX_CU_SIZE * MAX_CU_SIZE * 2;
-
- return m_qtTempTransformSkipYuv.create(g_maxCUSize, g_maxCUSize, m_param->internalCsp);
+ return true;
fail:
return false;
@@ -224,7 +215,7 @@
}
}
-void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype, const bool splitIntoSubTUs)
+void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype)
{
if (!cu->getCbf(absPartIdx, ttype, trDepth))
return;
@@ -238,7 +229,7 @@
uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
for (uint32_t part = 0; part < 4; part++)
{
- xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype, splitIntoSubTUs);
+ xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype);
}
return;
@@ -254,8 +245,7 @@
trDepth--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
- bool bSecondQ = (chFmt == CHROMA_422 && splitIntoSubTUs) ? ((absPartIdx & (qpdiv - 1)) == 2) : false;
- if ((!bFirstQ) && (!bSecondQ))
+ if (!bFirstQ)
{
return;
}
@@ -267,7 +257,6 @@
int cspy = chroma ? m_vChromaShift : 0;
uint32_t width = cu->getCUSize(0) >> (trDepth + cspx);
uint32_t height = cu->getCUSize(0) >> (trDepth + cspy);
- height = splitIntoSubTUs ? height >> 1 : height;
uint32_t coeffOffset = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (cspx + cspy));
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
@@ -370,20 +359,34 @@
if (bLuma)
{
- xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_LUMA, false);
+ xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_LUMA);
}
if (bChroma)
{
- xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_U, false);
- xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_V, false);
+ xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_U);
+ xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_V);
}
return m_entropyCoder->getNumberOfWrittenBits();
}
-uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs)
+uint32_t TEncSearch::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff)
{
m_entropyCoder->resetBits();
- xEncCoeffQT(cu, trDepth, absPartIdx, (TextType)chromaId, splitIntoSubTUs);
+ xEncIntraHeader(cu, trDepth, absPartIdx, true, false);
+ xEncSubdivCbfQT(cu, trDepth, absPartIdx, 0, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx), true, false);
+
+ if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth))
+ {
+ m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, 1 << log2TrSize, TEXT_LUMA);
+ }
+
+ return m_entropyCoder->getNumberOfWrittenBits();
+}
+
+uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff)
+{
+ m_entropyCoder->resetBits();
+ m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, 1 << log2TrSizeC, (TextType)chromaId);
return m_entropyCoder->getNumberOfWrittenBits();
}
@@ -393,6 +396,9 @@
TComYuv* fencYuv,
TComYuv* predYuv,
ShortYuv* resiYuv,
+ int16_t* reconQt,
+ uint32_t reconQtStride,
+ coeff_t* coeff,
uint32_t& cbf,
uint32_t& outDist)
{
@@ -402,12 +408,6 @@
pixel* pred = predYuv->getLumaAddr(absPartIdx);
int16_t* residual = resiYuv->getLumaAddr(absPartIdx);
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
- uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t* coeff = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
- int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
- X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
- const uint32_t reconQtStride = MAX_CU_SIZE;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
@@ -468,10 +468,12 @@
void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,
uint32_t absPartIdx,
- uint32_t log2TrSize,
TComYuv* fencYuv,
TComYuv* predYuv,
ShortYuv* resiYuv,
+ int16_t* reconQt,
+ uint32_t reconQtStride,
+ coeff_t* coeff,
uint32_t& cbf,
uint32_t& outDist,
uint32_t chromaId,
@@ -484,11 +486,6 @@
pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdx);
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
- uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
- coeff_t* coeff = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
- int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
- uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
@@ -641,6 +638,13 @@
cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
+ uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ coeff_t* coeffY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
+ X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
+ const uint32_t reconQtStride = MAX_CU_SIZE;
+
if (checkTransformSkip || checkTQbypass)
{
//----- store original entropy coding status -----
@@ -653,8 +657,15 @@
bool singleTQbypass = 0;
const int firstCheckId = 0;
+ ALIGN_VAR_32(coeff_t, tsCoeffY[32 * 32]);
+ ALIGN_VAR_32(int16_t, tsReconY[32 * 32]);
+
for (int modeId = firstCheckId; modeId < 2; modeId++)
{
+ coeff_t* coeff = (modeId ? tsCoeffY : coeffY);
+ int16_t* recon = (modeId ? tsReconY : reconQt);
+ uint32_t reconStride = (modeId ? tuSize : reconQtStride);
+
singleDistYTmp = 0;
singlePsyEnergyYTmp = 0;
cu->setTransformSkipSubParts(checkTransformSkip ? modeId : 0, TEXT_LUMA, absPartIdx, fullDepth);
@@ -666,7 +677,7 @@
}
//----- code luma block with given intra prediction mode and store Cbf-----
- xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfYTmp, singleDistYTmp);
+ xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, recon, reconStride, coeff, singleCbfYTmp, singleDistYTmp);
if (m_rdCost->psyRdEnabled())
{
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
@@ -679,11 +690,11 @@
if ((modeId == 1) && (singleCbfYTmp == 0) && checkTransformSkip)
{
// In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
- singleCostTmp = MAX_INT64;
+ break;
}
else
{
- uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
+ uint32_t singleBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, log2TrSize, coeff);
if (m_rdCost->psyRdEnabled())
singleCostTmp = m_rdCost->calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp);
else
@@ -700,7 +711,6 @@
bestModeId = modeId;
if (bestModeId == firstCheckId)
{
- xStoreIntraResultQT(cu, absPartIdx, log2TrSize);
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
}
@@ -718,10 +728,16 @@
if (bestModeId == firstCheckId)
{
- xLoadIntraResultQT(cu, absPartIdx, log2TrSize);
+ xLoadIntraResultQT(cu, absPartIdx, log2TrSize, reconQt, reconQtStride);
cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
+ else
+ {
+ ::memcpy(coeffY, tsCoeffY, sizeof(coeff_t) << (log2TrSize * 2));
+ int sizeIdx = log2TrSize - 2;
+ primitives.square_copy_ss[sizeIdx](reconQt, reconQtStride, tsReconY, tuSize);
+ }
}
else
{
@@ -729,7 +745,7 @@
//----- code luma block with given intra prediction mode and store Cbf-----
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
- xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfY, singleDistY);
+ xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize, fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffY, singleCbfY, singleDistY);
if (m_rdCost->psyRdEnabled())
{
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
@@ -738,7 +754,7 @@
}
cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
- uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
+ uint32_t singleBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY);
if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)
singleBits *= 4;
@@ -960,11 +976,10 @@
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
//===== copy transform coefficients =====
- uint32_t numCoeffY = 1 << (log2TrSize * 2);
uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
coeff_t* coeffDestY = cu->getCoeffY() + coeffOffsetY;
- ::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
+ ::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) << (log2TrSize * 2));
//===== copy reconstruction =====
m_qtTempShortYuv[qtLayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << log2TrSize);
@@ -979,100 +994,28 @@
}
}
-void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize)
+void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize,
+ int16_t* reconQt, uint32_t reconQtStride)
{
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
-
- //===== copy transform coefficients =====
- uint32_t numCoeffY = 1 << (log2TrSize * 2);
- uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
- coeff_t* coeffDstY = m_qtTempTUCoeff[0];
- ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
-
//===== copy reconstruction =====
- pixel* reconTs = m_qtTempTransformSkipYuv.getLumaAddr(absPartIdx);
- uint32_t reconTsStride = m_qtTempTransformSkipYuv.getStride();
- int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
- X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
- const uint32_t reconQtStride = MAX_CU_SIZE;
int sizeIdx = log2TrSize - 2;
- primitives.square_copy_sp[sizeIdx](reconTs, reconTsStride, reconQt, reconQtStride);
-}
-
-void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize)
-{
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
-
- //===== copy transform coefficients =====
- uint32_t numCoeffY = 1 << (log2TrSize * 2);
- uint32_t coeffOffsetY = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- coeff_t* coeffDstY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
- coeff_t* coeffSrcY = m_qtTempTUCoeff[0];
- ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
-
- //===== copy reconstruction =====
- pixel* reconTs = m_qtTempTransformSkipYuv.getLumaAddr(absPartIdx);
- uint32_t reconTsStride = m_qtTempTransformSkipYuv.getStride();
- int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
- X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
- const uint32_t reconQtStride = MAX_CU_SIZE;
- int sizeIdx = log2TrSize - 2;
- primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, reconTs, reconTsStride);
-
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
- primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, reconTs, reconTsStride);
+ primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride, reconQt, reconQtStride);
}
-void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId)
+void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId,
+ int16_t* reconQt, uint32_t reconQtStride)
{
X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
-
- //===== copy transform coefficients =====
- uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
- uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
- coeff_t* coeffSrcC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
- coeff_t* coeffDstC = m_qtTempTUCoeff[chromaId];
- ::memcpy(coeffDstC, coeffSrcC, sizeof(coeff_t) * numCoeffC);
-
//===== copy reconstruction =====
- pixel* reconTs = m_qtTempTransformSkipYuv.getChromaAddr(chromaId, absPartIdx);
- uint32_t reconTsStride = m_qtTempTransformSkipYuv.getCStride();
- int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
- uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
int sizeIdxC = log2TrSizeC - 2;
- primitives.square_copy_sp[sizeIdxC](reconTs, reconTsStride, reconQt, reconQtStride);
-}
-
-void TEncSearch::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId)
-{
- X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
-
- uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
-
- //===== copy transform coefficients =====
- uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
- uint32_t coeffOffsetC = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
- coeff_t* coeffDstC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
- coeff_t* coeffSrcC = m_qtTempTUCoeff[chromaId];
- ::memcpy(coeffDstC, coeffSrcC, sizeof(coeff_t) * numCoeffC);
-
- //===== copy reconstruction =====
- pixel* reconTs = m_qtTempTransformSkipYuv.getChromaAddr(chromaId, absPartIdx);
- uint32_t reconTsStride = m_qtTempTransformSkipYuv.getCStride();
- int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdx);
- uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
- int sizeIdxC = log2TrSizeC - 2;
- primitives.square_copy_ps[sizeIdxC](reconQt, reconQtStride, reconTs, reconTsStride);
-
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
- primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, reconTs, reconTsStride);
+ primitives.square_copy_sp[sizeIdxC](reconIPred, reconIPredStride, reconQt, reconQtStride);
}
void TEncSearch::offsetSubTUCBFs(TComDataCU* cu, TextType ttype, uint32_t trDepth, uint32_t absPartIdx)
@@ -1150,6 +1093,8 @@
log2TrSizeC <= LOG2_MAX_TS_SIZE &&
!cu->getCUTransquantBypass(0));
+ uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
+
if (m_param->bEnableTSkipFast)
{
checkTransformSkip &= ((cu->getCUSize(0) >> trDepth) <= 4);
@@ -1194,6 +1139,11 @@
uint32_t singleCbfC = 0;
uint32_t singlePsyEnergyTmp = 0;
+ int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdxC);
+ uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
+ uint32_t coeffOffsetC = absPartIdxC << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+ coeff_t* coeffC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
+
if (checkTransformSkip)
{
// use RDO to decide whether Cr/Cb takes TS
@@ -1208,22 +1158,29 @@
const int firstCheckId = 0;
+ ALIGN_VAR_32(coeff_t, tsCoeffC[MAX_TS_SIZE * MAX_TS_SIZE]);
+ ALIGN_VAR_32(int16_t, tsReconC[MAX_TS_SIZE * MAX_TS_SIZE]);
+
for (int chromaModeId = firstCheckId; chromaModeId < 2; chromaModeId++)
{
+ coeff_t* coeff = (chromaModeId ? tsCoeffC : coeffC);
+ int16_t* recon = (chromaModeId ? tsReconC : reconQt);
+ uint32_t reconStride = (chromaModeId ? tuSize : reconQtStride);
+
cu->setTransformSkipPartRange(chromaModeId, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
singleDistCTmp = 0;
- xIntraCodingChromaBlk(cu, absPartIdxC, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfCTmp, singleDistCTmp, chromaId, log2TrSizeC);
+ xIntraCodingChromaBlk(cu, absPartIdxC, fencYuv, predYuv, resiYuv, recon, reconStride, coeff, singleCbfCTmp, singleDistCTmp, chromaId, log2TrSizeC);
cu->setCbfPartRange(singleCbfCTmp << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
if (chromaModeId == 1 && singleCbfCTmp == 0)
{
//In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
- singleCostTmp = MAX_INT64;
+ break;
}
else
{
- uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsQTChroma(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs) : 0;
+ uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsQTChroma(cu, absPartIdxC, log2TrSizeC, chromaId, coeff) : 0;
if (m_rdCost->psyRdEnabled())
{
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
@@ -1244,7 +1201,6 @@
singlePsyEnergy = singlePsyEnergyTmp;
if (bestModeId == firstCheckId)
{
- xStoreIntraResultChromaQT(cu, absPartIdxC, log2TrSize, log2TrSizeC, chromaId);
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
}
@@ -1256,11 +1212,16 @@
if (bestModeId == firstCheckId)
{
- xLoadIntraResultChromaQT(cu, absPartIdxC, log2TrSize, log2TrSizeC, chromaId);
+ xLoadIntraResultChromaQT(cu, absPartIdxC, log2TrSizeC, chromaId, reconQt, reconQtStride);
cu->setCbfPartRange(singleCbfC << trDepth, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
-
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
+ else
+ {
+ ::memcpy(coeffC, tsCoeffC, sizeof(coeff_t) << (log2TrSizeC * 2));
+ int sizeIdxC = log2TrSizeC - 2;
+ primitives.square_copy_ss[sizeIdxC](reconQt, reconQtStride, tsReconC, tuSize);
+ }
cu->setTransformSkipPartRange(bestModeId, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
@@ -1274,7 +1235,7 @@
else
{
cu->setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
- xIntraCodingChromaBlk(cu, absPartIdxC, log2TrSize, fencYuv, predYuv, resiYuv, singleCbfC, outDist, chromaId, log2TrSizeC);
+ xIntraCodingChromaBlk(cu, absPartIdxC, fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffC, singleCbfC, outDist, chromaId, log2TrSizeC);
if (m_rdCost->psyRdEnabled())
{
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Tue Jun 24 15:41:55 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.h Tue Jun 24 15:42:36 2014 +0900
@@ -116,9 +116,7 @@
uint8_t* m_qtTempTrIdx;
uint8_t* m_qtTempCbf[3];
- coeff_t* m_qtTempTUCoeff[3];
uint8_t* m_qtTempTransformSkipFlag[3];
- TComYuv m_qtTempTransformSkipYuv;
public:
// interface to classes
@@ -202,15 +200,18 @@
void xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
- void xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype, const bool splitIntoSubTUs);
+ void xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
void xEncIntraHeader(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLuma, bool bChroma);
uint32_t xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma);
- uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
- void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,
- ShortYuv* resiYuv, uint32_t& cbf, uint32_t& outDist);
+ uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff);
+ uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
+ void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
+ int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff,
+ uint32_t& cbf, uint32_t& outDist);
- void xIntraCodingChromaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,
- ShortYuv* resiYuv, uint32_t& cbf, uint32_t& outDist, uint32_t chromaId, uint32_t log2TrSizeC);
+ void xIntraCodingChromaBlk(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
+ int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff,
+ uint32_t& cbf, uint32_t& outDist, uint32_t chromaId, uint32_t log2TrSizeC);
void xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& outDist);
@@ -222,10 +223,10 @@
void xSetIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);
- void xStoreIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize);
- void xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize);
- void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId);
- void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, uint32_t log2TrSizeC, uint32_t chromaId);
+ void xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize,
+ int16_t* reconQt, uint32_t reconQtStride);
+ void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId,
+ int16_t* reconQt, uint32_t reconQtStride);
// --------------------------------------------------------------------------------------------
// Inter search (AMP)
diff -r 3af58371c5ff -r ed2786407c46 source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Tue Jun 24 15:41:55 2014 +0900
+++ b/source/common/shortyuv.cpp Tue Jun 24 15:42:36 2014 +0900
@@ -212,16 +212,3 @@
uint32_t dstStride = dstPicYuv->m_cwidth;
primitives.chroma[m_csp].copy_ss[part](dst, dstStride, src, srcStride);
}
-
-void ShortYuv::copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
-{
- X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
-
- int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize);
-
- int16_t* src = getChromaAddr(chromaId, partIdx);
- pixel* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
- uint32_t srcStride = m_cwidth;
- uint32_t dstStride = dstPicYuv->getCStride();
- primitives.chroma[m_csp].copy_sp[part](dst, dstStride, src, srcStride);
-}
diff -r 3af58371c5ff -r ed2786407c46 source/common/shortyuv.h
--- a/source/common/shortyuv.h Tue Jun 24 15:41:55 2014 +0900
+++ b/source/common/shortyuv.h Tue Jun 24 15:42:36 2014 +0900
@@ -100,7 +100,6 @@
void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize);
void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
- void copyPartToPartYuvChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs);
// -------------------------------------------------------------------------------------------------------------------
// member functions to support multiple color space formats
More information about the x265-devel
mailing list