[x265] cleanup unused intra !bLumaOnly path
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Mar 10 07:15:49 CET 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1394431967 -32400
# Mon Mar 10 15:12:47 2014 +0900
# Node ID 11ec5eda3c18279bf79fe5f0e1437dcdbf70b991
# Parent 82a66870fc8b35519cbd7cb4f803740aba98b98b
cleanup unused intra !bLumaOnly path
diff -r 82a66870fc8b -r 11ec5eda3c18 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Sun Mar 09 21:49:30 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon Mar 10 15:12:47 2014 +0900
@@ -2442,23 +2442,6 @@
outMV.y = X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y));
}
-uint32_t TComDataCU::getIntraSizeIdx(uint32_t absPartIdx)
-{
- uint32_t shift = (m_partSizes[absPartIdx] == SIZE_NxN ? 1 : 0);
-
- UChar width = m_cuSize[absPartIdx] >> shift;
- uint32_t cnt = 0;
-
- while (width)
- {
- cnt++;
- width >>= 1;
- }
-
- cnt -= 2;
- return cnt > 6 ? 6 : cnt;
-}
-
/** Set a I_PCM flag for all sub-partitions of a partition.
* \param bIpcmFlag I_PCM flag
* \param absPartIdx partition index
diff -r 82a66870fc8b -r 11ec5eda3c18 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Sun Mar 09 21:49:30 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.h Mon Mar 10 15:12:47 2014 +0900
@@ -445,8 +445,6 @@
// member functions for symbol prediction (most probable / mode conversion)
// -------------------------------------------------------------------------------------------------------------------
- uint32_t getIntraSizeIdx(uint32_t absPartIdx);
-
void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList);
int getIntraDirLumaPredictor(uint32_t absPartIdx, int32_t* intraDirPred);
diff -r 82a66870fc8b -r 11ec5eda3c18 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Sun Mar 09 21:49:30 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Mar 10 15:12:47 2014 +0900
@@ -189,7 +189,7 @@
}
}
-void TComYuv::copyPartToPartShort(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChroma, bool bChromaSame)
+void TComYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize)
{
int part = partitionFromSizes(lumaSize, lumaSize);
@@ -197,24 +197,6 @@
uint32_t dststride = dstPicYuv->m_width;
primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), getStride());
-
- if (bChroma)
- {
- int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
- int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
- dststride = dstPicYuv->m_cwidth;
-
- if (bChromaSame)
- {
- primitives.luma_copy_ps[part](dstU, dststride, getCbAddr(partIdx), getCStride());
- primitives.luma_copy_ps[part](dstV, dststride, getCrAddr(partIdx), getCStride());
- }
- else
- {
- primitives.chroma[m_csp].copy_ps[part](dstU, dststride, getCbAddr(partIdx), getCStride());
- primitives.chroma[m_csp].copy_ps[part](dstV, dststride, getCrAddr(partIdx), getCStride());
- }
- }
}
void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
diff -r 82a66870fc8b -r 11ec5eda3c18 source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h Sun Mar 09 21:49:30 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.h Mon Mar 10 15:12:47 2014 +0900
@@ -132,7 +132,7 @@
// Copy YUV partition buffer to other YUV partition buffer
void copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
- void copyPartToPartShort(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChroma, bool bChromaSame);
+ void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize);
void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
// ------------------------------------------------------------------------------------------------------------------
diff -r 82a66870fc8b -r 11ec5eda3c18 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Sun Mar 09 21:49:30 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Mon Mar 10 15:12:47 2014 +0900
@@ -1374,16 +1374,15 @@
{
//PPAScopeEvent(TEncCU_xCheckRDCostIntra + depth);
uint32_t depth = outTempCU->getDepth(0);
- uint32_t preCalcDistC = 0;
outTempCU->setSkipFlagSubParts(false, 0, depth);
outTempCU->setPartSizeSubParts(partSize, 0, depth);
outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
- m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], preCalcDistC, true);
+ m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
- m_search->estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], preCalcDistC);
+ m_search->estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
m_entropyCoder->resetBits();
if (outTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
@@ -1421,16 +1420,9 @@
outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
- bool bSeparateLumaChroma = true; // choose estimation mode
- uint32_t preCalcDistC = 0;
- if (!bSeparateLumaChroma)
- {
- m_search->preestChromaPredMode(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth]);
- }
- m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth],
- preCalcDistC, bSeparateLumaChroma);
+ m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
- m_search->estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], preCalcDistC);
+ m_search->estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
m_entropyCoder->resetBits();
if (outTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
diff -r 82a66870fc8b -r 11ec5eda3c18 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Sun Mar 09 21:49:30 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Mar 10 15:12:47 2014 +0900
@@ -636,12 +636,10 @@
void TEncSearch::xRecurIntraCodingQT(TComDataCU* cu,
uint32_t trDepth,
uint32_t absPartIdx,
- bool bLumaOnly,
TComYuv* fencYuv,
TComYuv* predYuv,
- ShortYuv* resiYuv,
+ ShortYuv* resiYuv,
uint32_t& outDistY,
- uint32_t& outDistC,
bool bCheckFirst,
uint64_t& rdCost)
{
@@ -674,15 +672,11 @@
uint64_t singleCost = MAX_INT64;
uint32_t singleDistY = 0;
- uint32_t singleDistC = 0;
uint32_t singleCbfY = 0;
- uint32_t singleCbfU = 0;
- uint32_t singleCbfV = 0;
bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip();
uint32_t widthTransformSkip = cu->getCUSize(0) >> trDepth;
uint32_t heightTransformSkip = cu->getCUSize(0) >> trDepth;
int bestModeId = 0;
- int bestModeIdUV[2] = { 0, 0 };
checkTransformSkip &= (widthTransformSkip == 4 && heightTransformSkip == 4);
checkTransformSkip &= (!cu->getCUTransquantBypass(0));
@@ -700,38 +694,18 @@
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
uint32_t singleDistYTmp = 0;
- uint32_t singleDistCTmp = 0;
uint32_t singleCbfYTmp = 0;
- uint32_t singleCbfUTmp = 0;
- uint32_t singleCbfVTmp = 0;
uint64_t singleCostTmp = 0;
const int firstCheckId = 0;
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + (trDepth - 1)) << 1);
- bool bFirstQ = ((absPartIdx % qpdiv) == 0);
-
for (int modeId = firstCheckId; modeId < 2; modeId++)
{
singleDistYTmp = 0;
- singleDistCTmp = 0;
cu->setTransformSkipSubParts(modeId, TEXT_LUMA, absPartIdx, fullDepth);
//----- code luma block with given intra prediction mode and store Cbf-----
bool bReusePred = modeId != firstCheckId;
xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistYTmp, bReusePred);
singleCbfYTmp = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
- //----- code chroma blocks with given intra prediction mode and store Cbf-----
- if (!bLumaOnly)
- {
- if (bFirstQ)
- {
- cu->setTransformSkipSubParts(modeId, TEXT_CHROMA_U, absPartIdx, fullDepth);
- cu->setTransformSkipSubParts(modeId, TEXT_CHROMA_V, absPartIdx, fullDepth);
- }
- xIntraCodingChromaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistCTmp, 0, bReusePred);
- xIntraCodingChromaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistCTmp, 1, bReusePred);
- singleCbfUTmp = cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth);
- singleCbfVTmp = cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepth);
- }
//----- determine rate and r-d cost -----
if (modeId == 1 && singleCbfYTmp == 0)
{
@@ -740,22 +714,19 @@
}
else
{
- uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, true, !bLumaOnly);
- singleCostTmp = m_rdCost->calcRdCost(singleDistYTmp + singleDistCTmp, singleBits);
+ uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, true, false);
+ singleCostTmp = m_rdCost->calcRdCost(singleDistYTmp, singleBits);
}
if (singleCostTmp < singleCost)
{
singleCost = singleCostTmp;
singleDistY = singleDistYTmp;
- singleDistC = singleDistCTmp;
singleCbfY = singleCbfYTmp;
- singleCbfU = singleCbfUTmp;
- singleCbfV = singleCbfVTmp;
bestModeId = modeId;
if (bestModeId == firstCheckId)
{
- xStoreIntraResultQT(cu, trDepth, absPartIdx, bLumaOnly);
+ xStoreIntraResultQT(cu, trDepth, absPartIdx);
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
}
@@ -769,37 +740,10 @@
if (bestModeId == firstCheckId)
{
- xLoadIntraResultQT(cu, trDepth, absPartIdx, bLumaOnly);
+ xLoadIntraResultQT(cu, trDepth, absPartIdx);
cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
- if (!bLumaOnly)
- {
- if (bFirstQ)
- {
- cu->setCbfSubParts(singleCbfU << trDepth, TEXT_CHROMA_U, absPartIdx, cu->getDepth(0) + trDepth - 1);
- cu->setCbfSubParts(singleCbfV << trDepth, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trDepth - 1);
- }
- }
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
}
-
- if (!bLumaOnly)
- {
- bestModeIdUV[0] = bestModeIdUV[1] = bestModeId;
- if (bFirstQ && bestModeId == 1)
- {
- //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
- if (singleCbfU == 0)
- {
- cu->setTransformSkipSubParts(0, TEXT_CHROMA_U, absPartIdx, fullDepth);
- bestModeIdUV[0] = 0;
- }
- if (singleCbfV == 0)
- {
- cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx, fullDepth);
- bestModeIdUV[1] = 0;
- }
- }
- }
}
else
{
@@ -814,26 +758,13 @@
{
singleCbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
}
- //----- code chroma blocks with given intra prediction mode and store Cbf-----
- if (!bLumaOnly)
- {
- cu->setTransformSkipSubParts(0, TEXT_CHROMA_U, absPartIdx, fullDepth);
- cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx, fullDepth);
- xIntraCodingChromaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistC, 0);
- xIntraCodingChromaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistC, 1);
- if (bCheckSplit)
- {
- singleCbfU = cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth);
- singleCbfV = cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepth);
- }
- }
//----- determine rate and r-d cost -----
- uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, true, !bLumaOnly);
+ uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, true, false);
if (m_cfg->param->rdPenalty && (trSizeLog2 == 5) && !isIntraSlice)
{
singleBits = singleBits * 4;
}
- singleCost = m_rdCost->calcRdCost(singleDistY + singleDistC, singleBits);
+ singleCost = m_rdCost->calcRdCost(singleDistY, singleBits);
}
}
@@ -853,24 +784,16 @@
//----- code splitted block -----
uint64_t splitCost = 0;
uint32_t splitDistY = 0;
- uint32_t splitDistC = 0;
uint32_t qPartsDiv = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
uint32_t absPartIdxSub = absPartIdx;
uint32_t splitCbfY = 0;
- uint32_t splitCbfU = 0;
- uint32_t splitCbfV = 0;
for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)
{
- xRecurIntraCodingQT(cu, trDepth + 1, absPartIdxSub, bLumaOnly, fencYuv, predYuv, resiYuv, splitDistY, splitDistC, bCheckFirst, splitCost);
+ xRecurIntraCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, splitDistY, bCheckFirst, splitCost);
splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);
- if (!bLumaOnly)
- {
- splitCbfU |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_U, trDepth + 1);
- splitCbfV |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_V, trDepth + 1);
- }
}
for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
@@ -878,27 +801,18 @@
cu->getCbf(TEXT_LUMA)[absPartIdx + offs] |= (splitCbfY << trDepth);
}
- if (!bLumaOnly)
- {
- for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
- {
- cu->getCbf(TEXT_CHROMA_U)[absPartIdx + offs] |= (splitCbfU << trDepth);
- cu->getCbf(TEXT_CHROMA_V)[absPartIdx + offs] |= (splitCbfV << trDepth);
- }
- }
//----- restore context states -----
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
//----- determine rate and r-d cost -----
- uint32_t splitBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, true, !bLumaOnly);
- splitCost = m_rdCost->calcRdCost(splitDistY + splitDistC, splitBits);
+ uint32_t splitBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, true, false);
+ splitCost = m_rdCost->calcRdCost(splitDistY, splitBits);
//===== compare and set best =====
if (splitCost < singleCost)
{
//--- update cost ---
outDistY += splitDistY;
- outDistC += splitDistC;
rdCost += splitCost;
return;
}
@@ -910,13 +824,6 @@
cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
cu->setTransformSkipSubParts(bestModeId, TEXT_LUMA, absPartIdx, fullDepth);
- if (!bLumaOnly)
- {
- cu->setCbfSubParts(singleCbfU << trDepth, TEXT_CHROMA_U, absPartIdx, fullDepth);
- cu->setCbfSubParts(singleCbfV << trDepth, TEXT_CHROMA_V, absPartIdx, fullDepth);
- cu->setTransformSkipSubParts(bestModeIdUV[0], TEXT_CHROMA_U, absPartIdx, fullDepth);
- cu->setTransformSkipSubParts(bestModeIdUV[1], TEXT_CHROMA_V, absPartIdx, fullDepth);
- }
//--- set reconstruction for next intra prediction blocks ---
uint32_t width = cu->getCUSize(0) >> trDepth;
@@ -928,34 +835,18 @@
Pel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE);
-
- if (!bLumaOnly)
- {
- width >>= m_hChromaShift;
- height >>= m_vChromaShift;
- src = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdx);
- uint32_t srcstride = m_qtTempShortYuv[qtLayer].m_cwidth;
- dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
- dststride = cu->getPic()->getPicYuvRec()->getCStride();
- primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);
- src = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdx);
- dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
- primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);
- }
}
outDistY += singleDistY;
- outDistC += singleDistC;
rdCost += singleCost;
}
void TEncSearch::residualTransformQuantIntra(TComDataCU* cu,
uint32_t trDepth,
uint32_t absPartIdx,
- bool bLumaOnly,
TComYuv* fencYuv,
TComYuv* predYuv,
- ShortYuv* resiYuv,
+ ShortYuv* resiYuv,
TComYuv* reconYuv)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
@@ -1054,7 +945,7 @@
for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)
{
- residualTransformQuantIntra(cu, trDepth + 1, absPartIdxSub, bLumaOnly, fencYuv, predYuv, resiYuv, reconYuv);
+ residualTransformQuantIntra(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, reconYuv);
splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);
}
@@ -1067,7 +958,7 @@
}
}
-void TEncSearch::xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly, TComYuv* reconYuv)
+void TEncSearch::xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trMode = cu->getTransformIdx(absPartIdx);
@@ -1077,16 +968,6 @@
uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- bool bSkipChroma = false;
- bool bChromaSame = false;
- if (!bLumaOnly && trSizeLog2 == 2)
- {
- assert(trDepth > 0);
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth - 1) << 1);
- bSkipChroma = ((absPartIdx % qpdiv) != 0);
- bChromaSame = true;
- }
-
//===== copy transform coefficients =====
uint32_t numCoeffY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (fullDepth << 1);
uint32_t numCoeffIncY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
@@ -1094,54 +975,26 @@
TCoeff* coeffDestY = cu->getCoeffY() + (numCoeffIncY * absPartIdx);
::memcpy(coeffDestY, coeffSrcY, sizeof(TCoeff) * numCoeffY);
- if (!bLumaOnly && !bSkipChroma)
- {
- uint32_t numCoeffC = (bChromaSame ? numCoeffY : numCoeffY >> 2);
- uint32_t numCoeffIncC = numCoeffIncY >> 2;
- TCoeff* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);
- TCoeff* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);
- TCoeff* coeffDstU = cu->getCoeffCb() + (numCoeffIncC * absPartIdx);
- TCoeff* coeffDstV = cu->getCoeffCr() + (numCoeffIncC * absPartIdx);
- ::memcpy(coeffDstU, coeffSrcU, sizeof(TCoeff) * numCoeffC);
- ::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);
- }
-
//===== copy reconstruction =====
m_qtTempShortYuv[qtlayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2);
- if (!bLumaOnly && !bSkipChroma)
- {
- uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
- }
}
else
{
uint32_t numQPart = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
for (uint32_t part = 0; part < 4; part++)
{
- xSetIntraResultQT(cu, trDepth + 1, absPartIdx + part * numQPart, bLumaOnly, reconYuv);
+ xSetIntraResultQT(cu, trDepth + 1, absPartIdx + part * numQPart, reconYuv);
}
}
}
-void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly)
+void TEncSearch::xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
{
uint32_t fullMode = cu->getDepth(0) + trDepth;
uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullMode] + 2;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- bool bSkipChroma = false;
- bool bChromaSame = false;
-
- if (!bLumaOnly && trSizeLog2 == 2)
- {
- assert(trDepth > 0);
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth - 1) << 1);
- bSkipChroma = ((absPartIdx % qpdiv) != 0);
- bChromaSame = true;
- }
-
//===== copy transform coefficients =====
uint32_t numCoeffY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (fullMode << 1);
uint32_t numCoeffIncY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
@@ -1149,46 +1002,17 @@
TCoeff* coeffDstY = m_qtTempTUCoeffY;
::memcpy(coeffDstY, coeffSrcY, sizeof(TCoeff) * numCoeffY);
- if (!bLumaOnly && !bSkipChroma)
- {
- uint32_t numCoeffC = (bChromaSame ? numCoeffY : numCoeffY >> 2);
- uint32_t numCoeffIncC = numCoeffIncY >> 2;
- TCoeff* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);
- TCoeff* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);
- TCoeff* coeffDstU = m_qtTempTUCoeffCb;
- TCoeff* coeffDstV = m_qtTempTUCoeffCr;
- ::memcpy(coeffDstU, coeffSrcU, sizeof(TCoeff) * numCoeffC);
- ::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);
- }
-
//===== copy reconstruction =====
m_qtTempShortYuv[qtlayer].copyPartToPartLuma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2);
-
- if (!bLumaOnly && !bSkipChroma)
- {
- uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);
- m_qtTempShortYuv[qtlayer].copyPartToPartChroma(&m_qtTempTransformSkipYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
- }
}
-void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly)
+void TEncSearch::xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
- bool bSkipChroma = false;
- bool bChromaSame = false;
-
- if (!bLumaOnly && trSizeLog2 == 2)
- {
- assert(trDepth > 0);
- uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth - 1) << 1);
- bSkipChroma = ((absPartIdx % qpdiv) != 0);
- bChromaSame = true;
- }
-
//===== copy transform coefficients =====
uint32_t numCoeffY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (fullDepth << 1);
uint32_t numCoeffIncY = (cu->getSlice()->getSPS()->getMaxCUSize() * cu->getSlice()->getSPS()->getMaxCUSize()) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
@@ -1196,44 +1020,16 @@
TCoeff* coeffSrcY = m_qtTempTUCoeffY;
::memcpy(coeffDstY, coeffSrcY, sizeof(TCoeff) * numCoeffY);
- if (!bLumaOnly && !bSkipChroma)
- {
- uint32_t numCoeffC = (bChromaSame ? numCoeffY : numCoeffY >> 2);
- uint32_t numCoeffIncC = numCoeffIncY >> 2;
- TCoeff* coeffDstU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);
- TCoeff* coeffDstV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);
- TCoeff* coeffSrcU = m_qtTempTUCoeffCb;
- TCoeff* coeffSrcV = m_qtTempTUCoeffCr;
- ::memcpy(coeffDstU, coeffSrcU, sizeof(TCoeff) * numCoeffC);
- ::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);
- }
-
//===== copy reconstruction =====
- m_qtTempTransformSkipYuv.copyPartToPartShort(&m_qtTempShortYuv[qtlayer], absPartIdx, 1 << trSizeLog2, !bLumaOnly && !bSkipChroma, bChromaSame);
+ uint32_t trSize = 1 << trSizeLog2;
+ m_qtTempTransformSkipYuv.copyPartToPartLuma(&m_qtTempShortYuv[qtlayer], absPartIdx, trSize);
uint32_t zOrder = cu->getZorderIdxInCU() + absPartIdx;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zOrder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
int16_t* reconQt = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
- uint32_t width = cu->getCUSize(0) >> trDepth;
- uint32_t height = cu->getCUSize(0) >> trDepth;
- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE);
+ primitives.blockcpy_ps(trSize, trSize, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE);
assert(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE);
-
- if (!bLumaOnly && !bSkipChroma)
- {
- width >>= 1;
- height >>= 1;
- reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zOrder);
- reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
- reconQt = m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdx);
- uint32_t reconQtStride = m_qtTempShortYuv[qtlayer].m_cwidth;
- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);
-
- reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zOrder);
- reconQt = m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdx);
- primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);
- }
}
void TEncSearch::xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t stateU0V1Both2)
@@ -1684,66 +1480,19 @@
}
}
-void TEncSearch::preestChromaPredMode(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv)
-{
- uint32_t width = cu->getCUSize(0) >> 1;
- uint32_t height = cu->getCUSize(0) >> 1;
- uint32_t stride = fencYuv->getCStride();
- int chFmt = cu->getChromaFormat();
- Pel* fencU = fencYuv->getCbAddr(0);
- Pel* fencV = fencYuv->getCrAddr(0);
- Pel* predU = predYuv->getCbAddr(0);
- Pel* predV = predYuv->getCrAddr(0);
-
- //===== init pattern =====
- assert(width == height);
- cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight, 0);
- cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight, 1);
- Pel* patChromaU = TComPattern::getAdiChromaBuf(0, height, m_predBuf);
- Pel* patChromaV = TComPattern::getAdiChromaBuf(1, height, m_predBuf);
-
- //===== get best prediction modes (using SAD) =====
- uint32_t minMode = 0;
- uint32_t maxMode = 4;
- uint32_t bestMode = MAX_UINT;
- uint32_t minSAD = MAX_UINT;
- pixelcmp_t sa8d = primitives.sa8d[(int)g_convertToBit[width]];
- for (uint32_t mode = minMode; mode < maxMode; mode++)
- {
- //--- get prediction ---
- predIntraChromaAng(patChromaU, mode, predU, stride, width, height, chFmt);
- predIntraChromaAng(patChromaV, mode, predV, stride, width, height, chFmt);
-
- //--- get SAD ---
- uint32_t sad = sa8d(fencU, stride, predU, stride) + sa8d(fencV, stride, predV, stride);
-
- //--- check ---
- if (sad < minSAD)
- {
- minSAD = sad;
- bestMode = mode;
- }
- }
-
- x265_emms();
-
- //===== set chroma pred mode =====
- cu->setChromIntraDirSubParts(bestMode, 0, cu->getDepth(0));
-}
-
-void TEncSearch::estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t& outDistC, bool bLumaOnly)
+void TEncSearch::estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)
{
uint32_t depth = cu->getDepth(0);
- uint32_t numPU = cu->getNumPartInter();
uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
- uint32_t width = cu->getCUSize(0) >> initTrDepth;
+ uint32_t numPU = 1 << (2 * initTrDepth);
+ uint32_t puSize = cu->getCUSize(0) >> initTrDepth;
uint32_t qNumParts = cu->getTotalNumPart() >> 2;
- uint32_t widthBit = cu->getIntraSizeIdx(0);
+ uint32_t qPartNum = cu->getPic()->getNumPartInCU() >> ((depth + initTrDepth) << 1);
uint32_t overallDistY = 0;
- uint32_t overallDistC = 0;
uint32_t candNum;
uint64_t candCostList[FAST_UDI_MAX_RDMODE_NUM];
- const UChar intraModeNumFast[7] = { 3, 8, 8, 3, 3, 3, 3 }; // 2x2, 4x4, 8x8, 16x16, 32x32, 64x64, 128x128
+ uint32_t puSizeIdx = g_convertToBit[puSize]; // log2(puSize) - 2
+ static const UChar intraModeNumFast[] = { 8, 8, 3, 3, 3 }; // 4x4, 8x8, 16x16, 32x32, 64x64
//===== set QP and clear Cbf =====
if (cu->getSlice()->getPPS()->getUseDQP() == true)
@@ -1764,10 +1513,10 @@
//===== determine set of modes to be tested (using prediction signal only) =====
const int numModesAvailable = 35; //total number of Intra modes
- Pel* fenc = fencYuv->getLumaAddr(pu, width);
+ Pel* fenc = fencYuv->getLumaAddr(pu, puSize);
uint32_t stride = predYuv->getStride();
uint32_t rdModeList[FAST_UDI_MAX_RDMODE_NUM];
- int numModesForFullRD = intraModeNumFast[widthBit];
+ int numModesForFullRD = intraModeNumFast[puSizeIdx];
bool doFastSearch = (numModesForFullRD != numModesAvailable);
if (doFastSearch)
@@ -1782,19 +1531,19 @@
candNum = 0;
uint32_t modeCosts[35];
- Pel *above = m_refAbove + width - 1;
- Pel *aboveFiltered = m_refAboveFlt + width - 1;
- Pel *left = m_refLeft + width - 1;
- Pel *leftFiltered = m_refLeftFlt + width - 1;
+ Pel *above = m_refAbove + puSize - 1;
+ Pel *aboveFiltered = m_refAboveFlt + puSize - 1;
+ Pel *left = m_refLeft + puSize - 1;
+ Pel *leftFiltered = m_refLeftFlt + puSize - 1;
// 33 Angle modes once
ALIGN_VAR_32(Pel, buf_trans[32 * 32]);
ALIGN_VAR_32(Pel, tmp[33 * 32 * 32]);
- int scaleWidth = width;
+ int scaleSize = puSize;
int scaleStride = stride;
- int costMultiplier = 1;
-
- if (width > 32)
+ int costShift = 0;
+
+ if (puSize > 32)
{
// origin is 64x64, we scale to 32x32 and setup required parameters
ALIGN_VAR_32(Pel, bufScale[32 * 32]);
@@ -1811,9 +1560,9 @@
primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
- scaleWidth = 32;
+ scaleSize = 32;
scaleStride = 32;
- costMultiplier = 4;
+ costShift = 2;
// Filtered and Unfiltered refAbove and refLeft pointing to above and left.
above = aboveScale;
@@ -1822,17 +1571,17 @@
leftFiltered = leftScale;
}
- int log2SizeMinus2 = g_convertToBit[scaleWidth];
+ int log2SizeMinus2 = g_convertToBit[scaleSize];
pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
// DC
- primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleWidth <= 16));
- modeCosts[DC_IDX] = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
+ primitives.intra_pred[log2SizeMinus2][DC_IDX](tmp, scaleStride, left, above, 0, (scaleSize <= 16));
+ modeCosts[DC_IDX] = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
Pel *abovePlanar = above;
Pel *leftPlanar = left;
- if (width >= 8 && width <= 32)
+ if (puSize >= 8 && puSize <= 32)
{
abovePlanar = aboveFiltered;
leftPlanar = leftFiltered;
@@ -1840,19 +1589,19 @@
// PLANAR
primitives.intra_pred[log2SizeMinus2][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
- modeCosts[PLANAR_IDX] = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
+ modeCosts[PLANAR_IDX] = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
// Transpose NxN
primitives.transpose[log2SizeMinus2](buf_trans, fenc, scaleStride);
- primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleWidth <= 16));
+ primitives.intra_pred_allangs[log2SizeMinus2](tmp, above, left, aboveFiltered, leftFiltered, (scaleSize <= 16));
for (uint32_t mode = 2; mode < numModesAvailable; mode++)
{
bool modeHor = (mode < 18);
Pel *cmp = (modeHor ? buf_trans : fenc);
- intptr_t srcStride = (modeHor ? scaleWidth : scaleStride);
- modeCosts[mode] = costMultiplier * sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleWidth * scaleWidth)], scaleWidth);
+ intptr_t srcStride = (modeHor ? scaleSize : scaleStride);
+ modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleSize * scaleSize)], scaleSize) << costShift;
}
// Find N least cost modes. N = numModesForFullRD
@@ -1899,7 +1648,6 @@
//===== check modes (using r-d costs) =====
uint32_t bestPUMode = 0;
uint32_t bestPUDistY = 0;
- uint32_t bestPUDistC = 0;
uint64_t bestPUCost = MAX_INT64;
for (uint32_t mode = 0; mode < numModesForFullRD; mode++)
{
@@ -1913,28 +1661,21 @@
// determine residual for partition
uint32_t puDistY = 0;
- uint32_t puDistC = 0;
uint64_t puCost = 0;
- xRecurIntraCodingQT(cu, initTrDepth, partOffset, bLumaOnly, fencYuv, predYuv, resiYuv, puDistY, puDistC, true, puCost);
+ xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, true, puCost);
// check r-d cost
if (puCost < bestPUCost)
{
bestPUMode = origMode;
bestPUDistY = puDistY;
- bestPUDistC = puDistC;
bestPUCost = puCost;
- xSetIntraResultQT(cu, initTrDepth, partOffset, bLumaOnly, reconYuv);
-
- uint32_t qPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + initTrDepth) << 1);
+ xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
+
::memcpy(m_qtTempTrIdx, cu->getTransformIdx() + partOffset, qPartNum * sizeof(UChar));
::memcpy(m_qtTempCbf[0], cu->getCbf(TEXT_LUMA) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) + partOffset, qPartNum * sizeof(UChar));
::memcpy(m_qtTempTransformSkipFlag[0], cu->getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V) + partOffset, qPartNum * sizeof(UChar));
}
} // Mode loop
@@ -1948,84 +1689,40 @@
// determine residual for partition
uint32_t puDistY = 0;
- uint32_t puDistC = 0;
uint64_t puCost = 0;
- xRecurIntraCodingQT(cu, initTrDepth, partOffset, bLumaOnly, fencYuv, predYuv, resiYuv, puDistY, puDistC, false, puCost);
+ xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, false, puCost);
// check r-d cost
if (puCost < bestPUCost)
{
bestPUMode = origMode;
bestPUDistY = puDistY;
- bestPUDistC = puDistC;
-
- xSetIntraResultQT(cu, initTrDepth, partOffset, bLumaOnly, reconYuv);
-
- uint32_t qPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + initTrDepth) << 1);
+
+ xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
+
::memcpy(m_qtTempTrIdx, cu->getTransformIdx() + partOffset, qPartNum * sizeof(UChar));
::memcpy(m_qtTempCbf[0], cu->getCbf(TEXT_LUMA) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) + partOffset, qPartNum * sizeof(UChar));
::memcpy(m_qtTempTransformSkipFlag[0], cu->getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U) + partOffset, qPartNum * sizeof(UChar));
- ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V) + partOffset, qPartNum * sizeof(UChar));
}
} // Mode loop
//--- update overall distortion ---
overallDistY += bestPUDistY;
- overallDistC += bestPUDistC;
//--- update transform index and cbf ---
- uint32_t qPartNum = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + initTrDepth) << 1);
::memcpy(cu->getTransformIdx() + partOffset, m_qtTempTrIdx, qPartNum * sizeof(UChar));
::memcpy(cu->getCbf(TEXT_LUMA) + partOffset, m_qtTempCbf[0], qPartNum * sizeof(UChar));
- ::memcpy(cu->getCbf(TEXT_CHROMA_U) + partOffset, m_qtTempCbf[1], qPartNum * sizeof(UChar));
- ::memcpy(cu->getCbf(TEXT_CHROMA_V) + partOffset, m_qtTempCbf[2], qPartNum * sizeof(UChar));
::memcpy(cu->getTransformSkip(TEXT_LUMA) + partOffset, m_qtTempTransformSkipFlag[0], qPartNum * sizeof(UChar));
- ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + partOffset, m_qtTempTransformSkipFlag[1], qPartNum * sizeof(UChar));
- ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + partOffset, m_qtTempTransformSkipFlag[2], qPartNum * sizeof(UChar));
//--- set reconstruction for next intra prediction blocks ---
if (pu != numPU - 1)
{
- bool bSkipChroma = false;
- bool bChromaSame = false;
- uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> (cu->getDepth(0) + initTrDepth)] + 2;
- if (!bLumaOnly && trSizeLog2 == 2)
- {
- assert(initTrDepth > 0);
- bSkipChroma = (pu != 0);
- bChromaSame = true;
- }
-
- uint32_t compWidth = cu->getCUSize(0) >> initTrDepth;
- uint32_t compHeight = cu->getCUSize(0) >> initTrDepth;
uint32_t zorder = cu->getZorderIdxInCU() + partOffset;
- int part = partitionFromSizes(compWidth, compHeight);
+ int part = partitionFromSizes(puSize, puSize);
Pel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
Pel* src = reconYuv->getLumaAddr(partOffset);
uint32_t srcstride = reconYuv->getStride();
primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
-
- if (!bLumaOnly && !bSkipChroma)
- {
- dst = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
- dststride = cu->getPic()->getPicYuvRec()->getCStride();
- src = reconYuv->getCbAddr(partOffset);
- srcstride = reconYuv->getCStride();
- if (bChromaSame)
- primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
- else
- primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);
-
- dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
- src = reconYuv->getCrAddr(partOffset);
- if (bChromaSame)
- primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
- else
- primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);
- }
}
//=== update PU data ====
@@ -2036,21 +1733,15 @@
if (numPU > 1)
{ // set Cbf for all blocks
uint32_t combCbfY = 0;
- uint32_t combCbfU = 0;
- uint32_t combCbfV = 0;
uint32_t partIdx = 0;
for (uint32_t part = 0; part < 4; part++, partIdx += qNumParts)
{
combCbfY |= cu->getCbf(partIdx, TEXT_LUMA, 1);
- combCbfU |= cu->getCbf(partIdx, TEXT_CHROMA_U, 1);
- combCbfV |= cu->getCbf(partIdx, TEXT_CHROMA_V, 1);
}
for (uint32_t offs = 0; offs < 4 * qNumParts; offs++)
{
cu->getCbf(TEXT_LUMA)[offs] |= combCbfY;
- cu->getCbf(TEXT_CHROMA_U)[offs] |= combCbfU;
- cu->getCbf(TEXT_CHROMA_V)[offs] |= combCbfV;
}
}
@@ -2058,8 +1749,7 @@
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
//===== set distortion (rate and r-d costs are determined later) =====
- outDistC = overallDistC;
- cu->m_totalDistortion = overallDistY + overallDistC;
+ cu->m_totalDistortion = overallDistY;
}
void TEncSearch::getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv)
@@ -2080,13 +1770,13 @@
uint32_t stride = fencYuv->getCStride();
int scaleWidth = width;
int scaleStride = stride;
- int costMultiplier = 1;
+ int costShift = 0;
if (width > 32)
{
scaleWidth = 32;
scaleStride = 32;
- costMultiplier = 4;
+ costShift = 2;
}
cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 0);
@@ -2111,7 +1801,7 @@
predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, chFmt);
int log2SizeMinus2 = g_convertToBit[scaleWidth];
pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
- sad = costMultiplier * sa8d(fenc, scaleStride, pred, scaleStride);
+ sad = sa8d(fenc, scaleStride, pred, scaleStride) << costShift;
cost += sad;
}
@@ -2150,9 +1840,8 @@
void TEncSearch::estIntraPredChromaQT(TComDataCU* cu,
TComYuv* fencYuv,
TComYuv* predYuv,
- ShortYuv* resiYuv,
- TComYuv* reconYuv,
- uint32_t preCalcDistC)
+ ShortYuv* resiYuv,
+ TComYuv* reconYuv)
{
uint32_t depth = cu->getDepth(0);
uint32_t initTrDepth = (cu->getPartitionSize(0) != SIZE_2Nx2N) && (cu->getChromaFormat() == CHROMA_444 ? 1 : 0);
@@ -2236,7 +1925,7 @@
::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset, m_qtTempTransformSkipFlag[1], m_absPartIdxStep * sizeof(UChar));
::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset, m_qtTempTransformSkipFlag[2], m_absPartIdxStep * sizeof(UChar));
cu->setChromIntraDirSubParts(bestMode, m_partOffset, depth + initTrDepth);
- cu->m_totalDistortion += bestDist - preCalcDistC;
+ cu->m_totalDistortion += bestDist;
}
while (isNextSection());
@@ -3196,7 +2885,7 @@
else if (cu->getPredictionMode(0) == MODE_INTRA)
{
uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
- residualTransformQuantIntra(cu, initTrDepth, 0, true, fencYuv, predYuv, resiYuv, reconYuv);
+ residualTransformQuantIntra(cu, initTrDepth, 0, fencYuv, predYuv, resiYuv, reconYuv);
getBestIntraModeChroma(cu, fencYuv, predYuv);
residualQTIntrachroma(cu, 0, 0, fencYuv, predYuv, resiYuv, reconYuv);
}
diff -r 82a66870fc8b -r 11ec5eda3c18 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Sun Mar 09 21:49:30 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Mar 10 15:12:47 2014 +0900
@@ -145,13 +145,12 @@
uint32_t xModeBitsIntra(TComDataCU* cu, uint32_t mode, uint32_t partOffset, uint32_t depth, uint32_t initTrDepth);
uint32_t xUpdateCandList(uint32_t mode, uint64_t cost, uint32_t fastCandNum, uint32_t* CandModeList, uint64_t* CandCostList);
- void preestChromaPredMode(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv);
- void estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t& ruiDistC, bool bLumaOnly);
+ void estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
void getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv);
void estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
- TComYuv* reconYuv, uint32_t precalcDistC);
+ TComYuv* reconYuv);
/// encoder estimation - inter prediction (non-skip)
bool predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool bUseMRG, bool bChroma);
@@ -168,10 +167,10 @@
void IPCMSearch(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
- void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly, TComYuv* fencYuv,
- TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& distY, uint32_t& distC, bool bCheckFirst,
+ void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
+ TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& distY, bool bCheckFirst,
uint64_t& dRDCost);
- void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly, TComYuv* reconYuv);
+ void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);
void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, bool skipRes);
@@ -210,15 +209,15 @@
void xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& outDist);
- void residualTransformQuantIntra(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly, TComYuv* fencYuv,
+ void residualTransformQuantIntra(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
void residualQTIntrachroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
void xSetIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);
- void xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly);
- void xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly);
+ void xStoreIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
+ void xLoadIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
void xStoreIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t stateU0V1Both2);
void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t stateU0V1Both2);
diff -r 82a66870fc8b -r 11ec5eda3c18 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Sun Mar 09 21:49:30 2014 -0500
+++ b/source/encoder/compress.cpp Mon Mar 10 15:12:47 2014 +0900
@@ -36,23 +36,22 @@
{
uint64_t puCost = 0;
uint32_t puDistY = 0;
- uint32_t puDistC = 0;
uint32_t depth = cu->getDepth(0);
uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
// set context models
m_search->m_rdGoOnSbacCoder->load(m_search->m_rdSbacCoders[depth][CI_CURR_BEST]);
- m_search->xRecurIntraCodingQT(cu, initTrDepth, 0, true, fencYuv, predYuv, outResiYuv, puDistY, puDistC, false, puCost);
- m_search->xSetIntraResultQT(cu, initTrDepth, 0, true, outReconYuv);
+ m_search->xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, puDistY, false, puCost);
+ m_search->xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);
//=== update PU data ====
cu->copyToPic(cu->getDepth(0), 0, initTrDepth);
//===== set distortion (rate and r-d costs are determined later) =====
- cu->m_totalDistortion = puDistY + puDistC;
+ cu->m_totalDistortion = puDistY;
- m_search->estIntraPredChromaQT(cu, fencYuv, predYuv, outResiYuv, outReconYuv, puDistC);
+ m_search->estIntraPredChromaQT(cu, fencYuv, predYuv, outResiYuv, outReconYuv);
m_entropyCoder->resetBits();
if (cu->getSlice()->getPPS()->getTransquantBypassEnableFlag())
{
More information about the x265-devel
mailing list