[x265] [PATCH RFC] psyrd: use psyrdcost for PU/TU decision for inter and intra
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Thu Jun 12 08:27:51 CEST 2014
# HG changeset patch
# User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
# Date 1402554461 -19800
# Node ID 2d1e3f44b469bb60504a8cc62f07a9dc00091d88
# Parent e8df9b57eb0966959b189f4e2b9b7d01972456e8
psyrd: use psyrdcost for PU/TU decision for inter and intra
diff -r e8df9b57eb09 -r 2d1e3f44b469 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 11 21:35:54 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jun 12 11:57:41 2014 +0530
@@ -625,6 +625,7 @@
uint64_t singleCost = MAX_INT64;
uint32_t singleDistY = 0;
+ uint32_t singlePsyEnergyY = 0;
uint32_t singleCbfY = 0;
int bestModeId = 0;
bool bestTQbypass = 0;
@@ -663,6 +664,7 @@
m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
uint32_t singleDistYTmp = 0;
+ uint32_t singlePsyEnergyYTmp = 0;
uint32_t singleCbfYTmp = 0;
uint64_t singleCostTmp = 0;
bool singleTQbypass = 0;
@@ -671,6 +673,7 @@
for (int modeId = firstCheckId; modeId < 2; modeId++)
{
singleDistYTmp = 0;
+ singlePsyEnergyYTmp = 0;
cu->setTransformSkipSubParts(checkTransformSkip ? modeId : 0, TEXT_LUMA, absPartIdx, fullDepth);
bool bIsLossLess = modeId != firstCheckId;
@@ -681,7 +684,14 @@
//----- code luma block with given intra prediction mode and store Cbf-----
xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistYTmp);
- singleCbfYTmp = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
+ if (m_rdCost->psyRdEnabled())
+ {
+ int size = g_convertToBit[cu->getCUSize(0) >> trDepth];
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
+ singlePsyEnergyYTmp = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
+ cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getStride());
+ }
+ singleCbfYTmp = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
singleTQbypass = cu->getCUTransquantBypass(absPartIdx);
if ((modeId == 1) && (singleCbfYTmp == 0) && checkTransformSkip)
@@ -692,13 +702,17 @@
else
{
uint32_t singleBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
- singleCostTmp = m_rdCost->calcRdCost(singleDistYTmp, singleBits);
+ if (m_rdCost->psyRdEnabled())
+ singleCostTmp = m_rdCost->calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp);
+ else
+ singleCostTmp = m_rdCost->calcRdCost(singleDistYTmp, singleBits);
}
if (singleCostTmp < singleCost)
{
singleCost = singleCostTmp;
singleDistY = singleDistYTmp;
+ singlePsyEnergyY = singlePsyEnergyYTmp;
singleCbfY = singleCbfYTmp;
bestTQbypass = singleTQbypass;
bestModeId = modeId;
@@ -734,6 +748,13 @@
//----- code luma block with given intra prediction mode and store Cbf-----
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv, predYuv, resiYuv, singleDistY);
+ if (m_rdCost->psyRdEnabled())
+ {
+ int size = g_convertToBit[cu->getCUSize(0) >> trDepth];
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
+ singlePsyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
+ cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getStride());
+ }
if (bCheckSplit)
singleCbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trDepth);
@@ -742,7 +763,10 @@
if (m_cfg->m_param->rdPenalty && (trSizeLog2 == 5) && !isIntraSlice)
singleBits *= 4;
- singleCost = m_rdCost->calcRdCost(singleDistY, singleBits);
+ if (m_rdCost->psyRdEnabled())
+ singleCost = m_rdCost->calcPsyRdCost(singleDistY, singleBits, singlePsyEnergyY);
+ else
+ singleCost = m_rdCost->calcRdCost(singleDistY, singleBits);
}
}
@@ -762,6 +786,7 @@
//----- code splitted block -----
uint64_t splitCost = 0;
uint32_t splitDistY = 0;
+ uint32_t splitPsyEnergy = 0;
uint32_t qPartsDiv = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
uint32_t absPartIdxSub = absPartIdx;
@@ -769,8 +794,10 @@
for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)
{
+ cu->m_psyEnergy = 0;
xRecurIntraCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, splitDistY, bCheckFirst, splitCost);
+ splitPsyEnergy += cu->m_psyEnergy;
splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);
}
@@ -784,7 +811,10 @@
//----- determine rate and r-d cost -----
uint32_t splitBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
- splitCost = m_rdCost->calcRdCost(splitDistY, splitBits);
+ if (m_rdCost->psyRdEnabled())
+ splitCost = m_rdCost->calcPsyRdCost(splitDistY, splitBits, splitPsyEnergy);
+ else
+ splitCost = m_rdCost->calcRdCost(splitDistY, splitBits);
//===== compare and set best =====
if (splitCost < singleCost)
@@ -792,8 +822,11 @@
//--- update cost ---
outDistY += splitDistY;
rdCost += splitCost;
+ cu->m_psyEnergy = splitPsyEnergy;
return;
}
+ else
+ cu->m_psyEnergy = singlePsyEnergyY;
//----- set entropy coding status -----
m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_TEST]);
@@ -817,6 +850,7 @@
outDistY += singleDistY;
rdCost += singleCost;
+ cu->m_psyEnergy = singlePsyEnergyY;
}
void TEncSearch::residualTransformQuantIntra(TComDataCU* cu,
@@ -1196,7 +1230,7 @@
checkTransformSkip &= (nbLumaSkip > 0);
}
}
-
+ uint32_t singlePsyEnergy = 0;
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
TComTURecurse tuIterator;
@@ -1222,7 +1256,7 @@
chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
//===== get prediction signal =====
predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
-
+ uint32_t singlePsyEnergyTmp = 0;
if (checkTransformSkip)
{
// use RDO to decide whether Cr/Cb takes TS
@@ -1255,7 +1289,17 @@
else
{
uint32_t bitsTmp = xGetIntraBitsQTChroma(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
- singleCostTmp = m_rdCost->calcRdCost(singleDistCTmp, bitsTmp);
+ if (m_rdCost->psyRdEnabled())
+ {
+ int chFmt = cu->getChromaFormat();
+ int size = g_convertToBit[(cu->getCUSize(0)) >> (trDepth + m_hChromaShift - ((trSizeLog2 == 2) && !(chFmt == CHROMA_444)))];
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
+ singlePsyEnergyTmp = m_rdCost->psyCost(size, fencYuv->getChromaAddr(chromaId, absPartIdxC), fencYuv->getCStride(),
+ cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
+ singleCostTmp = m_rdCost->calcPsyRdCost(singleDistCTmp, bitsTmp, singlePsyEnergyTmp);
+ }
+ else
+ singleCostTmp = m_rdCost->calcRdCost(singleDistCTmp, bitsTmp);
}
if (singleCostTmp < singleCost)
@@ -1264,7 +1308,7 @@
singleDistC = singleDistCTmp;
bestModeId = chromaModeId;
singleCbfC = singleCbfCTmp;
-
+ singlePsyEnergy = singlePsyEnergyTmp;
if (bestModeId == firstCheckId)
{
xStoreIntraResultChromaQT(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
@@ -1298,7 +1342,16 @@
{
cu->setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
xIntraCodingChromaBlk(cu, trDepth, absPartIdxC, tuIterator.m_absPartIdxStep, fencYuv, predYuv, resiYuv, outDist, chromaId);
+ if (m_rdCost->psyRdEnabled())
+ {
+ int chFmt = cu->getChromaFormat();
+ int size = g_convertToBit[(cu->getCUSize(0)) >> (trDepth + m_hChromaShift - ((trSizeLog2 == 2) && !(chFmt == CHROMA_444)))];
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
+ singlePsyEnergyTmp = m_rdCost->psyCost(size, fencYuv->getChromaAddr(chromaId, absPartIdxC), fencYuv->getCStride(),
+ cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
+ }
}
+ singlePsyEnergy += singlePsyEnergyTmp;
}
while (isNextSection(&tuIterator));
@@ -1307,20 +1360,24 @@
offsetSubTUCBFs(cu, (TextType)chromaId, trDepth, absPartIdx);
}
}
+ cu->m_psyEnergy = singlePsyEnergy;
}
else
{
uint32_t splitCbfU = 0;
uint32_t splitCbfV = 0;
+ uint32_t splitPsyEnergy = 0;
uint32_t qPartsDiv = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
uint32_t absPartIdxSub = absPartIdx;
for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)
{
xRecurIntraChromaCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, outDist);
+ splitPsyEnergy += cu->m_psyEnergy;
splitCbfU |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_U, trDepth + 1);
splitCbfV |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_V, trDepth + 1);
}
+ cu->m_psyEnergy = splitPsyEnergy;
for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
{
cu->getCbf(TEXT_CHROMA_U)[absPartIdx + offs] |= (splitCbfU << trDepth);
@@ -1945,8 +2002,11 @@
}
uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, absPartIdxC, tuIterator.m_absPartIdxStep, false, true);
- uint64_t cost = m_rdCost->calcRdCost(dist, bits);
-
+ uint64_t cost = 0;
+ if (m_rdCost->psyRdEnabled())
+ cost = m_rdCost->calcPsyRdCost(dist, bits, cu->m_psyEnergy);
+ else
+ cost = m_rdCost->calcRdCost(dist, bits);
//----- compare -----
if (cost < bestCost)
{
@@ -2676,9 +2736,7 @@
cu->m_totalPsyCost = m_rdCost->calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, cu->m_psyEnergy);
}
else
- {
cu->m_totalRDCost = m_rdCost->calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
- }
m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
@@ -2715,12 +2773,22 @@
distortion = 0;
m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
- xEstimateResidualQT(cu, 0, outResiYuv, depth, cost, bits, distortion, &zeroDistortion, curUseRDOQ);
+ xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth, cost, bits, distortion, &zeroDistortion, curUseRDOQ);
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtRootCbfZero(cu);
uint32_t zeroResiBits = m_entropyCoder->getNumberOfWrittenBits();
- uint64_t zeroCost = m_rdCost->calcRdCost(zeroDistortion, zeroResiBits);
+ uint64_t zeroCost = 0;
+ uint32_t zeroPsyEnergyY = 0;
+ if (m_rdCost->psyRdEnabled())
+ {
+ int size = g_convertToBit[cuSize];
+ zeroPsyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+ (pixel*)RDCost::zeroPel, MAX_CU_SIZE); // need to check whether zero distortion is similar to psyenergy of fenc
+ zeroCost = m_rdCost->calcPsyRdCost(zeroDistortion, zeroResiBits, zeroPsyEnergyY);
+ }
+ else
+ zeroCost = m_rdCost->calcRdCost(zeroDistortion, zeroResiBits);
if (cu->isLosslessCoded(0))
{
zeroCost = cost + 1;
@@ -2728,6 +2796,7 @@
if (zeroCost < cost)
{
distortion = zeroDistortion;
+ cu->m_psyEnergy = zeroPsyEnergyY;
const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >> (depth << 1);
::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(uint8_t));
@@ -2750,7 +2819,10 @@
bits = xSymbolBitsInter(cu);
- cost = m_rdCost->calcRdCost(distortion, bits);
+ if (m_rdCost->psyRdEnabled())
+ cost = m_rdCost->calcPsyRdCost(distortion, bits, cu->m_psyEnergy);
+ else
+ cost = m_rdCost->calcRdCost(distortion, bits);
if (cost < bestCost)
{
@@ -2789,9 +2861,7 @@
cu->m_totalPsyCost = m_rdCost->calcPsyRdCost(bestDist, bestBits, cu->m_psyEnergy);
}
else
- {
cu->m_totalRDCost = m_rdCost->calcRdCost(bestDist, bestBits);
- }
cu->m_totalBits = bestBits;
cu->m_totalDistortion = bestDist;
@@ -3016,6 +3086,8 @@
void TEncSearch::xEstimateResidualQT(TComDataCU* cu,
uint32_t absPartIdx,
+ TComYuv* fencYuv,
+ TComYuv* predYuv,
ShortYuv* resiYuv,
const uint32_t depth,
uint64_t & rdCost,
@@ -3055,8 +3127,10 @@
uint64_t singleCost = MAX_INT64;
uint32_t singleBits = 0;
uint32_t singleDist = 0;
+ uint32_t singlePsyEnergy = 0;
uint32_t singleBitsComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
uint32_t singleDistComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
+ uint32_t singlePsyEnergyComp[MAX_NUM_COMPONENT][2] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
uint32_t absSum[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
uint32_t bestTransformMode[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
int lastPos[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { -1, -1 }, { -1, -1 }, { -1, -1 } };
@@ -3173,6 +3247,13 @@
int partSize = partitionFromSize(trSize);
uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, (pixel*)RDCost::zeroPel, trSize);
+ uint32_t psyEnergyY = 0;
+ if (m_rdCost->psyRdEnabled())
+ {
+ int size = g_convertToBit[trSize];
+ psyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
+ (pixel*)RDCost::zeroPel, cu->getPic()->getPicYuvRec()->getStride()); // need to check whether zero distortion is similar to psyenergy of fenc
+ }
int16_t *curResiY = m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE, "width not full CU\n");
const uint32_t strideResiY = MAX_CU_SIZE;
@@ -3191,17 +3272,40 @@
m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, strideResiY, coeffCurY, trSize, scalingListType, false, lastPos[TEXT_LUMA][0]); //this is for inter mode only
const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, curResiY, strideResiY);
+ uint32_t nonZeroPsyEnergyY = 0;
+ if (m_rdCost->psyRdEnabled())
+ {
+ pixel* pred = predYuv->getLumaAddr(absPartIdx);
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
+ uint32_t stride = fencYuv->getStride();
+ //===== reconstruction =====
+ primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, pred, curResiY, stride, strideResiY);
+ int size = g_convertToBit[trSize];
+ nonZeroPsyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
+ cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getStride());
+ }
if (cu->isLosslessCoded(0))
{
distY = nonZeroDistY;
+ psyEnergyY = nonZeroPsyEnergyY;
}
else
{
- const uint64_t singleCostY = m_rdCost->calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
+ uint64_t singleCostY = 0;
+ if (m_rdCost->psyRdEnabled())
+ singleCostY = m_rdCost->calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], nonZeroPsyEnergyY);
+ else
+ singleCostY = m_rdCost->calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbfZero(cu, TEXT_LUMA, trMode);
const uint32_t nullBitsY = m_entropyCoder->getNumberOfWrittenBits();
- const uint64_t nullCostY = m_rdCost->calcRdCost(distY, nullBitsY);
+ uint64_t nullCostY = 0;
+ if (m_rdCost->psyRdEnabled())
+ nullCostY = m_rdCost->calcPsyRdCost(distY, nullBitsY, psyEnergyY);
+ else
+ nullCostY = m_rdCost->calcRdCost(distY, nullBitsY);
if (nullCostY < singleCostY)
{
absSum[TEXT_LUMA][0] = 0;
@@ -3216,6 +3320,7 @@
else
{
distY = nonZeroDistY;
+ psyEnergyY = nonZeroPsyEnergyY;
if (checkTransformSkipY)
{
minCost[TEXT_LUMA][0] = singleCostY;
@@ -3228,11 +3333,14 @@
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbfZero(cu, TEXT_LUMA, trMode);
const uint32_t nullBitsY = m_entropyCoder->getNumberOfWrittenBits();
- minCost[TEXT_LUMA][0] = m_rdCost->calcRdCost(distY, nullBitsY);
+ if (m_rdCost->psyRdEnabled())
+ minCost[TEXT_LUMA][0] = m_rdCost->calcPsyRdCost(distY, nullBitsY, psyEnergyY);
+ else
+ minCost[TEXT_LUMA][0] = m_rdCost->calcRdCost(distY, nullBitsY);
}
singleDistComp[TEXT_LUMA][0] = distY;
-
+ singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
if (!absSum[TEXT_LUMA][0])
{
primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);
@@ -3241,6 +3349,8 @@
uint32_t distU = 0;
uint32_t distV = 0;
+ uint32_t psyEnergyU = 0;
+ uint32_t psyEnergyV = 0;
if (bCodeChroma)
{
TComTURecurse tuIterator;
@@ -3274,18 +3384,40 @@
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
curResiU, strideResiC);
const uint32_t nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
-
+ uint32_t nonZeroPsyEnergyU = 0;
+ if (m_rdCost->psyRdEnabled())
+ {
+ pixel* pred = predYuv->getCbAddr(absPartIdxC);
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
+ uint32_t stride = fencYuv->getCStride();
+ //===== reconstruction =====
+ primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiU, stride, strideResiC);
+ int size = g_convertToBit[trSizeC];
+ nonZeroPsyEnergyU = m_rdCost->psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
+ cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
+ }
if (cu->isLosslessCoded(0))
{
distU = nonZeroDistU;
+ psyEnergyU = nonZeroPsyEnergyU;
}
else
{
- const uint64_t singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
+ uint64_t singleCostU = 0;
+ if (m_rdCost->psyRdEnabled())
+ singleCostU = m_rdCost->calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section], nonZeroPsyEnergyU);
+ else
+ singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbfZero(cu, TEXT_CHROMA_U, trMode);
const uint32_t nullBitsU = m_entropyCoder->getNumberOfWrittenBits();
- const uint64_t nullCostU = m_rdCost->calcRdCost(distU, nullBitsU);
+ uint64_t nullCostU = 0;
+ if (m_rdCost->psyRdEnabled())
+ nullCostU = m_rdCost->calcPsyRdCost(distU, nullBitsU, psyEnergyU);
+ else
+ nullCostU = m_rdCost->calcRdCost(distU, nullBitsU);
if (nullCostU < singleCostU)
{
absSum[TEXT_CHROMA_U][tuIterator.m_section] = 0;
@@ -3300,6 +3432,7 @@
else
{
distU = nonZeroDistU;
+ psyEnergyU = nonZeroPsyEnergyU;
if (checkTransformSkipUV)
{
minCost[TEXT_CHROMA_U][tuIterator.m_section] = singleCostU;
@@ -3312,10 +3445,14 @@
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbfZero(cu, TEXT_CHROMA_U, trModeC);
const uint32_t nullBitsU = m_entropyCoder->getNumberOfWrittenBits();
- minCost[TEXT_CHROMA_U][tuIterator.m_section] = m_rdCost->calcRdCost(distU, nullBitsU);
+ if (m_rdCost->psyRdEnabled())
+ minCost[TEXT_CHROMA_U][tuIterator.m_section] = m_rdCost->calcPsyRdCost(distU, nullBitsU, psyEnergyU);
+ else
+ minCost[TEXT_CHROMA_U][tuIterator.m_section] = m_rdCost->calcRdCost(distU, nullBitsU);
}
singleDistComp[TEXT_CHROMA_U][tuIterator.m_section] = distU;
+ singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = psyEnergyU;
if (!absSum[TEXT_CHROMA_U][tuIterator.m_section])
{
@@ -3339,18 +3476,41 @@
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
curResiV, strideResiC);
const uint32_t nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
-
+ uint32_t nonZeroPsyEnergyV = 0;
+
+ if (m_rdCost->psyRdEnabled())
+ {
+ pixel* pred = predYuv->getCrAddr(absPartIdxC);
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
+ uint32_t stride = fencYuv->getCStride();
+ //===== reconstruction =====
+ primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiV, stride, strideResiC);
+ int size = g_convertToBit[trSizeC];
+ nonZeroPsyEnergyV = m_rdCost->psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
+ cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
+ }
if (cu->isLosslessCoded(0))
{
distV = nonZeroDistV;
+ psyEnergyV = nonZeroPsyEnergyV;
}
else
{
- const uint64_t singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
+ uint64_t singleCostV = 0;
+ if (m_rdCost->psyRdEnabled())
+ singleCostV = m_rdCost->calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section], nonZeroPsyEnergyV);
+ else
+ singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbfZero(cu, TEXT_CHROMA_V, trMode);
const uint32_t nullBitsV = m_entropyCoder->getNumberOfWrittenBits();
- const uint64_t nullCostV = m_rdCost->calcRdCost(distV, nullBitsV);
+ uint64_t nullCostV = 0;
+ if (m_rdCost->psyRdEnabled())
+ nullCostV = m_rdCost->calcPsyRdCost(distV, nullBitsV, psyEnergyV);
+ else
+ nullCostV = m_rdCost->calcRdCost(distV, nullBitsV);
if (nullCostV < singleCostV)
{
absSum[TEXT_CHROMA_V][tuIterator.m_section] = 0;
@@ -3365,6 +3525,7 @@
else
{
distV = nonZeroDistV;
+ psyEnergyV = nonZeroPsyEnergyV;
if (checkTransformSkipUV)
{
minCost[TEXT_CHROMA_V][tuIterator.m_section] = singleCostV;
@@ -3377,10 +3538,14 @@
m_entropyCoder->resetBits();
m_entropyCoder->encodeQtCbfZero(cu, TEXT_CHROMA_V, trModeC);
const uint32_t nullBitsV = m_entropyCoder->getNumberOfWrittenBits();
- minCost[TEXT_CHROMA_V][tuIterator.m_section] = m_rdCost->calcRdCost(distV, nullBitsV);
+ if (m_rdCost->psyRdEnabled())
+ minCost[TEXT_CHROMA_V][tuIterator.m_section] = m_rdCost->calcPsyRdCost(distV, nullBitsV, psyEnergyV);
+ else
+ minCost[TEXT_CHROMA_V][tuIterator.m_section] = m_rdCost->calcRdCost(distV, nullBitsV);
}
singleDistComp[TEXT_CHROMA_V][tuIterator.m_section] = distV;
+ singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = psyEnergyV;
if (!absSum[TEXT_CHROMA_V][tuIterator.m_section])
{
@@ -3397,6 +3562,7 @@
if (checkTransformSkipY)
{
uint32_t nonZeroDistY = 0, absSumTransformSkipY;
+ uint32_t nonZeroPsyEnergyY = 0;
uint64_t singleCostY = MAX_INT64;
coeff_t bestCoeffY[MAX_TS_SIZE * MAX_TS_SIZE];
@@ -3438,7 +3604,22 @@
nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width,
curResiY, strideResiY);
- singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);
+ if (m_rdCost->psyRdEnabled())
+ {
+ pixel* pred = predYuv->getLumaAddr(absPartIdx);
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
+ uint32_t stride = fencYuv->getStride();
+ //===== reconstruction =====
+ primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, pred, curResiY, stride, strideResiY);
+ int size = g_convertToBit[trSize];
+ nonZeroPsyEnergyY = m_rdCost->psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
+ cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getStride());
+ singleCostY = m_rdCost->calcPsyRdCost(nonZeroDistY, skipSingleBitsY, nonZeroPsyEnergyY);
+ }
+ else
+ singleCostY = m_rdCost->calcRdCost(nonZeroDistY, skipSingleBitsY);
}
if (!absSumTransformSkipY || minCost[TEXT_LUMA][0] < singleCostY)
@@ -3450,6 +3631,7 @@
else
{
singleDistComp[TEXT_LUMA][0] = nonZeroDistY;
+ singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
absSum[TEXT_LUMA][0] = absSumTransformSkipY;
bestTransformMode[TEXT_LUMA][0] = 1;
}
@@ -3460,6 +3642,7 @@
if (bCodeChroma && checkTransformSkipUV)
{
uint32_t nonZeroDistU = 0, nonZeroDistV = 0, absSumTransformSkipU, absSumTransformSkipV;
+ uint32_t nonZeroPsyEnergyU = 0, nonZeroPsyEnergyV = 0;
uint64_t singleCostU = MAX_INT64;
uint64_t singleCostV = MAX_INT64;
@@ -3527,7 +3710,22 @@
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
curResiU, strideResiC);
nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
- singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
+ if (m_rdCost->psyRdEnabled())
+ {
+ pixel* pred = predYuv->getCbAddr(absPartIdxC);
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
+ uint32_t stride = fencYuv->getCStride();
+ //===== reconstruction =====
+ primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiU, stride, strideResiC);
+ int size = g_convertToBit[trSizeC];
+ nonZeroPsyEnergyU = m_rdCost->psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
+ cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
+ singleCostU = m_rdCost->calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section], nonZeroPsyEnergyU);
+ }
+ else
+ singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
}
if (!absSumTransformSkipU || minCost[TEXT_CHROMA_U][tuIterator.m_section] < singleCostU)
@@ -3540,6 +3738,7 @@
else
{
singleDistComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroDistU;
+ singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroPsyEnergyU;
absSum[TEXT_CHROMA_U][tuIterator.m_section] = absSumTransformSkipU;
bestTransformMode[TEXT_CHROMA_U][tuIterator.m_section] = 1;
}
@@ -3560,7 +3759,22 @@
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
curResiV, strideResiC);
nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
- singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
+ if (m_rdCost->psyRdEnabled())
+ {
+ pixel* pred = predYuv->getCrAddr(absPartIdxC);
+ uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
+ pixel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
+ uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
+ uint32_t stride = fencYuv->getCStride();
+ //===== reconstruction =====
+ primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiV, stride, strideResiC);
+ int size = g_convertToBit[trSizeC];
+ nonZeroPsyEnergyV = m_rdCost->psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
+ cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder), cu->getPic()->getPicYuvRec()->getCStride());
+ singleCostV = m_rdCost->calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section], nonZeroPsyEnergyV);
+ }
+ else
+ singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
}
if (!absSumTransformSkipV || minCost[TEXT_CHROMA_V][tuIterator.m_section] < singleCostV)
@@ -3573,6 +3787,7 @@
else
{
singleDistComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroDistV;
+ singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroPsyEnergyV;
absSum[TEXT_CHROMA_V][tuIterator.m_section] = absSumTransformSkipV;
bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
}
@@ -3636,6 +3851,7 @@
}
singleDist += singleDistComp[TEXT_LUMA][0];
+ singlePsyEnergy += singlePsyEnergyComp[TEXT_LUMA][0];// need to check we need to add chroma also
for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)
{
singleDist += singleDistComp[TEXT_CHROMA_U][subTUIndex];
@@ -3643,7 +3859,10 @@
}
singleBits = m_entropyCoder->getNumberOfWrittenBits();
- singleCost = m_rdCost->calcRdCost(singleDist, singleBits);
+ if (m_rdCost->psyRdEnabled())
+ singleCost = m_rdCost->calcPsyRdCost(singleDist, singleBits, singlePsyEnergy);
+ else
+ singleCost = m_rdCost->calcRdCost(singleDist, singleBits);
bestCBF[TEXT_LUMA] = cu->getCbf(absPartIdx, TEXT_LUMA, trMode);
if (bCodeChroma)
@@ -3674,7 +3893,7 @@
uint32_t subdivDist = 0;
uint32_t subdivBits = 0;
uint64_t subDivCost = 0;
-
+ uint32_t subDivPsyEnergy = 0;
bestCBF[TEXT_LUMA] = cu->getCbf(absPartIdx, TEXT_LUMA, trMode);
if (bCodeChroma)
{
@@ -3695,7 +3914,9 @@
const uint32_t qPartNumSubdiv = cu->getPic()->getNumPartInCU() >> ((depth + 1) << 1);
for (uint32_t i = 0; i < 4; ++i)
{
- xEstimateResidualQT(cu, absPartIdx + i * qPartNumSubdiv, resiYuv, depth + 1, subDivCost, subdivBits, subdivDist, bCheckFull ? NULL : outZeroDist);
+ cu->m_psyEnergy = 0;
+ xEstimateResidualQT(cu, absPartIdx + i * qPartNumSubdiv, fencYuv, predYuv, resiYuv, depth + 1, subDivCost, subdivBits, subdivDist, bCheckFull ? NULL : outZeroDist);
+ subDivPsyEnergy += cu->m_psyEnergy;
}
uint32_t ycbf = 0;
@@ -3724,8 +3945,10 @@
xEncodeResidualQT(cu, absPartIdx, depth, false, TEXT_CHROMA_V);
subdivBits = m_entropyCoder->getNumberOfWrittenBits();
- subDivCost = m_rdCost->calcRdCost(subdivDist, subdivBits);
-
+ if (m_rdCost->psyRdEnabled())
+ subDivCost = m_rdCost->calcPsyRdCost(subdivDist, subdivBits, subDivPsyEnergy);
+ else
+ subDivCost = m_rdCost->calcRdCost(subdivDist, subdivBits);
if (ycbf || ucbf || vcbf || !bCheckFull)
{
if (subDivCost < singleCost)
@@ -3733,8 +3956,11 @@
rdCost += subDivCost;
outBits += subdivBits;
outDist += subdivDist;
+ cu->m_psyEnergy = subDivPsyEnergy;
return;
}
+ else
+ cu->m_psyEnergy = singlePsyEnergy;
}
cu->setTransformSkipSubParts(bestTransformMode[TEXT_LUMA][0], TEXT_LUMA, absPartIdx, depth);
@@ -3758,7 +3984,7 @@
rdCost += singleCost;
outBits += singleBits;
outDist += singleDist;
-
+ cu->m_psyEnergy = singlePsyEnergy;
cu->setTrIdxSubParts(trMode, absPartIdx, depth);
cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
diff -r e8df9b57eb09 -r 2d1e3f44b469 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Wed Jun 11 21:35:54 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.h Thu Jun 12 11:57:41 2014 +0530
@@ -179,7 +179,7 @@
void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, bool skipRes);
- void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, ShortYuv* resiYuv, uint32_t depth,
+ void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, uint32_t depth,
uint64_t &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, ShortYuv* resiYuv, uint32_t depth, bool bSpatial);
More information about the x265-devel
mailing list