[x265] [PATCH] implementation - Intra and Merge in no - rdo
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Wed Jul 3 15:13:39 CEST 2013
# HG changeset patch
# User sumalatha
# Date 1372857189 -19800
# Node ID af7e99a736333aaa532e29f1be225a51a4ac9b60
# Parent 209cce8f38beba4f66ba5d3bf1a1b373b4768090
implementation - Intra and Merge in no - rdo
diff -r 209cce8f38be -r af7e99a73633 source/Lib/TLibCommon/TComPrediction.h
--- a/source/Lib/TLibCommon/TComPrediction.h Wed Jul 03 12:07:43 2013 +0530
+++ b/source/Lib/TLibCommon/TComPrediction.h Wed Jul 03 18:43:09 2013 +0530
@@ -64,9 +64,6 @@
Int m_iPredBufStride;
Int m_iPredBufHeight;
- //reference sample for IntraPrediction
- Pel *refAbove, *refAboveFlt, *refLeft, *refLeftFlt;
-
TComYuv m_acYuvPred[2];
TShortYUV m_acShortPred[2];
TComYuv m_cYuvPredTemp;
@@ -99,6 +96,9 @@
TComPrediction();
virtual ~TComPrediction();
+ //reference sample for IntraPrediction
+ Pel *refAbove, *refAboveFlt, *refLeft, *refLeftFlt;
+
Void initTempBuff();
// inter
diff -r 209cce8f38be -r af7e99a73633 source/Lib/TLibEncoder/TEncCu.h
--- a/source/Lib/TLibEncoder/TEncCu.h Wed Jul 03 12:07:43 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.h Wed Jul 03 18:43:09 2013 +0530
@@ -73,7 +73,7 @@
TComDataCU** m_InterCU_Nx2N;
TComDataCU** m_IntrainInterCU;
TComDataCU** m_MergeCU;
- TComDataCU** m_MergeBestCU;
+ TComDataCU** m_MergeBestCU;
TComDataCU** m_ppcBestCU; ///< Best CUs in each depth
TComDataCU** m_ppcTempCU; ///< Temporary CUs in each depth
UChar m_uhTotalDepth;
@@ -111,11 +111,17 @@
Bool m_abortFlag; // This flag is used to abort the recursive CU check when the child CU cost is greater than the parent CU
public:
+
Void set_pppcRDSbacCoder(TEncSbac*** pppcRDSbacCoder) { m_pppcRDSbacCoder = pppcRDSbacCoder; }
+
Void set_pcEntropyCoder(TEncEntropy* pcEntropyCoder) { m_pcEntropyCoder = pcEntropyCoder; }
+
Void set_pcPredSearch(TEncSearch* pcPredSearch) { m_pcPredSearch = pcPredSearch; }
+
Void set_pcRDGoOnSbacCoder(TEncSbac* pcRDGoOnSbacCoder) { m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder; }
+
Void set_pcTrQuant(TComTrQuant* pcTrQuant) { m_pcTrQuant = pcTrQuant; }
+
Void set_pcRdCost(TComRdCost* pcRdCost) { m_pcRdCost = pcRdCost; }
/// copy parameters from encoder class
@@ -147,12 +153,13 @@
Int xComputeQP(TComDataCU* pcCU, UInt uiDepth);
Void xCheckBestMode(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth);
-
+
Void xCheckRDCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, Bool *earlyDetectionSkipMode);
- Void xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU);
- Void xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize,UInt index);
+ Void xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);
+ Void xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize, UInt index);
Void xCheckRDCostInter(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize, Bool bUseMRG = false);
Void xComputeCostInter(TComDataCU*& rpcTempCU, PartSize ePartSize, UInt Index, Bool bUseMRG = false);
+ Void xEncodeIntrainInter(TComDataCU*& pcCU, TComYuv* pcYuvOrg, TComYuv* pcYuvPred, TShortYUV*& rpcYuvResi, TComYuv*& rpcYuvRec);
Void xCheckRDCostIntra(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize);
Void xCheckRDCostIntrainInter(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize eSize);
Void xCheckDQP(TComDataCU* pcCU);
diff -r 209cce8f38be -r af7e99a73633 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Wed Jul 03 12:07:43 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h Wed Jul 03 18:43:09 2013 +0530
@@ -65,7 +65,7 @@
/// encoder search class
class TEncSearch : public TComPrediction
{
-private:
+public:
x265::MotionEstimate m_me;
x265::BitCost m_bc; // TODO: m_bc will go away with HM ME
@@ -117,8 +117,7 @@
x265::MV m_acMvPredictors[3];
// RD computation
- TEncSbac*** m_pppcRDSbacCoder;
- TEncSbac* m_pcRDGoOnSbacCoder;
+
DistParam m_cDistParam;
// Misc.
@@ -130,6 +129,9 @@
public:
+ TEncSbac*** m_pppcRDSbacCoder;
+ TEncSbac* m_pcRDGoOnSbacCoder;
+
Void set_pppcRDSbacCoder(TEncSbac*** pppcRDSbacCoder) { m_pppcRDSbacCoder = pppcRDSbacCoder; }
Void set_pcEntropyCoder(TEncEntropy* pcEntropyCoder) { m_pcEntropyCoder = pcEntropyCoder; }
@@ -142,6 +144,8 @@
virtual ~TEncSearch();
Void init(TEncCfg* pcEncCfg, TComRdCost* pcRdCost, TComTrQuant *pcTrQuant);
+ UInt xModeBitsIntra(TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth);
+ UInt xUpdateCandList(UInt uiMode, UInt64 uiCost, UInt uiFastCandNum, UInt * CandModeList, UInt64 * CandCostList);
protected:
@@ -215,6 +219,23 @@
Void IPCMSearch(TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv*& rpcPredYuv, TShortYUV*& rpcResiYuv, TComYuv*& rpcRecoYuv);
UInt estimateHeaderBits(TComDataCU* pcCU, UInt uiAbsPartIdx);
+ Void xRecurIntraCodingQT(TComDataCU * pcCU,
+ UInt uiTrDepth,
+ UInt uiAbsPartIdx,
+ Bool bLumaOnly,
+ TComYuv * pcOrgYuv,
+ TComYuv * pcPredYuv,
+ TShortYUV * pcResiYuv,
+ UInt & ruiDistY,
+ UInt & ruiDistC,
+ Bool bCheckFirst,
+ UInt64 & dRDCost);
+
+ Void xSetIntraResultQT(TComDataCU* pcCU,
+ UInt uiTrDepth,
+ UInt uiAbsPartIdx,
+ Bool bLumaOnly,
+ TComYuv* pcRecoYuv);
protected:
@@ -268,24 +289,6 @@
UInt uiChromaId,
Int default0Save1Load2 = 0);
- Void xRecurIntraCodingQT(TComDataCU * pcCU,
- UInt uiTrDepth,
- UInt uiAbsPartIdx,
- Bool bLumaOnly,
- TComYuv * pcOrgYuv,
- TComYuv * pcPredYuv,
- TShortYUV * pcResiYuv,
- UInt & ruiDistY,
- UInt & ruiDistC,
- Bool bCheckFirst,
- UInt64 & dRDCost);
-
- Void xSetIntraResultQT(TComDataCU* pcCU,
- UInt uiTrDepth,
- UInt uiAbsPartIdx,
- Bool bLumaOnly,
- TComYuv* pcRecoYuv);
-
Void xRecurIntraChromaCodingQT(TComDataCU* pcCU,
UInt uiTrDepth,
UInt uiAbsPartIdx,
@@ -408,17 +411,17 @@
x265::MV& rcMv,
UInt& ruiSAD);
- Void xPatternSearchFracDIF(TComDataCU* pcCU,
+ Void xPatternSearchFracDIF(TComDataCU* pcCU,
TComPattern* pcPatternKey,
- Pel* piRefY,
- Int iRefStride,
- x265::MV* pcMvInt,
- x265::MV& rcMvHalf,
- x265::MV& rcMvQter,
- UInt& ruiCost,
- Bool biPred,
- TComPicYuv* refPic,
- UInt uiPartAddr);
+ Pel* piRefY,
+ Int iRefStride,
+ x265::MV* pcMvInt,
+ x265::MV& rcMvHalf,
+ x265::MV& rcMvQter,
+ UInt& ruiCost,
+ Bool biPred,
+ TComPicYuv* refPic,
+ UInt uiPartAddr);
Void xExtDIFUpSamplingH(TComPattern* pcPattern, Bool biPred);
Void xExtDIFUpSamplingQ(TComPattern* pcPatternKey, x265::MV halfPelRef, Bool biPred);
@@ -431,9 +434,6 @@
Void xEstimateResidualQT(TComDataCU* pcCU, UInt uiQuadrant, UInt uiAbsPartIdx, UInt absTUPartIdx, TShortYUV* pcResi, const UInt uiDepth, UInt64 &rdCost, UInt &ruiBits, UInt &ruiDist, UInt *puiZeroDist);
Void xSetResidualQTData(TComDataCU* pcCU, UInt uiQuadrant, UInt uiAbsPartIdx, UInt absTUPartIdx, TShortYUV* pcResi, UInt uiDepth, Bool bSpatial);
- UInt xModeBitsIntra(TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth);
- UInt xUpdateCandList(UInt uiMode, UInt64 uiCost, UInt uiFastCandNum, UInt * CandModeList, UInt64 * CandCostList);
-
// -------------------------------------------------------------------------------------------------------------------
// compute symbol bits
// -------------------------------------------------------------------------------------------------------------------
diff -r 209cce8f38be -r af7e99a73633 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Wed Jul 03 12:07:43 2013 +0530
+++ b/source/encoder/compress.cpp Wed Jul 03 18:43:09 2013 +0530
@@ -35,45 +35,199 @@
extern FILE* fp1;
#endif
-Void TEncCu::xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize, UInt index)
+Void TEncCu::xEncodeIntrainInter(TComDataCU*& pcCU, TComYuv* pcYuvOrg, TComYuv* pcYuvPred, TShortYUV*& rpcYuvResi, TComYuv*& rpcYuvRec)
{
- //PPAScopeEvent(TEncCU_xCheckRDCostIntra + uiDepth);
+ UInt64 dPUCost = 0;
+ UInt uiPUDistY = 0;
+ UInt uiPUDistC = 0;
+ UInt uiDepth = pcCU->getDepth(0);
+ UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
- UChar uiDepth = rpcTempCU->getDepth(0);
- rpcTempCU->setSkipFlagSubParts(false, 0, uiDepth);
- rpcTempCU->setPartSizeSubParts(eSize, 0, uiDepth);
- rpcTempCU->setPredModeSubParts(MODE_INTRA, 0, uiDepth);
- rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth);
+ // set context models
+ m_pcPredSearch->m_pcRDGoOnSbacCoder->load(m_pcPredSearch->m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
- UInt uiPreCalcDistC = 0;
+ m_pcPredSearch->xRecurIntraCodingQT(pcCU, uiInitTrDepth, 0, true, pcYuvOrg, pcYuvPred, rpcYuvResi, uiPUDistY, uiPUDistC, false, dPUCost);
+ m_pcPredSearch->xSetIntraResultQT(pcCU, uiInitTrDepth, 0, true, rpcYuvRec);
- m_pcPredSearch->estIntraPredQT(rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC, true);
+ //=== update PU data ====
+ pcCU->copyToPic(pcCU->getDepth(0), 0, uiInitTrDepth);
+ //===== set distortion (rate and r-d costs are determined later) =====
+ pcCU->getTotalDistortion() = uiPUDistY + uiPUDistC;
- m_ppcRecoYuvTemp[uiDepth]->copyToPicLuma(rpcTempCU->getPic()->getPicYuvRec(), rpcTempCU->getAddr(), rpcTempCU->getZorderIdxInCU());
+ rpcYuvRec->copyToPicLuma(pcCU->getPic()->getPicYuvRec(), pcCU->getAddr(), pcCU->getZorderIdxInCU());
- m_pcPredSearch->estIntraPredChromaQT(rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC);
-
+ //m_pcPredSearch->estIntraPredChromaQT(pcCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPUDistC);
+ m_pcPredSearch->estIntraPredChromaQT(pcCU, pcYuvOrg, pcYuvPred, rpcYuvResi, rpcYuvRec, uiPUDistC);
m_pcEntropyCoder->resetBits();
- if (rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
+ if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
{
- m_pcEntropyCoder->encodeCUTransquantBypassFlag(rpcTempCU, 0, true);
+ m_pcEntropyCoder->encodeCUTransquantBypassFlag(pcCU, 0, true);
}
- m_pcEntropyCoder->encodeSkipFlag(rpcTempCU, 0, true);
- m_pcEntropyCoder->encodePredMode(rpcTempCU, 0, true);
- m_pcEntropyCoder->encodePartSize(rpcTempCU, 0, uiDepth, true);
- m_pcEntropyCoder->encodePredInfo(rpcTempCU, 0, true);
- m_pcEntropyCoder->encodeIPCMInfo(rpcTempCU, 0, true);
+ m_pcEntropyCoder->encodeSkipFlag(pcCU, 0, true);
+ m_pcEntropyCoder->encodePredMode(pcCU, 0, true);
+ m_pcEntropyCoder->encodePartSize(pcCU, 0, uiDepth, true);
+ m_pcEntropyCoder->encodePredInfo(pcCU, 0, true);
+ m_pcEntropyCoder->encodeIPCMInfo(pcCU, 0, true);
// Encode Coefficients
Bool bCodeDQP = getdQPFlag();
- // m_pcEntropyCoder->encodeCoeff(rpcTempCU, 0, uiDepth, rpcTempCU->getWidth(0), rpcTempCU->getHeight(0), bCodeDQP);
+ m_pcEntropyCoder->encodeCoeff(pcCU, 0, uiDepth, pcCU->getWidth(0), pcCU->getHeight(0), bCodeDQP);
setdQPFlag(bCodeDQP);
m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
- rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
- rpcTempCU->getTotalBins() = ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
- rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost(rpcTempCU->getTotalDistortion(), rpcTempCU->getTotalBits());
+ pcCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
+ pcCU->getTotalBins() = ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
+ pcCU->getTotalCost() = m_pcRdCost->calcRdCost(pcCU->getTotalDistortion(), pcCU->getTotalBits());
+}
+
+Void TEncCu::xComputeCostIntrainInter(TComDataCU*& pcCU, PartSize eSize, UInt index)
+{
+ UInt uiDepth = pcCU->getDepth(0);
+ UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
+ UInt uiWidth = pcCU->getWidth(0) >> uiInitTrDepth;
+ UInt uiWidthBit = pcCU->getIntraSizeIdx(0);
+ UInt64 CandCostList[FAST_UDI_MAX_RDMODE_NUM];
+ UInt CandNum;
+
+ UInt uiPartOffset = 0;
+
+ pcCU->setSkipFlagSubParts(false, 0, uiDepth);
+ pcCU->setPartSizeSubParts(eSize, 0, uiDepth);
+ pcCU->setPredModeSubParts(MODE_INTRA, 0, uiDepth);
+ pcCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth);
+
+ //===== init pattern for luma prediction =====
+ pcCU->getPattern()->initPattern(pcCU, uiInitTrDepth, uiPartOffset);
+ // Reference sample smoothing
+ pcCU->getPattern()->initAdiPattern(pcCU, uiPartOffset, uiInitTrDepth, m_pcPredSearch->getPredicBuf(), m_pcPredSearch->getPredicBufWidth(), m_pcPredSearch->getPredicBufHeight(), m_pcPredSearch->refAbove, m_pcPredSearch->refLeft, m_pcPredSearch->refAboveFlt, m_pcPredSearch->refLeftFlt);
+
+ //===== determine set of modes to be tested (using prediction signal only) =====
+ UInt numModesAvailable = 35; //total number of Intra modes
+ Pel* piOrg = m_ppcOrigYuv[uiDepth]->getLumaAddr(0, uiWidth);
+ Pel* piPred = m_ppcPredYuvMode[5][uiDepth]->getLumaAddr(0, uiWidth);
+ UInt uiStride = m_ppcPredYuvMode[5][uiDepth]->getStride();
+ UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
+ UInt numModesForFullRD = g_aucIntraModeNumFast[uiWidthBit];
+ Int nLog2SizeMinus2 = g_aucConvertToBit[uiWidth];
+ x265::pixelcmp sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
+ {
+ assert(numModesForFullRD < numModesAvailable);
+
+ for (UInt i = 0; i < numModesForFullRD; i++)
+ {
+ CandCostList[i] = MAX_INT64;
+ }
+
+ CandNum = 0;
+ UInt uiSads[35];
+ Bool bFilter = (uiWidth <= 16);
+ Pel *ptrSrc = m_pcPredSearch->getPredicBuf();
+
+ // 1
+ primitives.getIPredDC((pixel*)ptrSrc + ADI_BUF_STRIDE + 1, ADI_BUF_STRIDE, (pixel*)piPred, uiStride, uiWidth, bFilter);
+ uiSads[DC_IDX] = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+
+ // 0
+ if (uiWidth >= 8 && uiWidth <= 32)
+ {
+ ptrSrc += ADI_BUF_STRIDE * (2 * uiWidth + 1);
+ }
+ primitives.getIPredPlanar((pixel*)ptrSrc + ADI_BUF_STRIDE + 1, ADI_BUF_STRIDE, (pixel*)piPred, uiStride, uiWidth);
+ uiSads[PLANAR_IDX] = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+
+ // 33 Angle modes once
+ if (uiWidth <= 16)
+ {
+ ALIGN_VAR_32(Pel, buf1[MAX_CU_SIZE * MAX_CU_SIZE]);
+ ALIGN_VAR_32(Pel, tmp[33 * MAX_CU_SIZE * MAX_CU_SIZE]);
+
+ // Transpose NxN
+ x265::primitives.transpose[nLog2SizeMinus2]((pixel*)buf1, (pixel*)piOrg, uiStride);
+
+ Pel *pAbove0 = m_pcPredSearch->refAbove + uiWidth - 1;
+ Pel *pAbove1 = m_pcPredSearch->refAboveFlt + uiWidth - 1;
+ Pel *pLeft0 = m_pcPredSearch->refLeft + uiWidth - 1;
+ Pel *pLeft1 = m_pcPredSearch->refLeftFlt + uiWidth - 1;
+
+ x265::primitives.getIPredAngs[nLog2SizeMinus2]((pixel*)tmp, (pixel*)pAbove0, (pixel*)pLeft0, (pixel*)pAbove1, (pixel*)pLeft1, (uiWidth <= 16));
+
+ // TODO: We need SATD_x4 here
+ for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
+ {
+ bool modeHor = (uiMode < 18);
+ Pel *pSrc = (modeHor ? buf1 : piOrg);
+ intptr_t srcStride = (modeHor ? uiWidth : uiStride);
+
+ // use hadamard transform here
+ UInt uiSad = sa8d((pixel*)pSrc, srcStride, (pixel*)&tmp[(uiMode - 2) * (uiWidth * uiWidth)], uiWidth);
+ uiSads[uiMode] = uiSad;
+ }
+ }
+ else
+ {
+ for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
+ {
+ m_pcPredSearch->predIntraLumaAng(pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth);
+
+ // use hadamard transform here
+ UInt uiSad = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+ uiSads[uiMode] = uiSad;
+ }
+ }
+
+ for (UInt uiMode = 0; uiMode < numModesAvailable; uiMode++)
+ {
+ UInt uiSad = uiSads[uiMode];
+ UInt iModeBits = m_pcPredSearch->xModeBitsIntra(pcCU, uiMode, 0, uiPartOffset, uiDepth, uiInitTrDepth);
+ UInt64 cost = m_pcRdCost->calcRdSADCost(uiSad, iModeBits);
+ CandNum += m_pcPredSearch->xUpdateCandList(uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList); //Find N least cost modes. N = numModesForFullRD
+ }
+
+ Int uiPreds[3] = { -1, -1, -1 };
+ Int iMode = -1;
+ Int numCand = pcCU->getIntraDirLumaPredictor(uiPartOffset, uiPreds, &iMode);
+ if (iMode >= 0)
+ {
+ numCand = iMode;
+ }
+
+ for (Int j = 0; j < numCand; j++)
+ {
+ Bool mostProbableModeIncluded = false;
+ UInt mostProbableMode = uiPreds[j];
+
+ for (UInt i = 0; i < numModesForFullRD; i++)
+ {
+ mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
+ }
+
+ if (!mostProbableModeIncluded)
+ {
+ uiRdModeList[numModesForFullRD++] = mostProbableMode;
+ }
+ }
+ }
+
+ //determine predyuv for the best mode
+ UInt uiOrgMode = uiRdModeList[0];
+
+ pcCU->setLumaIntraDirSubParts(uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth);
+
+ // set context models
+ m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
+
+ // determine residual for partition
+ UInt uiPUDistY = 0;
+ UInt uiPUDistC = 0;
+ UInt64 dPUCost = 0;
+ m_pcPredSearch->xRecurIntraCodingQT(pcCU, uiInitTrDepth, uiPartOffset, true, m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], uiPUDistY, uiPUDistC, false, dPUCost);
+
+ //UInt partEnum = PartitionFromSizes(pcCU->getWidth(0), pcCU->getHeight(0));
+ // UInt SATD = primitives.satd[partEnum]((pixel*)m_ppcOrigYuv[uiDepth]->getLumaAddr(), m_ppcOrigYuv[uiDepth]->getStride(),
+ // (pixel*)m_ppcPredYuvMode[index][uiDepth]->getLumaAddr(), m_ppcPredYuvMode[index][uiDepth]->getStride());
+
+ // pcCU->getTotalCost() = SATD;
}
/** check RD costs for a CU block encoded with merge
@@ -81,7 +235,8 @@
* \param rpcTempCU
* \returns Void
*/
-Void TEncCu::xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU)
+
+Void TEncCu::xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv)
{
assert(rpcTempCU->getSlice()->getSliceType() != I_SLICE);
TComMvField cMvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists
@@ -97,61 +252,88 @@
rpcTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, uhDepth); // interprets depth relative to LCU level
rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uhDepth);
rpcTempCU->getInterMergeCandidates(0, 0, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand);
-
- x265::MotionEstimate me_merge; // TODO: use m_pcPredSearch->m_me here
- me_merge.setSourcePlane((pixel*)m_ppcOrigYuv[uhDepth]->getLumaAddr(),
- m_ppcOrigYuv[uhDepth]->getStride());
-
- for (Int uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand)
+ Int mergeCandBuffer[MRG_MAX_NUM_CANDS];
+ for (UInt ui = 0; ui < numValidMergeCand; ++ui)
{
- // set MC parameters
- rpcTempCU->setPredModeSubParts(MODE_INTER, 0, uhDepth); // interprets depth relative to LCU level
- rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uhDepth);
- rpcTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, uhDepth); // interprets depth relative to LCU level
- rpcTempCU->setMergeFlagSubParts(true, 0, 0, uhDepth); // interprets depth relative to LCU level
- rpcTempCU->setMergeIndexSubParts(uiMergeCand, 0, 0, uhDepth); // interprets depth relative to LCU level
- rpcTempCU->setInterDirSubParts(uhInterDirNeighbours[uiMergeCand], 0, 0, uhDepth); // interprets depth relative to LCU level
- rpcTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(cMvFieldNeighbours[0 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
- rpcTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(cMvFieldNeighbours[1 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
-
- // do MC
- m_pcPredSearch->motionCompensation(rpcTempCU, m_ppcPredYuvMode[4][uhDepth]);
-
- /*Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low?*/
-
- m_pcPredSearch->encodeResAndCalcRdInterCU(rpcTempCU,
- m_ppcOrigYuv[uhDepth],
- m_ppcPredYuvMode[4][uhDepth],
- m_ppcResiYuvTemp[uhDepth],
- m_ppcResiYuvBest[uhDepth],
- m_ppcRecoYuvTemp[uhDepth],
- (true));
-
- Int orgQP = rpcTempCU->getQP(0);
-
- if (rpcTempCU->getTotalCost() < rpcBestCU->getTotalCost())
- {
- TComDataCU* tmp = rpcTempCU;
- rpcTempCU = rpcBestCU;
- rpcBestCU = tmp;
- // Change Prediction data
- TComYuv* pcYuv = NULL;
- pcYuv = m_ppcPredYuvMode[3][uhDepth];
- m_ppcPredYuvMode[3][uhDepth] = m_ppcPredYuvMode[4][uhDepth];
- m_ppcPredYuvMode[4][uhDepth] = pcYuv;
- pcYuv = m_ppcRecoYuvBest[uhDepth];
- m_ppcRecoYuvBest[uhDepth] = m_ppcRecoYuvTemp[uhDepth];
- m_ppcRecoYuvTemp[uhDepth] = pcYuv;
- }
-
- rpcTempCU->initEstData(uhDepth, orgQP);
+ mergeCandBuffer[ui] = 0;
}
- me_merge.setSourcePU(0, rpcBestCU->getWidth(0), rpcBestCU->getHeight(0));
-
- rpcBestCU->getTotalCost() = me_merge.bufSATD((pixel*)m_ppcPredYuvMode[3][uhDepth]->getLumaAddr(),
- m_ppcPredYuvMode[3][uhDepth]->getStride());
- x265_emms();
+ Bool bestIsSkip = false;
+
+ UInt iteration;
+ if (rpcTempCU->isLosslessCoded(0))
+ {
+ iteration = 1;
+ }
+ else
+ {
+ iteration = 2;
+ }
+
+ for (UInt uiNoResidual = 0; uiNoResidual < iteration; ++uiNoResidual)
+ {
+ for (UInt uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand)
+ {
+ if (!(uiNoResidual == 1 && mergeCandBuffer[uiMergeCand] == 1))
+ {
+ if (!(bestIsSkip && uiNoResidual == 0))
+ {
+ // set MC parameters
+ rpcTempCU->setPredModeSubParts(MODE_INTER, 0, uhDepth); // interprets depth relative to LCU level
+ rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uhDepth);
+ rpcTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, uhDepth); // interprets depth relative to LCU level
+ rpcTempCU->setMergeFlagSubParts(true, 0, 0, uhDepth); // interprets depth relative to LCU level
+ rpcTempCU->setMergeIndexSubParts(uiMergeCand, 0, 0, uhDepth); // interprets depth relative to LCU level
+ rpcTempCU->setInterDirSubParts(uhInterDirNeighbours[uiMergeCand], 0, 0, uhDepth); // interprets depth relative to LCU level
+ rpcTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(cMvFieldNeighbours[0 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
+ rpcTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(cMvFieldNeighbours[1 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
+
+ // do MC
+ m_pcPredSearch->motionCompensation(rpcTempCU, tmpPredYuv);
+
+ /*Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low?*/
+
+ UInt partEnum = PartitionFromSizes(rpcTempCU->getWidth(0), rpcTempCU->getHeight(0));
+ UInt SATD = primitives.satd[partEnum]((pixel*)m_ppcOrigYuv[uhDepth]->getLumaAddr(), m_ppcOrigYuv[uhDepth]->getStride(),
+ (pixel*)tmpPredYuv->getLumaAddr(), tmpPredYuv->getStride());
+ x265_emms();
+ rpcTempCU->getTotalDistortion() = SATD;
+ rpcTempCU->getTotalCost() = SATD;
+
+ if (uiNoResidual == 0)
+ {
+ if (rpcTempCU->getQtRootCbf(0) == 0)
+ {
+ mergeCandBuffer[uiMergeCand] = 1;
+ }
+ }
+
+ rpcTempCU->setSkipFlagSubParts(rpcTempCU->getQtRootCbf(0) == 0, 0, uhDepth);
+ Int orgQP = rpcTempCU->getQP(0);
+
+ //xCheckBestMode(rpcBestCU, rpcTempCU, uhDepth);
+ if (rpcTempCU->getTotalCost() < rpcBestCU->getTotalCost())
+ {
+ TComDataCU* tmp = rpcTempCU;
+ rpcTempCU = rpcBestCU;
+ rpcBestCU = tmp;
+ // Change Prediction data
+ TComYuv* pcYuv = NULL;
+ pcYuv = bestPredYuv;
+ bestPredYuv = tmpPredYuv;
+ tmpPredYuv = pcYuv;
+ }
+
+ rpcTempCU->initEstData(uhDepth, orgQP);
+
+ if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
+ {
+ bestIsSkip = rpcTempCU->getQtRootCbf(0) == 0;
+ }
+ }
+ }
+ }
+ }
}
Void TEncCu::xComputeCostInter(TComDataCU*& rpcTempCU, PartSize ePartSize, UInt Index, Bool bUseMRG)
@@ -235,7 +417,7 @@
/*Compute Merge Cost */
#if 1
- xComputeCostMerge2Nx2N(m_MergeBestCU[uiDepth], m_MergeCU[uiDepth]);
+ xComputeCostMerge2Nx2N(m_MergeBestCU[uiDepth], m_MergeCU[uiDepth], m_ppcPredYuvMode[3][uiDepth], m_ppcPredYuvMode[4][uiDepth]);
rpcBestCU = m_MergeBestCU[uiDepth];
YuvTemp = m_ppcPredYuvMode[3][uiDepth];
m_ppcPredYuvMode[3][uiDepth] = m_ppcPredYuvBest[uiDepth];
@@ -286,16 +468,32 @@
m_ppcPredYuvBest[uiDepth] = YuvTemp;
}
- /* Perform encode residual for the best mode chosen only*/
- if(m_MergeBestCU[uiDepth] != rpcBestCU){
m_pcPredSearch->encodeResAndCalcRdInterCU(rpcBestCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvBest[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcResiYuvBest[uiDepth], m_ppcRecoYuvBest[uiDepth], false);
- }
- else{
- rpcBestCU->getTotalCost() = m_pcRdCost->calcRdCost(rpcBestCU->getTotalDistortion(), rpcBestCU->getTotalBits());
+
+ /*compute intra cost */
+
+ /* if(rpcBestCU->getCbf(0, TEXT_LUMA) != 0 ||
+ rpcBestCU->getCbf(0, TEXT_CHROMA_U) != 0 ||
+ rpcBestCU->getCbf(0, TEXT_CHROMA_V) != 0)*/
+ {
+ xComputeCostIntrainInter(m_IntrainInterCU[uiDepth], SIZE_2Nx2N, 5);
+ xEncodeIntrainInter(m_IntrainInterCU[uiDepth], m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[5][uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth]);
+
+ if (m_IntrainInterCU[uiDepth]->getTotalCost() < rpcBestCU->getTotalCost())
+ {
+ rpcBestCU = m_IntrainInterCU[uiDepth];
+
+ YuvTemp = m_ppcPredYuvMode[5][uiDepth];
+ m_ppcPredYuvMode[5][uiDepth] = m_ppcPredYuvBest[uiDepth];
+ m_ppcPredYuvBest[uiDepth] = YuvTemp;
+ TComYuv* tmpPic = m_ppcRecoYuvBest[uiDepth];
+ m_ppcRecoYuvBest[uiDepth] = m_ppcRecoYuvTemp[uiDepth];
+ m_ppcRecoYuvTemp[uiDepth] = tmpPic;
+ }
}
/* Disable recursive analysis for whole CUs temporarily*/
- if (rpcBestCU->isSkipped(0))
+ if ((rpcBestCU != 0) && (rpcBestCU->isSkipped(0)))
{
#if CU_STAT_LOGFILE
cntSkipCu[uiDepth]++;
@@ -321,7 +519,7 @@
#if CU_STAT_LOGFILE
if (rpcBestCU)
{
- fprintf(fp1, "\n Width : %d ,Inter 2Nx2N_Merge : %d , 2Nx2N : %d , 2NxN : %d, Nx2N : %d ", rpcBestCU->getWidth(0), m_MergeBestCU[uiDepth]->getTotalCost(), m_InterCU_2Nx2N[uiDepth]->getTotalCost(), m_InterCU_2NxN[uiDepth]->getTotalCost(), m_InterCU_Nx2N[uiDepth]->getTotalCost());
+ fprintf(fp1, "\n Width : %d ,Inter 2Nx2N_Merge : %d , 2Nx2N : %d , 2NxN : %d, Nx2N : %d , intra : %d", rpcBestCU->getWidth(0), m_MergeBestCU[uiDepth]->getTotalCost(), m_InterCU_2Nx2N[uiDepth]->getTotalCost(), m_InterCU_2NxN[uiDepth]->getTotalCost(), m_InterCU_Nx2N[uiDepth]->getTotalCost(), m_IntrainInterCU[uiDepth]->getTotalCost());
}
#endif
@@ -402,6 +600,43 @@
m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
+#if CU_STAT_LOGFILE
+ if (rpcBestCU != 0)
+ {
+ if (rpcBestCU->getTotalCost() < rpcTempCU->getTotalCost())
+ {
+ if (rpcBestCU->getPredictionMode(0) == MODE_INTER)
+ {
+ cntInter[uiDepth]++;
+ if (rpcBestCU->getPartitionSize(0) < 3)
+ {
+ cuInterDistribution[uiDepth][rpcBestCU->getPartitionSize(0)]++;
+ }
+ else
+ {
+ cuInterDistribution[uiDepth][3]++;
+ }
+ }
+ else if (rpcBestCU->getPredictionMode(0) == MODE_INTRA)
+ {
+ cntIntra[uiDepth]++;
+ if (rpcBestCU->getLumaIntraDir()[0] > 1)
+ {
+ cuIntraDistribution[uiDepth][2]++;
+ }
+ else
+ {
+ cuIntraDistribution[uiDepth][rpcBestCU->getLumaIntraDir()[0]]++;
+ }
+ }
+ }
+ else
+ {
+ cntSplit[uiDepth]++;
+ }
+ }
+#endif // if LOGGING
+
/*If Best Mode is not NULL; then compare costs. Else assign best mode to Sub-CU costs
Copy Recon data from Temp structure to Best structure*/
if (rpcBestCU)
@@ -422,39 +657,6 @@
m_ppcRecoYuvTemp[uiDepth] = m_ppcRecoYuvBest[uiDepth];
m_ppcRecoYuvBest[uiDepth] = YuvTemp;
}
-#if CU_STAT_LOGFILE
- if (rpcBestCU->getTotalCost() < rpcTempCU->getTotalCost())
- {
- if (rpcBestCU->getPredictionMode(0) == MODE_INTER)
- {
- cntInter[uiDepth]++;
- if (rpcBestCU->getPartitionSize(0) < 3)
- {
- cuInterDistribution[uiDepth][rpcBestCU->getPartitionSize(0)]++;
- }
- else
- {
- cuInterDistribution[uiDepth][3]++;
- }
- }
- else if (rpcBestCU->getPredictionMode(0) == MODE_INTRA)
- {
- cntIntra[uiDepth]++;
- if (rpcBestCU->getLumaIntraDir()[0] > 1)
- {
- cuIntraDistribution[uiDepth][2]++;
- }
- else
- {
- cuIntraDistribution[uiDepth][rpcBestCU->getLumaIntraDir()[0]]++;
- }
- }
- }
- else
- {
- cntSplit[uiDepth]++;
- }
-#endif // if LOGGING
}
#if CU_STAT_LOGFILE
-------------- next part --------------
A non-text attachment was scrubbed...
Name: July_3_HEVC.patch
Type: text/x-patch
Size: 34633 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130703/7d93032f/attachment-0001.bin>
More information about the x265-devel
mailing list