[x265] [PATCH] implementation - Intra and Merge in no - rdo

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Wed Jul 3 15:13:39 CEST 2013


# HG changeset patch
# User sumalatha
# Date 1372857189 -19800
# Node ID af7e99a736333aaa532e29f1be225a51a4ac9b60
# Parent  209cce8f38beba4f66ba5d3bf1a1b373b4768090
implementation - Intra and Merge in no - rdo

diff -r 209cce8f38be -r af7e99a73633 source/Lib/TLibCommon/TComPrediction.h
--- a/source/Lib/TLibCommon/TComPrediction.h	Wed Jul 03 12:07:43 2013 +0530
+++ b/source/Lib/TLibCommon/TComPrediction.h	Wed Jul 03 18:43:09 2013 +0530
@@ -64,9 +64,6 @@
     Int       m_iPredBufStride;
     Int       m_iPredBufHeight;
 
-    //reference sample for IntraPrediction
-    Pel *refAbove, *refAboveFlt, *refLeft, *refLeftFlt;
-
     TComYuv   m_acYuvPred[2];
     TShortYUV   m_acShortPred[2];
     TComYuv   m_cYuvPredTemp;
@@ -99,6 +96,9 @@
     TComPrediction();
     virtual ~TComPrediction();
 
+    //reference sample for IntraPrediction
+    Pel *refAbove, *refAboveFlt, *refLeft, *refLeftFlt;
+
     Void    initTempBuff();
 
     // inter
diff -r 209cce8f38be -r af7e99a73633 source/Lib/TLibEncoder/TEncCu.h
--- a/source/Lib/TLibEncoder/TEncCu.h	Wed Jul 03 12:07:43 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.h	Wed Jul 03 18:43:09 2013 +0530
@@ -73,7 +73,7 @@
     TComDataCU**            m_InterCU_Nx2N;
     TComDataCU**            m_IntrainInterCU;
     TComDataCU**            m_MergeCU;
-    TComDataCU**            m_MergeBestCU; 
+    TComDataCU**            m_MergeBestCU;
     TComDataCU**            m_ppcBestCU;    ///< Best CUs in each depth
     TComDataCU**            m_ppcTempCU;    ///< Temporary CUs in each depth
     UChar                   m_uhTotalDepth;
@@ -111,11 +111,17 @@
     Bool                    m_abortFlag; // This flag is used to abort the recursive CU check when the child CU cost is greater than the parent CU
 
 public:
+
     Void set_pppcRDSbacCoder(TEncSbac*** pppcRDSbacCoder) { m_pppcRDSbacCoder = pppcRDSbacCoder; }
+
     Void set_pcEntropyCoder(TEncEntropy* pcEntropyCoder) { m_pcEntropyCoder = pcEntropyCoder; }
+
     Void set_pcPredSearch(TEncSearch* pcPredSearch) { m_pcPredSearch = pcPredSearch; }
+
     Void set_pcRDGoOnSbacCoder(TEncSbac* pcRDGoOnSbacCoder) { m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder; }
+
     Void set_pcTrQuant(TComTrQuant* pcTrQuant) { m_pcTrQuant = pcTrQuant; }
+
     Void set_pcRdCost(TComRdCost* pcRdCost) { m_pcRdCost = pcRdCost; }
 
     /// copy parameters from encoder class
@@ -147,12 +153,13 @@
 
     Int   xComputeQP(TComDataCU* pcCU, UInt uiDepth);
     Void  xCheckBestMode(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth);
-    
+
     Void  xCheckRDCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, Bool *earlyDetectionSkipMode);
-    Void  xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU);
-    Void  xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize,UInt index);
+    Void xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);
+    Void  xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize, UInt index);
     Void  xCheckRDCostInter(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize, Bool bUseMRG = false);
     Void  xComputeCostInter(TComDataCU*& rpcTempCU, PartSize ePartSize, UInt Index, Bool bUseMRG = false);
+    Void  xEncodeIntrainInter(TComDataCU*& pcCU, TComYuv* pcYuvOrg, TComYuv* pcYuvPred, TShortYUV*& rpcYuvResi, TComYuv*& rpcYuvRec);
     Void  xCheckRDCostIntra(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize);
     Void  xCheckRDCostIntrainInter(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize eSize);
     Void  xCheckDQP(TComDataCU* pcCU);
diff -r 209cce8f38be -r af7e99a73633 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Wed Jul 03 12:07:43 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Wed Jul 03 18:43:09 2013 +0530
@@ -65,7 +65,7 @@
 /// encoder search class
 class TEncSearch : public TComPrediction
 {
-private:
+public:
 
     x265::MotionEstimate m_me;
     x265::BitCost        m_bc; // TODO: m_bc will go away with HM ME
@@ -117,8 +117,7 @@
     x265::MV        m_acMvPredictors[3];
 
     // RD computation
-    TEncSbac***     m_pppcRDSbacCoder;
-    TEncSbac*       m_pcRDGoOnSbacCoder;
+
     DistParam       m_cDistParam;
 
     // Misc.
@@ -130,6 +129,9 @@
 
 public:
 
+    TEncSbac***     m_pppcRDSbacCoder;
+    TEncSbac*       m_pcRDGoOnSbacCoder;
+
     Void set_pppcRDSbacCoder(TEncSbac*** pppcRDSbacCoder) { m_pppcRDSbacCoder = pppcRDSbacCoder; }
 
     Void set_pcEntropyCoder(TEncEntropy* pcEntropyCoder) { m_pcEntropyCoder = pcEntropyCoder; }
@@ -142,6 +144,8 @@
     virtual ~TEncSearch();
 
     Void init(TEncCfg* pcEncCfg, TComRdCost* pcRdCost, TComTrQuant *pcTrQuant);
+    UInt  xModeBitsIntra(TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth);
+    UInt  xUpdateCandList(UInt uiMode, UInt64 uiCost, UInt uiFastCandNum, UInt * CandModeList, UInt64 * CandCostList);
 
 protected:
 
@@ -215,6 +219,23 @@
     Void IPCMSearch(TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv*& rpcPredYuv, TShortYUV*& rpcResiYuv, TComYuv*& rpcRecoYuv);
 
     UInt estimateHeaderBits(TComDataCU* pcCU, UInt uiAbsPartIdx);
+    Void  xRecurIntraCodingQT(TComDataCU * pcCU,
+                              UInt         uiTrDepth,
+                              UInt         uiAbsPartIdx,
+                              Bool         bLumaOnly,
+                              TComYuv *    pcOrgYuv,
+                              TComYuv *    pcPredYuv,
+                              TShortYUV *  pcResiYuv,
+                              UInt &       ruiDistY,
+                              UInt &       ruiDistC,
+                              Bool         bCheckFirst,
+                              UInt64 &     dRDCost);
+
+    Void  xSetIntraResultQT(TComDataCU* pcCU,
+                            UInt        uiTrDepth,
+                            UInt        uiAbsPartIdx,
+                            Bool        bLumaOnly,
+                            TComYuv*    pcRecoYuv);
 
 protected:
 
@@ -268,24 +289,6 @@
                                 UInt        uiChromaId,
                                 Int         default0Save1Load2 = 0);
 
-    Void  xRecurIntraCodingQT(TComDataCU * pcCU,
-                              UInt         uiTrDepth,
-                              UInt         uiAbsPartIdx,
-                              Bool         bLumaOnly,
-                              TComYuv *    pcOrgYuv,
-                              TComYuv *    pcPredYuv,
-                              TShortYUV *  pcResiYuv,
-                              UInt &       ruiDistY,
-                              UInt &       ruiDistC,
-                              Bool         bCheckFirst,
-                              UInt64 &     dRDCost);
-
-    Void  xSetIntraResultQT(TComDataCU* pcCU,
-                            UInt        uiTrDepth,
-                            UInt        uiAbsPartIdx,
-                            Bool        bLumaOnly,
-                            TComYuv*    pcRecoYuv);
-
     Void  xRecurIntraChromaCodingQT(TComDataCU* pcCU,
                                     UInt        uiTrDepth,
                                     UInt        uiAbsPartIdx,
@@ -408,17 +411,17 @@
                         x265::MV&    rcMv,
                         UInt&        ruiSAD);
 
-    Void xPatternSearchFracDIF(TComDataCU* pcCU,
+    Void xPatternSearchFracDIF(TComDataCU*  pcCU,
                                TComPattern* pcPatternKey,
-                               Pel* piRefY,
-                               Int iRefStride,
-                               x265::MV* pcMvInt,
-                               x265::MV& rcMvHalf,
-                               x265::MV& rcMvQter,
-                               UInt& ruiCost,
-                               Bool biPred,
-                               TComPicYuv* refPic,
-                               UInt uiPartAddr);
+                               Pel*         piRefY,
+                               Int          iRefStride,
+                               x265::MV*    pcMvInt,
+                               x265::MV&    rcMvHalf,
+                               x265::MV&    rcMvQter,
+                               UInt&        ruiCost,
+                               Bool         biPred,
+                               TComPicYuv*  refPic,
+                               UInt         uiPartAddr);
 
     Void xExtDIFUpSamplingH(TComPattern* pcPattern, Bool biPred);
     Void xExtDIFUpSamplingQ(TComPattern* pcPatternKey, x265::MV halfPelRef, Bool biPred);
@@ -431,9 +434,6 @@
     Void xEstimateResidualQT(TComDataCU* pcCU, UInt uiQuadrant, UInt uiAbsPartIdx, UInt absTUPartIdx, TShortYUV* pcResi, const UInt uiDepth, UInt64 &rdCost, UInt &ruiBits, UInt &ruiDist, UInt *puiZeroDist);
     Void xSetResidualQTData(TComDataCU* pcCU, UInt uiQuadrant, UInt uiAbsPartIdx, UInt absTUPartIdx, TShortYUV* pcResi, UInt uiDepth, Bool bSpatial);
 
-    UInt  xModeBitsIntra(TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth);
-    UInt  xUpdateCandList(UInt uiMode, UInt64 uiCost, UInt uiFastCandNum, UInt * CandModeList, UInt64 * CandCostList);
-
     // -------------------------------------------------------------------------------------------------------------------
     // compute symbol bits
     // -------------------------------------------------------------------------------------------------------------------
diff -r 209cce8f38be -r af7e99a73633 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Wed Jul 03 12:07:43 2013 +0530
+++ b/source/encoder/compress.cpp	Wed Jul 03 18:43:09 2013 +0530
@@ -35,45 +35,199 @@
 extern FILE* fp1;
 #endif
 
-Void TEncCu::xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize, UInt index)
+Void TEncCu::xEncodeIntrainInter(TComDataCU*& pcCU, TComYuv* pcYuvOrg, TComYuv* pcYuvPred,  TShortYUV*& rpcYuvResi, TComYuv*& rpcYuvRec)
 {
-    //PPAScopeEvent(TEncCU_xCheckRDCostIntra + uiDepth);
+    UInt64   dPUCost = 0;
+    UInt   uiPUDistY = 0;
+    UInt   uiPUDistC = 0;
+    UInt   uiDepth = pcCU->getDepth(0);
+    UInt    uiInitTrDepth  = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
 
-    UChar uiDepth = rpcTempCU->getDepth(0);
-    rpcTempCU->setSkipFlagSubParts(false, 0, uiDepth);
-    rpcTempCU->setPartSizeSubParts(eSize, 0, uiDepth);
-    rpcTempCU->setPredModeSubParts(MODE_INTRA, 0, uiDepth);
-    rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth);
+    // set context models
+    m_pcPredSearch->m_pcRDGoOnSbacCoder->load(m_pcPredSearch->m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
 
-    UInt uiPreCalcDistC = 0;
+    m_pcPredSearch->xRecurIntraCodingQT(pcCU, uiInitTrDepth, 0, true, pcYuvOrg, pcYuvPred, rpcYuvResi, uiPUDistY, uiPUDistC, false, dPUCost);
+    m_pcPredSearch->xSetIntraResultQT(pcCU, uiInitTrDepth, 0, true, rpcYuvRec);
 
-    m_pcPredSearch->estIntraPredQT(rpcTempCU, m_ppcOrigYuv[uiDepth],  m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC, true);
+    //=== update PU data ====
+    pcCU->copyToPic(pcCU->getDepth(0), 0, uiInitTrDepth);
+    //===== set distortion (rate and r-d costs are determined later) =====
+    pcCU->getTotalDistortion() = uiPUDistY + uiPUDistC;
 
-    m_ppcRecoYuvTemp[uiDepth]->copyToPicLuma(rpcTempCU->getPic()->getPicYuvRec(), rpcTempCU->getAddr(), rpcTempCU->getZorderIdxInCU());
+    rpcYuvRec->copyToPicLuma(pcCU->getPic()->getPicYuvRec(), pcCU->getAddr(), pcCU->getZorderIdxInCU());
 
-    m_pcPredSearch->estIntraPredChromaQT(rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC);
-
+    //m_pcPredSearch->estIntraPredChromaQT(pcCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPUDistC);
+    m_pcPredSearch->estIntraPredChromaQT(pcCU, pcYuvOrg, pcYuvPred, rpcYuvResi, rpcYuvRec, uiPUDistC);
     m_pcEntropyCoder->resetBits();
-    if (rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
+    if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
     {
-        m_pcEntropyCoder->encodeCUTransquantBypassFlag(rpcTempCU, 0, true);
+        m_pcEntropyCoder->encodeCUTransquantBypassFlag(pcCU, 0, true);
     }
-    m_pcEntropyCoder->encodeSkipFlag(rpcTempCU, 0,          true);
-    m_pcEntropyCoder->encodePredMode(rpcTempCU, 0,          true);
-    m_pcEntropyCoder->encodePartSize(rpcTempCU, 0, uiDepth, true);
-    m_pcEntropyCoder->encodePredInfo(rpcTempCU, 0,          true);
-    m_pcEntropyCoder->encodeIPCMInfo(rpcTempCU, 0, true);
+    m_pcEntropyCoder->encodeSkipFlag(pcCU, 0,          true);
+    m_pcEntropyCoder->encodePredMode(pcCU, 0,          true);
+    m_pcEntropyCoder->encodePartSize(pcCU, 0, uiDepth, true);
+    m_pcEntropyCoder->encodePredInfo(pcCU, 0,          true);
+    m_pcEntropyCoder->encodeIPCMInfo(pcCU, 0, true);
 
     // Encode Coefficients
     Bool bCodeDQP = getdQPFlag();
-    // m_pcEntropyCoder->encodeCoeff(rpcTempCU, 0, uiDepth, rpcTempCU->getWidth(0), rpcTempCU->getHeight(0), bCodeDQP);
+    m_pcEntropyCoder->encodeCoeff(pcCU, 0, uiDepth, pcCU->getWidth(0), pcCU->getHeight(0), bCodeDQP);
     setdQPFlag(bCodeDQP);
 
     m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
 
-    rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
-    rpcTempCU->getTotalBins() = ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
-    rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost(rpcTempCU->getTotalDistortion(), rpcTempCU->getTotalBits());
+    pcCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
+    pcCU->getTotalBins() = ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
+    pcCU->getTotalCost() = m_pcRdCost->calcRdCost(pcCU->getTotalDistortion(), pcCU->getTotalBits());
+}
+
+Void TEncCu::xComputeCostIntrainInter(TComDataCU*& pcCU, PartSize eSize, UInt index)
+{
+    UInt    uiDepth        = pcCU->getDepth(0);
+    UInt    uiInitTrDepth  = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
+    UInt    uiWidth        = pcCU->getWidth(0) >> uiInitTrDepth;
+    UInt    uiWidthBit     = pcCU->getIntraSizeIdx(0);
+    UInt64  CandCostList[FAST_UDI_MAX_RDMODE_NUM];
+    UInt    CandNum;
+
+    UInt uiPartOffset = 0;
+
+    pcCU->setSkipFlagSubParts(false, 0, uiDepth);
+    pcCU->setPartSizeSubParts(eSize, 0, uiDepth);
+    pcCU->setPredModeSubParts(MODE_INTRA, 0, uiDepth);
+    pcCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth);
+
+    //===== init pattern for luma prediction =====
+    pcCU->getPattern()->initPattern(pcCU, uiInitTrDepth, uiPartOffset);
+    // Reference sample smoothing
+    pcCU->getPattern()->initAdiPattern(pcCU, uiPartOffset, uiInitTrDepth, m_pcPredSearch->getPredicBuf(),  m_pcPredSearch->getPredicBufWidth(),  m_pcPredSearch->getPredicBufHeight(), m_pcPredSearch->refAbove, m_pcPredSearch->refLeft, m_pcPredSearch->refAboveFlt, m_pcPredSearch->refLeftFlt);
+
+    //===== determine set of modes to be tested (using prediction signal only) =====
+    UInt numModesAvailable = 35; //total number of Intra modes
+    Pel* piOrg         = m_ppcOrigYuv[uiDepth]->getLumaAddr(0, uiWidth);
+    Pel* piPred        = m_ppcPredYuvMode[5][uiDepth]->getLumaAddr(0, uiWidth);
+    UInt uiStride      = m_ppcPredYuvMode[5][uiDepth]->getStride();
+    UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
+    UInt numModesForFullRD = g_aucIntraModeNumFast[uiWidthBit];
+    Int nLog2SizeMinus2 = g_aucConvertToBit[uiWidth];
+    x265::pixelcmp sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
+    {
+        assert(numModesForFullRD < numModesAvailable);
+
+        for (UInt i = 0; i < numModesForFullRD; i++)
+        {
+            CandCostList[i] = MAX_INT64;
+        }
+
+        CandNum = 0;
+        UInt uiSads[35];
+        Bool bFilter = (uiWidth <= 16);
+        Pel *ptrSrc = m_pcPredSearch->getPredicBuf();
+
+        // 1
+        primitives.getIPredDC((pixel*)ptrSrc + ADI_BUF_STRIDE + 1, ADI_BUF_STRIDE, (pixel*)piPred, uiStride, uiWidth, bFilter);
+        uiSads[DC_IDX] = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+
+        // 0
+        if (uiWidth >= 8 && uiWidth <= 32)
+        {
+            ptrSrc += ADI_BUF_STRIDE * (2 * uiWidth + 1);
+        }
+        primitives.getIPredPlanar((pixel*)ptrSrc + ADI_BUF_STRIDE + 1, ADI_BUF_STRIDE, (pixel*)piPred, uiStride, uiWidth);
+        uiSads[PLANAR_IDX] = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+
+        // 33 Angle modes once
+        if (uiWidth <= 16)
+        {
+            ALIGN_VAR_32(Pel, buf1[MAX_CU_SIZE * MAX_CU_SIZE]);
+            ALIGN_VAR_32(Pel, tmp[33 * MAX_CU_SIZE * MAX_CU_SIZE]);
+
+            // Transpose NxN
+            x265::primitives.transpose[nLog2SizeMinus2]((pixel*)buf1, (pixel*)piOrg, uiStride);
+
+            Pel *pAbove0 = m_pcPredSearch->refAbove    + uiWidth - 1;
+            Pel *pAbove1 = m_pcPredSearch->refAboveFlt + uiWidth - 1;
+            Pel *pLeft0  = m_pcPredSearch->refLeft     + uiWidth - 1;
+            Pel *pLeft1  = m_pcPredSearch->refLeftFlt  + uiWidth - 1;
+
+            x265::primitives.getIPredAngs[nLog2SizeMinus2]((pixel*)tmp, (pixel*)pAbove0, (pixel*)pLeft0, (pixel*)pAbove1, (pixel*)pLeft1, (uiWidth <= 16));
+
+            // TODO: We need SATD_x4 here
+            for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
+            {
+                bool modeHor = (uiMode < 18);
+                Pel *pSrc = (modeHor ? buf1 : piOrg);
+                intptr_t srcStride = (modeHor ? uiWidth : uiStride);
+
+                // use hadamard transform here
+                UInt uiSad = sa8d((pixel*)pSrc, srcStride, (pixel*)&tmp[(uiMode - 2) * (uiWidth * uiWidth)], uiWidth);
+                uiSads[uiMode] = uiSad;
+            }
+        }
+        else
+        {
+            for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
+            {
+                m_pcPredSearch->predIntraLumaAng(pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth);
+
+                // use hadamard transform here
+                UInt uiSad = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+                uiSads[uiMode] = uiSad;
+            }
+        }
+
+        for (UInt uiMode = 0; uiMode < numModesAvailable; uiMode++)
+        {
+            UInt uiSad = uiSads[uiMode];
+            UInt iModeBits = m_pcPredSearch->xModeBitsIntra(pcCU, uiMode, 0, uiPartOffset, uiDepth, uiInitTrDepth);
+            UInt64 cost = m_pcRdCost->calcRdSADCost(uiSad, iModeBits);
+            CandNum += m_pcPredSearch->xUpdateCandList(uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList);      //Find N least cost  modes. N = numModesForFullRD
+        }
+
+        Int uiPreds[3] = { -1, -1, -1 };
+        Int iMode = -1;
+        Int numCand = pcCU->getIntraDirLumaPredictor(uiPartOffset, uiPreds, &iMode);
+        if (iMode >= 0)
+        {
+            numCand = iMode;
+        }
+
+        for (Int j = 0; j < numCand; j++)
+        {
+            Bool mostProbableModeIncluded = false;
+            UInt mostProbableMode = uiPreds[j];
+
+            for (UInt i = 0; i < numModesForFullRD; i++)
+            {
+                mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
+            }
+
+            if (!mostProbableModeIncluded)
+            {
+                uiRdModeList[numModesForFullRD++] = mostProbableMode;
+            }
+        }
+    }
+
+    //determine predyuv for the best mode
+    UInt uiOrgMode = uiRdModeList[0];
+
+    pcCU->setLumaIntraDirSubParts(uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth);
+
+    // set context models
+    m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
+
+    // determine residual for partition
+    UInt   uiPUDistY = 0;
+    UInt   uiPUDistC = 0;
+    UInt64 dPUCost   = 0;
+    m_pcPredSearch->xRecurIntraCodingQT(pcCU, uiInitTrDepth, uiPartOffset, true, m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], uiPUDistY, uiPUDistC, false, dPUCost);
+
+    //UInt partEnum = PartitionFromSizes(pcCU->getWidth(0), pcCU->getHeight(0));
+    // UInt SATD = primitives.satd[partEnum]((pixel*)m_ppcOrigYuv[uiDepth]->getLumaAddr(), m_ppcOrigYuv[uiDepth]->getStride(),
+    // (pixel*)m_ppcPredYuvMode[index][uiDepth]->getLumaAddr(),  m_ppcPredYuvMode[index][uiDepth]->getStride());
+
+    // pcCU->getTotalCost() = SATD;
 }
 
 /** check RD costs for a CU block encoded with merge
@@ -81,7 +235,8 @@
  * \param rpcTempCU
  * \returns Void
  */
-Void TEncCu::xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU)
+
+Void TEncCu::xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv)
 {
     assert(rpcTempCU->getSlice()->getSliceType() != I_SLICE);
     TComMvField  cMvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists
@@ -97,61 +252,88 @@
     rpcTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, uhDepth); // interprets depth relative to LCU level
     rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uhDepth);
     rpcTempCU->getInterMergeCandidates(0, 0, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand);
-
-    x265::MotionEstimate me_merge; // TODO: use m_pcPredSearch->m_me here
-    me_merge.setSourcePlane((pixel*)m_ppcOrigYuv[uhDepth]->getLumaAddr(),
-                            m_ppcOrigYuv[uhDepth]->getStride());
-
-    for (Int uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand)
+    Int mergeCandBuffer[MRG_MAX_NUM_CANDS];
+    for (UInt ui = 0; ui < numValidMergeCand; ++ui)
     {
-        // set MC parameters
-        rpcTempCU->setPredModeSubParts(MODE_INTER, 0, uhDepth);             // interprets depth relative to LCU level
-        rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(),     0, uhDepth);
-        rpcTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, uhDepth);             // interprets depth relative to LCU level
-        rpcTempCU->setMergeFlagSubParts(true, 0, 0, uhDepth);             // interprets depth relative to LCU level
-        rpcTempCU->setMergeIndexSubParts(uiMergeCand, 0, 0, uhDepth);             // interprets depth relative to LCU level
-        rpcTempCU->setInterDirSubParts(uhInterDirNeighbours[uiMergeCand], 0, 0, uhDepth);             // interprets depth relative to LCU level
-        rpcTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(cMvFieldNeighbours[0 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0);             // interprets depth relative to rpcTempCU level
-        rpcTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(cMvFieldNeighbours[1 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0);             // interprets depth relative to rpcTempCU level
-
-        // do MC
-        m_pcPredSearch->motionCompensation(rpcTempCU, m_ppcPredYuvMode[4][uhDepth]);
-
-        /*Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low?*/
-
-        m_pcPredSearch->encodeResAndCalcRdInterCU(rpcTempCU,
-                                                  m_ppcOrigYuv[uhDepth],
-                                                  m_ppcPredYuvMode[4][uhDepth],
-                                                  m_ppcResiYuvTemp[uhDepth],
-                                                  m_ppcResiYuvBest[uhDepth],
-                                                  m_ppcRecoYuvTemp[uhDepth],
-                                                  (true));
-
-        Int orgQP = rpcTempCU->getQP(0);
-
-        if (rpcTempCU->getTotalCost() < rpcBestCU->getTotalCost())
-        {
-            TComDataCU* tmp = rpcTempCU;
-            rpcTempCU = rpcBestCU;
-            rpcBestCU = tmp;
-            // Change Prediction data
-            TComYuv* pcYuv = NULL;
-            pcYuv =  m_ppcPredYuvMode[3][uhDepth];
-            m_ppcPredYuvMode[3][uhDepth]  = m_ppcPredYuvMode[4][uhDepth];
-            m_ppcPredYuvMode[4][uhDepth] = pcYuv;
-            pcYuv = m_ppcRecoYuvBest[uhDepth];
-            m_ppcRecoYuvBest[uhDepth] = m_ppcRecoYuvTemp[uhDepth];
-            m_ppcRecoYuvTemp[uhDepth] = pcYuv;         
-        }
-
-        rpcTempCU->initEstData(uhDepth, orgQP);
+        mergeCandBuffer[ui] = 0;
     }
 
-    me_merge.setSourcePU(0, rpcBestCU->getWidth(0), rpcBestCU->getHeight(0));
-   
-    rpcBestCU->getTotalCost() = me_merge.bufSATD((pixel*)m_ppcPredYuvMode[3][uhDepth]->getLumaAddr(),
-                                                       m_ppcPredYuvMode[3][uhDepth]->getStride());
-    x265_emms();
+    Bool bestIsSkip = false;
+
+    UInt iteration;
+    if (rpcTempCU->isLosslessCoded(0))
+    {
+        iteration = 1;
+    }
+    else
+    {
+        iteration = 2;
+    }
+
+    for (UInt uiNoResidual = 0; uiNoResidual < iteration; ++uiNoResidual)
+    {
+        for (UInt uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand)
+        {
+            if (!(uiNoResidual == 1 && mergeCandBuffer[uiMergeCand] == 1))
+            {
+                if (!(bestIsSkip && uiNoResidual == 0))
+                {
+                    // set MC parameters
+                    rpcTempCU->setPredModeSubParts(MODE_INTER, 0, uhDepth); // interprets depth relative to LCU level
+                    rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(),     0, uhDepth);
+                    rpcTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, uhDepth); // interprets depth relative to LCU level
+                    rpcTempCU->setMergeFlagSubParts(true, 0, 0, uhDepth); // interprets depth relative to LCU level
+                    rpcTempCU->setMergeIndexSubParts(uiMergeCand, 0, 0, uhDepth); // interprets depth relative to LCU level
+                    rpcTempCU->setInterDirSubParts(uhInterDirNeighbours[uiMergeCand], 0, 0, uhDepth); // interprets depth relative to LCU level
+                    rpcTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(cMvFieldNeighbours[0 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
+                    rpcTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(cMvFieldNeighbours[1 + 2 * uiMergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
+
+                    // do MC
+                    m_pcPredSearch->motionCompensation(rpcTempCU, tmpPredYuv);
+
+                    /*Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low?*/
+
+                    UInt partEnum = PartitionFromSizes(rpcTempCU->getWidth(0), rpcTempCU->getHeight(0));
+                    UInt SATD = primitives.satd[partEnum]((pixel*)m_ppcOrigYuv[uhDepth]->getLumaAddr(), m_ppcOrigYuv[uhDepth]->getStride(),
+                                                          (pixel*)tmpPredYuv->getLumaAddr(), tmpPredYuv->getStride());
+                    x265_emms();
+                    rpcTempCU->getTotalDistortion() = SATD;
+                    rpcTempCU->getTotalCost() = SATD;
+
+                    if (uiNoResidual == 0)
+                    {
+                        if (rpcTempCU->getQtRootCbf(0) == 0)
+                        {
+                            mergeCandBuffer[uiMergeCand] = 1;
+                        }
+                    }
+
+                    rpcTempCU->setSkipFlagSubParts(rpcTempCU->getQtRootCbf(0) == 0, 0, uhDepth);
+                    Int orgQP = rpcTempCU->getQP(0);
+
+                    //xCheckBestMode(rpcBestCU, rpcTempCU, uhDepth);
+                    if (rpcTempCU->getTotalCost() < rpcBestCU->getTotalCost())
+                    {
+                        TComDataCU* tmp = rpcTempCU;
+                        rpcTempCU = rpcBestCU;
+                        rpcBestCU = tmp;
+                        // Change Prediction data
+                        TComYuv* pcYuv = NULL;
+                        pcYuv =  bestPredYuv;
+                        bestPredYuv  = tmpPredYuv;
+                        tmpPredYuv = pcYuv;
+                    }
+
+                    rpcTempCU->initEstData(uhDepth, orgQP);
+
+                    if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
+                    {
+                        bestIsSkip = rpcTempCU->getQtRootCbf(0) == 0;
+                    }
+                }
+            }
+        }
+    }
 }
 
 Void TEncCu::xComputeCostInter(TComDataCU*& rpcTempCU, PartSize ePartSize, UInt Index, Bool bUseMRG)
@@ -235,7 +417,7 @@
         /*Compute  Merge Cost  */
 #if 1
 
-        xComputeCostMerge2Nx2N(m_MergeBestCU[uiDepth], m_MergeCU[uiDepth]);
+        xComputeCostMerge2Nx2N(m_MergeBestCU[uiDepth], m_MergeCU[uiDepth],  m_ppcPredYuvMode[3][uiDepth], m_ppcPredYuvMode[4][uiDepth]);
         rpcBestCU = m_MergeBestCU[uiDepth];
         YuvTemp = m_ppcPredYuvMode[3][uiDepth];
         m_ppcPredYuvMode[3][uiDepth] = m_ppcPredYuvBest[uiDepth];
@@ -286,16 +468,32 @@
             m_ppcPredYuvBest[uiDepth] = YuvTemp;
         }
 
-        /* Perform encode residual for the best mode chosen only*/
-        if(m_MergeBestCU[uiDepth] != rpcBestCU){
         m_pcPredSearch->encodeResAndCalcRdInterCU(rpcBestCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvBest[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcResiYuvBest[uiDepth], m_ppcRecoYuvBest[uiDepth], false);
-        }
-        else{
-            rpcBestCU->getTotalCost() =  m_pcRdCost->calcRdCost(rpcBestCU->getTotalDistortion(), rpcBestCU->getTotalBits());
+
+        /*compute intra cost */
+
+        /* if(rpcBestCU->getCbf(0, TEXT_LUMA) != 0   ||
+               rpcBestCU->getCbf(0, TEXT_CHROMA_U) != 0   ||
+               rpcBestCU->getCbf(0, TEXT_CHROMA_V) != 0)*/
+        {
+            xComputeCostIntrainInter(m_IntrainInterCU[uiDepth], SIZE_2Nx2N, 5);
+            xEncodeIntrainInter(m_IntrainInterCU[uiDepth], m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[5][uiDepth], m_ppcResiYuvTemp[uiDepth],  m_ppcRecoYuvTemp[uiDepth]);
+
+            if (m_IntrainInterCU[uiDepth]->getTotalCost() < rpcBestCU->getTotalCost())
+            {
+                rpcBestCU = m_IntrainInterCU[uiDepth];
+
+                YuvTemp = m_ppcPredYuvMode[5][uiDepth];
+                m_ppcPredYuvMode[5][uiDepth] = m_ppcPredYuvBest[uiDepth];
+                m_ppcPredYuvBest[uiDepth] = YuvTemp;
+                TComYuv* tmpPic = m_ppcRecoYuvBest[uiDepth];
+                m_ppcRecoYuvBest[uiDepth] =  m_ppcRecoYuvTemp[uiDepth];
+                m_ppcRecoYuvTemp[uiDepth] = tmpPic;
+            }
         }
 
         /* Disable recursive analysis for whole CUs temporarily*/
-        if (rpcBestCU->isSkipped(0))
+        if ((rpcBestCU != 0) && (rpcBestCU->isSkipped(0)))
         {
 #if CU_STAT_LOGFILE
             cntSkipCu[uiDepth]++;
@@ -321,7 +519,7 @@
 #if CU_STAT_LOGFILE
     if (rpcBestCU)
     {
-        fprintf(fp1, "\n Width : %d ,Inter 2Nx2N_Merge : %d , 2Nx2N : %d , 2NxN : %d, Nx2N : %d ", rpcBestCU->getWidth(0), m_MergeBestCU[uiDepth]->getTotalCost(), m_InterCU_2Nx2N[uiDepth]->getTotalCost(), m_InterCU_2NxN[uiDepth]->getTotalCost(), m_InterCU_Nx2N[uiDepth]->getTotalCost());
+        fprintf(fp1, "\n Width : %d ,Inter 2Nx2N_Merge : %d , 2Nx2N : %d , 2NxN : %d, Nx2N : %d , intra : %d", rpcBestCU->getWidth(0), m_MergeBestCU[uiDepth]->getTotalCost(), m_InterCU_2Nx2N[uiDepth]->getTotalCost(), m_InterCU_2NxN[uiDepth]->getTotalCost(), m_InterCU_Nx2N[uiDepth]->getTotalCost(), m_IntrainInterCU[uiDepth]->getTotalCost());
     }
 #endif
 
@@ -402,6 +600,43 @@
 
         m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
 
+#if  CU_STAT_LOGFILE
+        if (rpcBestCU != 0)
+        {
+            if (rpcBestCU->getTotalCost() < rpcTempCU->getTotalCost())
+            {
+                if (rpcBestCU->getPredictionMode(0) == MODE_INTER)
+                {
+                    cntInter[uiDepth]++;
+                    if (rpcBestCU->getPartitionSize(0) < 3)
+                    {
+                        cuInterDistribution[uiDepth][rpcBestCU->getPartitionSize(0)]++;
+                    }
+                    else
+                    {
+                        cuInterDistribution[uiDepth][3]++;
+                    }
+                }
+                else if (rpcBestCU->getPredictionMode(0) == MODE_INTRA)
+                {
+                    cntIntra[uiDepth]++;
+                    if (rpcBestCU->getLumaIntraDir()[0] > 1)
+                    {
+                        cuIntraDistribution[uiDepth][2]++;
+                    }
+                    else
+                    {
+                        cuIntraDistribution[uiDepth][rpcBestCU->getLumaIntraDir()[0]]++;
+                    }
+                }
+            }
+            else
+            {
+                cntSplit[uiDepth]++;
+            }
+        }
+#endif // if  LOGGING
+
         /*If Best Mode is not NULL; then compare costs. Else assign best mode to Sub-CU costs
         Copy Recon data from Temp structure to Best structure*/
         if (rpcBestCU)
@@ -422,39 +657,6 @@
             m_ppcRecoYuvTemp[uiDepth] = m_ppcRecoYuvBest[uiDepth];
             m_ppcRecoYuvBest[uiDepth] = YuvTemp;
         }
-#if  CU_STAT_LOGFILE
-        if (rpcBestCU->getTotalCost() < rpcTempCU->getTotalCost())
-        {
-            if (rpcBestCU->getPredictionMode(0) == MODE_INTER)
-            {
-                cntInter[uiDepth]++;
-                if (rpcBestCU->getPartitionSize(0) < 3)
-                {
-                    cuInterDistribution[uiDepth][rpcBestCU->getPartitionSize(0)]++;
-                }
-                else
-                {
-                    cuInterDistribution[uiDepth][3]++;
-                }
-            }
-            else if (rpcBestCU->getPredictionMode(0) == MODE_INTRA)
-            {
-                cntIntra[uiDepth]++;
-                if (rpcBestCU->getLumaIntraDir()[0] > 1)
-                {
-                    cuIntraDistribution[uiDepth][2]++;
-                }
-                else
-                {
-                    cuIntraDistribution[uiDepth][rpcBestCU->getLumaIntraDir()[0]]++;
-                }
-            }
-        }
-        else
-        {
-            cntSplit[uiDepth]++;
-        }
-#endif // if  LOGGING
     }
 
 #if CU_STAT_LOGFILE
-------------- next part --------------
A non-text attachment was scrubbed...
Name: July_3_HEVC.patch
Type: text/x-patch
Size: 34633 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130703/7d93032f/attachment-0001.bin>


More information about the x265-devel mailing list