[x265] [PATCH] added INtraInInter functions in compress.cpp and made minor modifications - changed few protected variables to public in TEncSearch.h

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Mon Jul 1 13:42:49 CEST 2013


# HG changeset patch
# User sumalatha
# Date 1372678949 -19800
# Node ID 6acd8e9aaa3b19f863e68f21de0304ca48966ee0
# Parent  30c0e5591120904f989953e9c74038253d607e45
 added INtraInInter functions in compress.cpp and made minor modifications - changed few protected variables to public in TEncSearch.h

diff -r 30c0e5591120 -r 6acd8e9aaa3b source/Lib/TLibCommon/TComPrediction.h
--- a/source/Lib/TLibCommon/TComPrediction.h	Sun Jun 30 00:03:01 2013 -0500
+++ b/source/Lib/TLibCommon/TComPrediction.h	Mon Jul 01 17:12:29 2013 +0530
@@ -64,9 +64,6 @@
     Int       m_iPredBufStride;
     Int       m_iPredBufHeight;
 
-    //reference sample for IntraPrediction
-    Pel *refAbove, *refAboveFlt, *refLeft, *refLeftFlt;
-
     TComYuv   m_acYuvPred[2];
     TShortYUV   m_acShortPred[2];
     TComYuv   m_cYuvPredTemp;
@@ -99,6 +96,9 @@
     TComPrediction();
     virtual ~TComPrediction();
 
+    //reference sample for IntraPrediction
+    Pel *refAbove, *refAboveFlt, *refLeft, *refLeftFlt;
+
     Void    initTempBuff();
 
     // inter
diff -r 30c0e5591120 -r 6acd8e9aaa3b source/Lib/TLibEncoder/TEncCu.h
--- a/source/Lib/TLibEncoder/TEncCu.h	Sun Jun 30 00:03:01 2013 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.h	Mon Jul 01 17:12:29 2013 +0530
@@ -73,7 +73,7 @@
     TComDataCU**            m_InterCU_Nx2N;
     TComDataCU**            m_IntrainInterCU;
     TComDataCU**            m_MergeCU;
-    TComDataCU**            m_MergeBestCU; 
+    TComDataCU**            m_MergeBestCU;
     TComDataCU**            m_ppcBestCU;    ///< Best CUs in each depth
     TComDataCU**            m_ppcTempCU;    ///< Temporary CUs in each depth
     UChar                   m_uhTotalDepth;
@@ -111,11 +111,17 @@
     Bool                    m_abortFlag; // This flag is used to abort the recursive CU check when the child CU cost is greater than the parent CU
 
 public:
+
     Void set_pppcRDSbacCoder(TEncSbac*** pppcRDSbacCoder) { m_pppcRDSbacCoder = pppcRDSbacCoder; }
+
     Void set_pcEntropyCoder(TEncEntropy* pcEntropyCoder) { m_pcEntropyCoder = pcEntropyCoder; }
+
     Void set_pcPredSearch(TEncSearch* pcPredSearch) { m_pcPredSearch = pcPredSearch; }
+
     Void set_pcRDGoOnSbacCoder(TEncSbac* pcRDGoOnSbacCoder) { m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder; }
+
     Void set_pcTrQuant(TComTrQuant* pcTrQuant) { m_pcTrQuant = pcTrQuant; }
+
     Void set_pcRdCost(TComRdCost* pcRdCost) { m_pcRdCost = pcRdCost; }
 
     /// copy parameters from encoder class
@@ -147,12 +153,13 @@
 
     Int   xComputeQP(TComDataCU* pcCU, UInt uiDepth);
     Void  xCheckBestMode(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth);
-    
+
     Void  xCheckRDCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, Bool *earlyDetectionSkipMode);
     Void  xComputeCostMerge2Nx2N(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU);
-    Void  xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize,UInt index);
+    Void  xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize, UInt index);
     Void  xCheckRDCostInter(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize, Bool bUseMRG = false);
     Void  xComputeCostInter(TComDataCU*& rpcTempCU, PartSize ePartSize, UInt Index, Bool bUseMRG = false);
+    Void  xEncodeIntrainInter(TComDataCU*& pcCU, TComYuv* pcYuvOrg, TComYuv* pcYuvPred, TShortYUV*& rpcYuvResi, TComYuv*& rpcYuvRec);
     Void  xCheckRDCostIntra(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize);
     Void  xCheckRDCostIntrainInter(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize eSize);
     Void  xCheckDQP(TComDataCU* pcCU);
diff -r 30c0e5591120 -r 6acd8e9aaa3b source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Sun Jun 30 00:03:01 2013 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Mon Jul 01 17:12:29 2013 +0530
@@ -65,7 +65,7 @@
 /// encoder search class
 class TEncSearch : public TComPrediction
 {
-private:
+public:
 
     x265::MotionEstimate m_me;
     x265::BitCost        m_bc; // TODO: m_bc will go away with HM ME
@@ -117,8 +117,7 @@
     x265::MV        m_acMvPredictors[3];
 
     // RD computation
-    TEncSbac***     m_pppcRDSbacCoder;
-    TEncSbac*       m_pcRDGoOnSbacCoder;
+
     DistParam       m_cDistParam;
 
     // Misc.
@@ -130,6 +129,9 @@
 
 public:
 
+    TEncSbac***     m_pppcRDSbacCoder;
+    TEncSbac*       m_pcRDGoOnSbacCoder;
+
     Void set_pppcRDSbacCoder(TEncSbac*** pppcRDSbacCoder) { m_pppcRDSbacCoder = pppcRDSbacCoder; }
 
     Void set_pcEntropyCoder(TEncEntropy* pcEntropyCoder) { m_pcEntropyCoder = pcEntropyCoder; }
@@ -142,6 +144,8 @@
     virtual ~TEncSearch();
 
     Void init(TEncCfg* pcEncCfg, TComRdCost* pcRdCost, TComTrQuant *pcTrQuant);
+    UInt  xModeBitsIntra(TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth);
+    UInt  xUpdateCandList(UInt uiMode, UInt64 uiCost, UInt uiFastCandNum, UInt * CandModeList, UInt64 * CandCostList);
 
 protected:
 
@@ -215,6 +219,23 @@
     Void IPCMSearch(TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv*& rpcPredYuv, TShortYUV*& rpcResiYuv, TComYuv*& rpcRecoYuv);
 
     UInt estimateHeaderBits(TComDataCU* pcCU, UInt uiAbsPartIdx);
+    Void  xRecurIntraCodingQT(TComDataCU * pcCU,
+                              UInt         uiTrDepth,
+                              UInt         uiAbsPartIdx,
+                              Bool         bLumaOnly,
+                              TComYuv *    pcOrgYuv,
+                              TComYuv *    pcPredYuv,
+                              TShortYUV *  pcResiYuv,
+                              UInt &       ruiDistY,
+                              UInt &       ruiDistC,
+                              Bool         bCheckFirst,
+                              UInt64 &     dRDCost);
+
+    Void  xSetIntraResultQT(TComDataCU* pcCU,
+                            UInt        uiTrDepth,
+                            UInt        uiAbsPartIdx,
+                            Bool        bLumaOnly,
+                            TComYuv*    pcRecoYuv);
 
 protected:
 
@@ -268,24 +289,6 @@
                                 UInt        uiChromaId,
                                 Int         default0Save1Load2 = 0);
 
-    Void  xRecurIntraCodingQT(TComDataCU * pcCU,
-                              UInt         uiTrDepth,
-                              UInt         uiAbsPartIdx,
-                              Bool         bLumaOnly,
-                              TComYuv *    pcOrgYuv,
-                              TComYuv *    pcPredYuv,
-                              TShortYUV *  pcResiYuv,
-                              UInt &       ruiDistY,
-                              UInt &       ruiDistC,
-                              Bool         bCheckFirst,
-                              UInt64 &     dRDCost);
-
-    Void  xSetIntraResultQT(TComDataCU* pcCU,
-                            UInt        uiTrDepth,
-                            UInt        uiAbsPartIdx,
-                            Bool        bLumaOnly,
-                            TComYuv*    pcRecoYuv);
-
     Void  xRecurIntraChromaCodingQT(TComDataCU* pcCU,
                                     UInt        uiTrDepth,
                                     UInt        uiAbsPartIdx,
@@ -408,17 +411,17 @@
                         x265::MV&    rcMv,
                         UInt&        ruiSAD);
 
-    Void xPatternSearchFracDIF(TComDataCU* pcCU,
+    Void xPatternSearchFracDIF(TComDataCU*  pcCU,
                                TComPattern* pcPatternKey,
-                               Pel* piRefY,
-                               Int iRefStride,
-                               x265::MV* pcMvInt,
-                               x265::MV& rcMvHalf,
-                               x265::MV& rcMvQter,
-                               UInt& ruiCost,
-                               Bool biPred,
-                               TComPicYuv* refPic,
-                               UInt uiPartAddr);
+                               Pel*         piRefY,
+                               Int          iRefStride,
+                               x265::MV*    pcMvInt,
+                               x265::MV&    rcMvHalf,
+                               x265::MV&    rcMvQter,
+                               UInt&        ruiCost,
+                               Bool         biPred,
+                               TComPicYuv*  refPic,
+                               UInt         uiPartAddr);
 
     Void xExtDIFUpSamplingH(TComPattern* pcPattern, Bool biPred);
     Void xExtDIFUpSamplingQ(TComPattern* pcPatternKey, x265::MV halfPelRef, Bool biPred);
@@ -431,9 +434,6 @@
     Void xEstimateResidualQT(TComDataCU* pcCU, UInt uiQuadrant, UInt uiAbsPartIdx, UInt absTUPartIdx, TShortYUV* pcResi, const UInt uiDepth, UInt64 &rdCost, UInt &ruiBits, UInt &ruiDist, UInt *puiZeroDist);
     Void xSetResidualQTData(TComDataCU* pcCU, UInt uiQuadrant, UInt uiAbsPartIdx, UInt absTUPartIdx, TShortYUV* pcResi, UInt uiDepth, Bool bSpatial);
 
-    UInt  xModeBitsIntra(TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth);
-    UInt  xUpdateCandList(UInt uiMode, UInt64 uiCost, UInt uiFastCandNum, UInt * CandModeList, UInt64 * CandCostList);
-
     // -------------------------------------------------------------------------------------------------------------------
     // compute symbol bits
     // -------------------------------------------------------------------------------------------------------------------
diff -r 30c0e5591120 -r 6acd8e9aaa3b source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Sun Jun 30 00:03:01 2013 -0500
+++ b/source/encoder/compress.cpp	Mon Jul 01 17:12:29 2013 +0530
@@ -35,45 +35,222 @@
 extern FILE* fp1;
 #endif
 
-Void TEncCu::xComputeCostIntrainInter(TComDataCU*& rpcTempCU, PartSize eSize, UInt index)
+Void TEncCu::xEncodeIntrainInter(TComDataCU*& pcCU, TComYuv* pcYuvOrg, TComYuv* pcYuvPred,  TShortYUV*& rpcYuvResi, TComYuv*& rpcYuvRec)
 {
-    //PPAScopeEvent(TEncCU_xCheckRDCostIntra + uiDepth);
+    UInt64   dPUCost = 0;
+    UInt   uiPUDistY = 0;
+    UInt   uiPUDistC = 0;
+    UInt   uiDepth = pcCU->getDepth(0);
+    UInt    uiInitTrDepth  = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
 
-    UChar uiDepth = rpcTempCU->getDepth(0);
-    rpcTempCU->setSkipFlagSubParts(false, 0, uiDepth);
-    rpcTempCU->setPartSizeSubParts(eSize, 0, uiDepth);
-    rpcTempCU->setPredModeSubParts(MODE_INTRA, 0, uiDepth);
-    rpcTempCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth);
+    // set context models
+    m_pcPredSearch->m_pcRDGoOnSbacCoder->load(m_pcPredSearch->m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
 
-    UInt uiPreCalcDistC = 0;
+    m_pcPredSearch->xRecurIntraCodingQT(pcCU, uiInitTrDepth, 0, true, pcYuvOrg, pcYuvPred, rpcYuvResi, uiPUDistY, uiPUDistC, false, dPUCost);
 
-    m_pcPredSearch->estIntraPredQT(rpcTempCU, m_ppcOrigYuv[uiDepth],  m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC, true);
+    m_pcPredSearch->xSetIntraResultQT(pcCU, uiInitTrDepth, 0, true, rpcYuvRec);
 
-    m_ppcRecoYuvTemp[uiDepth]->copyToPicLuma(rpcTempCU->getPic()->getPicYuvRec(), rpcTempCU->getAddr(), rpcTempCU->getZorderIdxInCU());
+    UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ((pcCU->getDepth(0) + uiInitTrDepth) << 1);
+    ::memcpy(m_pcPredSearch->m_puhQTTempTrIdx,  pcCU->getTransformIdx()       + 0, uiQPartNum * sizeof(UChar));
+    ::memcpy(m_pcPredSearch->m_puhQTTempCbf[0], pcCU->getCbf(TEXT_LUMA) + 0, uiQPartNum * sizeof(UChar));
+    ::memcpy(m_pcPredSearch->m_puhQTTempCbf[1], pcCU->getCbf(TEXT_CHROMA_U) + 0, uiQPartNum * sizeof(UChar));
+    ::memcpy(m_pcPredSearch->m_puhQTTempCbf[2], pcCU->getCbf(TEXT_CHROMA_V) + 0, uiQPartNum * sizeof(UChar));
+    ::memcpy(m_pcPredSearch->m_puhQTTempTransformSkipFlag[0], pcCU->getTransformSkip(TEXT_LUMA)     + 0, uiQPartNum * sizeof(UChar));
+    ::memcpy(m_pcPredSearch->m_puhQTTempTransformSkipFlag[1], pcCU->getTransformSkip(TEXT_CHROMA_U) + 0, uiQPartNum * sizeof(UChar));
+    ::memcpy(m_pcPredSearch->m_puhQTTempTransformSkipFlag[2], pcCU->getTransformSkip(TEXT_CHROMA_V) + 0, uiQPartNum * sizeof(UChar));
 
-    m_pcPredSearch->estIntraPredChromaQT(rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[index][uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC);
+    //--- update transform index and cbf ---
+
+    ::memcpy(pcCU->getTransformIdx()       + 0, m_pcPredSearch->m_puhQTTempTrIdx,  uiQPartNum * sizeof(UChar));
+    ::memcpy(pcCU->getCbf(TEXT_LUMA) + 0, m_pcPredSearch->m_puhQTTempCbf[0], uiQPartNum * sizeof(UChar));
+    ::memcpy(pcCU->getCbf(TEXT_CHROMA_U) + 0, m_pcPredSearch->m_puhQTTempCbf[1], uiQPartNum * sizeof(UChar));
+    ::memcpy(pcCU->getCbf(TEXT_CHROMA_V) + 0, m_pcPredSearch->m_puhQTTempCbf[2], uiQPartNum * sizeof(UChar));
+    ::memcpy(pcCU->getTransformSkip(TEXT_LUMA)     + 0, m_pcPredSearch->m_puhQTTempTransformSkipFlag[0], uiQPartNum * sizeof(UChar));
+    ::memcpy(pcCU->getTransformSkip(TEXT_CHROMA_U) + 0, m_pcPredSearch->m_puhQTTempTransformSkipFlag[1], uiQPartNum * sizeof(UChar));
+    ::memcpy(pcCU->getTransformSkip(TEXT_CHROMA_V) + 0, m_pcPredSearch->m_puhQTTempTransformSkipFlag[2], uiQPartNum * sizeof(UChar));
+
+    //=== update PU data ====
+    pcCU->copyToPic(pcCU->getDepth(0), 0, uiInitTrDepth);
+    //===== reset context models =====
+    m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
+
+    //===== set distortion (rate and r-d costs are determined later) =====
+    pcCU->getTotalDistortion() = uiPUDistY + uiPUDistC;
+
+    rpcYuvRec->copyToPicLuma(pcCU->getPic()->getPicYuvRec(), pcCU->getAddr(), pcCU->getZorderIdxInCU());
+
+    m_pcPredSearch->estIntraPredChromaQT(pcCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPUDistC);
 
     m_pcEntropyCoder->resetBits();
-    if (rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
+    if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
     {
-        m_pcEntropyCoder->encodeCUTransquantBypassFlag(rpcTempCU, 0, true);
+        m_pcEntropyCoder->encodeCUTransquantBypassFlag(pcCU, 0, true);
     }
-    m_pcEntropyCoder->encodeSkipFlag(rpcTempCU, 0,          true);
-    m_pcEntropyCoder->encodePredMode(rpcTempCU, 0,          true);
-    m_pcEntropyCoder->encodePartSize(rpcTempCU, 0, uiDepth, true);
-    m_pcEntropyCoder->encodePredInfo(rpcTempCU, 0,          true);
-    m_pcEntropyCoder->encodeIPCMInfo(rpcTempCU, 0, true);
+    m_pcEntropyCoder->encodeSkipFlag(pcCU, 0,          true);
+    m_pcEntropyCoder->encodePredMode(pcCU, 0,          true);
+    m_pcEntropyCoder->encodePartSize(pcCU, 0, uiDepth, true);
+    m_pcEntropyCoder->encodePredInfo(pcCU, 0,          true);
+    m_pcEntropyCoder->encodeIPCMInfo(pcCU, 0, true);
 
     // Encode Coefficients
     Bool bCodeDQP = getdQPFlag();
-    // m_pcEntropyCoder->encodeCoeff(rpcTempCU, 0, uiDepth, rpcTempCU->getWidth(0), rpcTempCU->getHeight(0), bCodeDQP);
+    m_pcEntropyCoder->encodeCoeff(pcCU, 0, uiDepth, pcCU->getWidth(0), pcCU->getHeight(0), bCodeDQP);
     setdQPFlag(bCodeDQP);
 
     m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
 
-    rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
-    rpcTempCU->getTotalBins() = ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
-    rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost(rpcTempCU->getTotalDistortion(), rpcTempCU->getTotalBits());
+    pcCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
+    pcCU->getTotalBins() = ((TEncBinCABAC*)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
+    pcCU->getTotalCost() = m_pcRdCost->calcRdCost(pcCU->getTotalDistortion(), pcCU->getTotalBits());
+}
+
+Void TEncCu::xComputeCostIntrainInter(TComDataCU*& pcCU, PartSize eSize, UInt index)
+{
+    UInt    uiDepth        = pcCU->getDepth(0);
+    UInt    uiInitTrDepth  = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
+    UInt    uiWidth        = pcCU->getWidth(0) >> uiInitTrDepth;
+    UInt    uiWidthBit     = pcCU->getIntraSizeIdx(0);
+    UInt64  CandCostList[FAST_UDI_MAX_RDMODE_NUM];
+    UInt    CandNum;
+
+    UInt uiPartOffset = 0;
+
+    pcCU->setSkipFlagSubParts(false, 0, uiDepth);
+    pcCU->setPartSizeSubParts(eSize, 0, uiDepth);
+    pcCU->setPredModeSubParts(MODE_INTRA, 0, uiDepth);
+    pcCU->setCUTransquantBypassSubParts(m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth);
+
+    //===== init pattern for luma prediction =====
+    pcCU->getPattern()->initPattern(pcCU, uiInitTrDepth, uiPartOffset);
+    // Reference sample smoothing
+    pcCU->getPattern()->initAdiPattern(pcCU, uiPartOffset, uiInitTrDepth, m_pcPredSearch->getPredicBuf(),  m_pcPredSearch->getPredicBufWidth(),  m_pcPredSearch->getPredicBufHeight(), m_pcPredSearch->refAbove, m_pcPredSearch->refLeft, m_pcPredSearch->refAboveFlt, m_pcPredSearch->refLeftFlt);
+
+    //===== determine set of modes to be tested (using prediction signal only) =====
+    UInt numModesAvailable = 35; //total number of Intra modes
+    Pel* piOrg         = m_ppcOrigYuv[uiDepth]->getLumaAddr(0, uiWidth);
+    Pel* piPred        = m_ppcPredYuvMode[5][uiDepth]->getLumaAddr(0, uiWidth);
+    UInt uiStride      = m_ppcPredYuvMode[5][uiDepth]->getStride();
+    UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
+    UInt numModesForFullRD = g_aucIntraModeNumFast[uiWidthBit];
+    Int nLog2SizeMinus2 = g_aucConvertToBit[uiWidth];
+    x265::pixelcmp sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
+    {
+        assert(numModesForFullRD < numModesAvailable);
+
+        for (UInt i = 0; i < numModesForFullRD; i++)
+        {
+            CandCostList[i] = MAX_INT64;
+        }
+
+        CandNum = 0;
+        UInt uiSads[35];
+        Bool bFilter = (uiWidth <= 16);
+        Pel *ptrSrc = m_pcPredSearch->getPredicBuf();
+
+        // 1
+        primitives.getIPredDC((pixel*)ptrSrc + ADI_BUF_STRIDE + 1, ADI_BUF_STRIDE, (pixel*)piPred, uiStride, uiWidth, bFilter);
+        uiSads[DC_IDX] = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+
+        // 0
+        if (uiWidth >= 8 && uiWidth <= 32)
+        {
+            ptrSrc += ADI_BUF_STRIDE * (2 * uiWidth + 1);
+        }
+        primitives.getIPredPlanar((pixel*)ptrSrc + ADI_BUF_STRIDE + 1, ADI_BUF_STRIDE, (pixel*)piPred, uiStride, uiWidth);
+        uiSads[PLANAR_IDX] = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+
+        // 33 Angle modes once
+        if (uiWidth <= 16)
+        {
+            ALIGN_VAR_32(Pel, buf1[MAX_CU_SIZE * MAX_CU_SIZE]);
+            ALIGN_VAR_32(Pel, tmp[33 * MAX_CU_SIZE * MAX_CU_SIZE]);
+
+            // Transpose NxN
+            x265::primitives.transpose[nLog2SizeMinus2]((pixel*)buf1, (pixel*)piOrg, uiStride);
+
+            Pel *pAbove0 = m_pcPredSearch->refAbove    + uiWidth - 1;
+            Pel *pAbove1 = m_pcPredSearch->refAboveFlt + uiWidth - 1;
+            Pel *pLeft0  = m_pcPredSearch->refLeft     + uiWidth - 1;
+            Pel *pLeft1  = m_pcPredSearch->refLeftFlt  + uiWidth - 1;
+
+            x265::primitives.getIPredAngs[nLog2SizeMinus2]((pixel*)tmp, (pixel*)pAbove0, (pixel*)pLeft0, (pixel*)pAbove1, (pixel*)pLeft1, (uiWidth <= 16));
+
+            // TODO: We need SATD_x4 here
+            for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
+            {
+                bool modeHor = (uiMode < 18);
+                Pel *pSrc = (modeHor ? buf1 : piOrg);
+                intptr_t srcStride = (modeHor ? uiWidth : uiStride);
+
+                // use hadamard transform here
+                UInt uiSad = sa8d((pixel*)pSrc, srcStride, (pixel*)&tmp[(uiMode - 2) * (uiWidth * uiWidth)], uiWidth);
+                uiSads[uiMode] = uiSad;
+            }
+        }
+        else
+        {
+            for (UInt uiMode = 2; uiMode < numModesAvailable; uiMode++)
+            {
+                m_pcPredSearch->predIntraLumaAng(pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth);
+
+                // use hadamard transform here
+                UInt uiSad = sa8d((pixel*)piOrg, uiStride, (pixel*)piPred, uiStride);
+                uiSads[uiMode] = uiSad;
+            }
+        }
+
+        for (UInt uiMode = 0; uiMode < numModesAvailable; uiMode++)
+        {
+            UInt uiSad = uiSads[uiMode];
+            UInt iModeBits = m_pcPredSearch->xModeBitsIntra(pcCU, uiMode, 0, uiPartOffset, uiDepth, uiInitTrDepth);
+            UInt64 cost = m_pcRdCost->calcRdSADCost(uiSad, iModeBits);
+            CandNum += m_pcPredSearch->xUpdateCandList(uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList);      //Find N least cost  modes. N = numModesForFullRD
+        }
+
+        Int uiPreds[3] = { -1, -1, -1 };
+        Int iMode = -1;
+        Int numCand = pcCU->getIntraDirLumaPredictor(uiPartOffset, uiPreds, &iMode);
+        if (iMode >= 0)
+        {
+            numCand = iMode;
+        }
+
+        for (Int j = 0; j < numCand; j++)
+        {
+            Bool mostProbableModeIncluded = false;
+            UInt mostProbableMode = uiPreds[j];
+
+            for (UInt i = 0; i < numModesForFullRD; i++)
+            {
+                mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
+            }
+
+            if (!mostProbableModeIncluded)
+            {
+                uiRdModeList[numModesForFullRD++] = mostProbableMode;
+            }
+        }
+    }
+
+    //determine predyuv for the best mode
+    UInt uiOrgMode = uiRdModeList[0];
+
+    pcCU->setLumaIntraDirSubParts(uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth);
+
+    // set context models
+    m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
+
+    // determine residual for partition
+    UInt   uiPUDistY = 0;
+    UInt   uiPUDistC = 0;
+    UInt64 dPUCost   = 0;
+    m_pcPredSearch->xRecurIntraCodingQT(pcCU, uiInitTrDepth, uiPartOffset, true, m_ppcOrigYuv[uiDepth], m_ppcPredYuvMode[5][uiDepth], m_ppcResiYuvTemp[uiDepth], uiPUDistY, uiPUDistC, false, dPUCost);
+
+    UInt partEnum = PartitionFromSizes(pcCU->getWidth(0), pcCU->getHeight(0));
+    UInt SATD = primitives.satd[partEnum]((pixel*)m_ppcOrigYuv[uiDepth]->getLumaAddr(), m_ppcOrigYuv[uiDepth]->getStride(),
+                                          (pixel*)m_ppcPredYuvMode[index][uiDepth]->getLumaAddr(),  m_ppcPredYuvMode[index][uiDepth]->getStride());
+
+    pcCU->getTotalCost() = SATD;
 }
 
 /** check RD costs for a CU block encoded with merge
@@ -141,16 +318,16 @@
             m_ppcPredYuvMode[4][uhDepth] = pcYuv;
             pcYuv = m_ppcRecoYuvBest[uhDepth];
             m_ppcRecoYuvBest[uhDepth] = m_ppcRecoYuvTemp[uhDepth];
-            m_ppcRecoYuvTemp[uhDepth] = pcYuv;         
+            m_ppcRecoYuvTemp[uhDepth] = pcYuv;
         }
 
         rpcTempCU->initEstData(uhDepth, orgQP);
     }
 
     me_merge.setSourcePU(0, rpcBestCU->getWidth(0), rpcBestCU->getHeight(0));
-   
+
     rpcBestCU->getTotalCost() = me_merge.bufSATD((pixel*)m_ppcPredYuvMode[3][uhDepth]->getLumaAddr(),
-                                                       m_ppcPredYuvMode[3][uhDepth]->getStride());
+                                                 m_ppcPredYuvMode[3][uhDepth]->getStride());
     x265_emms();
 }
 
@@ -258,6 +435,8 @@
             xComputeCostInter(m_InterCU_Nx2N[uiDepth], SIZE_Nx2N, 1);
             xComputeCostInter(m_InterCU_2NxN[uiDepth], SIZE_2NxN, 2);
         }
+        /*compute intra cost */
+        xComputeCostIntrainInter(m_IntrainInterCU[uiDepth], SIZE_2Nx2N, 5);
 
         /*Choose best mode; initialise rpcBestCU to 2Nx2N*/
         if (m_InterCU_2Nx2N[uiDepth]->getTotalCost() < rpcBestCU->getTotalCost())
@@ -286,12 +465,27 @@
             m_ppcPredYuvBest[uiDepth] = YuvTemp;
         }
 
-        /* Perform encode residual for the best mode chosen only*/
-        if(m_MergeBestCU[uiDepth] != rpcBestCU){
-        m_pcPredSearch->encodeResAndCalcRdInterCU(rpcBestCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvBest[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcResiYuvBest[uiDepth], m_ppcRecoYuvBest[uiDepth], false);
+        if (m_IntrainInterCU[uiDepth]->getTotalCost() < rpcBestCU->getTotalCost())
+        {
+            rpcBestCU = m_IntrainInterCU[uiDepth];
+
+            YuvTemp = m_ppcPredYuvMode[5][uiDepth];
+            m_ppcPredYuvMode[5][uiDepth] = m_ppcPredYuvBest[uiDepth];
+            m_ppcPredYuvBest[uiDepth] = YuvTemp;
+            xEncodeIntrainInter(rpcBestCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvBest[uiDepth], m_ppcResiYuvBest[uiDepth],  m_ppcRecoYuvBest[uiDepth]);
         }
-        else{
-            rpcBestCU->getTotalCost() =  m_pcRdCost->calcRdCost(rpcBestCU->getTotalDistortion(), rpcBestCU->getTotalBits());
+
+        else
+        {
+            /* Perform encode residual for the best mode chosen only*/
+            if (m_MergeBestCU[uiDepth] != rpcBestCU)
+            {
+                m_pcPredSearch->encodeResAndCalcRdInterCU(rpcBestCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvBest[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcResiYuvBest[uiDepth], m_ppcRecoYuvBest[uiDepth], false);
+            }
+            else
+            {
+                rpcBestCU->getTotalCost() =  m_pcRdCost->calcRdCost(rpcBestCU->getTotalDistortion(), rpcBestCU->getTotalBits());
+            }
         }
 
         /* Disable recursive analysis for whole CUs temporarily*/
@@ -321,7 +515,7 @@
 #if CU_STAT_LOGFILE
     if (rpcBestCU)
     {
-        fprintf(fp1, "\n Width : %d ,Inter 2Nx2N_Merge : %d , 2Nx2N : %d , 2NxN : %d, Nx2N : %d ", rpcBestCU->getWidth(0), m_MergeBestCU[uiDepth]->getTotalCost(), m_InterCU_2Nx2N[uiDepth]->getTotalCost(), m_InterCU_2NxN[uiDepth]->getTotalCost(), m_InterCU_Nx2N[uiDepth]->getTotalCost());
+        fprintf(fp1, "\n Width : %d ,Inter 2Nx2N_Merge : %d , 2Nx2N : %d , 2NxN : %d, Nx2N : %d , intra : %d", rpcBestCU->getWidth(0), m_MergeBestCU[uiDepth]->getTotalCost(), m_InterCU_2Nx2N[uiDepth]->getTotalCost(), m_InterCU_2NxN[uiDepth]->getTotalCost(), m_InterCU_Nx2N[uiDepth]->getTotalCost(), m_IntrainInterCU[uiDepth]->getTotalCost());
     }
 #endif
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: July_1_HEVC.patch
Type: text/x-patch
Size: 26249 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130701/50185372/attachment-0001.bin>


More information about the x265-devel mailing list