[x265] [PATCH 1 of 4] analysis: cleanup intra analysis functions

kavitha at multicorewareinc.com kavitha at multicorewareinc.com
Wed Oct 14 08:35:45 CEST 2015


# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1444628646 -19800
#      Mon Oct 12 11:14:06 2015 +0530
# Node ID c7e8ab6338aa97067f5823f4547b288dde0aeb54
# Parent  b6156a08b1def3584647f26096866c1a0c11e54a
analysis: cleanup intra analysis functions

During the analysis-load mode, the predicted luma and chroma modes saved in
shared buffers are no longer passed explicitly to intra analysis functions.
Instead the predicted modes are copied to parentCTU, thereby enabling the intra
analysis functions to decide implicitly if modes should be reused.

diff -r b6156a08b1de -r c7e8ab6338aa source/common/cudata.cpp
--- a/source/common/cudata.cpp	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/common/cudata.cpp	Mon Oct 12 11:14:06 2015 +0530
@@ -199,6 +199,7 @@
     m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
     m_log2CUSize         = charBuf; charBuf += m_numPartitions;
     m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
+    m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
     m_tqBypass           = charBuf; charBuf += m_numPartitions;
     m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
     m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
@@ -216,7 +217,6 @@
     m_cbf[0]             = charBuf; charBuf += m_numPartitions;
     m_cbf[1]             = charBuf; charBuf += m_numPartitions;
     m_cbf[2]             = charBuf; charBuf += m_numPartitions;
-    m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
 
     X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
 
@@ -246,7 +246,8 @@
     /* sequential memsets */
     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
     m_partSet(m_log2CUSize,   (uint8_t)g_maxLog2CUSize);
-    m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
+    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
+    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
     m_partSet(m_tqBypass,     (uint8_t)frame.m_encData->m_param->bLossless);
     if (m_slice->m_sliceType != I_SLICE)
     {
@@ -257,7 +258,7 @@
     X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
 
     /* initialize the remaining CU data in one memset */
-    memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
+    memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
 
     uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
     m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
@@ -284,14 +285,15 @@
     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
 
     m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
-    m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
+    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
+    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
     m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
     m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
     m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
     m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
 
     /* initialize the remaining CU data in one memset */
-    memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
+    memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
 }
 
 /* Copy the results of a sub-part (split) CU to the parent CU */
diff -r b6156a08b1de -r c7e8ab6338aa source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/encoder/analysis.cpp	Mon Oct 12 11:14:06 2015 +0530
@@ -72,7 +72,6 @@
 
 Analysis::Analysis()
 {
-    m_reuseIntraDataCTU = NULL;
     m_reuseInterDataCTU = NULL;
     m_reuseRef = NULL;
     m_reuseBestMergeCand = NULL;
@@ -138,17 +137,12 @@
     m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
 
     uint32_t numPartition = ctu.m_numPartitions;
-    if (m_param->analysisMode)
+    if (m_param->analysisMode && m_slice->m_sliceType != I_SLICE)
     {
-        if (m_slice->m_sliceType == I_SLICE)
-            m_reuseIntraDataCTU = (analysis_intra_data*)m_frame->m_analysisData.intraData;
-        else
-        {
-            int numPredDir = m_slice->isInterP() ? 1 : 2;
-            m_reuseInterDataCTU = (analysis_inter_data*)m_frame->m_analysisData.interData;
-            m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
-            m_reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
-        }
+        int numPredDir = m_slice->isInterP() ? 1 : 2;
+        m_reuseInterDataCTU = (analysis_inter_data*)m_frame->m_analysisData.interData;
+        m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
+        m_reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
     }
 
     ProfileCUScope(ctu, totalCTUTime, totalCTUs);
@@ -156,14 +150,22 @@
     uint32_t zOrder = 0;
     if (m_slice->m_sliceType == I_SLICE)
     {
+        analysis_intra_data* intraDataCTU = (analysis_intra_data*)m_frame->m_analysisData.intraData;
+        if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+        {
+            memcpy(ctu.m_cuDepth, &intraDataCTU->depth[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
+            memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
+            memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * numPartition);
+            memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
+        }
         compressIntraCU(ctu, cuGeom, zOrder, qp);
-        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE && intraDataCTU)
         {
             CUData* bestCU = &m_modeDepth[0].bestMode->cu;
-            memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
-            memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
-            memcpy(&m_reuseIntraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
-            memcpy(&m_reuseIntraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition);
+            memcpy(&intraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+            memcpy(&intraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
+            memcpy(&intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
+            memcpy(&intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition);
         }
     }
     else
@@ -210,8 +212,7 @@
         md.pred[PRED_LOSSLESS].initCosts();
         md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
         PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
-        uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
-        checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
+        checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size);
         checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
     }
     else
@@ -233,42 +234,40 @@
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 
-    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+    bool bAlreadyDecided = parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX;
+    bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
+
+    // stop recursion if we reach the depth of previous analysis decision
+    mightSplit &= !(bAlreadyDecided && bDecidedDepth);
+
+    if (bAlreadyDecided)
     {
-        uint8_t* reuseDepth  = &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-        uint8_t* reuseModes  = &m_reuseIntraDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-        char* reusePartSizes = &m_reuseIntraDataCTU->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-        uint8_t* reuseChromaModes = &m_reuseIntraDataCTU->chromaModes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
-
-        if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.absPartIdx)
+        if (bDecidedDepth)
         {
-            PartSize size = (PartSize)reusePartSizes[zOrder];
-            Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
+            Mode& mode = md.pred[0];
+            md.bestMode = &mode;
             mode.cu.initSubCU(parentCTU, cuGeom, qp);
-            checkIntra(mode, cuGeom, size, &reuseModes[zOrder], &reuseChromaModes[zOrder]);
-            checkBestMode(mode, depth);
+            memcpy(mode.cu.m_lumaIntraDir, parentCTU.m_lumaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
+            memcpy(mode.cu.m_chromaIntraDir, parentCTU.m_chromaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
+            checkIntra(mode, cuGeom, (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx]);
 
             if (m_bTryLossless)
                 tryLossless(cuGeom);
 
             if (mightSplit)
                 addSplitFlagCost(*md.bestMode, cuGeom.depth);
-
-            // increment zOrder offset to point to next best depth in sharedDepth buffer
-            zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
-            mightSplit = false;
         }
     }
     else if (mightNotSplit)
     {
         md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
-        checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
+        checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N);
         checkBestMode(md.pred[PRED_INTRA], depth);
 
         if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
         {
             md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-            checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
+            checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN);
             checkBestMode(md.pred[PRED_INTRA_NxN], depth);
         }
 
@@ -451,9 +450,9 @@
             switch (pmode.modes[task])
             {
             case PRED_INTRA:
-                slave.checkIntra(md.pred[PRED_INTRA], pmode.cuGeom, SIZE_2Nx2N, NULL, NULL);
+                slave.checkIntra(md.pred[PRED_INTRA], pmode.cuGeom, SIZE_2Nx2N);
                 if (pmode.cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
-                    slave.checkIntra(md.pred[PRED_INTRA_NxN], pmode.cuGeom, SIZE_NxN, NULL, NULL);
+                    slave.checkIntra(md.pred[PRED_INTRA_NxN], pmode.cuGeom, SIZE_NxN);
                 break;
 
             case PRED_2Nx2N:
@@ -1369,13 +1368,13 @@
                 {
                     ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
                     md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
+                    checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N);
                     checkBestMode(md.pred[PRED_INTRA], depth);
 
                     if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
                     {
                         md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
+                        checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN);
                         checkBestMode(md.pred[PRED_INTRA_NxN], depth);
                     }
                 }
diff -r b6156a08b1de -r c7e8ab6338aa source/encoder/analysis.h
--- a/source/encoder/analysis.h	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/encoder/analysis.h	Mon Oct 12 11:14:06 2015 +0530
@@ -104,7 +104,6 @@
 protected:
 
     /* Analysis data for load/save modes, keeps getting incremented as CTU analysis proceeds and data is consumed or read */
-    analysis_intra_data* m_reuseIntraDataCTU;
     analysis_inter_data* m_reuseInterDataCTU;
     int32_t*             m_reuseRef;
     uint32_t*            m_reuseBestMergeCand;
diff -r b6156a08b1de -r c7e8ab6338aa source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/encoder/search.cpp	Mon Oct 12 11:14:06 2015 +0530
@@ -1157,7 +1157,7 @@
     }
 }
 
-void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint8_t* sharedChromaModes)
+void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize)
 {
     CUData& cu = intraMode.cu;
 
@@ -1168,8 +1168,8 @@
     cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
     intraMode.initCosts();
-    intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
-    intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
+    intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange);
+    intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom);
     intraMode.distortion += intraMode.lumaDistortion + intraMode.chromaDistortion;
 
     m_entropyCoder.resetBits();
@@ -1387,7 +1387,7 @@
     extractIntraResultQT(cu, *reconYuv, 0, 0);
 
     intraMode.lumaDistortion = icosts.distortion;
-    intraMode.chromaDistortion = estIntraPredChromaQT(intraMode, cuGeom, NULL);
+    intraMode.chromaDistortion = estIntraPredChromaQT(intraMode, cuGeom);
     intraMode.distortion = intraMode.lumaDistortion + intraMode.chromaDistortion;
 
     m_entropyCoder.resetBits();
@@ -1415,7 +1415,7 @@
     checkDQP(intraMode, cuGeom);
 }
 
-uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes)
+uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2])
 {
     CUData& cu = intraMode.cu;
     Yuv* reconYuv = &intraMode.reconYuv;
@@ -1439,8 +1439,8 @@
     {
         uint32_t bmode = 0;
 
-        if (sharedModes)
-            bmode = sharedModes[puIdx];
+        if (intraMode.cu.m_lumaIntraDir[puIdx] != (uint8_t)ALL_IDX)
+            bmode = intraMode.cu.m_lumaIntraDir[puIdx];
         else
         {
             uint64_t candCostList[MAX_RD_INTRA_MODES];
@@ -1679,7 +1679,7 @@
     cu.setChromIntraDirSubParts(bestMode, 0, cuGeom.depth);
 }
 
-uint32_t Search::estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom, uint8_t* sharedChromaModes)
+uint32_t Search::estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom)
 {
     CUData& cu = intraMode.cu;
     Yuv& reconYuv = intraMode.reconYuv;
@@ -1707,10 +1707,10 @@
         uint32_t maxMode = NUM_CHROMA_MODE;
         uint32_t modeList[NUM_CHROMA_MODE];
 
-        if (sharedChromaModes && !initTuDepth)
+        if (intraMode.cu.m_chromaIntraDir[0] != (uint8_t)ALL_IDX && !initTuDepth)
         {
             for (uint32_t l = 0; l < NUM_CHROMA_MODE; l++)
-                modeList[l] = sharedChromaModes[0];
+                modeList[l] = intraMode.cu.m_chromaIntraDir[0];
             maxMode = 1;
         }
         else
diff -r b6156a08b1de -r c7e8ab6338aa source/encoder/search.h
--- a/source/encoder/search.h	Fri Oct 09 20:45:59 2015 +0530
+++ b/source/encoder/search.h	Mon Oct 12 11:14:06 2015 +0530
@@ -284,8 +284,8 @@
     // mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
     void     invalidateContexts(int fromDepth);
 
-    // full RD search of intra modes. if sharedModes is not NULL, it directly uses them
-    void     checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint8_t* sharedChromaModes);
+    // full RD search of intra modes
+    void     checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSizes);
 
     // select best intra mode using only sa8d costs, cannot measure NxN intra
     void     checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
@@ -351,10 +351,10 @@
     void     saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
 
     // RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
-    uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
+    uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2]);
 
     // RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
-    uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom, uint8_t* sharedChromaModes);
+    uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
 
     void     codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
     void     codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2]);


More information about the x265-devel mailing list