[x265] [PATCH] analysis: reuse the bestSadCand in rd level 0 to 4 for skip and merge modes

gopu at multicorewareinc.com gopu at multicorewareinc.com
Tue Feb 3 08:59:00 CET 2015


# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1422950320 -19800
#      Tue Feb 03 13:28:40 2015 +0530
# Node ID 420bb1251dc7b9bf1dbb9e6f8e2655ea8f18fe1c
# Parent  c01267c2280b33047bed11d812880d17153040ed
analysis: reuse the bestSadCand in rd level 0 to 4 for skip and merge modes

diff -r c01267c2280b -r 420bb1251dc7 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Tue Feb 03 11:22:12 2015 +0530
+++ b/source/encoder/analysis.cpp	Tue Feb 03 13:28:40 2015 +0530
@@ -166,7 +166,7 @@
             * they are available for intra predictions */
             m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
 
-            compressInterCU_rd0_4(ctu, cuGeom);
+            compressInterCU_rd0_4(ctu, cuGeom, zOrder);
 
             /* generate residual for entire CTU at once and copy to reconPic */
             encodeResidue(ctu, cuGeom);
@@ -174,16 +174,15 @@
         else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)
             compressInterCU_dist(ctu, cuGeom);
         else if (m_param->rdLevel <= 4)
-            compressInterCU_rd0_4(ctu, cuGeom);
+            compressInterCU_rd0_4(ctu, cuGeom, zOrder);
         else
+            compressInterCU_rd5_6(ctu, cuGeom, zOrder);
+
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData && !m_param->bDistributeModeAnalysis)
         {
-            compressInterCU_rd5_6(ctu, cuGeom, zOrder);
-            if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData)
-            {
-                CUData *bestCU = &m_modeDepth[0].bestMode->cu;
-                memcpy(&m_reuseInterDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
-                memcpy(&m_reuseInterDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition);
-            }
+            CUData *bestCU = &m_modeDepth[0].bestMode->cu;
+            memcpy(&m_reuseInterDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+            memcpy(&m_reuseInterDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition);
         }
     }
 
@@ -789,7 +788,7 @@
         md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.encodeIdx);
 }
 
-void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom)
+void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
 {
     uint32_t depth = cuGeom.depth;
     uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -800,6 +799,30 @@
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
     uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
 
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+    {
+        uint8_t* reuseDepth  = &m_reuseInterDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        uint8_t* reuseModes  = &m_reuseInterDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        if (mightNotSplit && depth >= minDepth && depth == reuseDepth[zOrder] && zOrder == cuGeom.encodeIdx && reuseModes[zOrder] == MODE_SKIP)
+        {
+            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+            checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+            if (m_bTryLossless)
+                tryLossless(cuGeom);
+
+            if (mightSplit)
+                addSplitFlagCost(*md.bestMode, cuGeom.depth);
+
+            // increment zOrder offset to point to next best depth in sharedDepth buffer
+            zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
+
+            mightSplit = false;
+            mightNotSplit = false;
+        }
+    }
+
     if (mightNotSplit && depth >= minDepth)
     {
         bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
@@ -1022,7 +1045,7 @@
             {
                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
                 m_rqt[nextDepth].cur.load(*nextContext);
-                compressInterCU_rd0_4(parentCTU, childGeom);
+                compressInterCU_rd0_4(parentCTU, childGeom, zOrder);
 
                 // Save best CU and pred data for this sub CU
                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1036,7 +1059,10 @@
                     nextContext = &nd.bestMode->contexts;
             }
             else
+            {
                 splitCU->setEmptyPart(childGeom, subPartIdx);
+                zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth];
+            }
         }
         nextContext->store(splitPred->contexts);
 
@@ -1300,10 +1326,18 @@
 
     for (uint32_t i = 0; i < maxNumMergeCand; ++i)
     {
-        if (m_bFrameParallel &&
-            (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
-            mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
-            continue;
+        if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+        {
+            i = (int)*reuseBestMergeCand;
+            maxNumMergeCand = 1;
+        }
+        else
+        {
+            if (m_bFrameParallel &&
+                (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
+                mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
+                continue;
+        }
 
         tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; // merge candidate ID is stored in L0 MVP idx
         tempPred->cu.m_interDir[0] = interDirNeighbours[i];
@@ -1335,6 +1369,11 @@
     if (bestSadCand < 0)
         return;
 
+    if (m_param->analysisMode == X265_ANALYSIS_SAVE)
+        *reuseBestMergeCand = bestSadCand;
+    if (m_param->analysisMode)
+        reuseBestMergeCand++;
+
     /* calculate the motion compensation for chroma for the best mode selected */
     if (!m_bChromaSa8d) /* Chroma MC was done above */
     {
diff -r c01267c2280b -r 420bb1251dc7 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Tue Feb 03 11:22:12 2015 +0530
+++ b/source/encoder/analysis.h	Tue Feb 03 13:28:40 2015 +0530
@@ -101,7 +101,7 @@
 
     /* full analysis for a P or B slice CU */
     void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
-    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
+    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
     void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
 
     /* measure merge and skip */


More information about the x265-devel mailing list