[x265] [PATCH] analysis: dump the best depth and re-use it for analysis-mode=load

gopu at multicorewareinc.com gopu at multicorewareinc.com
Thu Jan 8 13:05:44 CET 2015


# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1420713484 -19800
#      Thu Jan 08 16:08:04 2015 +0530
# Node ID f307fc02faf6111642737516cf34d3dd5f3b2820
# Parent  24da7b0accdafb1f9e7e54182c114cb264d09bab
analysis: dump the best depth and re-use it for analysis-mode=load

For inter frame currently sharing the best ref and in addition to that
share the depth and re-use it for analysis mode=load, the best depth can be
shared only if Predmode is MODE_SKIP, otherwise ignored. currently this changes
works only rdlevel > 4

diff -r 24da7b0accda -r f307fc02faf6 source/common/common.h
--- a/source/common/common.h	Wed Dec 24 10:34:59 2014 +0530
+++ b/source/common/common.h	Thu Jan 08 16:08:04 2015 +0530
@@ -353,10 +353,12 @@
     }
 };
 
-/* Stores inter (motion estimation) analysis data for a single frame */
+/* Stores inter analysis data for a single frame */
 struct analysis_inter_data
 {
     int32_t*    ref;
+    uint8_t*    depth;
+    uint8_t*    modes;
 };
 
 /* Stores intra analysis data for a single frame. This struct needs better packing */
diff -r 24da7b0accda -r f307fc02faf6 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed Dec 24 10:34:59 2014 +0530
+++ b/source/encoder/analysis.cpp	Thu Jan 08 16:08:04 2015 +0530
@@ -143,9 +143,9 @@
         }
     }
 
+    uint32_t zOrder = 0;
     if (m_slice->m_sliceType == I_SLICE)
     {
-        uint32_t zOrder = 0;
         compressIntraCU(ctu, cuGeom, zOrder);
         if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
         {
@@ -173,7 +173,15 @@
         else if (m_param->rdLevel <= 4)
             compressInterCU_rd0_4(ctu, cuGeom);
         else
-            compressInterCU_rd5_6(ctu, cuGeom);
+        {
+            compressInterCU_rd5_6(ctu, cuGeom, zOrder);
+            if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData)
+            {
+                CUData *bestCU = &m_modeDepth[0].bestMode->cu;
+                memcpy(&m_reuseInterDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+                memcpy(&m_reuseInterDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition);
+            }
+        }
     }
 
     return *m_modeDepth[0].bestMode;
@@ -1037,7 +1045,7 @@
         md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.encodeIdx);
 }
 
-void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom)
+void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
 {
     uint32_t depth = cuGeom.depth;
     ModeDepth& md = m_modeDepth[depth];
@@ -1046,6 +1054,45 @@
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+    {
+        uint8_t* reuseDepth  = &m_reuseInterDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        uint8_t* reuseModes  = &m_reuseInterDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+        if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.encodeIdx && reuseModes[zOrder] == MODE_SKIP)
+        {
+            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+            checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+            if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&
+                (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
+            {
+                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
+                checkBestMode(md.pred[PRED_INTRA], depth);
+
+                if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
+                {
+                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);
+                    checkBestMode(md.pred[PRED_INTRA_NxN], depth);
+                }
+            }
+
+            if (m_bTryLossless)
+                tryLossless(cuGeom);
+
+            if (mightSplit)
+                addSplitFlagCost(*md.bestMode, cuGeom.depth);
+
+            // increment zOrder offset to point to next best depth in sharedDepth buffer
+            zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
+
+            mightSplit = false;
+            mightNotSplit = false;
+        }
+    }
+
     if (mightNotSplit)
     {
         md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
@@ -1177,7 +1224,7 @@
             {
                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
                 m_rqt[nextDepth].cur.load(*nextContext);
-                compressInterCU_rd5_6(parentCTU, childGeom);
+                compressInterCU_rd5_6(parentCTU, childGeom, zOrder);
 
                 // Save best CU and pred data for this sub CU
                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1186,7 +1233,10 @@
                 nextContext = &nd.bestMode->contexts;
             }
             else
+            {
                 splitCU->setEmptyPart(childGeom, subPartIdx);
+                zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth];
+            }
         }
         nextContext->store(splitPred->contexts);
         if (mightNotSplit)
diff -r 24da7b0accda -r f307fc02faf6 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Wed Dec 24 10:34:59 2014 +0530
+++ b/source/encoder/analysis.h	Thu Jan 08 16:08:04 2015 +0530
@@ -101,7 +101,7 @@
     /* full analysis for a P or B slice CU */
     void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
     void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
-    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom);
+    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
 
     /* measure merge and skip */
     void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
diff -r 24da7b0accda -r f307fc02faf6 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Wed Dec 24 10:34:59 2014 +0530
+++ b/source/encoder/encoder.cpp	Thu Jan 08 16:08:04 2015 +0530
@@ -1578,6 +1578,8 @@
         analysis_inter_data *interData = (analysis_inter_data*)analysis->interData;
         CHECKED_MALLOC_ZERO(interData, analysis_inter_data, 1);
         CHECKED_MALLOC(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);
+        CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
+        CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
         analysis->interData = interData;
     }
     return;
@@ -1599,6 +1601,8 @@
     else
     {
         X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
+        X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
+        X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
         X265_FREE(analysis->interData);
     }
 }
@@ -1662,12 +1666,16 @@
     else if (analysis->sliceType == X265_TYPE_P)
     {
         X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         consumedBytes += frameRecordSize;
         totalConsumedBytes = consumedBytes;
     }
     else
     {
         X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         consumedBytes += frameRecordSize;
     }
 #undef X265_FREAD
@@ -1691,9 +1699,15 @@
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)
         analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 3;
     else if (analysis->sliceType == X265_TYPE_P)
+    {
         analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
+        analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
+    }
     else
+    {
         analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
+        analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
+    }
 
     X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);
     X265_FWRITE(&analysis->poc, sizeof(int), 1, m_analysisFile);
@@ -1709,11 +1723,15 @@
     }
     else if (analysis->sliceType == X265_TYPE_P)
     {
-        X265_FWRITE(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
     }
     else
     {
-        X265_FWRITE(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
     }
 #undef X265_FWRITE
 }


More information about the x265-devel mailing list