[x265] [PATCH] analysis: use AVC CU analysis-info for HEVC mode analysis

Fri Nov 17 14:53:25 CET 2017

# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1510926794 -19800
#      Fri Nov 17 19:23:14 2017 +0530
# Node ID 6b248ccb14169d2b0d5b84d50d94a153bd8f3b4f
# Parent  9723e8812e63ce51e38ede41f7d5edf73cad0849
analysis: use AVC CU analysis-info for HEVC mode analysis

This patch work implements the functionality for anlysis-reuselevel 7, here we want
to use AVC analysis-info for HEVC mode decision and use the depth from offload
for AVC sizes

diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.cpp

--- a/source/common/cudata.cpp	Fri Nov 17 14:16:31 2017 +0530
+++ b/source/common/cudata.cpp	Fri Nov 17 19:23:14 2017 +0530
@@ -201,6 +201,8 @@
         m_cuDepth            = charBuf; charBuf += m_numPartitions;
         m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
         m_partSize           = charBuf; charBuf += m_numPartitions;
+        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
+        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
         m_mergeFlag          = charBuf; charBuf += m_numPartitions;
         m_interDir           = charBuf; charBuf += m_numPartitions;
         m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
@@ -239,6 +241,8 @@
         m_cuDepth            = charBuf; charBuf += m_numPartitions;
         m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
         m_partSize           = charBuf; charBuf += m_numPartitions;
+        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
+        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
         m_mergeFlag          = charBuf; charBuf += m_numPartitions;
         m_interDir           = charBuf; charBuf += m_numPartitions;
         m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.h
--- a/source/common/cudata.h	Fri Nov 17 14:16:31 2017 +0530
+++ b/source/common/cudata.h	Fri Nov 17 19:23:14 2017 +0530
@@ -199,13 +199,14 @@
     uint8_t*      m_predMode;         // array of prediction modes
     uint8_t*      m_partSize;         // array of partition sizes
     uint8_t*      m_mergeFlag;        // array of merge flags
+    uint8_t*      m_skipFlag[2];
     uint8_t*      m_interDir;         // array of inter directions
     uint8_t*      m_mvpIdx[2];        // array of motion vector predictor candidates or merge candidate indices [0]
     uint8_t*      m_tuDepth;          // array of transform indices
     uint8_t*      m_transformSkip[3]; // array of transform skipping flags per plane
     uint8_t*      m_cbf[3];           // array of coded block flags (CBF) per plane
     uint8_t*      m_chromaIntraDir;   // array of intra directions (chroma)
-    enum { BytesPerPartition = 21 };  // combined sizeof() of all per-part data
+    enum { BytesPerPartition = 23 };  // combined sizeof() of all per-part data
 
     sse_t*        m_distortion;
     coeff_t*      m_trCoeff[3];       // transformed coefficient buffer per plane
diff -r 9723e8812e63 -r 6b248ccb1416 source/common/framedata.h
--- a/source/common/framedata.h	Fri Nov 17 14:16:31 2017 +0530
+++ b/source/common/framedata.h	Fri Nov 17 19:23:14 2017 +0530
@@ -195,6 +195,7 @@
     uint8_t*    mvpIdx[2];
     int8_t*     refIdx[2];
     MV*         mv[2];
+   int64_t*     sadCost;
 };
 
 struct analysis2PassFrameData
diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Nov 17 14:16:31 2017 +0530
+++ b/source/encoder/analysis.cpp	Fri Nov 17 19:23:14 2017 +0530
@@ -75,6 +75,10 @@
     m_reuseInterDataCTU = NULL;
     m_reuseRef = NULL;
     m_bHD = false;
+    m_modeFlag[0] = false;
+    m_modeFlag[1] = false;
+    m_checkMergeAndSkipOnly[0] = false;
+    m_checkMergeAndSkipOnly[1] = false;
     m_evaluateInter = 0;
 }
 
@@ -247,6 +251,9 @@
             memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], sizeof(uint8_t) * numPartition);
             memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
             memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], sizeof(uint8_t) * numPartition);
+            for (int list = 0; list < m_slice->isInterB() + 1; list++)
+                memcpy(ctu.m_skipFlag[list], &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) * numPartition);
+
             if ((m_slice->m_sliceType == P_SLICE || m_param->bIntraInBFrames) && !m_param->bMVType)
             {
                 analysis_intra_data* intraDataCTU = (analysis_intra_data*)m_frame->m_analysisData.intraData;
@@ -1162,7 +1169,11 @@
     PicYuv& reconPic = *m_frame->m_reconPic;
     SplitData splitCUData;
 
-    if ((m_param->bMVType && cuGeom.numPartitions > 16) || !m_param->bMVType)
+    bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions > 16);
+    bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]));
+    bool bNooffloading = !m_param->bMVType;
+
+    if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
     {
         md.bestMode = NULL;
         bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
@@ -1296,7 +1307,7 @@
         }
 
         /* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */
-        if (mightNotSplit && depth >= minDepth && !md.bestMode && !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */
+        if ((mightNotSplit && depth >= minDepth && !md.bestMode && !bCtuInfoCheck) || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1]))) /* TODO: Re-evaluate if analysis load/save still works */
         {
             /* Compute Merge Cost */
             md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
@@ -1307,7 +1318,7 @@
                 && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
         }
 
-        if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck)
+        if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
         {
             skipRecursion = md.bestMode->cu.isSkipped(0);
             if (mightSplit && depth >= minDepth && !skipRecursion)
@@ -1319,6 +1330,9 @@
             }
         }
 
+        if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= 16)
+            skipRecursion = true;
+
         /* Step 2. Evaluate each of the 4 split sub-blocks in series */
         if (mightSplit && !skipRecursion)
         {
@@ -1374,6 +1388,10 @@
                 splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion, splitPred->sa8dBits);
         }
 
+        /* If analysis mode is simple do not Evaluate other modes */
+        if ((m_param->bMVType && cuGeom.numPartitions <= 16) && (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
+            mightNotSplit = !(m_checkMergeAndSkipOnly[0] || (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
+
         /* Split CUs
          *   0  1
          *   2  3 */
@@ -1838,7 +1856,12 @@
     }
 
     SplitData splitCUData;
-    if ((m_param->bMVType && cuGeom.numPartitions > 16) || !m_param->bMVType)
+
+    bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions > 16);
+    bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]));
+    bool bNooffloading = !m_param->bMVType;
+
+    if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
     {
         bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
         bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
@@ -1977,7 +2000,7 @@
         }
 
         /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
-        if (mightNotSplit && !md.bestMode && !bCtuInfoCheck)
+        if (mightNotSplit && !md.bestMode && !bCtuInfoCheck || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
         {
             md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
             md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
@@ -1993,6 +2016,9 @@
                 skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
         }
 
+        if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= 16)
+            skipRecursion = true;
+
         // estimate split cost
         /* Step 2. Evaluate each of the 4 split sub-blocks in series */
         if (mightSplit && !skipRecursion)
@@ -2045,6 +2071,10 @@
             checkDQPForSplitPred(*splitPred, cuGeom);
         }
 
+        /* If analysis mode is simple do not Evaluate other modes */
+        if ((m_param->bMVType && cuGeom.numPartitions <= 16) && (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
+            mightNotSplit = !(m_checkMergeAndSkipOnly[0] || (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
+
         /* Split CUs
          *   0  1
          *   2  3 */
@@ -2479,6 +2509,22 @@
                 checkDQPForSplitPred(*md.bestMode, cuGeom);
         }
 
+        if (m_param->bMVType && m_param->analysisReuseLevel == 7)
+        {
+            for (int list = 0; list < m_slice->isInterB() + 1; list++)
+            {
+                m_modeFlag[list] = true;
+                if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1 && cuGeom.numPartitions <= 16)
+                    m_checkMergeAndSkipOnly[list] = true;
+            }
+            m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU, cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
+            for (int list = 0; list < m_slice->isInterB() + 1; list++)
+            {
+                m_modeFlag[list] = false;
+                m_checkMergeAndSkipOnly[list] = false;
+            }
+        }
+
         if (m_param->interRefine > 1 || (m_param->interRefine && parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP  && !mode.cu.isSkipped(0)))
         {
             m_evaluateInter = 1;
diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Fri Nov 17 14:16:31 2017 +0530
+++ b/source/encoder/analysis.h	Fri Nov 17 19:23:14 2017 +0530
@@ -110,6 +110,9 @@
     bool      m_bChromaSa8d;
     bool      m_bHD;
 
+    bool      m_modeFlag[2];
+    bool      m_checkMergeAndSkipOnly[2];
+
     Analysis();
 
     bool create(ThreadLocalData* tld);
diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Nov 17 14:16:31 2017 +0530
+++ b/source/encoder/encoder.cpp	Fri Nov 17 19:23:14 2017 +0530
@@ -48,6 +48,12 @@
 const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
 }
 
+/* Threshold for motion vection, based on expermental result.
+ * TODO: come up an algorithm for adoptive threshold */
+
+#define MVTHRESHOLD 10
+#define PU_2Nx2N 1
+
 static const char* defaultAnalysisFileName = "x265_analysis.dat";
 
 using namespace X265_NS;
@@ -565,6 +571,14 @@
                             (interData)->mvpIdx[k][cuPos + cuOffset] = (srcInterData)->mvpIdx[k][(mbIndex * 16) + cuOffset];
                             (interData)->refIdx[k][cuPos + cuOffset] = (srcInterData)->refIdx[k][(mbIndex * 16) + cuOffset];
                             memcpy(&(interData)->mv[k][cuPos + cuOffset], &(srcInterData)->mv[k][(mbIndex * 16) + cuOffset], sizeof(MV));
+                            if (m_param->analysisReuseLevel == 7)
+                            {
+                                int mv_x = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + cuOffset].x;
+                                int mv_y = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + cuOffset].y;
+                                double mv = sqrt(mv_x*mv_x + mv_y*mv_y);
+                                if (numPU == PU_2Nx2N && ((srcInterData)->depth[cuPos + cuOffset] == (m_param->maxCUSize >> 5)) && mv <= MVTHRESHOLD)
+                                    memset(&curFrame->m_analysisData.modeFlag[k][cuPos + cuOffset], 1, bytes);
+                            }
                         }
                     }
                 }
@@ -624,6 +638,7 @@
                     int bytes = curFrame->m_analysisData.numPartitions >> ((interData)->depth[d] * 2);
                     memset(&(currInterData)->depth[count], (interData)->depth[d], bytes);
                     memset(&(currInterData)->modes[count], (interData)->modes[d], bytes);
+                    memcpy(&(currInterData)->sadCost[count], &((analysis_inter_data*)analysis_data->interData)->sadCost[d], bytes);
                     if (m_param->analysisReuseLevel > 4)
                     {
                         memset(&(currInterData)->partSize[count], (interData)->partSize[d], bytes);
@@ -639,6 +654,14 @@
                                     (currInterData)->mvpIdx[i][count + pu] = (interData)->mvpIdx[i][d];
                                     (currInterData)->refIdx[i][count + pu] = (interData)->refIdx[i][d];
                                     memcpy(&(currInterData)->mv[i][count + pu], &(interData)->mv[i][d], sizeof(MV));
+                                    if (m_param->analysisReuseLevel == 7)
+                                    {
+                                        int mv_x = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count + pu].x;
+                                        int mv_y = ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count + pu].y;
+                                        double mv = sqrt(mv_x*mv_x + mv_y*mv_y);
+                                        if (numPU == PU_2Nx2N && m_param->num4x4Partitions <= 16 && mv <= MVTHRESHOLD)
+                                            memset(&curFrame->m_analysisData.modeFlag[i][count + pu], 1, bytes);
+                                    }
                                 }
                             }
                         }
@@ -3116,12 +3139,14 @@
             if (m_param->analysisReuseLevel >= 7)
             {
                 X265_FREE(((analysis_inter_data*)analysis->interData)->interDir);
+                X265_FREE(((analysis_inter_data*)analysis->interData)->sadCost);
                 int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
                 for (int dir = 0; dir < numDir; dir++)
                 {
                     X265_FREE(((analysis_inter_data*)analysis->interData)->mvpIdx[dir]);
                     X265_FREE(((analysis_inter_data*)analysis->interData)->refIdx[dir]);
                     X265_FREE(((analysis_inter_data*)analysis->interData)->mv[dir]);
+                    X265_FREE(analysis->modeFlag[dir]);
                 }
             }
             else
diff -r 9723e8812e63 -r 6b248ccb1416 source/x265.h
--- a/source/x265.h	Fri Nov 17 14:16:31 2017 +0530
+++ b/source/x265.h	Fri Nov 17 19:23:14 2017 +0530
@@ -123,6 +123,7 @@
     void*            intraData;
     uint32_t         numCuInHeight;
     x265_lookahead_data lookahead;
+    uint8_t*         modeFlag[2];
 } x265_analysis_data;
 
 /* cu statistics */