[x265] [PATCH] analysis-load: reuse lowres MVs computed in save mode

sagar at multicorewareinc.com sagar at multicorewareinc.com
Thu Nov 26 05:24:53 CET 2015


# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1446793431 -19800
#      Fri Nov 06 12:33:51 2015 +0530
# Node ID a5d409d087dccdd2bfcf2222b6329a685e86fd71
# Parent  400d625864498f0f08629579157fb2f67703b7a1
analysis-load: reuse lowres MVs computed in save mode

Since analysis-save caches the slicetype, analysis-load skips the lookahead.
Hence lowres mvs are not computed and are not added to the mv candidate list
for motion estimation. This introduces mismatch in output of save and load mode.
By caching lowres mvs and reusing them in load-mode, this mismatch gets fixed.

diff -r 400d62586449 -r a5d409d087dc source/common/common.h
--- a/source/common/common.h	Wed Nov 04 13:55:37 2015 +0530
+++ b/source/common/common.h	Fri Nov 06 12:33:51 2015 +0530
@@ -370,25 +370,6 @@
         delete[] ctuParam[2];
     }
 };
-
-/* Stores inter analysis data for a single frame */
-struct analysis_inter_data
-{
-    int32_t*    ref;
-    uint8_t*    depth;
-    uint8_t*    modes;
-    uint32_t*   bestMergeCand;
-};
-
-/* Stores intra analysis data for a single frame. This struct needs better packing */
-struct analysis_intra_data
-{
-    uint8_t*  depth;
-    uint8_t*  modes;
-    char*     partSizes;
-    uint8_t*  chromaModes;
-};
-
 enum TextType
 {
     TEXT_LUMA     = 0,  // luma
diff -r 400d62586449 -r a5d409d087dc source/common/framedata.h
--- a/source/common/framedata.h	Wed Nov 04 13:55:37 2015 +0530
+++ b/source/common/framedata.h	Fri Nov 06 12:33:51 2015 +0530
@@ -152,9 +152,26 @@
     bool create(const x265_param& param, const SPS& sps);
     void reinit(const SPS& sps);
     void destroy();
-
     inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
 };
+
+/* Stores intra analysis data for a single frame. This struct needs better packing */
+struct analysis_intra_data
+{
+    uint8_t*  depth;
+    uint8_t*  modes;
+    char*     partSizes;
+    uint8_t*  chromaModes;
+};
+
+/* Stores inter analysis data for a single frame */
+struct analysis_inter_data
+{
+    MV*         mv;
+    int32_t*    ref;
+    uint8_t*    depth;
+    uint8_t*    modes;
+    uint32_t*   bestMergeCand;
+};
 }
-
 #endif // ifndef X265_FRAMEDATA_H
diff -r 400d62586449 -r a5d409d087dc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed Nov 04 13:55:37 2015 +0530
+++ b/source/encoder/analysis.cpp	Fri Nov 06 12:33:51 2015 +0530
@@ -75,8 +75,8 @@
     m_reuseInterDataCTU = NULL;
     m_reuseRef = NULL;
     m_reuseBestMergeCand = NULL;
+    m_reuseMv = NULL;
 }
-
 bool Analysis::create(ThreadLocalData *tld)
 {
     m_tld = tld;
@@ -143,8 +143,8 @@
         m_reuseInterDataCTU = (analysis_inter_data*)m_frame->m_analysisData.interData;
         m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
         m_reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
+        m_reuseMv = &m_reuseInterDataCTU->mv[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
     }
-
     ProfileCUScope(ctu, totalCTUTime, totalCTUs);
 
     if (m_slice->m_sliceType == I_SLICE)
@@ -1797,11 +1797,15 @@
         first = *m_reuseBestMergeCand;
         last = first + 1;
         int numPred = m_slice->isInterB() + 1;
-
-        /* skip refs used for 2Nx2N, Nx2N, 2NxN inter predictions if best mode is SKIP */
+        /* skip refs and mvs used for 2Nx2N, Nx2N, 2NxN inter predictions if best mode is SKIP */
         m_reuseRef += numPred;
+        m_reuseMv += numPred;
         if (m_param->bEnableRectInter)
-            m_reuseRef += (numPred * 2 * 2);
+        {
+           int inc = numPred * 2 * 2;
+            m_reuseRef += inc;
+            m_reuseMv += inc;
+        }
     }
     int safeX, maxSafeMv;
     if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE)
@@ -1925,6 +1929,9 @@
             {
                 bestME[i].ref = *m_reuseRef;
                 m_reuseRef++;
+
+                bestME[i].mv = *m_reuseMv;
+                m_reuseMv++;
             }
         }
     }
@@ -1948,11 +1955,15 @@
         uint32_t numPU = interMode.cu.getNumPartInter(0);
         for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
         {
+            PredictionUnit pu(interMode.cu, cuGeom, puIdx);
             MotionData* bestME = interMode.bestME[puIdx];
             for (int32_t i = 0; i < numPredDir; i++)
             {
                 *m_reuseRef = bestME[i].ref;
                 m_reuseRef++;
+
+                *m_reuseMv = getLowresMV(interMode.cu, pu, i, bestME[i].ref);
+                m_reuseMv++;
             }
         }
     }
@@ -1975,6 +1986,9 @@
             {
                 bestME[i].ref = *m_reuseRef;
                 m_reuseRef++;
+
+                bestME[i].mv = *m_reuseMv;
+                m_reuseMv++;
             }
         }
     }
@@ -1989,11 +2003,15 @@
         uint32_t numPU = interMode.cu.getNumPartInter(0);
         for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
         {
+            PredictionUnit pu(interMode.cu, cuGeom, puIdx);
             MotionData* bestME = interMode.bestME[puIdx];
             for (int32_t i = 0; i < numPredDir; i++)
             {
                 *m_reuseRef = bestME[i].ref;
                 m_reuseRef++;
+
+                *m_reuseMv = getLowresMV(interMode.cu, pu, i, bestME[i].ref);
+                m_reuseMv++;
             }
         }
     }
diff -r 400d62586449 -r a5d409d087dc source/encoder/analysis.h
--- a/source/encoder/analysis.h	Wed Nov 04 13:55:37 2015 +0530
+++ b/source/encoder/analysis.h	Fri Nov 06 12:33:51 2015 +0530
@@ -117,12 +117,11 @@
     Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
 
 protected:
-
     /* Analysis data for load/save modes, keeps getting incremented as CTU analysis proceeds and data is consumed or read */
     analysis_inter_data* m_reuseInterDataCTU;
+    MV*                  m_reuseMv;
     int32_t*             m_reuseRef;
     uint32_t*            m_reuseBestMergeCand;
-
     uint32_t m_splitRefIdx[4];
 
     /* full analysis for an I-slice CU */
diff -r 400d62586449 -r a5d409d087dc source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Wed Nov 04 13:55:37 2015 +0530
+++ b/source/encoder/encoder.cpp	Fri Nov 06 12:33:51 2015 +0530
@@ -1831,6 +1831,7 @@
         CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t, analysis->numCUsInFrame * CUGeom::MAX_GEOMS);
+        CHECKED_MALLOC_ZERO(interData->mv, MV, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);
         analysis->interData = interData;
     }
     return;
@@ -1856,6 +1857,7 @@
         X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
         X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
         X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);
+        X265_FREE(((analysis_inter_data*)analysis->interData)->mv);
         X265_FREE(analysis->interData);
     }
 }
@@ -1923,6 +1925,7 @@
         X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->mv, sizeof(MV), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
         consumedBytes += frameRecordSize;
         totalConsumedBytes = consumedBytes;
     }
@@ -1932,6 +1935,7 @@
         X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->mv, sizeof(MV), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
         consumedBytes += frameRecordSize;
     }
 #undef X265_FREAD
@@ -1959,14 +1963,15 @@
         analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
         analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
         analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
+        analysis->frameRecordSize += sizeof(MV) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
     }
     else
     {
         analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
         analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
         analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
+        analysis->frameRecordSize += sizeof(MV) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
     }
-
     X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);
     X265_FWRITE(&analysis->poc, sizeof(int), 1, m_analysisFile);
     X265_FWRITE(&analysis->sliceType, sizeof(int), 1, m_analysisFile);
@@ -1986,6 +1991,7 @@
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->mv, sizeof(MV), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
     }
     else
     {
@@ -1993,6 +1999,7 @@
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->mv, sizeof(MV), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
     }
 #undef X265_FWRITE
 }
diff -r 400d62586449 -r a5d409d087dc source/encoder/search.cpp
--- a/source/encoder/search.cpp	Wed Nov 04 13:55:37 2015 +0530
+++ b/source/encoder/search.cpp	Fri Nov 06 12:33:51 2015 +0530
@@ -2068,8 +2068,7 @@
                 const MV* amvp = interMode.amvpCand[list][ref];
                 int mvpIdx = selectMVP(cu, pu, amvp, list, ref);
                 MV mvmin, mvmax, outmv, mvp = amvp[mvpIdx];
-
-                MV lmv = getLowresMV(cu, pu, list, ref);
+                MV lmv = bestME[list].mv;
                 if (lmv.notZero())
                     mvc[numMvc++] = lmv;
 


More information about the x265-devel mailing list