[x265] [PATCH] analysis: Improve and fix indexing issue for analysis-load/save

sagar at multicorewareinc.com sagar at multicorewareinc.com
Wed Mar 9 09:51:59 CET 2016


# HG changeset patch
# User Sagar Kotecha<sagar at multicorewareinc.com>
# Date 1457415892 -19800
#      Tue Mar 08 11:14:52 2016 +0530
# Node ID 67b63012de821913ecde12c000d09a5a8e5c76fa
# Parent  42fd78c163fbae6992493ade1ac0f693b0d38d84
analysis: Improve and fix indexing issue for analysis-load/save

diff -r 42fd78c163fb -r 67b63012de82 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Mon Mar 07 14:31:17 2016 +0530
+++ b/source/common/cudata.cpp	Tue Mar 08 11:14:52 2016 +0530
@@ -2089,6 +2089,7 @@
                 cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;
                 cu->numPartitions = (NUM_4x4_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2));
                 cu->depth = g_log2Size[maxCUSize] - log2CUSize;
+                cu->geomRecurId = cuIdx;
 
                 cu->flags = 0;
                 CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
diff -r 42fd78c163fb -r 67b63012de82 source/common/cudata.h
--- a/source/common/cudata.h	Mon Mar 07 14:31:17 2016 +0530
+++ b/source/common/cudata.h	Tue Mar 08 11:14:52 2016 +0530
@@ -87,6 +87,7 @@
     uint32_t numPartitions; // Number of 4x4 blocks in the CU
     uint32_t flags;         // CU flags.
     uint32_t depth;         // depth of this CU relative from CTU
+    uint32_t geomRecurId;   // Unique geom id from 0 to MAX_GEOMS - 1 for every depth
 };
 
 struct MVField
diff -r 42fd78c163fb -r 67b63012de82 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Mar 07 14:31:17 2016 +0530
+++ b/source/encoder/analysis.cpp	Tue Mar 08 11:14:52 2016 +0530
@@ -147,6 +147,9 @@
         int numPredDir = m_slice->isInterP() ? 1 : 2;
         m_reuseInterDataCTU = (analysis_inter_data*)m_frame->m_analysisData.interData;
         m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
+        if (m_param->analysisMode == X265_ANALYSIS_SAVE)
+            for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++)
+                m_reuseRef[i] = -1;
     }
     ProfileCUScope(ctu, totalCTUTime, totalCTUs);
 
@@ -2075,15 +2078,15 @@
 
     if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU)
     {
+        int refOffset = cuGeom.geomRecurId * 16 * numPredDir + partSize * numPredDir * 2;
+        int index = 0;
+
         uint32_t numPU = interMode.cu.getNumPartInter(0);
         for (uint32_t part = 0; part < numPU; part++)
         {
             MotionData* bestME = interMode.bestME[part];
             for (int32_t i = 0; i < numPredDir; i++)
-            {
-                bestME[i].ref = *m_reuseRef;
-                m_reuseRef++;
-            }
+                bestME[i].ref = m_reuseRef[refOffset + index++];
         }
     }
     predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400), refMask);
@@ -2102,15 +2105,15 @@
 
     if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU)
     {
+        int refOffset = cuGeom.geomRecurId * 16 * numPredDir + partSize * numPredDir * 2;
+        int index = 0;
+
         uint32_t numPU = interMode.cu.getNumPartInter(0);
         for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
         {
             MotionData* bestME = interMode.bestME[puIdx];
             for (int32_t i = 0; i < numPredDir; i++)
-            {
-                *m_reuseRef = bestME[i].ref;
-                m_reuseRef++;
-            }
+                m_reuseRef[refOffset + index++] = bestME[i].ref;
         }
     }
 }
@@ -2124,15 +2127,15 @@
 
     if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU)
     {
+        int refOffset = cuGeom.geomRecurId * 16 * numPredDir + partSize * numPredDir * 2;
+        int index = 0;
+
         uint32_t numPU = interMode.cu.getNumPartInter(0);
         for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
         {
             MotionData* bestME = interMode.bestME[puIdx];
             for (int32_t i = 0; i < numPredDir; i++)
-            {
-                bestME[i].ref = *m_reuseRef;
-                m_reuseRef++;
-            }
+                bestME[i].ref = m_reuseRef[refOffset + index++];
         }
     }
     predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400, refMask);
@@ -2142,15 +2145,15 @@
 
     if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU)
     {
+        int refOffset = cuGeom.geomRecurId * 16 * numPredDir + partSize * numPredDir * 2;
+        int index = 0;
+
         uint32_t numPU = interMode.cu.getNumPartInter(0);
         for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
         {
             MotionData* bestME = interMode.bestME[puIdx];
             for (int32_t i = 0; i < numPredDir; i++)
-            {
-                *m_reuseRef = bestME[i].ref;
-                m_reuseRef++;
-            }
+                m_reuseRef[refOffset + index++] = bestME[i].ref;
         }
     }
 }
diff -r 42fd78c163fb -r 67b63012de82 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Mon Mar 07 14:31:17 2016 +0530
+++ b/source/encoder/search.cpp	Tue Mar 08 11:14:52 2016 +0530
@@ -2130,8 +2130,8 @@
                     bestME[list].bits = bits;
                     bestME[list].mvCost  = mvCost;
                 }
-            }
-            bDoUnidir = false;
+                bDoUnidir = false;
+            }            
         }
         else if (m_param->bDistributeMotionEstimation)
         {


More information about the x265-devel mailing list