[x265] [PATCH] Clean up dynamic refinement

bhavna at multicorewareinc.com bhavna at multicorewareinc.com
Tue May 22 10:44:11 CEST 2018


# HG changeset patch
# User Bhavna Hariharan <bhavna at multicorewareinc.com>
# Date 1526964471 -19800
#      Tue May 22 10:17:51 2018 +0530
# Node ID 5587d9a25248075edadf94e1a78f6e11d091f651
# Parent  cc2c5e46f3c87d27e3602af30b06ba6a0fbe2704
Clean up dynamic refinement

This patch does the following:
1) Earlier, locks were used to avoid the possibility of race conditions while
copying data from CTU level to frame level. Now, the data is collected for each
row and when the entire frame completes analysis the row data is copied to the
frame. This method eliminates the possibility of a race condition without
having to employ locks.
2) Allocate memory for the CTU infromation from the data pool, this will avoid
fragmentation of data.

diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/common.h
--- a/source/common/common.h	Mon May 21 18:42:29 2018 +0530
+++ b/source/common/common.h	Tue May 22 10:17:51 2018 +0530
@@ -332,6 +332,8 @@
 #define START_CODE_OVERHEAD 3 
 #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
 
+#define MAX_NUM_DYN_REFINE          ((NUM_CU_DEPTH - 1) * X265_REFINE_INTER_LEVELS)
+
 namespace X265_NS {
 
 enum { SAO_NUM_OFFSET = 4 };
diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Mon May 21 18:42:29 2018 +0530
+++ b/source/common/cudata.cpp	Tue May 22 10:17:51 2018 +0530
@@ -274,6 +274,9 @@
         for (int i = 0; i < 3; i++)
             m_fAc_den[i] = m_fDc_den[i] = 0;
     }
+    m_collectCURd = dataPool.dynRefineRdBlock + (instance * MAX_NUM_DYN_REFINE);
+    m_collectCUVariance = dataPool.dynRefVarBlock + (instance * MAX_NUM_DYN_REFINE);
+    m_collectCUCount = dataPool.dynRefCntBlock + (instance * MAX_NUM_DYN_REFINE);
 }
 
 void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice)
@@ -318,15 +321,9 @@
     m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
     memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
 
-    if (m_encData->m_param->bDynamicRefine)
-    {
-        int size = m_encData->m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
-        CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size);
-        CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size);
-        CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size);
-    }
-fail:
-    return;
+    memset(m_collectCURd, 0, MAX_NUM_DYN_REFINE * sizeof(uint64_t));
+    memset(m_collectCUVariance, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
+    memset(m_collectCUCount, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
 }
 
 // initialize Sub partition
diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/cudata.h
--- a/source/common/cudata.h	Mon May 21 18:42:29 2018 +0530
+++ b/source/common/cudata.h	Tue May 22 10:17:51 2018 +0530
@@ -353,8 +353,12 @@
     coeff_t* trCoeffMemBlock;
     MV*      mvMemBlock;
     sse_t*   distortionMemBlock;
+    uint64_t* dynRefineRdBlock;
+    uint32_t* dynRefCntBlock;
+    uint32_t* dynRefVarBlock;
 
-    CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL; }
+    CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL; 
+                      dynRefineRdBlock = NULL; dynRefCntBlock = NULL; dynRefVarBlock = NULL;}
 
     bool create(uint32_t depth, uint32_t csp, uint32_t numInstances, const x265_param& param)
     {
@@ -373,6 +377,9 @@
         CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition);
         CHECKED_MALLOC_ZERO(mvMemBlock, MV, numPartition * 4 * numInstances);
         CHECKED_MALLOC(distortionMemBlock, sse_t, numPartition * numInstances);
+        CHECKED_MALLOC_ZERO(dynRefineRdBlock, uint64_t, MAX_NUM_DYN_REFINE * numInstances);
+        CHECKED_MALLOC_ZERO(dynRefCntBlock, uint32_t, MAX_NUM_DYN_REFINE * numInstances);
+        CHECKED_MALLOC_ZERO(dynRefVarBlock, uint32_t, MAX_NUM_DYN_REFINE * numInstances);
         return true;
     fail:
         return false;
@@ -384,6 +391,9 @@
         X265_FREE(mvMemBlock);
         X265_FREE(charMemBlock);
         X265_FREE(distortionMemBlock);
+        X265_FREE(dynRefineRdBlock);
+        X265_FREE(dynRefCntBlock);
+        X265_FREE(dynRefVarBlock);
     }
 };
 }
diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/framedata.h
--- a/source/common/framedata.h	Mon May 21 18:42:29 2018 +0530
+++ b/source/common/framedata.h	Tue May 22 10:17:51 2018 +0530
@@ -88,6 +88,11 @@
     uint64_t    cntInterPu[NUM_CU_DEPTH][INTER_MODES - 1];
     uint64_t    cntMergePu[NUM_CU_DEPTH][INTER_MODES - 1];
 
+    /* Feature values per row for dynamic refinement */
+    uint64_t       rowRdDyn[MAX_NUM_DYN_REFINE];
+    uint32_t       rowVarDyn[MAX_NUM_DYN_REFINE];
+    uint32_t       rowCntDyn[MAX_NUM_DYN_REFINE];
+
     FrameStats()
     {
         memset(this, 0, sizeof(FrameStats));
diff -r cc2c5e46f3c8 -r 5587d9a25248 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon May 21 18:42:29 2018 +0530
+++ b/source/encoder/frameencoder.cpp	Tue May 22 10:17:51 2018 +0530
@@ -956,6 +956,9 @@
         }  
     } // end of (m_param->maxSlices > 1)
 
+    if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder) //Avoid collecting data that will not be used by future frames.
+        collectDynDataFrame();
+
     if (m_param->rc.bStatWrite)
     {
         int totalI = 0, totalP = 0, totalSkip = 0;
@@ -1494,31 +1497,12 @@
 
         // Does all the CU analysis, returns best top level mode decision
         Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
-        if (m_param->bDynamicRefine)
-        {
-            if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid collecting data that will not be used by future frames.
-            {
-                ScopedLock dynLock(m_top->m_dynamicRefineLock);
-                for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
-                {
-                    for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
-                    {
-                        int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
-                        int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;
-                        int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
-                        if (ctu->m_collectCUCount[offset])
-                        {
-                            m_top->m_variance[index] += ctu->m_collectCUVariance[offset];
-                            m_top->m_rdCost[index] += ctu->m_collectCURd[offset];
-                            m_top->m_trainingCount[index] += ctu->m_collectCUCount[offset];
-                        }
-                    }
-                }
-            }
-            X265_FREE_ZERO(ctu->m_collectCUVariance);
-            X265_FREE_ZERO(ctu->m_collectCURd);
-            X265_FREE_ZERO(ctu->m_collectCUCount);
-        }
+
+        /* startPoint > encodeOrder is true when the start point changes for
+        a new GOP but few frames from the previous GOP is still incomplete.
+        The data of frames in this interval will not be used by any future frames. */
+        if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder)
+            collectDynDataRow(*ctu, &curRow.rowStats);
 
         // take a sample of the current active worker count
         ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
@@ -1901,6 +1885,46 @@
     if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
         m_completionEvent.trigger();
 }
+
+void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats)
+{
+    for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
+    {
+        for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+        {
+            int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
+            if (ctu.m_collectCUCount[offset])
+            {
+                rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[offset];
+                rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
+                rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];
+            }
+        }
+    }
+}
+
+void FrameEncoder::collectDynDataFrame()
+{
+    for (uint32_t row = 0; row < m_numRows; row++)
+    {
+        for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)
+        {
+            for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+            {
+                int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;
+                int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;
+                int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
+                if (m_rows[row].rowStats.rowCntDyn[offset])
+                {
+                    m_top->m_variance[index] += m_rows[row].rowStats.rowVarDyn[offset];
+                    m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[offset];
+                    m_top->m_trainingCount[index] += m_rows[row].rowStats.rowCntDyn[offset];
+                }
+            }
+        }
+    }
+}
+
 void FrameEncoder::computeAvgTrainingData()
 {
     if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)
diff -r cc2c5e46f3c8 -r 5587d9a25248 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Mon May 21 18:42:29 2018 +0530
+++ b/source/encoder/frameencoder.h	Tue May 22 10:17:51 2018 +0530
@@ -243,6 +243,8 @@
 #if ENABLE_LIBVMAF
     void vmafFrameLevelScore();
 #endif
+    void collectDynDataRow(CUData& ctu, FrameStats* rowStats);
+    void collectDynDataFrame();
 };
 }
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265-clone.patch
Type: text/x-patch
Size: 9849 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180522/817bb722/attachment-0001.bin>


More information about the x265-devel mailing list