[x265-commits] [x265] deblock: removed bonded task group support in filtering

Ashok Kumar Mishra ashok at multicorewareinc.com
Fri Sep 29 01:03:03 CEST 2017


details:   http://hg.videolan.org/x265/rev/1a7edb6fd993
branches:  
changeset: 11880:1a7edb6fd993
user:      Ashok Kumar Mishra <ashok at multicorewareinc.com>
date:      Thu Jun 29 13:13:56 2017 +0530
description:
deblock: removed bonded task group support in filtering
Since filtering is very light weight process compared to encoding the ctu,
there is no need to use bonded task group for filtering. There is little
improvement in performance after removing bonded task group.
Subject: [x265] wavefront: fix for triggering rows for multiple slices when wpp is enabled

details:   http://hg.videolan.org/x265/rev/71f700844b0b
branches:  
changeset: 11881:71f700844b0b
user:      Ashok Kumar Mishra <ashok at multicorewareinc.com>
date:      Tue Sep 12 18:13:03 2017 +0530
description:
wavefront: fix for triggering rows for multiple slices when wpp is enabled
It is required to trigger alternative rows in slices for encoding when wpp is enabled.
Subject: [x265] fix multiple insertion of payloadSize into bitstream

details:   http://hg.videolan.org/x265/rev/0967d0add97e
branches:  
changeset: 11882:0967d0add97e
user:      Bhavna Hariharan <bhavna at multicorewareinc.com>
date:      Mon Sep 25 18:35:07 2017 +0530
description:
fix multiple insertion of payloadSize into bitstream

bitbucket issue #369

# HG changeset patch
# User Bhavna Hariharan <bhavna at multicorewareinc.com>
# Date 1506344707 -19800
#      Mon Sep 25 18:35:07 2017 +0530
# Node ID 0fb6bc88eb81b22ae1ec693f67b715606025fe8d
# Parent  f8ae7afc1f61ed0db3b2f23f5d581706fe6ed677
fix multiple insertion of payloadSize into bitstream

bitbucket issue #369

diffstat:

 source/common/wavefront.cpp     |   6 +++
 source/common/wavefront.h       |   4 ++
 source/encoder/encoder.cpp      |   6 +-
 source/encoder/frameencoder.cpp |  73 +++++++++++++++++++---------------------
 source/encoder/framefilter.cpp  |   6 +--
 source/encoder/framefilter.h    |   6 +--
 6 files changed, 50 insertions(+), 51 deletions(-)

diffs (truncated from 319 to 300 lines):

diff -r f8ae7afc1f61 -r 0967d0add97e source/common/wavefront.cpp
--- a/source/common/wavefront.cpp	Mon Sep 11 11:12:19 2017 +0530
+++ b/source/common/wavefront.cpp	Mon Sep 25 18:35:07 2017 +0530
@@ -43,11 +43,17 @@ bool WaveFront::init(int numRows)
     if (m_externalDependencyBitmap)
         memset((void*)m_externalDependencyBitmap, 0, sizeof(uint32_t) * m_numWords);
 
+    m_row_to_idx = X265_MALLOC(uint32_t, m_numRows);
+    m_idx_to_row = X265_MALLOC(uint32_t, m_numRows);
+
     return m_internalDependencyBitmap && m_externalDependencyBitmap;
 }
 
 WaveFront::~WaveFront()
 {
+    x265_free((void*)m_row_to_idx);
+    x265_free((void*)m_idx_to_row);
+
     x265_free((void*)m_internalDependencyBitmap);
     x265_free((void*)m_externalDependencyBitmap);
 }
diff -r f8ae7afc1f61 -r 0967d0add97e source/common/wavefront.h
--- a/source/common/wavefront.h	Mon Sep 11 11:12:19 2017 +0530
+++ b/source/common/wavefront.h	Mon Sep 25 18:35:07 2017 +0530
@@ -52,6 +52,10 @@ private:
 
     int m_numRows;
 
+protected:
+    uint32_t *m_row_to_idx;
+    uint32_t *m_idx_to_row;
+
 public:
 
     WaveFront()
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/encoder.cpp	Mon Sep 25 18:35:07 2017 +0530
@@ -631,12 +631,12 @@ int Encoder::encode(const x265_picture* 
                 int32_t i = 0;
                 toneMap.payloadSize = 0;
                 while (m_cim[currentPOC][i] == 0xFF)
-                    toneMap.payloadSize += m_cim[currentPOC][i++] + 1;
-                toneMap.payloadSize += m_cim[currentPOC][i] + 1;
+                    toneMap.payloadSize += m_cim[currentPOC][i++];
+                toneMap.payloadSize += m_cim[currentPOC][i];
 
                 toneMap.payload = (uint8_t*)x265_malloc(sizeof(uint8_t) * toneMap.payloadSize);
                 toneMap.payloadType = USER_DATA_REGISTERED_ITU_T_T35;
-                memcpy(toneMap.payload, m_cim[currentPOC], toneMap.payloadSize);
+                memcpy(toneMap.payload, &m_cim[currentPOC][i+1], toneMap.payloadSize);
             }
         }
 #endif
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/frameencoder.cpp	Mon Sep 25 18:35:07 2017 +0530
@@ -701,6 +701,24 @@ void FrameEncoder::compressFrame()
     {
         m_rows[m_sliceBaseRow[sliceId]].active = true;
     }
+    if (m_param->bEnableWavefront)
+    {
+        int i = 0;
+        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
+        {
+            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
+            {
+                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
+                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
+                const uint32_t row = sliceStartRow + rowInSlice;
+                if (row > sliceEndRow)
+                    continue;
+                m_row_to_idx[row] = i;
+                m_idx_to_row[i] = row;
+                i += 1;
+            }
+        }
+    }
 
     if (m_param->bEnableWavefront)
     {
@@ -735,11 +753,11 @@ void FrameEncoder::compressFrame()
                     }
                 }
 
-                enableRowEncoder(row); /* clear external dependency for this row */
+                enableRowEncoder(m_row_to_idx[row]); /* clear external dependency for this row */
                 if (!rowInSlice)
                 {
                     m_row0WaitTime = x265_mdate();
-                    enqueueRowEncoder(row); /* clear internal dependency, start wavefront */
+                    enqueueRowEncoder(m_row_to_idx[row]); /* clear internal dependency, start wavefront */
                 }
                 tryWakeOne();
             } // end of loop rowInSlice
@@ -1196,8 +1214,8 @@ void FrameEncoder::processRow(int row, i
     if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime)
         m_totalNoWorkerTime += x265_mdate() - m_stallStartTime;
 
-    const uint32_t realRow = row >> 1;
-    const uint32_t typeNum = row & 1;
+    const uint32_t realRow = m_idx_to_row[row >> 1];
+    const uint32_t typeNum = m_idx_to_row[row & 1];
 
     if (!typeNum)
         processRowEncoder(realRow, m_tld[threadId]);
@@ -1207,7 +1225,7 @@ void FrameEncoder::processRow(int row, i
 
         // NOTE: Active next row
         if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1)
-            enqueueRowFilter(realRow + 1);
+            enqueueRowFilter(m_row_to_idx[realRow + 1]);
     }
 
     if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
@@ -1264,10 +1282,7 @@ void FrameEncoder::processRowEncoder(int
     if (bFirstRowInSlice && !curRow.completed)
     {
         // Load SBAC coder context from previous row and initialize row state.
-        //rowCoder.copyState(m_initSliceContext);
-        //rowCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
-        rowCoder.load(m_initSliceContext);
-        //m_rows[row - 1].bufferedEntropy.loadContexts(m_initSliceContext);
+        rowCoder.load(m_initSliceContext);        
     }
 
     // calculate mean QP for consistent deltaQP signalling calculation
@@ -1328,9 +1343,7 @@ void FrameEncoder::processRowEncoder(int
             }
             curRow.avgQPComputed = 1;
         }
-    }
-
-    // TODO: specially case handle on first and last row
+    }    
 
     // Initialize restrict on MV range in slices
     tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;
@@ -1428,15 +1441,10 @@ void FrameEncoder::processRowEncoder(int
         {
             // NOTE: in VBV mode, we may reencode anytime, so we can't do Deblock stage-Horizon and SAO
             if (!bIsVbv)
-            {
-                // TODO: Multiple Threading
-                // Delay ONE row to avoid Intra Prediction Conflict
+            {                
+                // Delay one row to avoid intra prediction conflict
                 if (m_pool && !bFirstRowInSlice)
-                {
-                    // Waitting last threading finish
-                    m_frameFilter.m_parallelFilter[row - 1].waitForExit();
-
-                    // Processing new group
+                {                    
                     int allowCol = col;
 
                     // avoid race condition on last column
@@ -1446,15 +1454,11 @@ void FrameEncoder::processRowEncoder(int
                                                                   : m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get()), (int)col);
                     }
                     m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol);
-                    m_frameFilter.m_parallelFilter[row - 1].tryBondPeers(*this, 1);
                 }
 
                 // Last Row may start early
                 if (m_pool && bLastRowInSlice)
                 {
-                    // Waiting for the last thread to finish
-                    m_frameFilter.m_parallelFilter[row].waitForExit();
-
                     // Deblocking last row
                     int allowCol = col;
 
@@ -1465,7 +1469,6 @@ void FrameEncoder::processRowEncoder(int
                                                                   : m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get()), (int)col);
                     }
                     m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol);
-                    m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1);
                 }
             } // end of !bIsVbv
         }
@@ -1481,7 +1484,7 @@ void FrameEncoder::processRowEncoder(int
         FrameStats frameLog;
         curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog);
 
-        // copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
+        // copy number of intra, inter cu per row into frame stats for 2 pass
         if (m_param->rc.bStatWrite)
         {
             curRow.rowStats.mvBits    += best.mvBits;
@@ -1537,7 +1540,6 @@ void FrameEncoder::processRowEncoder(int
             }
             
             // If current block is at row end checkpoint, call vbv ratecontrol.
-
             if (!m_param->bEnableWavefront && col == numCols - 1)
             {
                 double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
@@ -1566,9 +1568,7 @@ void FrameEncoder::processRowEncoder(int
                     curEncData.m_rowStat[row].sumQpAq = 0;
                 }
             }
-
             // If current block is at row diagonal checkpoint, call vbv ratecontrol.
-
             else if (m_param->bEnableWavefront && row == col && row)
             {
                 if (m_param->rc.bEnableConstVbv)
@@ -1667,7 +1667,7 @@ void FrameEncoder::processRowEncoder(int
                 m_rows[row + 1].completed + 2 <= curRow.completed)
             {
                 m_rows[row + 1].active = true;
-                enqueueRowEncoder(row + 1);
+                enqueueRowEncoder(m_row_to_idx[row + 1]);
                 tryWakeOne(); /* wake up a sleeping thread or set the help wanted flag */
             }
         }
@@ -1683,7 +1683,7 @@ void FrameEncoder::processRowEncoder(int
         }
     }
 
-    /** this row of CTUs has been compressed **/
+    /* this row of CTUs has been compressed */
     if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
     {
         if (row == m_numRows - 1)
@@ -1740,13 +1740,10 @@ void FrameEncoder::processRowEncoder(int
     /* Processing left Deblock block with current threading */
     if ((m_param->bEnableLoopFilter | m_param->bEnableSAO) & (rowInSlice >= 2))
     {
-        /* TODO: Multiple Threading */
-
         /* Check conditional to start previous row process with current threading */
         if (m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() == (int)numCols)
         {
             /* stop threading on current row and restart it */
-            m_frameFilter.m_parallelFilter[row - 1].waitForExit();
             m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
             m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
         }
@@ -1757,11 +1754,11 @@ void FrameEncoder::processRowEncoder(int
     {
         if (rowInSlice >= m_filterRowDelay)
         {
-            enableRowFilter(row - m_filterRowDelay);
+            enableRowFilter(m_row_to_idx[row - m_filterRowDelay]);
 
             /* NOTE: Activate filter if first row (row 0) */
             if (rowInSlice == m_filterRowDelay)
-                enqueueRowFilter(row - m_filterRowDelay);
+                enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]);
             tryWakeOne();
         }
 
@@ -1769,7 +1766,7 @@ void FrameEncoder::processRowEncoder(int
         {
             for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < endRowInSlicePlus1; i++)
             {
-                enableRowFilter(i);
+                enableRowFilter(m_row_to_idx[i]);
             }
             tryWakeOne();
         }
@@ -1777,7 +1774,7 @@ void FrameEncoder::processRowEncoder(int
         // handle specially case - single row slice
         if  (bFirstRowInSlice & bLastRowInSlice)
         {
-            enqueueRowFilter(row);
+            enqueueRowFilter(m_row_to_idx[row]);
             tryWakeOne();
         }
     }
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/framefilter.cpp	Mon Sep 25 18:35:07 2017 +0530
@@ -582,10 +582,7 @@ void FrameFilter::processRow(int row)
     CUData* ctu = encData.getPicCTU(m_parallelFilter[row].m_rowAddr);
 
     /* Processing left block Deblock with current threading */
-    {
-        /* stop threading on current row */
-        m_parallelFilter[row].waitForExit();
-
+    {        
         /* Check to avoid previous row process slower than current row */
         X265_CHECK(ctu->m_bFirstRowInSlice || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
 
@@ -618,7 +615,6 @@ void FrameFilter::processRow(int row)
     }
 
     // this row of CTUs has been encoded
-
     if (!ctu->m_bFirstRowInSlice)
         processPostRow(row - 1);
 
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/framefilter.h	Mon Sep 25 18:35:07 2017 +0530
@@ -62,7 +62,7 @@ public:


More information about the x265-commits mailing list