[x265] [PATCH] framepp: thread of loopfilter

Min Chen chenm003 at 163.com
Thu Aug 8 13:12:42 CEST 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1375960323 -28800
# Node ID cd4dd4ab924f36a7e6fe127ebc12f6da4c3073fc
# Parent  33aa6210de6d486b413f0a6ef82750a89d76c981
framepp: thread of loopfilter

diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComLoopFilter.cpp
--- a/source/Lib/TLibCommon/TComLoopFilter.cpp	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComLoopFilter.cpp	Thu Aug 08 19:12:03 2013 +0800
@@ -48,8 +48,6 @@
 // Constants
 // ====================================================================================================================
 
-#define   EDGE_VER    0
-#define   EDGE_HOR    1
 #define   QpUV(iQpY)  (((iQpY) < 0) ? (iQpY) : (((iQpY) > 57) ? ((iQpY) - 6) : g_chromaScale[(iQpY)]))
 
 #define DEFAULT_INTRA_TC_OFFSET 2 ///< Default intra TC offset
@@ -163,6 +161,15 @@
     }
 }
 
+Void TComLoopFilter::loopFilterCU(TComDataCU* cu, int dir)
+{
+    ::memset(m_blockingStrength[dir], 0, sizeof(UChar) * m_numPartitions);
+    ::memset(m_bEdgeFilter[dir], 0, sizeof(Bool) * m_numPartitions);
+
+    // CU-based deblocking
+    xDeblockCU(cu, 0, 0, dir);
+}
+
 // ====================================================================================================================
 // Protected member functions
 // ====================================================================================================================
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComLoopFilter.h
--- a/source/Lib/TLibCommon/TComLoopFilter.h	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComLoopFilter.h	Thu Aug 08 19:12:03 2013 +0800
@@ -45,6 +45,8 @@
 //! \{
 
 #define DEBLOCK_SMALLEST_BLOCK  8
+#define EDGE_VER                0
+#define EDGE_HOR                1
 
 /// parameters for deblocking filter
 typedef struct _LFCUParam
@@ -124,6 +126,8 @@
     /// picture-level deblocking filter
     Void loopFilterPic(TComPic* pic);
 
+    Void loopFilterCU(TComDataCU* cu, int dir);
+
     static Int getBeta(Int qp)
     {
         Int indexB = Clip3(0, MAX_QP, qp);
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComPic.cpp
--- a/source/Lib/TLibCommon/TComPic.cpp	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.cpp	Thu Aug 08 19:12:03 2013 +0800
@@ -138,6 +138,7 @@
 
     int numRows = (height + maxHeight - 1) / maxHeight;
     m_complete_enc = new uint32_t[numRows]; // initial in FrameEncoder::encode()
+    m_complete_lft = new uint32_t[numRows]; // initial in FrameFilter::encode()
 }
 
 Void TComPic::destroy()
@@ -190,6 +191,11 @@
     {
         delete[] m_complete_enc;
     }
+
+    if (m_complete_lft)
+    {
+        delete[] m_complete_lft;
+    }
 }
 
 Void TComPic::compressMotion()
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComPic.h
--- a/source/Lib/TLibCommon/TComPic.h	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.h	Thu Aug 08 19:12:03 2013 +0800
@@ -70,7 +70,8 @@
 
 public:
 
-    volatile uint32_t     *m_complete_enc;       // Array of Col number that was finished stage encode
+    volatile uint32_t*    m_complete_enc;       // Array of Col number that was finished stage encode
+    volatile uint32_t*    m_complete_lft;       // Array of Col number that was finished stage loopfilter
     x265::LookaheadFrame  m_lowres;
 
     TComPic();
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibEncoder/TEncGOP.cpp
--- a/source/Lib/TLibEncoder/TEncGOP.cpp	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibEncoder/TEncGOP.cpp	Thu Aug 08 19:12:03 2013 +0800
@@ -254,7 +254,6 @@
     TEncCavlc*            cavlcCoder   = frameEncoder->getCavlcCoder();
     TEncSbac*             sbacCoder    = frameEncoder->getSingletonSbac();
     TEncBinCABAC*         binCABAC     = frameEncoder->getBinCABAC();
-    TComLoopFilter*       loopFilter   = frameEncoder->getLoopFilter();
     TComBitCounter*       bitCounter   = frameEncoder->getBitCounter();
     TEncSampleAdaptiveOffset* sao      = frameEncoder->getSAO();
     Bool bBufferingPeriodSEIPresentInAU = false;
@@ -545,8 +544,7 @@
         //-- Loop filter
         if (m_cfg->param.bEnableLoopFilter)
         {
-            loopFilter->setCfg(m_pps.getLoopFilterAcrossTilesEnabledFlag());
-            loopFilter->loopFilterPic(pic);
+            frameEncoder->wait_lft();
         }
 
         if (m_sps.getUseSAO())
diff -r 33aa6210de6d -r cd4dd4ab924f source/PPA/ppaCPUEvents.h
--- a/source/PPA/ppaCPUEvents.h	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/PPA/ppaCPUEvents.h	Thu Aug 08 19:12:03 2013 +0800
@@ -33,3 +33,4 @@
 PPA_REGISTER_CPU_EVENT(Thread_ProcessRow)
 PPA_REGISTER_CPU_EVENT(Thread_compressCU)
 PPA_REGISTER_CPU_EVENT(Thread_encodeCU)
+PPA_REGISTER_CPU_EVENT(Thread_filterCU)
diff -r 33aa6210de6d -r cd4dd4ab924f source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/encoder/frameencoder.cpp	Thu Aug 08 19:12:03 2013 +0800
@@ -133,6 +133,7 @@
     , m_slice(NULL)
     , m_pic(NULL)
     , m_rows(NULL)
+    , m_frameFilter(pool)
 {}
 
 void FrameEncoder::destroy()
@@ -155,10 +156,7 @@
         m_sao.destroy();
         m_sao.destroyEncBuffer();
     }
-    if (m_cfg->param.bEnableLoopFilter)
-    {
-        m_loopFilter.destroy();
-    }
+    m_frameFilter.destroy();
 }
 
 void FrameEncoder::init(TEncTop *top, int numRows)
@@ -176,10 +174,7 @@
         m_sao.create(top->param.sourceWidth, top->param.sourceHeight, g_maxCUWidth, g_maxCUHeight);
         m_sao.createEncBuffer();
     }
-    if (top->param.bEnableLoopFilter)
-    {
-        m_loopFilter.create(g_maxCUDepth);
-    }
+    m_frameFilter.init(top, numRows);
 
     m_rows = new CTURow[m_numRows];
     for (int i = 0; i < m_numRows; ++i)
@@ -196,6 +191,7 @@
 
 void FrameEncoder::encode(TComPic *pic, TComSlice *slice)
 {
+    int bEnableLoopFilter = m_cfg->param.bEnableLoopFilter;
     m_pic = pic;
     m_slice = slice;
 
@@ -210,12 +206,22 @@
         m_pic->m_complete_enc[i] = 0;
     }
 
+    m_frameFilter.start(pic, slice);
+
     if (!m_pool || !m_cfg->param.bEnableWavefront)
     {
         for (int i = 0; i < this->m_numRows; i++)
         {
             processRow(i);
         }
+        // Loopfilter
+        if (bEnableLoopFilter)
+        {
+            for (int i = 0; i < this->m_numRows; i++)
+            {
+                m_frameFilter.processRow(i);
+            }
+        }
     }
     else
     {
@@ -223,9 +229,16 @@
 
         // Enqueue first row, then block until worker threads complete the frame
         WaveFront::enqueueRow(0);
+
         m_completionEvent.wait();
 
         WaveFront::dequeue();
+
+        // Dummy, I think we need not pend here
+        if (bEnableLoopFilter)
+        {
+            //wait_lft();
+        }
     }
 }
 
@@ -259,6 +272,13 @@
         // Completed CU processing
         m_pic->m_complete_enc[row]++;
 
+        // Active Loopfilter
+        if (row > 0)
+        {
+            // NOTE: my version, it need check active flag
+            m_frameFilter.enqueueRow(row - 1);
+        }
+
         if (m_pic->m_complete_enc[row] >= 2 && row < m_numRows - 1)
         {
             ScopedLock below(m_rows[row + 1].m_lock);
@@ -289,3 +309,194 @@
         m_completionEvent.trigger();
     }
 }
+
+// **************************************************************************
+// * LoopFilter
+// **************************************************************************
+FrameFilter::FrameFilter(ThreadPool* pool)
+    : WaveFront(pool)
+    , m_cfg(NULL)
+    , m_slice(NULL)
+    , m_pic(NULL)
+    , m_loopFilter(NULL)
+    , m_complete_lftV(NULL)
+    , m_rows_active(NULL)
+    , m_locks(NULL)
+{}
+
+void FrameFilter::destroy()
+{
+    JobProvider::flush();  // ensure no worker threads are using this frame
+
+    if (m_complete_lftV)
+    {
+        delete[] m_complete_lftV;
+    }
+
+    if (m_rows_active)
+    {
+        delete[] m_rows_active;
+    }
+
+    if (m_locks)
+    {
+        delete[] m_locks;
+    }
+
+    if (m_cfg->param.bEnableLoopFilter)
+    {
+        for (int i = 0; i < m_numRows; ++i)
+        {
+            m_loopFilter[i].destroy();
+        }
+
+        delete[] m_loopFilter;
+    }
+}
+
+void FrameFilter::init(TEncTop *top, int numRows)
+{
+    m_cfg = top;
+    m_numRows = numRows;
+
+    m_complete_lftV = new uint32_t[numRows];
+    m_rows_active = new bool[numRows];
+    m_locks = new Lock[numRows];
+
+    if (top->param.bEnableLoopFilter)
+    {
+        m_loopFilter = new TComLoopFilter[numRows];
+        for (int i = 0; i < m_numRows; ++i)
+        {
+            m_loopFilter[i].create(g_maxCUDepth);
+        }
+    }
+
+
+    if (!WaveFront::init(m_numRows))
+    {
+        assert(!"Unable to initialize job queue.");
+        m_pool = NULL;
+    }
+}
+
+void FrameFilter::start(TComPic *pic, TComSlice *slice)
+{
+    m_pic = pic;
+    m_slice = slice;
+
+    for (int i = 0; i < m_numRows; i++)
+    {
+        if (m_cfg->param.bEnableLoopFilter)
+        {
+            // TODO: I think this flag unused since we remove Tiles
+            m_loopFilter[i].setCfg(slice->getPPS()->getLoopFilterAcrossTilesEnabledFlag());
+            m_pic->m_complete_lft[i] = 0;
+            m_rows_active[i] = false;
+            m_complete_lftV[i] = 0;
+        }
+        else
+        {
+            m_pic->m_complete_lft[i] = MAX_INT; // for SAO
+        }
+    }
+
+    if (m_cfg->param.bEnableLoopFilter && m_pool && m_cfg->param.bEnableWavefront)
+    {
+        WaveFront::enqueue();
+    }
+}
+
+void FrameFilter::wait()
+{
+    // Block until worker threads complete the frame
+    m_completionEvent.wait();
+    WaveFront::dequeue();
+}
+
+void FrameFilter::enqueueRow(int row)
+{
+    ScopedLock self(m_locks[row]);
+
+    if (!m_rows_active[row])
+    {
+        m_rows_active[row] = true;
+        WaveFront::enqueueRow(row);
+    }
+}
+
+void FrameFilter::processRow(int row)
+{
+    PPAScopeEvent(Thread_filterCU);
+
+    // Called by worker threads
+
+    const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
+    const uint32_t lineStartCUAddr = row * numCols;
+    for (UInt col = m_complete_lftV[row]; col < numCols; col++)
+    {
+        {
+            // TODO: modify FindJob to avoid invalid status here
+            ScopedLock self(m_locks[row]);
+            if (row < m_numRows - 1 && m_pic->m_complete_enc[row + 1] < col + 1)
+            {
+                m_rows_active[row] = false;
+                return;
+            }
+            if ( row == m_numRows - 1 && m_pic->m_complete_enc[row] < col + 1)
+            {
+                m_rows_active[row] = false;
+                return;
+            }
+            if (row > 0 && m_complete_lftV[row - 1] < col + 1)
+            {
+                
+                m_rows_active[row] = false;
+                return;
+            }
+            // TODO: it works, but I think we need more strong verify
+            //if (checkHigherPriorityRow(row))
+            //{
+            //    m_rows_active[row] = false;
+            //    return;
+            //}
+        }
+        const uint32_t cuAddr = lineStartCUAddr + col;
+        TComDataCU* cu = m_pic->getCU(cuAddr);
+
+        m_loopFilter[row].loopFilterCU(cu, EDGE_VER);
+        m_complete_lftV[row]++;
+
+        if (col > 0)
+        {
+            TComDataCU* cu_prev = m_pic->getCU(cuAddr - 1);
+            m_loopFilter[row].loopFilterCU(cu_prev, EDGE_HOR);
+            m_pic->m_complete_lft[row]++;
+        }
+
+        // Active next row when possible
+        if (m_complete_lftV[row] >= 2 && row < m_numRows - 1)
+        {
+            ScopedLock below(m_locks[row + 1]);
+            if (m_rows_active[row + 1] == false &&
+                (m_complete_lftV[row + 1] + 2 <= m_complete_lftV[row] || m_complete_lftV[row] == numCols))
+            {
+                m_rows_active[row + 1] = true;
+                WaveFront::enqueueRow(row + 1);
+            }
+        }
+    }
+
+    {
+        TComDataCU* cu_prev = m_pic->getCU(lineStartCUAddr + numCols - 1);
+        m_loopFilter[row].loopFilterCU(cu_prev, EDGE_HOR);
+        m_pic->m_complete_lft[row]++;
+    }
+
+    // this row of CTUs has been encoded
+    if (row == m_numRows - 1)
+    {
+        m_completionEvent.trigger();
+    }
+}
+
diff -r 33aa6210de6d -r cd4dd4ab924f source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Wed Aug 07 22:36:10 2013 +0800
+++ b/source/encoder/frameencoder.h	Thu Aug 08 19:12:03 2013 +0800
@@ -2,6 +2,7 @@
  * Copyright (C) 2013 x265 project
  *
  * Authors: Chung Shin Yee <shinyee at multicorewareinc.com>
+ *          Min Chen <chenm003 at 163.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -72,6 +73,42 @@
     volatile bool       m_active;
 };
 
+// Manages the wave-front processing of a single frame loopfilter
+class FrameFilter : public WaveFront
+{
+public:
+
+    FrameFilter(ThreadPool *);
+
+    virtual ~FrameFilter() {}
+
+    void init(TEncTop *top, int numRows);
+
+    void destroy();
+
+    void start(TComPic *pic, TComSlice* slice);
+    void wait();
+
+    void enqueueRow(int row);
+
+    void processRow(int row);
+
+protected:
+    TEncCfg*                 m_cfg;
+
+    TComSlice*               m_slice;
+    TComPic*                 m_pic;
+    volatile uint32_t*       m_complete_lftV;
+    volatile bool*           m_rows_active;
+    Lock*                    m_locks;
+
+public:
+    TComLoopFilter*          m_loopFilter;
+
+    int                      m_numRows;
+    Event                    m_completionEvent;
+};
+
 // Manages the wave-front processing of a single encoding frame
 class FrameEncoder : public WaveFront
 {
@@ -163,8 +200,6 @@
     /* Frame singletons, last the life of the encoder */
     TEncSbac*               getSingletonSbac() { return &m_sbacCoder; }
 
-    TComLoopFilter*         getLoopFilter()    { return &m_loopFilter; }
-
     TEncSampleAdaptiveOffset* getSAO()         { return &m_sao; }
 
     TEncCavlc*              getCavlcCoder()    { return &m_cavlcCoder; }
@@ -190,12 +225,21 @@
         m_sbacCoder.init((TEncBinIf*)&m_binCoderCABAC);
     }
 
+    void wait_lft()
+    {
+        if (m_cfg->param.bEnableLoopFilter)
+        {
+            m_frameFilter.wait();
+            m_frameFilter.dequeue();
+        }
+    }
+
 protected:
 
     TEncSbac                 m_sbacCoder;
     TEncBinCABAC             m_binCoderCABAC;
     TEncCavlc                m_cavlcCoder;
-    TComLoopFilter           m_loopFilter;
+    FrameFilter              m_frameFilter;
     TEncSampleAdaptiveOffset m_sao;
     TComBitCounter           m_bitCounter;
     TEncSlice                m_sliceEncoder;
@@ -208,6 +252,7 @@
     CTURow*                  m_rows;
     Event                    m_completionEvent;
 };
+
 }
 
 #endif // ifndef __FRAMEENCODER__



More information about the x265-devel mailing list