[x265] [PATCH] framepp: thread of loopfilter
Min Chen
chenm003 at 163.com
Thu Aug 8 13:12:42 CEST 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1375960323 -28800
# Node ID cd4dd4ab924f36a7e6fe127ebc12f6da4c3073fc
# Parent 33aa6210de6d486b413f0a6ef82750a89d76c981
framepp: thread of loopfilter
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComLoopFilter.cpp
--- a/source/Lib/TLibCommon/TComLoopFilter.cpp Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComLoopFilter.cpp Thu Aug 08 19:12:03 2013 +0800
@@ -48,8 +48,6 @@
// Constants
// ====================================================================================================================
-#define EDGE_VER 0
-#define EDGE_HOR 1
#define QpUV(iQpY) (((iQpY) < 0) ? (iQpY) : (((iQpY) > 57) ? ((iQpY) - 6) : g_chromaScale[(iQpY)]))
#define DEFAULT_INTRA_TC_OFFSET 2 ///< Default intra TC offset
@@ -163,6 +161,15 @@
}
}
+Void TComLoopFilter::loopFilterCU(TComDataCU* cu, int dir)
+{
+ ::memset(m_blockingStrength[dir], 0, sizeof(UChar) * m_numPartitions);
+ ::memset(m_bEdgeFilter[dir], 0, sizeof(Bool) * m_numPartitions);
+
+ // CU-based deblocking
+ xDeblockCU(cu, 0, 0, dir);
+}
+
// ====================================================================================================================
// Protected member functions
// ====================================================================================================================
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComLoopFilter.h
--- a/source/Lib/TLibCommon/TComLoopFilter.h Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComLoopFilter.h Thu Aug 08 19:12:03 2013 +0800
@@ -45,6 +45,8 @@
//! \{
#define DEBLOCK_SMALLEST_BLOCK 8
+#define EDGE_VER 0
+#define EDGE_HOR 1
/// parameters for deblocking filter
typedef struct _LFCUParam
@@ -124,6 +126,8 @@
/// picture-level deblocking filter
Void loopFilterPic(TComPic* pic);
+ Void loopFilterCU(TComDataCU* cu, int dir);
+
static Int getBeta(Int qp)
{
Int indexB = Clip3(0, MAX_QP, qp);
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComPic.cpp
--- a/source/Lib/TLibCommon/TComPic.cpp Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.cpp Thu Aug 08 19:12:03 2013 +0800
@@ -138,6 +138,7 @@
int numRows = (height + maxHeight - 1) / maxHeight;
m_complete_enc = new uint32_t[numRows]; // initial in FrameEncoder::encode()
+ m_complete_lft = new uint32_t[numRows]; // initial in FrameFilter::encode()
}
Void TComPic::destroy()
@@ -190,6 +191,11 @@
{
delete[] m_complete_enc;
}
+
+ if (m_complete_lft)
+ {
+ delete[] m_complete_lft;
+ }
}
Void TComPic::compressMotion()
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibCommon/TComPic.h
--- a/source/Lib/TLibCommon/TComPic.h Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.h Thu Aug 08 19:12:03 2013 +0800
@@ -70,7 +70,8 @@
public:
- volatile uint32_t *m_complete_enc; // Array of Col number that was finished stage encode
+ volatile uint32_t* m_complete_enc; // Array of Col number that was finished stage encode
+ volatile uint32_t* m_complete_lft; // Array of Col number that was finished stage loopfilter
x265::LookaheadFrame m_lowres;
TComPic();
diff -r 33aa6210de6d -r cd4dd4ab924f source/Lib/TLibEncoder/TEncGOP.cpp
--- a/source/Lib/TLibEncoder/TEncGOP.cpp Wed Aug 07 22:36:10 2013 +0800
+++ b/source/Lib/TLibEncoder/TEncGOP.cpp Thu Aug 08 19:12:03 2013 +0800
@@ -254,7 +254,6 @@
TEncCavlc* cavlcCoder = frameEncoder->getCavlcCoder();
TEncSbac* sbacCoder = frameEncoder->getSingletonSbac();
TEncBinCABAC* binCABAC = frameEncoder->getBinCABAC();
- TComLoopFilter* loopFilter = frameEncoder->getLoopFilter();
TComBitCounter* bitCounter = frameEncoder->getBitCounter();
TEncSampleAdaptiveOffset* sao = frameEncoder->getSAO();
Bool bBufferingPeriodSEIPresentInAU = false;
@@ -545,8 +544,7 @@
//-- Loop filter
if (m_cfg->param.bEnableLoopFilter)
{
- loopFilter->setCfg(m_pps.getLoopFilterAcrossTilesEnabledFlag());
- loopFilter->loopFilterPic(pic);
+ frameEncoder->wait_lft();
}
if (m_sps.getUseSAO())
diff -r 33aa6210de6d -r cd4dd4ab924f source/PPA/ppaCPUEvents.h
--- a/source/PPA/ppaCPUEvents.h Wed Aug 07 22:36:10 2013 +0800
+++ b/source/PPA/ppaCPUEvents.h Thu Aug 08 19:12:03 2013 +0800
@@ -33,3 +33,4 @@
PPA_REGISTER_CPU_EVENT(Thread_ProcessRow)
PPA_REGISTER_CPU_EVENT(Thread_compressCU)
PPA_REGISTER_CPU_EVENT(Thread_encodeCU)
+PPA_REGISTER_CPU_EVENT(Thread_filterCU)
diff -r 33aa6210de6d -r cd4dd4ab924f source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Wed Aug 07 22:36:10 2013 +0800
+++ b/source/encoder/frameencoder.cpp Thu Aug 08 19:12:03 2013 +0800
@@ -133,6 +133,7 @@
, m_slice(NULL)
, m_pic(NULL)
, m_rows(NULL)
+ , m_frameFilter(pool)
{}
void FrameEncoder::destroy()
@@ -155,10 +156,7 @@
m_sao.destroy();
m_sao.destroyEncBuffer();
}
- if (m_cfg->param.bEnableLoopFilter)
- {
- m_loopFilter.destroy();
- }
+ m_frameFilter.destroy();
}
void FrameEncoder::init(TEncTop *top, int numRows)
@@ -176,10 +174,7 @@
m_sao.create(top->param.sourceWidth, top->param.sourceHeight, g_maxCUWidth, g_maxCUHeight);
m_sao.createEncBuffer();
}
- if (top->param.bEnableLoopFilter)
- {
- m_loopFilter.create(g_maxCUDepth);
- }
+ m_frameFilter.init(top, numRows);
m_rows = new CTURow[m_numRows];
for (int i = 0; i < m_numRows; ++i)
@@ -196,6 +191,7 @@
void FrameEncoder::encode(TComPic *pic, TComSlice *slice)
{
+ int bEnableLoopFilter = m_cfg->param.bEnableLoopFilter;
m_pic = pic;
m_slice = slice;
@@ -210,12 +206,22 @@
m_pic->m_complete_enc[i] = 0;
}
+ m_frameFilter.start(pic, slice);
+
if (!m_pool || !m_cfg->param.bEnableWavefront)
{
for (int i = 0; i < this->m_numRows; i++)
{
processRow(i);
}
+ // Loopfilter
+ if (bEnableLoopFilter)
+ {
+ for (int i = 0; i < this->m_numRows; i++)
+ {
+ m_frameFilter.processRow(i);
+ }
+ }
}
else
{
@@ -223,9 +229,16 @@
// Enqueue first row, then block until worker threads complete the frame
WaveFront::enqueueRow(0);
+
m_completionEvent.wait();
WaveFront::dequeue();
+
+ // Dummy, I think we need not pend here
+ if (bEnableLoopFilter)
+ {
+ //wait_lft();
+ }
}
}
@@ -259,6 +272,13 @@
// Completed CU processing
m_pic->m_complete_enc[row]++;
+ // Active Loopfilter
+ if (row > 0)
+ {
+ // NOTE: my version, it need check active flag
+ m_frameFilter.enqueueRow(row - 1);
+ }
+
if (m_pic->m_complete_enc[row] >= 2 && row < m_numRows - 1)
{
ScopedLock below(m_rows[row + 1].m_lock);
@@ -289,3 +309,194 @@
m_completionEvent.trigger();
}
}
+
+// **************************************************************************
+// * LoopFilter
+// **************************************************************************
+FrameFilter::FrameFilter(ThreadPool* pool)
+ : WaveFront(pool)
+ , m_cfg(NULL)
+ , m_slice(NULL)
+ , m_pic(NULL)
+ , m_loopFilter(NULL)
+ , m_complete_lftV(NULL)
+ , m_rows_active(NULL)
+ , m_locks(NULL)
+{}
+
+void FrameFilter::destroy()
+{
+ JobProvider::flush(); // ensure no worker threads are using this frame
+
+ if (m_complete_lftV)
+ {
+ delete[] m_complete_lftV;
+ }
+
+ if (m_rows_active)
+ {
+ delete[] m_rows_active;
+ }
+
+ if (m_locks)
+ {
+ delete[] m_locks;
+ }
+
+ if (m_cfg->param.bEnableLoopFilter)
+ {
+ for (int i = 0; i < m_numRows; ++i)
+ {
+ m_loopFilter[i].destroy();
+ }
+
+ delete[] m_loopFilter;
+ }
+}
+
+void FrameFilter::init(TEncTop *top, int numRows)
+{
+ m_cfg = top;
+ m_numRows = numRows;
+
+ m_complete_lftV = new uint32_t[numRows];
+ m_rows_active = new bool[numRows];
+ m_locks = new Lock[numRows];
+
+ if (top->param.bEnableLoopFilter)
+ {
+ m_loopFilter = new TComLoopFilter[numRows];
+ for (int i = 0; i < m_numRows; ++i)
+ {
+ m_loopFilter[i].create(g_maxCUDepth);
+ }
+ }
+
+
+ if (!WaveFront::init(m_numRows))
+ {
+ assert(!"Unable to initialize job queue.");
+ m_pool = NULL;
+ }
+}
+
+void FrameFilter::start(TComPic *pic, TComSlice *slice)
+{
+ m_pic = pic;
+ m_slice = slice;
+
+ for (int i = 0; i < m_numRows; i++)
+ {
+ if (m_cfg->param.bEnableLoopFilter)
+ {
+ // TODO: I think this flag unused since we remove Tiles
+ m_loopFilter[i].setCfg(slice->getPPS()->getLoopFilterAcrossTilesEnabledFlag());
+ m_pic->m_complete_lft[i] = 0;
+ m_rows_active[i] = false;
+ m_complete_lftV[i] = 0;
+ }
+ else
+ {
+ m_pic->m_complete_lft[i] = MAX_INT; // for SAO
+ }
+ }
+
+ if (m_cfg->param.bEnableLoopFilter && m_pool && m_cfg->param.bEnableWavefront)
+ {
+ WaveFront::enqueue();
+ }
+}
+
+void FrameFilter::wait()
+{
+ // Block until worker threads complete the frame
+ m_completionEvent.wait();
+ WaveFront::dequeue();
+}
+
+void FrameFilter::enqueueRow(int row)
+{
+ ScopedLock self(m_locks[row]);
+
+ if (!m_rows_active[row])
+ {
+ m_rows_active[row] = true;
+ WaveFront::enqueueRow(row);
+ }
+}
+
+void FrameFilter::processRow(int row)
+{
+ PPAScopeEvent(Thread_filterCU);
+
+ // Called by worker threads
+
+ const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
+ const uint32_t lineStartCUAddr = row * numCols;
+ for (UInt col = m_complete_lftV[row]; col < numCols; col++)
+ {
+ {
+ // TODO: modify FindJob to avoid invalid status here
+ ScopedLock self(m_locks[row]);
+ if (row < m_numRows - 1 && m_pic->m_complete_enc[row + 1] < col + 1)
+ {
+ m_rows_active[row] = false;
+ return;
+ }
+ if ( row == m_numRows - 1 && m_pic->m_complete_enc[row] < col + 1)
+ {
+ m_rows_active[row] = false;
+ return;
+ }
+ if (row > 0 && m_complete_lftV[row - 1] < col + 1)
+ {
+
+ m_rows_active[row] = false;
+ return;
+ }
+ // TODO: it works, but I think we need more strong verify
+ //if (checkHigherPriorityRow(row))
+ //{
+ // m_rows_active[row] = false;
+ // return;
+ //}
+ }
+ const uint32_t cuAddr = lineStartCUAddr + col;
+ TComDataCU* cu = m_pic->getCU(cuAddr);
+
+ m_loopFilter[row].loopFilterCU(cu, EDGE_VER);
+ m_complete_lftV[row]++;
+
+ if (col > 0)
+ {
+ TComDataCU* cu_prev = m_pic->getCU(cuAddr - 1);
+ m_loopFilter[row].loopFilterCU(cu_prev, EDGE_HOR);
+ m_pic->m_complete_lft[row]++;
+ }
+
+ // Active next row when possible
+ if (m_complete_lftV[row] >= 2 && row < m_numRows - 1)
+ {
+ ScopedLock below(m_locks[row + 1]);
+ if (m_rows_active[row + 1] == false &&
+ (m_complete_lftV[row + 1] + 2 <= m_complete_lftV[row] || m_complete_lftV[row] == numCols))
+ {
+ m_rows_active[row + 1] = true;
+ WaveFront::enqueueRow(row + 1);
+ }
+ }
+ }
+
+ {
+ TComDataCU* cu_prev = m_pic->getCU(lineStartCUAddr + numCols - 1);
+ m_loopFilter[row].loopFilterCU(cu_prev, EDGE_HOR);
+ m_pic->m_complete_lft[row]++;
+ }
+
+ // this row of CTUs has been encoded
+ if (row == m_numRows - 1)
+ {
+ m_completionEvent.trigger();
+ }
+}
+
diff -r 33aa6210de6d -r cd4dd4ab924f source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Wed Aug 07 22:36:10 2013 +0800
+++ b/source/encoder/frameencoder.h Thu Aug 08 19:12:03 2013 +0800
@@ -2,6 +2,7 @@
* Copyright (C) 2013 x265 project
*
* Authors: Chung Shin Yee <shinyee at multicorewareinc.com>
+ * Min Chen <chenm003 at 163.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -72,6 +73,42 @@
volatile bool m_active;
};
+// Manages the wave-front processing of a single frame loopfilter
+class FrameFilter : public WaveFront
+{
+public:
+
+ FrameFilter(ThreadPool *);
+
+ virtual ~FrameFilter() {}
+
+ void init(TEncTop *top, int numRows);
+
+ void destroy();
+
+ void start(TComPic *pic, TComSlice* slice);
+ void wait();
+
+ void enqueueRow(int row);
+
+ void processRow(int row);
+
+protected:
+ TEncCfg* m_cfg;
+
+ TComSlice* m_slice;
+ TComPic* m_pic;
+ volatile uint32_t* m_complete_lftV;
+ volatile bool* m_rows_active;
+ Lock* m_locks;
+
+public:
+ TComLoopFilter* m_loopFilter;
+
+ int m_numRows;
+ Event m_completionEvent;
+};
+
// Manages the wave-front processing of a single encoding frame
class FrameEncoder : public WaveFront
{
@@ -163,8 +200,6 @@
/* Frame singletons, last the life of the encoder */
TEncSbac* getSingletonSbac() { return &m_sbacCoder; }
- TComLoopFilter* getLoopFilter() { return &m_loopFilter; }
-
TEncSampleAdaptiveOffset* getSAO() { return &m_sao; }
TEncCavlc* getCavlcCoder() { return &m_cavlcCoder; }
@@ -190,12 +225,21 @@
m_sbacCoder.init((TEncBinIf*)&m_binCoderCABAC);
}
+ void wait_lft()
+ {
+ if (m_cfg->param.bEnableLoopFilter)
+ {
+ m_frameFilter.wait();
+ m_frameFilter.dequeue();
+ }
+ }
+
protected:
TEncSbac m_sbacCoder;
TEncBinCABAC m_binCoderCABAC;
TEncCavlc m_cavlcCoder;
- TComLoopFilter m_loopFilter;
+ FrameFilter m_frameFilter;
TEncSampleAdaptiveOffset m_sao;
TComBitCounter m_bitCounter;
TEncSlice m_sliceEncoder;
@@ -208,6 +252,7 @@
CTURow* m_rows;
Event m_completionEvent;
};
+
}
#endif // ifndef __FRAMEENCODER__
More information about the x265-devel
mailing list