[x265] [PATCH] refactor on FrameFilter and ParallelFilter, reduce duplicated data field
Min Chen
chenm003 at 163.com
Tue Jan 19 13:00:02 CET 2016
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1453204681 -28800
# Node ID 08dadacfe2cddfdea2c3a1e6f523c17ffa74bf09
# Parent 765864c3c6f02e2a3ec426974de7df7bbec7de58
refactor on FrameFilter and ParallelFilter, reduce duplicated data field
---
source/encoder/framefilter.cpp | 113 +++++++++++++++++++--------------------
source/encoder/framefilter.h | 32 ++++-------
2 files changed, 67 insertions(+), 78 deletions(-)
diff -r 765864c3c6f0 -r 08dadacfe2cd source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Tue Jan 19 19:28:58 2016 +0800
+++ b/source/encoder/framefilter.cpp Tue Jan 19 19:58:01 2016 +0800
@@ -57,12 +57,14 @@
m_param = top->m_param;
m_frameEncoder = frame;
m_numRows = numRows;
+ m_numCols = numCols;
m_hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);
m_vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);
m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ? (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
+ m_lastWidth = (m_param->sourceWidth % g_maxCUSize) ? (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
if (m_param->bEnableSsim)
m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
@@ -86,18 +88,13 @@
}
}
- const int lastWidth = (m_param->sourceWidth % g_maxCUSize) ? (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
for(int row = 0; row < numRows; row++)
{
// Setting maximum bound information
- m_parallelFilter[row].m_numCols = numCols;
- m_parallelFilter[row].m_numRows = numRows;
m_parallelFilter[row].m_rowHeight = (row == numRows - 1) ? m_lastHeight : g_maxCUSize;
- m_parallelFilter[row].m_lastWidth = lastWidth;
- m_parallelFilter[row].m_param = m_param;
m_parallelFilter[row].m_row = row;
m_parallelFilter[row].m_rowAddr = row * numCols;
- m_parallelFilter[row].m_frameEncoder = m_frameEncoder;
+ m_parallelFilter[row].m_frameFilter = this;
if (row > 0)
m_parallelFilter[row].m_prevRow = &m_parallelFilter[row - 1];
@@ -122,7 +119,6 @@
m_parallelFilter[row].m_allowedCol.set(0);
m_parallelFilter[row].m_lastDeblocked.set(-1);
m_parallelFilter[row].m_encData = frame->m_encData;
- m_parallelFilter[row].m_frame = frame;
}
// Reset SAO common statistics
@@ -189,7 +185,7 @@
X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");
// Chroma
- if (m_param->internalCsp != X265_CSP_I400)
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
{
ctuWidth >>= m_sao.m_hChromaShift;
@@ -213,30 +209,30 @@
if (m_encData->m_slice->m_pps->bTransquantBypassEnabled)
{
- const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
- const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;
uint32_t cuAddr = m_rowAddr + col;
const CUData* ctu = m_encData->getPicCTU(cuAddr);
- assert(m_frame->m_reconPic == m_encData->m_reconPic);
- origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);
+ assert(m_frameFilter->m_frame->m_reconPic == m_encData->m_reconPic);
+ origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frameFilter->m_frame);
}
}
// NOTE: MUST BE delay a row when Deblock enabled, the Deblock will modify above pixels in Horizon pass
-void FrameFilter::ParallelFilter::processPostCu(uint32_t col) const
+void FrameFilter::ParallelFilter::processPostCu(int col) const
{
// Update finished CU cursor
- m_frame->m_reconColCount[m_row].set(col);
+ m_frameFilter->m_frame->m_reconColCount[m_row].set(col);
// shortcut path for non-border area
- if ((col != 0) & (col != m_numCols - 1) & (m_row != 0) & (m_row != m_numRows - 1))
+ if ((col != 0) & (col != m_frameFilter->m_numCols - 1) & (m_row != 0) & (m_row != m_frameFilter->m_numRows - 1))
return;
- PicYuv *reconPic = m_frame->m_reconPic;
+ PicYuv *reconPic = m_frameFilter->m_frame->m_reconPic;
const uint32_t lineStartCUAddr = m_rowAddr + col;
const int realH = getCUHeight();
- const int realW = getCUWidth(col);
+ const int realW = m_frameFilter->getCUWidth(col);
const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
@@ -248,17 +244,17 @@
const intptr_t strideC = reconPic->m_strideC;
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
// // MUST BE check I400 since m_picOrg uninitialize in that case
- pixel *pixU = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
- pixel *pixV = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
+ pixel *pixU = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
+ pixel *pixV = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
int copySizeY = realW;
int copySizeC = (realW >> hChromaShift);
- if ((col == 0) | (col == m_numCols - 1))
+ if ((col == 0) | (col == m_frameFilter->m_numCols - 1))
{
// TODO: improve by process on Left or Right only
primitives.extendRowBorder(reconPic->getLumaAddr(m_rowAddr), stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
- if (m_param->internalCsp != X265_CSP_I400)
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
{
primitives.extendRowBorder(reconPic->getCbAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
primitives.extendRowBorder(reconPic->getCrAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
@@ -266,7 +262,7 @@
}
// Extra Left and Right border on first and last CU
- if ((col == 0) | (col == m_numCols - 1))
+ if ((col == 0) | (col == m_frameFilter->m_numCols - 1))
{
copySizeY += lumaMarginX;
copySizeC += chromaMarginX;
@@ -286,7 +282,7 @@
for (uint32_t y = 0; y < lumaMarginY; y++)
memcpy(pixY - (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
- if (m_param->internalCsp != X265_CSP_I400)
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
{
for (uint32_t y = 0; y < chromaMarginY; y++)
{
@@ -297,7 +293,7 @@
}
// Border extend Bottom
- if (m_row == m_numRows - 1)
+ if (m_row == m_frameFilter->m_numRows - 1)
{
pixY += (realH - 1) * stride;
pixU += ((realH >> vChromaShift) - 1) * strideC;
@@ -305,7 +301,7 @@
for (uint32_t y = 0; y < lumaMarginY; y++)
memcpy(pixY + (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
- if (m_param->internalCsp != X265_CSP_I400)
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)
{
for (uint32_t y = 0; y < chromaMarginY; y++)
{
@@ -320,12 +316,13 @@
void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)
{
SAOParam* saoParam = m_encData->m_saoParam;
- const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
- const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;
PicYuv* reconPic = m_encData->m_reconPic;
const int colStart = m_lastCol.get();
// TODO: Waiting previous row finish or simple clip on it?
const int colEnd = m_allowedCol.get();
+ const int numCols = m_frameFilter->m_numCols;
// Avoid threading conflict
if (colStart >= colEnd)
@@ -335,7 +332,7 @@
{
const uint32_t cuAddr = m_rowAddr + col;
- if (m_param->bEnableLoopFilter)
+ if (m_frameFilter->m_param->bEnableLoopFilter)
{
const CUData* ctu = m_encData->getPicCTU(cuAddr);
deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
@@ -343,17 +340,17 @@
if (col >= 1)
{
- if (m_param->bEnableLoopFilter)
+ if (m_frameFilter->m_param->bEnableLoopFilter)
{
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
// When SAO Disable, setting column counter here
- if ((!m_param->bEnableSAO) & (m_row >= 1))
+ if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))
m_prevRow->processPostCu(col - 1);
}
- if (m_param->bEnableSAO)
+ if (m_frameFilter->m_param->bEnableSAO)
{
// Save SAO bottom row reference pixels
copySaoAboveRef(reconPic, cuAddr - 1, col - 1);
@@ -381,58 +378,58 @@
m_lastCol.incr();
}
- if (colEnd == (int)m_numCols)
+ if (colEnd == numCols)
{
- const uint32_t cuAddr = m_rowAddr + m_numCols - 1;
+ const uint32_t cuAddr = m_rowAddr + numCols - 1;
- if (m_param->bEnableLoopFilter)
+ if (m_frameFilter->m_param->bEnableLoopFilter)
{
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);
// When SAO Disable, setting column counter here
- if ((!m_param->bEnableSAO) & (m_row >= 1))
- m_prevRow->processPostCu(m_numCols - 1);
+ if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))
+ m_prevRow->processPostCu(numCols - 1);
}
// TODO: move processPostCu() into processSaoUnitCu()
- if (m_param->bEnableSAO)
+ if (m_frameFilter->m_param->bEnableSAO)
{
// Save SAO bottom row reference pixels
- copySaoAboveRef(reconPic, cuAddr, m_numCols - 1);
+ copySaoAboveRef(reconPic, cuAddr, numCols - 1);
// SAO Decide
// NOTE: reduce condition check for 1 CU only video, Why someone play with it?
- if (m_numCols >= 2)
- m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, m_numCols - 2, cuAddr - 1);
+ if (numCols >= 2)
+ m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 2, cuAddr - 1);
- if (m_numCols >= 1)
- m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, m_numCols - 1, cuAddr);
+ if (numCols >= 1)
+ m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 1, cuAddr);
// Process Previous Rows SAO CU
- if (m_row >= 1 && m_numCols >= 3)
+ if (m_row >= 1 && numCols >= 3)
{
- m_prevRow->processSaoUnitCu(saoParam, m_numCols - 3);
- m_prevRow->processPostCu(m_numCols - 3);
+ m_prevRow->processSaoUnitCu(saoParam, numCols - 3);
+ m_prevRow->processPostCu(numCols - 3);
}
- if (m_row >= 1 && m_numCols >= 2)
+ if (m_row >= 1 && numCols >= 2)
{
- m_prevRow->processSaoUnitCu(saoParam, m_numCols - 2);
- m_prevRow->processPostCu(m_numCols - 2);
+ m_prevRow->processSaoUnitCu(saoParam, numCols - 2);
+ m_prevRow->processPostCu(numCols - 2);
}
- if (m_row >= 1 && m_numCols >= 1)
+ if (m_row >= 1 && numCols >= 1)
{
- m_prevRow->processSaoUnitCu(saoParam, m_numCols - 1);
- m_prevRow->processPostCu(m_numCols - 1);
+ m_prevRow->processSaoUnitCu(saoParam, numCols - 1);
+ m_prevRow->processPostCu(numCols - 1);
}
// Setting column sync counter
if (m_row >= 1)
- m_frame->m_reconColCount[m_row - 1].set(m_numCols - 1);
+ m_frameFilter->m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
}
- m_lastDeblocked.set(m_numCols);
+ m_lastDeblocked.set(numCols);
}
}
@@ -461,21 +458,21 @@
m_parallelFilter[row].waitForExit();
/* Check to avoid previous row process slower than current row */
- X265_CHECK((row < 1) || m_parallelFilter[row - 1].m_lastDeblocked.get() == (int)m_parallelFilter[row - 1].m_numCols, "previous row not finish");
+ X265_CHECK((row < 1) || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
- m_parallelFilter[row].m_allowedCol.set(m_parallelFilter[row].m_numCols);
+ m_parallelFilter[row].m_allowedCol.set(m_numCols);
m_parallelFilter[row].processTasks(-1);
if (row == m_numRows - 1)
{
/* TODO: Early start last row */
- if ((row >= 1) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != (int)m_parallelFilter[row - 1].m_numCols))
+ if ((row >= 1) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != m_numCols))
x265_log(m_param, X265_LOG_WARNING, "detected ParallelFilter race condition on last row\n");
/* Apply SAO on last row of CUs, because we always apply SAO on row[X-1] */
if (m_param->bEnableSAO)
{
- for(uint32_t col = 0; col < m_parallelFilter[row].m_numCols; col++)
+ for(int col = 0; col < m_numCols; col++)
{
// NOTE: must use processSaoUnitCu(), it include TQBypass logic
m_parallelFilter[row].processSaoUnitCu(saoParam, col);
@@ -483,7 +480,7 @@
}
// Process border extension on last row
- for(uint32_t col = 0; col < m_parallelFilter[row].m_numCols; col++)
+ for(int col = 0; col < m_numCols; col++)
{
// m_reconColCount will be set in processPostCu()
m_parallelFilter[row].processPostCu(col);
diff -r 765864c3c6f0 -r 08dadacfe2cd source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Tue Jan 19 19:28:58 2016 +0800
+++ b/source/encoder/framefilter.h Tue Jan 19 19:58:01 2016 +0800
@@ -52,8 +52,10 @@
int m_pad[2];
int m_numRows;
+ int m_numCols;
int m_saoRowDelay;
int m_lastHeight;
+ int m_lastWidth;
void* m_ssimBuf; /* Temp storage for ssim computation */
@@ -61,15 +63,10 @@
class ParallelFilter : public BondedTaskGroup, public Deblock
{
public:
- uint32_t m_numCols;
- uint32_t m_numRows;
uint32_t m_rowHeight;
- uint32_t m_lastWidth;
- uint32_t m_row;
+ int m_row;
uint32_t m_rowAddr;
- x265_param* m_param;
- Frame* m_frame;
- FrameEncoder* m_frameEncoder;
+ FrameFilter* m_frameFilter;
FrameData* m_encData;
ParallelFilter* m_prevRow;
SAO m_sao;
@@ -78,15 +75,10 @@
ThreadSafeInteger m_lastDeblocked; /* The column that finished all of Deblock stages */
ParallelFilter()
- : m_numCols(0)
- , m_numRows(0)
- , m_rowHeight(0)
- , m_lastWidth(0)
+ : m_rowHeight(0)
, m_row(0)
, m_rowAddr(0)
- , m_param(NULL)
- , m_frame(NULL)
- , m_frameEncoder(NULL)
+ , m_frameFilter(NULL)
, m_encData(NULL)
, m_prevRow(NULL)
{
@@ -104,18 +96,13 @@
void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
// Post-Process (Border extension)
- void processPostCu(uint32_t col) const;
+ void processPostCu(int col) const;
uint32_t getCUHeight() const
{
return m_rowHeight;
}
- uint32_t getCUWidth(int colNum) const
- {
- return (colNum == (int)m_numCols - 1) ? m_lastWidth : g_maxCUSize;
- }
-
protected:
ParallelFilter operator=(const ParallelFilter&);
@@ -132,6 +119,11 @@
{
}
+ uint32_t getCUWidth(int colNum) const
+ {
+ return (colNum == (int)m_numCols - 1) ? m_lastWidth : g_maxCUSize;
+ }
+
void init(Encoder *top, FrameEncoder *frame, int numRows, uint32_t numCols);
void destroy();
More information about the x265-devel
mailing list