<div dir="ltr">Excellent, thanks Min. This is exactly what I had in mind.</div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jan 19, 2016 at 5:30 PM, Min Chen <span dir="ltr"><<a href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5"># HG changeset patch<br>
# User Min Chen <<a href="mailto:chenm003@163.com">chenm003@163.com</a>><br>
# Date 1453204681 -28800<br>
# Node ID 08dadacfe2cddfdea2c3a1e6f523c17ffa74bf09<br>
# Parent 765864c3c6f02e2a3ec426974de7df7bbec7de58<br>
refactor on FrameFilter and ParallelFilter, reduce duplicated data field<br>
---<br>
source/encoder/framefilter.cpp | 113 +++++++++++++++++++--------------------<br>
source/encoder/framefilter.h | 32 ++++-------<br>
2 files changed, 67 insertions(+), 78 deletions(-)<br>
<br>
diff -r 765864c3c6f0 -r 08dadacfe2cd source/encoder/framefilter.cpp<br>
--- a/source/encoder/framefilter.cpp Tue Jan 19 19:28:58 2016 +0800<br>
+++ b/source/encoder/framefilter.cpp Tue Jan 19 19:58:01 2016 +0800<br>
@@ -57,12 +57,14 @@<br>
m_param = top->m_param;<br>
m_frameEncoder = frame;<br>
m_numRows = numRows;<br>
+ m_numCols = numCols;<br>
m_hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);<br>
m_vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);<br>
m_pad[0] = top->m_sps.conformanceWindow.rightOffset;<br>
m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;<br>
m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;<br>
m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ? (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;<br>
+ m_lastWidth = (m_param->sourceWidth % g_maxCUSize) ? (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;<br>
<br>
if (m_param->bEnableSsim)<br>
m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));<br>
@@ -86,18 +88,13 @@<br>
}<br>
}<br>
<br>
- const int lastWidth = (m_param->sourceWidth % g_maxCUSize) ? (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;<br>
for(int row = 0; row < numRows; row++)<br>
{<br>
// Setting maximum bound information<br>
- m_parallelFilter[row].m_numCols = numCols;<br>
- m_parallelFilter[row].m_numRows = numRows;<br>
m_parallelFilter[row].m_rowHeight = (row == numRows - 1) ? m_lastHeight : g_maxCUSize;<br>
- m_parallelFilter[row].m_lastWidth = lastWidth;<br>
- m_parallelFilter[row].m_param = m_param;<br>
m_parallelFilter[row].m_row = row;<br>
m_parallelFilter[row].m_rowAddr = row * numCols;<br>
- m_parallelFilter[row].m_frameEncoder = m_frameEncoder;<br>
+ m_parallelFilter[row].m_frameFilter = this;<br>
<br>
if (row > 0)<br>
m_parallelFilter[row].m_prevRow = &m_parallelFilter[row - 1];<br>
@@ -122,7 +119,6 @@<br>
m_parallelFilter[row].m_allowedCol.set(0);<br>
m_parallelFilter[row].m_lastDeblocked.set(-1);<br>
m_parallelFilter[row].m_encData = frame->m_encData;<br>
- m_parallelFilter[row].m_frame = frame;<br>
}<br>
<br>
// Reset SAO common statistics<br>
@@ -189,7 +185,7 @@<br>
X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");<br>
<br>
// Chroma<br>
- if (m_param->internalCsp != X265_CSP_I400)<br>
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)<br>
{<br>
ctuWidth >>= m_sao.m_hChromaShift;<br>
<br>
@@ -213,30 +209,30 @@<br>
<br>
if (m_encData->m_slice->m_pps->bTransquantBypassEnabled)<br>
{<br>
- const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;<br>
- const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;<br>
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;<br>
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;<br>
<br>
uint32_t cuAddr = m_rowAddr + col;<br>
const CUData* ctu = m_encData->getPicCTU(cuAddr);<br>
- assert(m_frame->m_reconPic == m_encData->m_reconPic);<br>
- origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);<br>
+ assert(m_frameFilter->m_frame->m_reconPic == m_encData->m_reconPic);<br>
+ origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frameFilter->m_frame);<br>
}<br>
}<br>
<br>
// NOTE: MUST BE delay a row when Deblock enabled, the Deblock will modify above pixels in Horizon pass<br>
-void FrameFilter::ParallelFilter::processPostCu(uint32_t col) const<br>
+void FrameFilter::ParallelFilter::processPostCu(int col) const<br>
{<br>
// Update finished CU cursor<br>
- m_frame->m_reconColCount[m_row].set(col);<br>
+ m_frameFilter->m_frame->m_reconColCount[m_row].set(col);<br>
<br>
// shortcut path for non-border area<br>
- if ((col != 0) & (col != m_numCols - 1) & (m_row != 0) & (m_row != m_numRows - 1))<br>
+ if ((col != 0) & (col != m_frameFilter->m_numCols - 1) & (m_row != 0) & (m_row != m_frameFilter->m_numRows - 1))<br>
return;<br>
<br>
- PicYuv *reconPic = m_frame->m_reconPic;<br>
+ PicYuv *reconPic = m_frameFilter->m_frame->m_reconPic;<br>
const uint32_t lineStartCUAddr = m_rowAddr + col;<br>
const int realH = getCUHeight();<br>
- const int realW = getCUWidth(col);<br>
+ const int realW = m_frameFilter->getCUWidth(col);<br>
<br>
const uint32_t lumaMarginX = reconPic->m_lumaMarginX;<br>
const uint32_t lumaMarginY = reconPic->m_lumaMarginY;<br>
@@ -248,17 +244,17 @@<br>
const intptr_t strideC = reconPic->m_strideC;<br>
pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);<br>
// // MUST BE check I400 since m_picOrg uninitialize in that case<br>
- pixel *pixU = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;<br>
- pixel *pixV = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;<br>
+ pixel *pixU = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;<br>
+ pixel *pixV = (m_frameFilter->m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;<br>
int copySizeY = realW;<br>
int copySizeC = (realW >> hChromaShift);<br>
<br>
- if ((col == 0) | (col == m_numCols - 1))<br>
+ if ((col == 0) | (col == m_frameFilter->m_numCols - 1))<br>
{<br>
// TODO: improve by process on Left or Right only<br>
primitives.extendRowBorder(reconPic->getLumaAddr(m_rowAddr), stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);<br>
<br>
- if (m_param->internalCsp != X265_CSP_I400)<br>
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)<br>
{<br>
primitives.extendRowBorder(reconPic->getCbAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);<br>
primitives.extendRowBorder(reconPic->getCrAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);<br>
@@ -266,7 +262,7 @@<br>
}<br>
<br>
// Extra Left and Right border on first and last CU<br>
- if ((col == 0) | (col == m_numCols - 1))<br>
+ if ((col == 0) | (col == m_frameFilter->m_numCols - 1))<br>
{<br>
copySizeY += lumaMarginX;<br>
copySizeC += chromaMarginX;<br>
@@ -286,7 +282,7 @@<br>
for (uint32_t y = 0; y < lumaMarginY; y++)<br>
memcpy(pixY - (y + 1) * stride, pixY, copySizeY * sizeof(pixel));<br>
<br>
- if (m_param->internalCsp != X265_CSP_I400)<br>
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)<br>
{<br>
for (uint32_t y = 0; y < chromaMarginY; y++)<br>
{<br>
@@ -297,7 +293,7 @@<br>
}<br>
<br>
// Border extend Bottom<br>
- if (m_row == m_numRows - 1)<br>
+ if (m_row == m_frameFilter->m_numRows - 1)<br>
{<br>
pixY += (realH - 1) * stride;<br>
pixU += ((realH >> vChromaShift) - 1) * strideC;<br>
@@ -305,7 +301,7 @@<br>
for (uint32_t y = 0; y < lumaMarginY; y++)<br>
memcpy(pixY + (y + 1) * stride, pixY, copySizeY * sizeof(pixel));<br>
<br>
- if (m_param->internalCsp != X265_CSP_I400)<br>
+ if (m_frameFilter->m_param->internalCsp != X265_CSP_I400)<br>
{<br>
for (uint32_t y = 0; y < chromaMarginY; y++)<br>
{<br>
@@ -320,12 +316,13 @@<br>
void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)<br>
{<br>
SAOParam* saoParam = m_encData->m_saoParam;<br>
- const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;<br>
- const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;<br>
+ const CUGeom* cuGeoms = m_frameFilter->m_frameEncoder->m_cuGeoms;<br>
+ const uint32_t* ctuGeomMap = m_frameFilter->m_frameEncoder->m_ctuGeomMap;<br>
PicYuv* reconPic = m_encData->m_reconPic;<br>
const int colStart = m_lastCol.get();<br>
// TODO: Waiting previous row finish or simple clip on it?<br>
const int colEnd = m_allowedCol.get();<br>
+ const int numCols = m_frameFilter->m_numCols;<br>
<br>
// Avoid threading conflict<br>
if (colStart >= colEnd)<br>
@@ -335,7 +332,7 @@<br>
{<br>
const uint32_t cuAddr = m_rowAddr + col;<br>
<br>
- if (m_param->bEnableLoopFilter)<br>
+ if (m_frameFilter->m_param->bEnableLoopFilter)<br>
{<br>
const CUData* ctu = m_encData->getPicCTU(cuAddr);<br>
deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);<br>
@@ -343,17 +340,17 @@<br>
<br>
if (col >= 1)<br>
{<br>
- if (m_param->bEnableLoopFilter)<br>
+ if (m_frameFilter->m_param->bEnableLoopFilter)<br>
{<br>
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);<br>
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);<br>
<br>
// When SAO Disable, setting column counter here<br>
- if ((!m_param->bEnableSAO) & (m_row >= 1))<br>
+ if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))<br>
m_prevRow->processPostCu(col - 1);<br>
}<br>
<br>
- if (m_param->bEnableSAO)<br>
+ if (m_frameFilter->m_param->bEnableSAO)<br>
{<br>
// Save SAO bottom row reference pixels<br>
copySaoAboveRef(reconPic, cuAddr - 1, col - 1);<br>
@@ -381,58 +378,58 @@<br>
m_lastCol.incr();<br>
}<br>
<br>
- if (colEnd == (int)m_numCols)<br>
+ if (colEnd == numCols)<br>
{<br>
- const uint32_t cuAddr = m_rowAddr + m_numCols - 1;<br>
+ const uint32_t cuAddr = m_rowAddr + numCols - 1;<br>
<br>
- if (m_param->bEnableLoopFilter)<br>
+ if (m_frameFilter->m_param->bEnableLoopFilter)<br>
{<br>
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);<br>
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);<br>
<br>
// When SAO Disable, setting column counter here<br>
- if ((!m_param->bEnableSAO) & (m_row >= 1))<br>
- m_prevRow->processPostCu(m_numCols - 1);<br>
+ if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))<br>
+ m_prevRow->processPostCu(numCols - 1);<br>
}<br>
<br>
// TODO: move processPostCu() into processSaoUnitCu()<br>
- if (m_param->bEnableSAO)<br>
+ if (m_frameFilter->m_param->bEnableSAO)<br>
{<br>
// Save SAO bottom row reference pixels<br>
- copySaoAboveRef(reconPic, cuAddr, m_numCols - 1);<br>
+ copySaoAboveRef(reconPic, cuAddr, numCols - 1);<br>
<br>
// SAO Decide<br>
// NOTE: reduce condition check for 1 CU only video, Why someone play with it?<br>
- if (m_numCols >= 2)<br>
- m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, m_numCols - 2, cuAddr - 1);<br>
+ if (numCols >= 2)<br>
+ m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 2, cuAddr - 1);<br>
<br>
- if (m_numCols >= 1)<br>
- m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, m_numCols - 1, cuAddr);<br>
+ if (numCols >= 1)<br>
+ m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 1, cuAddr);<br>
<br>
// Process Previous Rows SAO CU<br>
- if (m_row >= 1 && m_numCols >= 3)<br>
+ if (m_row >= 1 && numCols >= 3)<br>
{<br>
- m_prevRow->processSaoUnitCu(saoParam, m_numCols - 3);<br>
- m_prevRow->processPostCu(m_numCols - 3);<br>
+ m_prevRow->processSaoUnitCu(saoParam, numCols - 3);<br>
+ m_prevRow->processPostCu(numCols - 3);<br>
}<br>
<br>
- if (m_row >= 1 && m_numCols >= 2)<br>
+ if (m_row >= 1 && numCols >= 2)<br>
{<br>
- m_prevRow->processSaoUnitCu(saoParam, m_numCols - 2);<br>
- m_prevRow->processPostCu(m_numCols - 2);<br>
+ m_prevRow->processSaoUnitCu(saoParam, numCols - 2);<br>
+ m_prevRow->processPostCu(numCols - 2);<br>
}<br>
<br>
- if (m_row >= 1 && m_numCols >= 1)<br>
+ if (m_row >= 1 && numCols >= 1)<br>
{<br>
- m_prevRow->processSaoUnitCu(saoParam, m_numCols - 1);<br>
- m_prevRow->processPostCu(m_numCols - 1);<br>
+ m_prevRow->processSaoUnitCu(saoParam, numCols - 1);<br>
+ m_prevRow->processPostCu(numCols - 1);<br>
}<br>
<br>
// Setting column sync counter<br>
if (m_row >= 1)<br>
- m_frame->m_reconColCount[m_row - 1].set(m_numCols - 1);<br>
+ m_frameFilter->m_frame->m_reconColCount[m_row - 1].set(numCols - 1);<br>
}<br>
- m_lastDeblocked.set(m_numCols);<br>
+ m_lastDeblocked.set(numCols);<br>
}<br>
}<br>
<br>
@@ -461,21 +458,21 @@<br>
m_parallelFilter[row].waitForExit();<br>
<br>
/* Check to avoid previous row process slower than current row */<br>
- X265_CHECK((row < 1) || m_parallelFilter[row - 1].m_lastDeblocked.get() == (int)m_parallelFilter[row - 1].m_numCols, "previous row not finish");<br>
+ X265_CHECK((row < 1) || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");<br>
<br>
- m_parallelFilter[row].m_allowedCol.set(m_parallelFilter[row].m_numCols);<br>
+ m_parallelFilter[row].m_allowedCol.set(m_numCols);<br>
m_parallelFilter[row].processTasks(-1);<br>
<br>
if (row == m_numRows - 1)<br>
{<br>
/* TODO: Early start last row */<br>
- if ((row >= 1) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != (int)m_parallelFilter[row - 1].m_numCols))<br>
+ if ((row >= 1) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != m_numCols))<br>
x265_log(m_param, X265_LOG_WARNING, "detected ParallelFilter race condition on last row\n");<br>
<br>
/* Apply SAO on last row of CUs, because we always apply SAO on row[X-1] */<br>
if (m_param->bEnableSAO)<br>
{<br>
- for(uint32_t col = 0; col < m_parallelFilter[row].m_numCols; col++)<br>
+ for(int col = 0; col < m_numCols; col++)<br>
{<br>
// NOTE: must use processSaoUnitCu(), it include TQBypass logic<br>
m_parallelFilter[row].processSaoUnitCu(saoParam, col);<br>
@@ -483,7 +480,7 @@<br>
}<br>
<br>
// Process border extension on last row<br>
- for(uint32_t col = 0; col < m_parallelFilter[row].m_numCols; col++)<br>
+ for(int col = 0; col < m_numCols; col++)<br>
{<br>
// m_reconColCount will be set in processPostCu()<br>
m_parallelFilter[row].processPostCu(col);<br>
diff -r 765864c3c6f0 -r 08dadacfe2cd source/encoder/framefilter.h<br>
--- a/source/encoder/framefilter.h Tue Jan 19 19:28:58 2016 +0800<br>
+++ b/source/encoder/framefilter.h Tue Jan 19 19:58:01 2016 +0800<br>
@@ -52,8 +52,10 @@<br>
int m_pad[2];<br>
<br>
int m_numRows;<br>
+ int m_numCols;<br>
int m_saoRowDelay;<br>
int m_lastHeight;<br>
+ int m_lastWidth;<br>
<br>
void* m_ssimBuf; /* Temp storage for ssim computation */<br>
<br>
@@ -61,15 +63,10 @@<br>
class ParallelFilter : public BondedTaskGroup, public Deblock<br>
{<br>
public:<br>
- uint32_t m_numCols;<br>
- uint32_t m_numRows;<br>
uint32_t m_rowHeight;<br>
- uint32_t m_lastWidth;<br>
- uint32_t m_row;<br>
+ int m_row;<br>
uint32_t m_rowAddr;<br>
- x265_param* m_param;<br>
- Frame* m_frame;<br>
- FrameEncoder* m_frameEncoder;<br>
+ FrameFilter* m_frameFilter;<br>
FrameData* m_encData;<br>
ParallelFilter* m_prevRow;<br>
SAO m_sao;<br>
@@ -78,15 +75,10 @@<br>
ThreadSafeInteger m_lastDeblocked; /* The column that finished all of Deblock stages */<br>
<br>
ParallelFilter()<br>
- : m_numCols(0)<br>
- , m_numRows(0)<br>
- , m_rowHeight(0)<br>
- , m_lastWidth(0)<br>
+ : m_rowHeight(0)<br>
, m_row(0)<br>
, m_rowAddr(0)<br>
- , m_param(NULL)<br>
- , m_frame(NULL)<br>
- , m_frameEncoder(NULL)<br>
+ , m_frameFilter(NULL)<br>
, m_encData(NULL)<br>
, m_prevRow(NULL)<br>
{<br>
@@ -104,18 +96,13 @@<br>
void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);<br>
<br>
// Post-Process (Border extension)<br>
- void processPostCu(uint32_t col) const;<br>
+ void processPostCu(int col) const;<br>
<br>
uint32_t getCUHeight() const<br>
{<br>
return m_rowHeight;<br>
}<br>
<br>
- uint32_t getCUWidth(int colNum) const<br>
- {<br>
- return (colNum == (int)m_numCols - 1) ? m_lastWidth : g_maxCUSize;<br>
- }<br>
-<br>
protected:<br>
<br>
ParallelFilter operator=(const ParallelFilter&);<br>
@@ -132,6 +119,11 @@<br>
{<br>
}<br>
<br>
+ uint32_t getCUWidth(int colNum) const<br>
+ {<br>
+ return (colNum == (int)m_numCols - 1) ? m_lastWidth : g_maxCUSize;<br>
+ }<br>
+<br>
void init(Encoder *top, FrameEncoder *frame, int numRows, uint32_t numCols);<br>
void destroy();<br>
<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature"><div dir="ltr"><div><div>Deepthi Nandakumar<br></div>Engineering Manager, x265<br></div>Multicoreware, Inc<br></div></div>
</div>