[x265] [PATCH 2 of 3] simplify logic on setting reconColCount[] and fix bug in case that disable both Deblock and Sao
Min Chen
chenm003 at 163.com
Tue Dec 22 02:49:09 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1450727188 21600
# Node ID d8c3eded1440670bde63e2fb5bec0e80ff6e2d67
# Parent bcc6d005cd852c043413d1f90aca05366acec514
simplify logic on setting reconColCount[] and fix bug in case that disable both Deblock and Sao
---
source/encoder/frameencoder.cpp | 5 +-
source/encoder/framefilter.cpp | 119 +++++++++++++++++++++++++++++++++------
source/encoder/framefilter.h | 8 +-
3 files changed, 108 insertions(+), 24 deletions(-)
diff -r bcc6d005cd85 -r d8c3eded1440 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Dec 21 13:46:26 2015 -0600
+++ b/source/encoder/frameencoder.cpp Mon Dec 21 13:46:28 2015 -0600
@@ -1142,8 +1142,7 @@
// Both Loopfilter and SAO Disabled
else
{
- m_frameFilter.m_parallelFilter[row].processPostCu(col);
- m_frame->m_reconColCount[row].set(col);
+ m_frameFilter.processPostCu(row, col);
}
if (m_param->bEnableWavefront && curRow.completed >= 2 && row < m_numRows - 1 &&
@@ -1257,9 +1256,9 @@
// Process border extension on last row
for(uint32_t col = 0; col < numCols; col++)
{
+ // m_reconColCount will be set in processPostCu()
m_frameFilter.m_parallelFilter[row].processPostCu(col);
}
- m_frame->m_reconColCount[row].set(numCols - 1);
}
}
diff -r bcc6d005cd85 -r d8c3eded1440 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Dec 21 13:46:26 2015 -0600
+++ b/source/encoder/framefilter.cpp Mon Dec 21 13:46:28 2015 -0600
@@ -231,6 +231,9 @@
// NOTE: MUST BE delay a row when Deblock enabled, the Deblock will modify above pixels in Horizon pass
void FrameFilter::ParallelFilter::processPostCu(uint32_t col) const
{
+ // Update finished CU cursor
+ m_frame->m_reconColCount[m_row].set(col);
+
// shortcut path for non-border area
if ((col != 0) & (col != numCols - 1) & (m_row != 0) & (m_row != numRows - 1))
return;
@@ -352,10 +355,7 @@
// When SAO Disable, setting column counter here
if ((!m_param->bEnableSAO) & (m_row >= 1))
- {
m_prevRow->processPostCu(col - 1);
- m_frame->m_reconColCount[m_row - 1].set(col - 1);
- }
}
if (m_param->bEnableSAO)
@@ -378,7 +378,6 @@
// Must delay 1 row to avoid thread data race conflict
m_prevRow->processSaoUnitCu(saoParam, col - 3);
m_prevRow->processPostCu(col - 3);
- m_frame->m_reconColCount[m_row - 1].set(col - 3);
}
}
@@ -398,10 +397,7 @@
// When SAO Disable, setting column counter here
if ((!m_param->bEnableSAO) & (m_row >= 1))
- {
m_prevRow->processPostCu(numCols - 1);
- m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
- }
}
// TODO: move processPostCu() into processSaoUnitCu()
@@ -456,7 +452,7 @@
if (!m_param->bEnableLoopFilter && !m_param->bEnableSAO)
{
- processRowPost(row);
+ processPostRow(row);
return;
}
FrameData& encData = *m_frame->m_encData;
@@ -467,7 +463,7 @@
// this row of CTUs has been encoded
if (row > 0)
- processRowPost(row - 1);
+ processPostRow(row - 1);
if (row == m_numRows - 1)
{
@@ -482,16 +478,105 @@
m_parallelFilter[0].m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame);
}
- processRowPost(row);
+ processPostRow(row);
}
}
-uint32_t FrameFilter::getCUHeight(int rowNum) const
+// NOTE: This version for case that Disable both Deblock and Sao
+void FrameFilter::processPostCu(uint32_t row, uint32_t col) const
{
- return rowNum == m_numRows - 1 ? m_lastHeight : g_maxCUSize;
+ // Update finished CU cursor
+ m_frame->m_reconColCount[row].set(col);
+
+ // shortcut path for non-border area
+ if ((col != 0) & (col != FrameFilter::ParallelFilter::numCols - 1) & (row != 0) & (row != FrameFilter::ParallelFilter::numRows - 1))
+ return;
+
+ PicYuv *reconPic = m_frame->m_reconPic;
+ const uint32_t rowAddr = row * FrameFilter::ParallelFilter::numCols;
+ const uint32_t lineStartCUAddr = rowAddr + col;
+ const int realH = FrameFilter::ParallelFilter::getCUHeight(row);
+ const int realW = FrameFilter::ParallelFilter::getCUWidth(col);
+
+ const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
+ const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
+ const uint32_t chromaMarginX = reconPic->m_chromaMarginX;
+ const uint32_t chromaMarginY = reconPic->m_chromaMarginY;
+ const int hChromaShift = reconPic->m_hChromaShift;
+ const int vChromaShift = reconPic->m_vChromaShift;
+ const intptr_t stride = reconPic->m_stride;
+ const intptr_t strideC = reconPic->m_strideC;
+ pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
+ // // MUST BE check I400 since m_picOrg uninitialize in that case
+ pixel *pixU = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
+ pixel *pixV = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
+ int copySizeY = realW;
+ int copySizeC = (realW >> hChromaShift);
+
+ if ((col == 0) | (col == FrameFilter::ParallelFilter::numCols - 1))
+ {
+ // TODO: improve by process on Left or Right only
+ primitives.extendRowBorder(reconPic->getLumaAddr(rowAddr), stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
+
+ if (m_param->internalCsp != X265_CSP_I400)
+ {
+ primitives.extendRowBorder(reconPic->getCbAddr(rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+ primitives.extendRowBorder(reconPic->getCrAddr(rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+ }
+ }
+
+ // Extra Left and Right border on first and last CU
+ if ((col == 0) | (col == FrameFilter::ParallelFilter::numCols - 1))
+ {
+ copySizeY += lumaMarginX;
+ copySizeC += chromaMarginX;
+ }
+
+ // First column need extension left padding area and first CU
+ if (col == 0)
+ {
+ pixY -= lumaMarginX;
+ pixU -= chromaMarginX;
+ pixV -= chromaMarginX;
+ }
+
+ // Border extend Top
+ if (row == 0)
+ {
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY - (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+ if (m_param->internalCsp != X265_CSP_I400)
+ {
+ for (uint32_t y = 0; y < chromaMarginY; y++)
+ {
+ memcpy(pixU - (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+ memcpy(pixV - (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+ }
+ }
+ }
+
+ // Border extend Bottom
+ if (row == FrameFilter::ParallelFilter::numRows - 1)
+ {
+ pixY += (realH - 1) * stride;
+ pixU += ((realH >> vChromaShift) - 1) * strideC;
+ pixV += ((realH >> vChromaShift) - 1) * strideC;
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY + (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+ if (m_param->internalCsp != X265_CSP_I400)
+ {
+ for (uint32_t y = 0; y < chromaMarginY; y++)
+ {
+ memcpy(pixU + (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+ memcpy(pixV + (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+ }
+ }
+ }
}
-void FrameFilter::processRowPost(int row)
+void FrameFilter::processPostRow(int row)
{
PicYuv *reconPic = m_frame->m_reconPic;
const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
@@ -507,7 +592,7 @@
intptr_t stride = reconPic->m_stride;
uint32_t width = reconPic->m_picWidth - m_pad[0];
- uint32_t height = getCUHeight(row);
+ uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height);
m_frameEncoder->m_SSDY += ssdY;
@@ -547,7 +632,7 @@
}
if (m_param->decodedPictureHashSEI == 1)
{
- uint32_t height = getCUHeight(row);
+ uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
uint32_t width = reconPic->m_picWidth;
intptr_t stride = reconPic->m_stride;
@@ -573,7 +658,7 @@
}
else if (m_param->decodedPictureHashSEI == 2)
{
- uint32_t height = getCUHeight(row);
+ uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
uint32_t width = reconPic->m_picWidth;
intptr_t stride = reconPic->m_stride;
@@ -595,7 +680,7 @@
else if (m_param->decodedPictureHashSEI == 3)
{
uint32_t width = reconPic->m_picWidth;
- uint32_t height = getCUHeight(row);
+ uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
intptr_t stride = reconPic->m_stride;
uint32_t cuHeight = g_maxCUSize;
diff -r bcc6d005cd85 -r d8c3eded1440 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Mon Dec 21 13:46:26 2015 -0600
+++ b/source/encoder/framefilter.h Mon Dec 21 13:46:28 2015 -0600
@@ -102,12 +102,12 @@
// Post-Process (Border extension)
void processPostCu(uint32_t col) const;
- uint32_t getCUHeight(int rowNum) const
+ static uint32_t getCUHeight(int rowNum)
{
return (rowNum == (int)numRows - 1) ? lastHeight : g_maxCUSize;
}
- uint32_t getCUWidth(int colNum) const
+ static uint32_t getCUWidth(int colNum)
{
return (colNum == (int)numCols - 1) ? lastWidth : g_maxCUSize;
}
@@ -134,8 +134,8 @@
void start(Frame *pic, Entropy& initState, int qp);
void processRow(int row);
- void processRowPost(int row);
- uint32_t getCUHeight(int rowNum) const;
+ void processPostRow(int row);
+ void processPostCu(uint32_t row, uint32_t col) const;
};
}
More information about the x265-devel
mailing list