[x265] [PATCH 1 of 3] do border extension on CU level and new counter for reconColCount
Min Chen
chenm003 at 163.com
Tue Dec 22 02:49:08 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1450727186 21600
# Node ID bcc6d005cd852c043413d1f90aca05366acec514
# Parent 942587f1ab4484ce69a818ce9c8adc59c38fe239
do border extension on CU level and new counter for reconColCount
---
source/common/frame.cpp | 11 +++
source/common/frame.h | 2 +
source/encoder/dpb.cpp | 6 ++
source/encoder/frameencoder.cpp | 13 +++
source/encoder/framefilter.cpp | 186 +++++++++++++++++++++++++++------------
source/encoder/framefilter.h | 17 ++++
6 files changed, 177 insertions(+), 58 deletions(-)
diff -r 942587f1ab44 -r bcc6d005cd85 source/common/frame.cpp
--- a/source/common/frame.cpp Wed Dec 16 09:08:00 2015 +0530
+++ b/source/common/frame.cpp Mon Dec 21 13:46:26 2015 -0600
@@ -33,6 +33,7 @@
m_bChromaExtended = false;
m_lowresInit = false;
m_reconRowCount.set(0);
+ m_reconColCount = NULL;
m_countRefEncoders = 0;
m_encData = NULL;
m_reconPic = NULL;
@@ -51,6 +52,10 @@
if (m_fencPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp) &&
m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode))
{
+ X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
+ m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1) / g_maxCUSize;
+ m_reconColCount = new ThreadSafeInteger[m_numRows];
+
if (quantOffsets)
{
int32_t cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
@@ -122,6 +127,12 @@
m_reconPic = NULL;
}
+ if (m_reconColCount)
+ {
+ delete[] m_reconColCount;
+ m_reconColCount = NULL;
+ }
+
if (m_quantOffsets)
{
delete[] m_quantOffsets;
diff -r 942587f1ab44 -r bcc6d005cd85 source/common/frame.h
--- a/source/common/frame.h Wed Dec 16 09:08:00 2015 +0530
+++ b/source/common/frame.h Mon Dec 21 13:46:26 2015 -0600
@@ -63,6 +63,8 @@
/* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
ThreadSafeInteger m_reconRowCount; // count of CTU rows completely reconstructed and extended for motion reference
+ ThreadSafeInteger* m_reconColCount; // count of CTU cols completely reconstructed and extended for motion reference
+ int32_t m_numRows;
volatile uint32_t m_countRefEncoders; // count of FrameEncoder threads monitoring m_reconRowCount
Frame* m_next; // PicList doubly linked list pointers
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cpp Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/dpb.cpp Mon Dec 21 13:46:26 2015 -0600
@@ -74,6 +74,12 @@
curFrame->m_reconRowCount.set(0);
curFrame->m_bChromaExtended = false;
+ // Reset column counter
+ X265_CHECK(curFrame->m_reconColCount != NULL, "curFrame->m_reconColCount check failure");
+ X265_CHECK(curFrame->m_numRows > 0, "curFrame->m_numRows check failure");
+ for(int32_t col = 0; col < curFrame->m_numRows; col++)
+ curFrame->m_reconColCount[col].set(0);
+
// iterator is invalidated by remove, restart scan
m_picList.remove(*curFrame);
iterFrame = m_picList.first();
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/frameencoder.cpp Mon Dec 21 13:46:26 2015 -0600
@@ -1139,6 +1139,12 @@
m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1);
}
}
+ // Both Loopfilter and SAO Disabled
+ else
+ {
+ m_frameFilter.m_parallelFilter[row].processPostCu(col);
+ m_frame->m_reconColCount[row].set(col);
+ }
if (m_param->bEnableWavefront && curRow.completed >= 2 && row < m_numRows - 1 &&
(!m_bAllRowsStop || intRow + 1 < m_vbvResetTriggerRow))
@@ -1247,6 +1253,13 @@
m_frameFilter.m_parallelFilter[row].processSaoUnitCu(saoParam, col);
}
}
+
+ // Process border extension on last row
+ for(uint32_t col = 0; col < numCols; col++)
+ {
+ m_frameFilter.m_parallelFilter[row].processPostCu(col);
+ }
+ m_frame->m_reconColCount[row].set(numCols - 1);
}
}
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/framefilter.cpp Mon Dec 21 13:46:26 2015 -0600
@@ -37,6 +37,8 @@
uint32_t FrameFilter::ParallelFilter::numCols = 0;
uint32_t FrameFilter::ParallelFilter::numRows = 0;
+uint32_t FrameFilter::ParallelFilter::lastHeight = 0;
+uint32_t FrameFilter::ParallelFilter::lastWidth = 0;
void FrameFilter::destroy()
{
@@ -65,7 +67,7 @@
m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
- m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize;
+ m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ? (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
if (m_param->bEnableSsim)
m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
@@ -105,6 +107,8 @@
// Setting maximum columns
ParallelFilter::numCols = numCols;
ParallelFilter::numRows = numRows;
+ ParallelFilter::lastHeight = m_lastHeight;
+ ParallelFilter::lastWidth = (m_param->sourceWidth % g_maxCUSize) ? (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
}
void FrameFilter::start(Frame *frame, Entropy& initState, int qp)
@@ -123,6 +127,7 @@
m_parallelFilter[row].m_allowedCol.set(0);
m_parallelFilter[row].m_lastDeblocked.set(-1);
m_parallelFilter[row].m_encData = frame->m_encData;
+ m_parallelFilter[row].m_frame = frame;
}
// Reset SAO common statistics
@@ -218,8 +223,98 @@
uint32_t cuAddr = m_rowAddr + col;
const CUData* ctu = m_encData->getPicCTU(cuAddr);
- assert(m_frameEncoder->m_frame->m_reconPic == m_encData->m_reconPic);
- origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frameEncoder->m_frame);
+ assert(m_frame->m_reconPic == m_encData->m_reconPic);
+ origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);
+ }
+}
+
+// NOTE: MUST BE delay a row when Deblock enabled, the Deblock will modify above pixels in Horizon pass
+void FrameFilter::ParallelFilter::processPostCu(uint32_t col) const
+{
+ // shortcut path for non-border area
+ if ((col != 0) & (col != numCols - 1) & (m_row != 0) & (m_row != numRows - 1))
+ return;
+
+ PicYuv *reconPic = m_frame->m_reconPic;
+ const uint32_t lineStartCUAddr = m_rowAddr + col;
+ const int realH = getCUHeight(m_row);
+ const int realW = getCUWidth(col);
+
+ const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
+ const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
+ const uint32_t chromaMarginX = reconPic->m_chromaMarginX;
+ const uint32_t chromaMarginY = reconPic->m_chromaMarginY;
+ const int hChromaShift = reconPic->m_hChromaShift;
+ const int vChromaShift = reconPic->m_vChromaShift;
+ const intptr_t stride = reconPic->m_stride;
+ const intptr_t strideC = reconPic->m_strideC;
+ pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
+ // // MUST BE check I400 since m_picOrg uninitialize in that case
+ pixel *pixU = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
+ pixel *pixV = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
+ int copySizeY = realW;
+ int copySizeC = (realW >> hChromaShift);
+
+ if ((col == 0) | (col == numCols - 1))
+ {
+ // TODO: improve by process on Left or Right only
+ primitives.extendRowBorder(reconPic->getLumaAddr(m_rowAddr), stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
+
+ if (m_param->internalCsp != X265_CSP_I400)
+ {
+ primitives.extendRowBorder(reconPic->getCbAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+ primitives.extendRowBorder(reconPic->getCrAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+ }
+ }
+
+ // Extra Left and Right border on first and last CU
+ if ((col == 0) | (col == numCols - 1))
+ {
+ copySizeY += lumaMarginX;
+ copySizeC += chromaMarginX;
+ }
+
+ // First column need extension left padding area and first CU
+ if (col == 0)
+ {
+ pixY -= lumaMarginX;
+ pixU -= chromaMarginX;
+ pixV -= chromaMarginX;
+ }
+
+ // Border extend Top
+ if (m_row == 0)
+ {
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY - (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+ if (m_param->internalCsp != X265_CSP_I400)
+ {
+ for (uint32_t y = 0; y < chromaMarginY; y++)
+ {
+ memcpy(pixU - (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+ memcpy(pixV - (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+ }
+ }
+ }
+
+ // Border extend Bottom
+ if (m_row == numRows - 1)
+ {
+ pixY += (realH - 1) * stride;
+ pixU += ((realH >> vChromaShift) - 1) * strideC;
+ pixV += ((realH >> vChromaShift) - 1) * strideC;
+ for (uint32_t y = 0; y < lumaMarginY; y++)
+ memcpy(pixY + (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+ if (m_param->internalCsp != X265_CSP_I400)
+ {
+ for (uint32_t y = 0; y < chromaMarginY; y++)
+ {
+ memcpy(pixU + (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+ memcpy(pixV + (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+ }
+ }
}
}
@@ -254,6 +349,13 @@
{
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
+
+ // When SAO Disable, setting column counter here
+ if ((!m_param->bEnableSAO) & (m_row >= 1))
+ {
+ m_prevRow->processPostCu(col - 1);
+ m_frame->m_reconColCount[m_row - 1].set(col - 1);
+ }
}
if (m_param->bEnableSAO)
@@ -275,6 +377,8 @@
{
// Must delay 1 row to avoid thread data race conflict
m_prevRow->processSaoUnitCu(saoParam, col - 3);
+ m_prevRow->processPostCu(col - 3);
+ m_frame->m_reconColCount[m_row - 1].set(col - 3);
}
}
@@ -291,8 +395,16 @@
{
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);
+
+ // When SAO Disable, setting column counter here
+ if ((!m_param->bEnableSAO) & (m_row >= 1))
+ {
+ m_prevRow->processPostCu(numCols - 1);
+ m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
+ }
}
+ // TODO: move processPostCu() into processSaoUnitCu()
if (m_param->bEnableSAO)
{
// Save SAO bottom row reference pixels
@@ -308,13 +420,26 @@
// Process Previous Rows SAO CU
if (m_row >= 1 && numCols >= 3)
+ {
m_prevRow->processSaoUnitCu(saoParam, numCols - 3);
+ m_prevRow->processPostCu(numCols - 3);
+ }
if (m_row >= 1 && numCols >= 2)
+ {
m_prevRow->processSaoUnitCu(saoParam, numCols - 2);
+ m_prevRow->processPostCu(numCols - 2);
+ }
if (m_row >= 1 && numCols >= 1)
+ {
m_prevRow->processSaoUnitCu(saoParam, numCols - 1);
+ m_prevRow->processPostCu(numCols - 1);
+ }
+
+ // Setting column sync counter
+ if (m_row >= 1)
+ m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
}
m_lastDeblocked.set(numCols);
}
@@ -371,61 +496,6 @@
PicYuv *reconPic = m_frame->m_reconPic;
const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
const uint32_t lineStartCUAddr = row * numCols;
- const int realH = getCUHeight(row);
-
- // Border extend Left and Right
- primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
- if (m_param->internalCsp != X265_CSP_I400)
- {
- primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
- primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
- }
-
- // Border extend Top
- if (!row)
- {
- const intptr_t stride = reconPic->m_stride;
- pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX;
-
- for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
- memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
-
- if (m_param->internalCsp != X265_CSP_I400)
- {
- const intptr_t strideC = reconPic->m_strideC;
- pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
- pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
-
- for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
- {
- memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
- memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
- }
- }
- }
-
- // Border extend Bottom
- if (row == m_numRows - 1)
- {
- const intptr_t stride = reconPic->m_stride;
- pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride;
-
- for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
- memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
-
- if (m_param->internalCsp != X265_CSP_I400)
- {
- const intptr_t strideC = reconPic->m_strideC;
- pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
- pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
-
- for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
- {
- memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
- memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
- }
- }
- }
// Notify other FrameEncoders that this row of reconstructed pixels is available
m_frame->m_reconRowCount.incr();
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/framefilter.h Mon Dec 21 13:46:26 2015 -0600
@@ -63,9 +63,12 @@
public:
static uint32_t numCols;
static uint32_t numRows;
+ static uint32_t lastHeight;
+ static uint32_t lastWidth;
uint32_t m_row;
uint32_t m_rowAddr;
x265_param* m_param;
+ Frame* m_frame;
FrameEncoder* m_frameEncoder;
FrameData* m_encData;
ParallelFilter* m_prevRow;
@@ -78,6 +81,7 @@
: m_row(0)
, m_rowAddr(0)
, m_param(NULL)
+ , m_frame(NULL)
, m_frameEncoder(NULL)
, m_encData(NULL)
, m_prevRow(NULL)
@@ -95,6 +99,19 @@
// Copy and Save SAO reference pixels for SAO Rdo decide
void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
+ // Post-Process (Border extension)
+ void processPostCu(uint32_t col) const;
+
+ uint32_t getCUHeight(int rowNum) const
+ {
+ return (rowNum == (int)numRows - 1) ? lastHeight : g_maxCUSize;
+ }
+
+ uint32_t getCUWidth(int colNum) const
+ {
+ return (colNum == (int)numCols - 1) ? lastWidth : g_maxCUSize;
+ }
+
protected:
ParallelFilter operator=(const ParallelFilter&);
More information about the x265-devel
mailing list