[x265] [PATCH 06 of 15] sao: merge tmpU1 and tmpU2 into tmpU, and copy these above reference pixels in every row based thread
Min Chen
chenm003 at 163.com
Wed Dec 2 18:28:29 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1449076356 21600
# Node ID 1c6f6e627722c767bb9484064a1cea6286c62103
# Parent eb20b66eebe7e9de04cec0f98f1c3c43e678fcf5
sao: merge tmpU1 and tmpU2 into tmpU, and copy these above reference pixels in every row based thread
---
source/encoder/frameencoder.cpp | 7 ++++++
source/encoder/framefilter.cpp | 44 +++++++++++++++++++++++++++-----------
source/encoder/framefilter.h | 3 ++
source/encoder/sao.cpp | 39 +++++++++------------------------
source/encoder/sao.h | 3 +-
5 files changed, 53 insertions(+), 43 deletions(-)
diff -r eb20b66eebe7 -r 1c6f6e627722 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Wed Dec 02 11:12:32 2015 -0600
+++ b/source/encoder/frameencoder.cpp Wed Dec 02 11:12:36 2015 -0600
@@ -1124,6 +1124,13 @@
}
}
+ /* Case of DEBLOCK Disable and SAO Enable */
+ if (!m_param->bEnableLoopFilter && m_param->bEnableSAO)
+ {
+ PicYuv* reconPic = curEncData.m_reconPic;
+ m_frameFilter.m_parallelFilter[row].copySaoAboveRef(reconPic, cuAddr, col);
+ }
+
if (m_param->bEnableWavefront && curRow.completed >= 2 && row < m_numRows - 1 &&
(!m_bAllRowsStop || intRow + 1 < m_vbvResetTriggerRow))
{
diff -r eb20b66eebe7 -r 1c6f6e627722 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Wed Dec 02 11:12:32 2015 -0600
+++ b/source/encoder/framefilter.cpp Wed Dec 02 11:12:36 2015 -0600
@@ -69,7 +69,7 @@
if (m_param->bEnableSsim)
m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
- if (m_param->bEnableLoopFilter)
+ if (m_param->bEnableLoopFilter | m_param->bEnableSAO)
m_parallelFilter = new ParallelFilter[numRows];
if (m_parallelFilter)
@@ -91,6 +91,7 @@
for(int row = 0; row < numRows; row++)
{
+ m_parallelFilter[row].m_param = m_param;
m_parallelFilter[row].m_rowAddr = row * numCols;
m_parallelFilter[row].m_frameEncoder = m_frameEncoder;
}
@@ -117,17 +118,39 @@
m_parallelFilter[row].m_encData = frame->m_encData;
}
- // Reset SAO global/common statistics
+ // Reset SAO common statistics
if (m_param->bEnableSAO)
m_parallelFilter[0].m_sao.resetStats();
}
}
+void FrameFilter::ParallelFilter::copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col)
+{
+ // Copy SAO Top Reference Pixels
+ int ctuWidth = g_maxCUSize;
+ const pixel* recY = reconPic->getPlaneAddr(0, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_stride);
+
+ // Luma
+ memcpy(&m_sao.m_tmpU[0][col * ctuWidth], recY, ctuWidth * sizeof(pixel));
+ X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");
+
+ // Chroma
+ ctuWidth >>= m_sao.m_hChromaShift;
+
+ const pixel* recU = reconPic->getPlaneAddr(1, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
+ const pixel* recV = reconPic->getPlaneAddr(2, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
+ memcpy(&m_sao.m_tmpU[1][col * ctuWidth], recU, ctuWidth * sizeof(pixel));
+ memcpy(&m_sao.m_tmpU[2][col * ctuWidth], recV, ctuWidth * sizeof(pixel));
+
+ X265_CHECK(col * ctuWidth + ctuWidth <= m_sao.m_numCuInWidth * ctuWidth, "m_tmpU buffer beyond bound write detected");
+}
+
// NOTE: Single Threading only
void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)
{
const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
+ PicYuv* reconPic = m_encData->m_reconPic;
const int colStart = m_lastCol.get();
// TODO: Waiting previous row finish or simple clip on it?
const int colEnd = m_allowedCol.get();
@@ -146,6 +169,9 @@
{
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
+
+ if (m_param->bEnableSAO)
+ copySaoAboveRef(reconPic, cuAddr - 1, col - 1);
}
m_lastCol.incr();
}
@@ -155,6 +181,9 @@
const uint32_t cuAddr = m_rowAddr + numCols - 1;
const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);
+
+ if (m_param->bEnableSAO)
+ copySaoAboveRef(reconPic, cuAddr, numCols - 1);
}
}
@@ -507,23 +536,12 @@
SAOParam* saoParam = encData.m_saoParam;
if (saoParam->bSaoFlag[0])
- {
m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0);
- if (row != m_numRows - 1)
- {
- memcpy(m_parallelFilter[row + 1].m_sao.m_tmpU1[0], m_parallelFilter[row].m_sao.m_tmpU1[0], sizeof(pixel) * m_param->sourceWidth);
- }
- }
if (saoParam->bSaoFlag[1])
{
m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1);
m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2);
- if (row != m_numRows - 1)
- {
- memcpy(m_parallelFilter[row + 1].m_sao.m_tmpU1[1], m_parallelFilter[row].m_sao.m_tmpU1[1], sizeof(pixel) * m_param->sourceWidth);
- memcpy(m_parallelFilter[row + 1].m_sao.m_tmpU1[2], m_parallelFilter[row].m_sao.m_tmpU1[2], sizeof(pixel) * m_param->sourceWidth);
- }
}
if (encData.m_slice->m_pps->bTransquantBypassEnabled)
diff -r eb20b66eebe7 -r 1c6f6e627722 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Wed Dec 02 11:12:32 2015 -0600
+++ b/source/encoder/framefilter.h Wed Dec 02 11:12:36 2015 -0600
@@ -63,6 +63,7 @@
public:
static uint32_t numCols;
uint32_t m_rowAddr;
+ x265_param* m_param;
FrameEncoder* m_frameEncoder;
FrameData* m_encData;
SAO m_sao;
@@ -71,6 +72,7 @@
ParallelFilter()
: m_rowAddr(0)
+ , m_param(NULL)
, m_frameEncoder(NULL)
, m_encData(NULL)
{
@@ -80,6 +82,7 @@
{ }
void processTasks(int workerThreadId);
+ void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
protected:
diff -r eb20b66eebe7 -r 1c6f6e627722 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Wed Dec 02 11:12:32 2015 -0600
+++ b/source/encoder/sao.cpp Wed Dec 02 11:12:36 2015 -0600
@@ -84,12 +84,9 @@
m_param = NULL;
m_clipTable = NULL;
m_clipTableBase = NULL;
- m_tmpU1[0] = NULL;
- m_tmpU1[1] = NULL;
- m_tmpU1[2] = NULL;
- m_tmpU2[0] = NULL;
- m_tmpU2[1] = NULL;
- m_tmpU2[2] = NULL;
+ m_tmpU[0] = NULL;
+ m_tmpU[1] = NULL;
+ m_tmpU[2] = NULL;
m_tmpL1 = NULL;
m_tmpL2 = NULL;
@@ -125,10 +122,9 @@
for (int i = 0; i < 3; i++)
{
// SAO asm code will read 1 pixel before and after, so pad by 2
- CHECKED_MALLOC(m_tmpU1[i], pixel, m_param->sourceWidth + 2);
- m_tmpU1[i] += 1;
- CHECKED_MALLOC(m_tmpU2[i], pixel, m_param->sourceWidth + 2);
- m_tmpU2[i] += 1;
+ // NOTE: m_param->sourceWidth+2 enough, to avoid condition check in copySaoAboveRef(), I alloc more up to 63 bytes in here
+ CHECKED_MALLOC(m_tmpU[i], pixel, m_numCuInWidth * g_maxCUSize + 2);
+ m_tmpU[i] += 1;
}
if (initCommon)
@@ -191,15 +187,10 @@
for (int i = 0; i < 3; i++)
{
- if (m_tmpU1[i])
+ if (m_tmpU[i])
{
- X265_FREE(m_tmpU1[i] - 1);
- m_tmpU1[i] = NULL;
- }
- if (m_tmpU2[i])
- {
- X265_FREE(m_tmpU2[i] - 1);
- m_tmpU2[i] = NULL;
+ X265_FREE(m_tmpU[i] - 1);
+ m_tmpU[i] = NULL;
}
}
@@ -325,7 +316,7 @@
}
tmpL = m_tmpL1;
- tmpU = &(m_tmpU1[plane][lpelx]);
+ tmpU = &(m_tmpU[plane][lpelx]);
}
switch (typeIdx)
@@ -615,6 +606,7 @@
uint32_t picWidth = m_param->sourceWidth;
int ctuWidth = g_maxCUSize;
int ctuHeight = g_maxCUSize;
+
if (plane)
{
picWidth >>= m_hChromaShift;
@@ -625,11 +617,6 @@
int addr = idxY * m_numCuInWidth;
pixel* rec = reconPic->getPlaneAddr(plane, addr);
- if (!idxY)
- {
- memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
- }
-
for (int i = 0; i < ctuHeight + 1; i++)
{
m_tmpL1[i] = rec[0];
@@ -638,8 +625,6 @@
rec -= (stride << 1);
- memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidth);
-
for (int idxX = 0; idxX < m_numCuInWidth; idxX++)
{
addr = idxY * m_numCuInWidth + idxX;
@@ -682,8 +667,6 @@
}
}
}
-
- std::swap(m_tmpU1[plane], m_tmpU2[plane]);
}
void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc)
diff -r eb20b66eebe7 -r 1c6f6e627722 source/encoder/sao.h
--- a/source/encoder/sao.h Wed Dec 02 11:12:32 2015 -0600
+++ b/source/encoder/sao.h Wed Dec 02 11:12:36 2015 -0600
@@ -92,8 +92,7 @@
pixel* m_clipTable;
pixel* m_clipTableBase;
- pixel* m_tmpU1[3];
- pixel* m_tmpU2[3];
+ pixel* m_tmpU[3];
pixel* m_tmpL1;
pixel* m_tmpL2;
More information about the x265-devel
mailing list