[x265] [PATCH 14 of 15] sao: reduce address operators by split into Luma and Chroma path
Min Chen
chenm003 at 163.com
Wed Dec 2 18:28:37 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1449076380 21600
# Node ID a6d88a08af3d48cb804aa61819bd45ee685d1f59
# Parent a3a9660c91b8eeb8f70869fc4022f939c01023f0
sao: reduce address operators by split into Luma and Chroma path
---
source/encoder/framefilter.cpp | 7 +--
source/encoder/sao.cpp | 133 ++++++++++++++++++++++++++++++++++------
source/encoder/sao.h | 3 +-
3 files changed, 118 insertions(+), 25 deletions(-)
diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Wed Dec 02 11:12:57 2015 -0600
+++ b/source/encoder/framefilter.cpp Wed Dec 02 11:13:00 2015 -0600
@@ -546,13 +546,10 @@
for(uint32_t col = 0; col < numCols; col++)
{
if (saoParam->bSaoFlag[0])
- m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[0], row, col, 0);
+ m_parallelFilter[row].m_sao.processSaoUnitCuLuma(saoParam->ctuParam[0], row, col);
if (saoParam->bSaoFlag[1])
- {
- m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[1], row, col, 1);
- m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[2], row, col, 2);
- }
+ m_parallelFilter[row].m_sao.processSaoUnitCuChroma(saoParam->ctuParam, row, col);
}
if (encData.m_slice->m_pps->bTransquantBypassEnabled)
diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Wed Dec 02 11:12:57 2015 -0600
+++ b/source/encoder/sao.cpp Wed Dec 02 11:13:00 2015 -0600
@@ -674,29 +674,21 @@
}
/* Process SAO unit */
-void SAO::processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane)
+void SAO::processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX)
{
PicYuv* reconPic = m_frame->m_reconPic;
- intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
- uint32_t picWidth = m_param->sourceWidth;
+ intptr_t stride = reconPic->m_stride;
int ctuWidth = g_maxCUSize;
int ctuHeight = g_maxCUSize;
- if (plane)
- {
- picWidth >>= m_hChromaShift;
- ctuWidth >>= m_hChromaShift;
- ctuHeight >>= m_vChromaShift;
- }
-
int addr = idxY * m_numCuInWidth + idxX;
- pixel* rec = reconPic->getPlaneAddr(plane, addr);
+ pixel* rec = reconPic->getLumaAddr(addr);
if (idxX == 0)
{
for (int i = 0; i < ctuHeight + 1; i++)
{
- m_tmpL1[plane][i] = rec[0];
+ m_tmpL1[0][i] = rec[0];
rec += stride;
}
}
@@ -706,10 +698,10 @@
if (idxX != (m_numCuInWidth - 1))
{
- rec = reconPic->getPlaneAddr(plane, addr);
+ rec = reconPic->getLumaAddr(addr);
for (int i = 0; i < ctuHeight + 1; i++)
{
- m_tmpL2[plane][i] = rec[ctuWidth - 1];
+ m_tmpL2[0][i] = rec[ctuWidth - 1];
rec += stride;
}
}
@@ -720,10 +712,10 @@
{
if (typeIdx == SAO_BO)
{
- memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
+ memset(m_offsetBo[0], 0, sizeof(m_offsetBo[0]));
for (int i = 0; i < SAO_NUM_OFFSET; i++)
- m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
+ m_offsetBo[0][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
}
else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
{
@@ -733,12 +725,115 @@
offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
- m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+ m_offsetEo[0][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
}
}
- processSaoCu(addr, typeIdx, plane);
+ processSaoCu(addr, typeIdx, 0);
}
- std::swap(m_tmpL1[plane], m_tmpL2[plane]);
+ std::swap(m_tmpL1[0], m_tmpL2[0]);
+}
+
+/* Process SAO unit (Chroma only) */
+void SAO::processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX)
+{
+ PicYuv* reconPic = m_frame->m_reconPic;
+ intptr_t stride = reconPic->m_strideC;
+ int ctuWidth = g_maxCUSize;
+ int ctuHeight = g_maxCUSize;
+
+ {
+ ctuWidth >>= m_hChromaShift;
+ ctuHeight >>= m_vChromaShift;
+ }
+
+ int addr = idxY * m_numCuInWidth + idxX;
+ pixel* recCb = reconPic->getCbAddr(addr);
+ pixel* recCr = reconPic->getCrAddr(addr);
+
+ if (idxX == 0)
+ {
+ for (int i = 0; i < ctuHeight + 1; i++)
+ {
+ m_tmpL1[1][i] = recCb[0];
+ m_tmpL1[2][i] = recCr[0];
+ recCb += stride;
+ recCr += stride;
+ }
+ }
+
+ bool mergeLeftFlagCb = (ctuParam[1][addr].mergeMode == SAO_MERGE_LEFT);
+ int typeIdxCb = ctuParam[1][addr].typeIdx;
+
+ bool mergeLeftFlagCr = (ctuParam[2][addr].mergeMode == SAO_MERGE_LEFT);
+ int typeIdxCr = ctuParam[2][addr].typeIdx;
+
+ if (idxX != (m_numCuInWidth - 1))
+ {
+ recCb = reconPic->getCbAddr(addr);
+ recCr = reconPic->getCrAddr(addr);
+ for (int i = 0; i < ctuHeight + 1; i++)
+ {
+ m_tmpL2[1][i] = recCb[ctuWidth - 1];
+ m_tmpL2[2][i] = recCr[ctuWidth - 1];
+ recCb += stride;
+ recCr += stride;
+ }
+ }
+
+ // Process U
+ if (typeIdxCb >= 0)
+ {
+ if (!mergeLeftFlagCb)
+ {
+ if (typeIdxCb == SAO_BO)
+ {
+ memset(m_offsetBo[1], 0, sizeof(m_offsetBo[0]));
+
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC);
+ }
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ {
+ int offset[NUM_EDGETYPE];
+ offset[0] = 0;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ offset[i + 1] = ctuParam[1][addr].offset[i] << SAO_BIT_INC;
+
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+ m_offsetEo[1][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+ }
+ }
+ processSaoCu(addr, typeIdxCb, 1);
+ }
+
+ // Process V
+ if (typeIdxCr >= 0)
+ {
+ if (!mergeLeftFlagCr)
+ {
+ if (typeIdxCr == SAO_BO)
+ {
+ memset(m_offsetBo[2], 0, sizeof(m_offsetBo[0]));
+
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ m_offsetBo[2][((ctuParam[2][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[2][addr].offset[i] << SAO_BIT_INC);
+ }
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ {
+ int offset[NUM_EDGETYPE];
+ offset[0] = 0;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ offset[i + 1] = ctuParam[2][addr].offset[i] << SAO_BIT_INC;
+
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+ m_offsetEo[2][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+ }
+ }
+ processSaoCu(addr, typeIdxCb, 2);
+ }
+
+ std::swap(m_tmpL1[1], m_tmpL2[1]);
+ std::swap(m_tmpL1[2], m_tmpL2[2]);
}
void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc)
diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/sao.h
--- a/source/encoder/sao.h Wed Dec 02 11:12:57 2015 -0600
+++ b/source/encoder/sao.h Wed Dec 02 11:13:00 2015 -0600
@@ -132,7 +132,8 @@
// CTU-based SAO process without slice granularity
void processSaoCu(int addr, int typeIdx, int plane);
void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane);
- void processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane);
+ void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX);
+ void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX);
void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc);
More information about the x265-devel
mailing list