[x265] [PATCH] SAO: no need to unroll chroma to avoid increased code size
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Tue Feb 16 16:12:02 CET 2016
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1455633570 -19800
# Tue Feb 16 20:09:30 2016 +0530
# Node ID 36751a3dce37e4f506f4bdec12e20ef665b42012
# Parent 33b02e2af5a4b38cd54d3f94f163aae640855dbf
SAO: no need to unroll chroma to avoid increased code size
diff -r 33b02e2af5a4 -r 36751a3dce37 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Tue Feb 16 20:09:30 2016 +0530
+++ b/source/encoder/sao.cpp Tue Feb 16 20:09:30 2016 +0530
@@ -113,7 +113,6 @@
m_clipTable = &(m_clipTableBase[rangeExt]);
// Share with fast clip lookup table
-
for (int i = 0; i < rangeExt; i++)
m_clipTableBase[i] = 0;
@@ -638,13 +637,8 @@
{
PicYuv* reconPic = m_frame->m_reconPic;
intptr_t stride = reconPic->m_strideC;
- int ctuWidth = g_maxCUSize;
- int ctuHeight = g_maxCUSize;
-
- {
- ctuWidth >>= m_hChromaShift;
- ctuHeight >>= m_vChromaShift;
- }
+ int ctuWidth = g_maxCUSize >> m_hChromaShift;
+ int ctuHeight = g_maxCUSize >> m_vChromaShift;
int addr = idxY * m_numCuInWidth + idxX;
pixel* recCb = reconPic->getCbAddr(addr);
@@ -652,88 +646,53 @@
if (idxX == 0)
{
- for (int i = 0; i < ctuHeight + 1; i++)
+ for (int i = 0; i < ctuHeight + 1; i++, recCb += stride, recCr += stride)
{
m_tmpL1[1][i] = recCb[0];
m_tmpL1[2][i] = recCr[0];
- recCb += stride;
- recCr += stride;
}
}
- bool mergeLeftFlagCb = (ctuParam[1][addr].mergeMode == SAO_MERGE_LEFT);
- int typeIdxCb = ctuParam[1][addr].typeIdx;
-
- bool mergeLeftFlagCr = (ctuParam[2][addr].mergeMode == SAO_MERGE_LEFT);
- int typeIdxCr = ctuParam[2][addr].typeIdx;
-
if (idxX != (m_numCuInWidth - 1))
{
recCb = reconPic->getCbAddr(addr);
recCr = reconPic->getCrAddr(addr);
- for (int i = 0; i < ctuHeight + 1; i++)
+ for (int i = 0; i < ctuHeight + 1; i++, recCb += stride, recCr += stride)
{
m_tmpL2[1][i] = recCb[ctuWidth - 1];
m_tmpL2[2][i] = recCr[ctuWidth - 1];
- recCb += stride;
- recCr += stride;
}
}
- // Process U
- if (typeIdxCb >= 0)
+ for (int plane = 1; plane < 3; plane++)
{
- if (!mergeLeftFlagCb)
+ int typeIdx = ctuParam[plane][addr].typeIdx;
+ if (typeIdx >= 0)
{
- if (typeIdxCb == SAO_BO)
+ if (ctuParam[plane][addr].mergeMode != SAO_MERGE_LEFT)
{
- memset(m_offsetBo[1], 0, sizeof(m_offsetBo[0]));
+ if (typeIdx == SAO_BO)
+ {
+ memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
- for (int i = 0; i < SAO_NUM_OFFSET; i++)
- m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC);
- }
- else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
- {
- int offset[NUM_EDGETYPE];
- offset[0] = 0;
- for (int i = 0; i < SAO_NUM_OFFSET; i++)
- offset[i + 1] = ctuParam[1][addr].offset[i] << SAO_BIT_INC;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ m_offsetBo[plane][((ctuParam[plane][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[plane][addr].offset[i] << SAO_BIT_INC);
+ }
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ {
+ int offset[NUM_EDGETYPE];
+ offset[0] = 0;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ offset[i + 1] = ctuParam[plane][addr].offset[i] << SAO_BIT_INC;
- for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
- m_offsetEo[1][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
+ m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+ }
}
}
- processSaoCu(addr, typeIdxCb, 1);
+ processSaoCu(addr, typeIdx, plane);
+ std::swap(m_tmpL1[plane], m_tmpL2[plane]);
}
-
- // Process V
- if (typeIdxCr >= 0)
- {
- if (!mergeLeftFlagCr)
- {
- if (typeIdxCr == SAO_BO)
- {
- memset(m_offsetBo[2], 0, sizeof(m_offsetBo[0]));
-
- for (int i = 0; i < SAO_NUM_OFFSET; i++)
- m_offsetBo[2][((ctuParam[2][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[2][addr].offset[i] << SAO_BIT_INC);
- }
- else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
- {
- int offset[NUM_EDGETYPE];
- offset[0] = 0;
- for (int i = 0; i < SAO_NUM_OFFSET; i++)
- offset[i + 1] = ctuParam[2][addr].offset[i] << SAO_BIT_INC;
-
- for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
- m_offsetEo[2][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
- }
- }
- processSaoCu(addr, typeIdxCb, 2);
- }
-
- std::swap(m_tmpL1[1], m_tmpL2[1]);
- std::swap(m_tmpL1[2], m_tmpL2[2]);
}
/* Calculate SAO statistics for current CTU without non-crossing slice */
More information about the x265-devel
mailing list