[x265] [PATCH] SAO: fixed max size of sao statistics buffer to 32
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Tue Feb 16 16:10:39 CET 2016
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1455632166 -19800
# Tue Feb 16 19:46:06 2016 +0530
# Node ID dfc4d87df196e94be70757d648dfb935c100c32a
# Parent 11cb0f838b5a9ba2e38e1412e139d0cf94c66e7d
SAO: fixed max size of sao statistics buffer to 32
Please modify the asm primitive for saoCuStatsBO
diff -r 11cb0f838b5a -r dfc4d87df196 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Feb 02 14:50:24 2016 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Feb 16 19:46:06 2016 +0530
@@ -2530,7 +2530,7 @@
p.pelFilterLumaStrong[0] = PFX(pelFilterLumaStrong_V_sse4);
p.pelFilterLumaStrong[1] = PFX(pelFilterLumaStrong_H_sse4);
- p.saoCuStatsBO = PFX(saoCuStatsBO_sse4);
+// p.saoCuStatsBO = PFX(saoCuStatsBO_sse4);
p.saoCuStatsE0 = PFX(saoCuStatsE0_sse4);
p.saoCuStatsE1 = PFX(saoCuStatsE1_sse4);
p.saoCuStatsE2 = PFX(saoCuStatsE2_sse4);
diff -r 11cb0f838b5a -r dfc4d87df196 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Tue Feb 02 14:50:24 2016 +0530
+++ b/source/encoder/sao.cpp Tue Feb 16 19:46:06 2016 +0530
@@ -1007,7 +1007,7 @@
{
for (x = (y < startY ? startX : 0); x < ctuWidth; x++)
{
- int classIdx = 1 + (rec[x] >> boShift);
+ int classIdx = rec[x] >> boShift;
stats[classIdx] += (fenc[x] - rec[x]);
count[classIdx]++;
}
@@ -1306,11 +1306,11 @@
int typeIdx = mergeSrcParam->typeIdx;
if (typeIdx >= 0)
{
- int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 0;
+ int bandPos = (typeIdx == SAO_BO) ? mergeSrcParam->bandPos : 1;
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
{
int mergeOffset = mergeSrcParam->offset[classIdx];
- estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos + 1]);
+ estDist += estSaoDist(m_count[plane][typeIdx][classIdx + bandPos], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + bandPos]);
}
}
@@ -1393,7 +1393,7 @@
// BO
for (int plane = 0; plane < planes; plane++)
{
- for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
{
int32_t count = m_count[plane][SAO_BO][classIdx];
int32_t& offsetOrg = m_offsetOrg[plane][SAO_BO][classIdx];
@@ -1500,17 +1500,17 @@
//BO RDO
int64_t estDist = 0;
- for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
{
int32_t count = m_count[0][SAO_BO][classIdx];
int32_t& offsetOrg = m_offsetOrg[0][SAO_BO][classIdx];
int32_t& offsetOut = m_offset[0][SAO_BO][classIdx];
- distBOClasses[classIdx - 1] = 0;
- costBOClasses[classIdx - 1] = m_lumaLambda;
+ distBOClasses[classIdx] = 0;
+ costBOClasses[classIdx] = m_lumaLambda;
if (count)
- offsetOut = estIterOffset(SAO_BO, m_lumaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx - 1], costBOClasses[classIdx - 1]);
+ offsetOut = estIterOffset(SAO_BO, m_lumaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx], costBOClasses[classIdx]);
else
offsetOut = 0;
}
@@ -1538,7 +1538,7 @@
m_entropyCoder.load(m_rdContexts.temp);
m_entropyCoder.resetBits();
- m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + (bestClassBO + 1), bestClassBO, 0);
+ m_entropyCoder.codeSaoOffsetBO(m_offset[0][SAO_BO] + bestClassBO, bestClassBO, 0);
uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
double cost = (double)estDist + m_lumaLambda * (double)estRate;
@@ -1552,7 +1552,7 @@
lclCtuParam->typeIdx = SAO_BO;
lclCtuParam->bandPos = bestClassBO;
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
- lclCtuParam->offset[classIdx] = (int)m_offset[0][SAO_BO][classIdx + bestClassBO + 1];
+ lclCtuParam->offset[classIdx] = (int)m_offset[0][SAO_BO][classIdx + bestClassBO];
}
mergeDist[0] = ((double)bestDist / m_lumaLambda);
@@ -1636,17 +1636,17 @@
{
double bestRDCostBO = MAX_DOUBLE;
- for (int classIdx = 1; classIdx < SAO_NUM_BO_CLASSES + 1; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_BO_CLASSES; classIdx++)
{
int32_t count = m_count[compIdx][SAO_BO][classIdx];
int32_t& offsetOrg = m_offsetOrg[compIdx][SAO_BO][classIdx];
int32_t& offsetOut = m_offset[compIdx][SAO_BO][classIdx];
- distBOClasses[classIdx - 1] = 0;
- costBOClasses[classIdx - 1] = m_chromaLambda;
+ distBOClasses[classIdx] = 0;
+ costBOClasses[classIdx] = m_chromaLambda;
if (count)
- offsetOut = estIterOffset(SAO_BO, m_chromaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx - 1], costBOClasses[classIdx - 1]);
+ offsetOut = estIterOffset(SAO_BO, m_chromaLambda, offsetOut, count, offsetOrg, distBOClasses[classIdx], costBOClasses[classIdx]);
else
offsetOut = 0;
}
@@ -1673,7 +1673,7 @@
m_entropyCoder.resetBits();
for (int compIdx = 0; compIdx < 2; compIdx++)
- m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + (bestClassBO[compIdx] + 1), bestClassBO[compIdx], compIdx + 1);
+ m_entropyCoder.codeSaoOffsetBO(m_offset[compIdx + 1][SAO_BO] + bestClassBO[compIdx], bestClassBO[compIdx], compIdx + 1);
uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
double cost = (double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate;
@@ -1689,7 +1689,7 @@
lclCtuParam[compIdx]->typeIdx = SAO_BO;
lclCtuParam[compIdx]->bandPos = bestClassBO[compIdx];
for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
- lclCtuParam[compIdx]->offset[classIdx] = (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx] + 1];
+ lclCtuParam[compIdx]->offset[classIdx] = (int)m_offset[compIdx + 1][SAO_BO][classIdx + bestClassBO[compIdx]];
}
}
@@ -1703,14 +1703,13 @@
// NOTE: must put in namespace X265_NS since we need class SAO
void saoCuStatsBO_c(const int16_t *diff, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
{
- int x, y;
const int boShift = X265_DEPTH - SAO_BO_BITS;
- for (y = 0; y < endY; y++)
+ for (int y = 0; y < endY; y++)
{
- for (x = 0; x < endX; x++)
+ for (int x = 0; x < endX; x++)
{
- int classIdx = 1 + (rec[x] >> boShift);
+ int classIdx = rec[x] >> boShift;
stats[classIdx] += diff[x];
count[classIdx]++;
}
@@ -1722,7 +1721,6 @@
void saoCuStatsE0_c(const int16_t *diff, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
{
- int x, y;
int32_t tmp_stats[SAO::NUM_EDGETYPE];
int32_t tmp_count[SAO::NUM_EDGETYPE];
@@ -1731,10 +1729,10 @@
memset(tmp_stats, 0, sizeof(tmp_stats));
memset(tmp_count, 0, sizeof(tmp_count));
- for (y = 0; y < endY; y++)
+ for (int y = 0; y < endY; y++)
{
int signLeft = signOf(rec[0] - rec[-1]);
- for (x = 0; x < endX; x++)
+ for (int x = 0; x < endX; x++)
{
int signRight = signOf2(rec[x], rec[x + 1]);
X265_CHECK(signRight == signOf(rec[x] - rec[x + 1]), "signDown check failure\n");
@@ -1750,7 +1748,7 @@
rec += stride;
}
- for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+ for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
{
stats[SAO::s_eoTable[x]] += tmp_stats[x];
count[SAO::s_eoTable[x]] += tmp_count[x];
@@ -1762,7 +1760,6 @@
X265_CHECK(endX <= MAX_CU_SIZE, "endX check failure\n");
X265_CHECK(endY <= MAX_CU_SIZE, "endY check failure\n");
- int x, y;
int32_t tmp_stats[SAO::NUM_EDGETYPE];
int32_t tmp_count[SAO::NUM_EDGETYPE];
@@ -1770,9 +1767,9 @@
memset(tmp_count, 0, sizeof(tmp_count));
X265_CHECK(endX * endY <= (4096 - 16), "Assembly of saoE1 may overflow with this block size\n");
- for (y = 0; y < endY; y++)
+ for (int y = 0; y < endY; y++)
{
- for (x = 0; x < endX; x++)
+ for (int x = 0; x < endX; x++)
{
int signDown = signOf2(rec[x], rec[x + stride]);
X265_CHECK(signDown == signOf(rec[x] - rec[x + stride]), "signDown check failure\n");
@@ -1787,7 +1784,7 @@
rec += stride;
}
- for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+ for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
{
stats[SAO::s_eoTable[x]] += tmp_stats[x];
count[SAO::s_eoTable[x]] += tmp_count[x];
@@ -1799,17 +1796,16 @@
X265_CHECK(endX < MAX_CU_SIZE, "endX check failure\n");
X265_CHECK(endY < MAX_CU_SIZE, "endY check failure\n");
- int x, y;
int32_t tmp_stats[SAO::NUM_EDGETYPE];
int32_t tmp_count[SAO::NUM_EDGETYPE];
memset(tmp_stats, 0, sizeof(tmp_stats));
memset(tmp_count, 0, sizeof(tmp_count));
- for (y = 0; y < endY; y++)
+ for (int y = 0; y < endY; y++)
{
upBufft[0] = signOf(rec[stride] - rec[-1]);
- for (x = 0; x < endX; x++)
+ for (int x = 0; x < endX; x++)
{
int signDown = signOf2(rec[x], rec[x + stride + 1]);
X265_CHECK(signDown == signOf(rec[x] - rec[x + stride + 1]), "signDown check failure\n");
@@ -1825,7 +1821,7 @@
diff += MAX_CU_SIZE;
}
- for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+ for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
{
stats[SAO::s_eoTable[x]] += tmp_stats[x];
count[SAO::s_eoTable[x]] += tmp_count[x];
@@ -1837,16 +1833,15 @@
X265_CHECK(endX < MAX_CU_SIZE, "endX check failure\n");
X265_CHECK(endY < MAX_CU_SIZE, "endY check failure\n");
- int x, y;
int32_t tmp_stats[SAO::NUM_EDGETYPE];
int32_t tmp_count[SAO::NUM_EDGETYPE];
memset(tmp_stats, 0, sizeof(tmp_stats));
memset(tmp_count, 0, sizeof(tmp_count));
- for (y = 0; y < endY; y++)
+ for (int y = 0; y < endY; y++)
{
- for (x = 0; x < endX; x++)
+ for (int x = 0; x < endX; x++)
{
int signDown = signOf2(rec[x], rec[x + stride - 1]);
X265_CHECK(signDown == signOf(rec[x] - rec[x + stride - 1]), "signDown check failure\n");
@@ -1864,7 +1859,7 @@
diff += MAX_CU_SIZE;
}
- for (x = 0; x < SAO::NUM_EDGETYPE; x++)
+ for (int x = 0; x < SAO::NUM_EDGETYPE; x++)
{
stats[SAO::s_eoTable[x]] += tmp_stats[x];
count[SAO::s_eoTable[x]] += tmp_count[x];
diff -r 11cb0f838b5a -r dfc4d87df196 source/encoder/sao.h
--- a/source/encoder/sao.h Tue Feb 02 14:50:24 2016 +0530
+++ b/source/encoder/sao.h Tue Feb 16 19:46:06 2016 +0530
@@ -56,7 +56,7 @@
enum { SAO_MAX_DEPTH = 4 };
enum { SAO_BO_BITS = 5 };
- enum { MAX_NUM_SAO_CLASS = 33 };
+ enum { MAX_NUM_SAO_CLASS = 32 };
enum { SAO_BIT_INC = 0 }; /* in HM12.0, it wrote as X265_MAX(X265_DEPTH - 10, 0) */
enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
enum { NUM_EDGETYPE = 5 };
More information about the x265-devel
mailing list