[x265] [PATCH 1 of 4] improve calcSaoStatsCu, use local temporary buffer to reduce array index operators
Min Chen
chenm003 at 163.com
Thu May 14 01:53:17 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1431561161 25200
# Node ID 2cb7386376a40786ca6cbd02d9f19bc304942251
# Parent 0ffdd0f379adcbf6b3e49956580301211a01c203
improve calcSaoStatsCu, use local temporary buffer to reduce array index operators
---
source/encoder/sao.cpp | 91 ++++++++++++++++++++++++++++++++++++-----------
1 files changed, 69 insertions(+), 22 deletions(-)
diff -r 0ffdd0f379ad -r 2cb7386376a4 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Wed May 13 18:12:50 2015 +0530
+++ b/source/encoder/sao.cpp Wed May 13 16:52:41 2015 -0700
@@ -696,6 +696,11 @@
int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
+ // Dynamic Range: 64x64x14bpp = 24bits
+ int32_t tmp_stats[NUM_EDGETYPE];
+ // TODO: improve by uint64_t, but need Haswell SHLX
+ uint16_t tmp_count[NUM_EDGETYPE];
+
// SAO_BO:
{
const int boShift = X265_DEPTH - SAO_BO_BITS;
@@ -736,30 +741,41 @@
skipB = plane ? 1 : 3;
skipR = plane ? 3 : 5;
}
- stats = m_offsetOrg[plane][SAO_EO_0];
- count = m_count[plane][SAO_EO_0];
fenc = fenc0;
rec = rec0;
startX = !lpelx;
endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
+
+ memset(tmp_stats, 0, sizeof(tmp_stats));
+ memset(tmp_count, 0, sizeof(tmp_count));
+
for (y = 0; y < ctuHeight - skipB; y++)
{
int signLeft = signOf(rec[startX] - rec[startX - 1]);
for (x = startX; x < endX; x++)
{
int signRight = signOf(rec[x] - rec[x + 1]);
- int edgeType = signRight + signLeft + 2;
+ uint32_t edgeType = signRight + signLeft + 2;
signLeft = -signRight;
- stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
- count[s_eoTable[edgeType]]++;
+ X265_CHECK((edgeType >= 0) && (edgeType <= 4), "edgeType check failure\n");
+ tmp_stats[edgeType] += (fenc[x] - rec[x]);
+ tmp_count[edgeType]++;
}
fenc += stride;
rec += stride;
}
+
+ stats = m_offsetOrg[plane][SAO_EO_0];
+ count = m_count[plane][SAO_EO_0];
+ for (x = 0; x < NUM_EDGETYPE; x++)
+ {
+ stats[s_eoTable[x]] += tmp_stats[x];
+ count[s_eoTable[x]] += tmp_count[x];
+ }
}
// SAO_EO_1: // dir: |
@@ -769,8 +785,6 @@
skipB = plane ? 2 : 4;
skipR = plane ? 2 : 4;
}
- stats = m_offsetOrg[plane][SAO_EO_1];
- count = m_count[plane][SAO_EO_1];
fenc = fenc0;
rec = rec0;
@@ -786,21 +800,32 @@
primitives.sign(upBuff1, rec, &rec[- stride], ctuWidth);
+ memset(tmp_stats, 0, sizeof(tmp_stats));
+ memset(tmp_count, 0, sizeof(tmp_count));
+
for (y = startY; y < endY; y++)
{
for (x = 0; x < endX; x++)
{
- int8_t signDown = signOf(rec[x] - rec[x + stride]);
- int edgeType = signDown + upBuff1[x] + 2;
- upBuff1[x] = -signDown;
+ int signDown = signOf(rec[x] - rec[x + stride]);
+ uint32_t edgeType = signDown + upBuff1[x] + 2;
+ upBuff1[x] = (int8_t)(-signDown);
- stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
- count[s_eoTable[edgeType]]++;
+ tmp_stats[edgeType] += (fenc[x] - rec[x]);
+ tmp_count[edgeType]++;
}
fenc += stride;
rec += stride;
}
+
+ stats = m_offsetOrg[plane][SAO_EO_1];
+ count = m_count[plane][SAO_EO_1];
+ for (x = 0; x < NUM_EDGETYPE; x++)
+ {
+ stats[s_eoTable[x]] += tmp_stats[x];
+ count[s_eoTable[x]] += tmp_count[x];
+ }
}
// SAO_EO_2: // dir: 135
@@ -829,16 +854,19 @@
primitives.sign(&upBuff1[startX], &rec[startX], &rec[startX - stride - 1], (endX - startX));
+ memset(tmp_stats, 0, sizeof(tmp_stats));
+ memset(tmp_count, 0, sizeof(tmp_count));
+
for (y = startY; y < endY; y++)
{
upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]);
for (x = startX; x < endX; x++)
{
- int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
- int edgeType = signDown + upBuff1[x] + 2;
- upBufft[x + 1] = -signDown;
- stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
- count[s_eoTable[edgeType]]++;
+ int signDown = signOf(rec[x] - rec[x + stride + 1]);
+ uint32_t edgeType = signDown + upBuff1[x] + 2;
+ upBufft[x + 1] = (int8_t)(-signDown);
+ tmp_stats[edgeType] += (fenc[x] - rec[x]);
+ tmp_count[edgeType]++;
}
std::swap(upBuff1, upBufft);
@@ -846,6 +874,14 @@
rec += stride;
fenc += stride;
}
+
+ stats = m_offsetOrg[plane][SAO_EO_2];
+ count = m_count[plane][SAO_EO_2];
+ for (x = 0; x < NUM_EDGETYPE; x++)
+ {
+ stats[s_eoTable[x]] += tmp_stats[x];
+ count[s_eoTable[x]] += tmp_count[x];
+ }
}
// SAO_EO_3: // dir: 45
@@ -875,15 +911,18 @@
primitives.sign(&upBuff1[startX - 1], &rec[startX - 1], &rec[startX - 1 - stride + 1], (endX - startX + 1));
+ memset(tmp_stats, 0, sizeof(tmp_stats));
+ memset(tmp_count, 0, sizeof(tmp_count));
+
for (y = startY; y < endY; y++)
{
for (x = startX; x < endX; x++)
{
- int8_t signDown = signOf(rec[x] - rec[x + stride - 1]);
- int edgeType = signDown + upBuff1[x] + 2;
- upBuff1[x - 1] = -signDown;
- stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
- count[s_eoTable[edgeType]]++;
+ int signDown = signOf(rec[x] - rec[x + stride - 1]);
+ uint32_t edgeType = signDown + upBuff1[x] + 2;
+ upBuff1[x - 1] = (int8_t)(-signDown);
+ tmp_stats[edgeType] += (fenc[x] - rec[x]);
+ tmp_count[edgeType]++;
}
upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
@@ -891,6 +930,14 @@
rec += stride;
fenc += stride;
}
+
+ stats = m_offsetOrg[plane][SAO_EO_3];
+ count = m_count[plane][SAO_EO_3];
+ for (x = 0; x < NUM_EDGETYPE; x++)
+ {
+ stats[s_eoTable[x]] += tmp_stats[x];
+ count[s_eoTable[x]] += tmp_count[x];
+ }
}
}
}
More information about the x265-devel
mailing list