[x265] [PATCH] SAO: cleanup processSAO()
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Tue Feb 16 16:12:21 CET 2016
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1455633571 -19800
# Tue Feb 16 20:09:31 2016 +0530
# Node ID fe1168e6d6f4e633a0b138ebf5b7ad8d54779d15
# Parent 36751a3dce37e4f506f4bdec12e20ef665b42012
SAO: cleanup processSAO()
Separate asm primitive is required when width and height is not same
diff -r 36751a3dce37 -r fe1168e6d6f4 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Tue Feb 16 20:09:30 2016 +0530
+++ b/source/encoder/sao.cpp Tue Feb 16 20:09:31 2016 +0530
@@ -254,7 +254,6 @@
// CTU-based SAO process without slice granularity
void SAO::processSaoCu(int addr, int typeIdx, int plane)
{
- int x, y;
PicYuv* reconPic = m_frame->m_reconPic;
pixel* rec = reconPic->getPlaneAddr(plane, addr);
intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
@@ -279,20 +278,13 @@
ctuWidth = rpelx - lpelx;
ctuHeight = bpely - tpely;
- int startX;
- int startY;
- int endX;
- int endY;
- pixel* tmpL;
- pixel* tmpU;
-
int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1, signLeft1[2];
int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
memset(_upBuff1 + MAX_CU_SIZE, 0, 2 * sizeof(int8_t)); /* avoid valgrind uninit warnings */
- tmpL = m_tmpL1[plane];
- tmpU = &(m_tmpU[plane][lpelx]);
+ pixel* tmpL = m_tmpL1[plane];
+ pixel* tmpU = &(m_tmpU[plane][lpelx]);
int8_t* offsetEo = m_offsetEo[plane];
@@ -301,14 +293,14 @@
case SAO_EO_0: // dir: -
{
pixel firstPxl = 0, lastPxl = 0, row1FirstPxl = 0, row1LastPxl = 0;
- startX = !lpelx;
- endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
+ int startX = !lpelx;
+ int endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
if (ctuWidth & 15)
{
- for (y = 0; y < ctuHeight; y++)
+ for (int y = 0; y < ctuHeight; y++, rec += stride)
{
int signLeft = signOf(rec[startX] - tmpL[y]);
- for (x = startX; x < endX; x++)
+ for (int x = startX; x < endX; x++)
{
int signRight = signOf(rec[x] - rec[x + 1]);
int edgeType = signRight + signLeft + 2;
@@ -316,13 +308,11 @@
rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
}
-
- rec += stride;
}
}
else
{
- for (y = 0; y < ctuHeight; y += 2)
+ for (int y = 0; y < ctuHeight; y += 2, rec += 2 * stride)
{
signLeft1[0] = signOf(rec[startX] - tmpL[y]);
signLeft1[1] = signOf(rec[stride + startX] - tmpL[y + 1]);
@@ -352,27 +342,25 @@
rec[ctuWidth - 1] = lastPxl;
rec[stride + ctuWidth - 1] = row1LastPxl;
}
-
- rec += 2 * stride;
}
}
break;
}
case SAO_EO_1: // dir: |
{
- startY = !tpely;
- endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
+ int startY = !tpely;
+ int endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
if (!tpely)
rec += stride;
if (ctuWidth & 15)
{
- for (x = 0; x < ctuWidth; x++)
+ for (int x = 0; x < ctuWidth; x++)
upBuff1[x] = signOf(rec[x] - tmpU[x]);
- for (y = startY; y < endY; y++)
+ for (int y = startY; y < endY; y++, rec += stride)
{
- for (x = 0; x < ctuWidth; x++)
+ for (int x = 0; x < ctuWidth; x++)
{
int8_t signDown = signOf(rec[x] - rec[x + stride]);
int edgeType = signDown + upBuff1[x] + 2;
@@ -380,8 +368,6 @@
rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
}
-
- rec += stride;
}
}
else
@@ -389,11 +375,9 @@
primitives.sign(upBuff1, rec, tmpU, ctuWidth);
int diff = (endY - startY) % 2;
- for (y = startY; y < endY - diff; y += 2)
- {
+ for (int y = startY; y < endY - diff; y += 2, rec += 2 * stride)
primitives.saoCuOrgE1_2Rows(rec, upBuff1, offsetEo, stride, ctuWidth);
- rec += 2 * stride;
- }
+
if (diff & 1)
primitives.saoCuOrgE1(rec, upBuff1, offsetEo, stride, ctuWidth);
}
@@ -402,11 +386,11 @@
}
case SAO_EO_2: // dir: 135
{
- startX = !lpelx;
- endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
+ int startX = !lpelx;
+ int endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
- startY = !tpely;
- endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
+ int startY = !tpely;
+ int endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
if (!tpely)
rec += stride;
@@ -431,16 +415,16 @@
}
else
{
- for (x = startX; x < endX; x++)
+ for (int x = startX; x < endX; x++)
upBuff1[x] = signOf(rec[x] - tmpU[x - 1]);
}
if (ctuWidth & 15)
{
- for (y = startY; y < endY; y++)
+ for (int y = startY; y < endY; y++, rec += stride)
{
upBufft[startX] = signOf(rec[stride + startX] - tmpL[y]);
- for (x = startX; x < endX; x++)
+ for (int x = startX; x < endX; x++)
{
int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
int edgeType = signDown + upBuff1[x] + 2;
@@ -449,13 +433,11 @@
}
std::swap(upBuff1, upBufft);
-
- rec += stride;
}
}
else
{
- for (y = startY; y < endY; y++)
+ for (int y = startY; y < endY; y++, rec += stride)
{
int8_t iSignDown2 = signOf(rec[stride + startX] - tmpL[y]);
@@ -464,30 +446,29 @@
upBufft[startX] = iSignDown2;
std::swap(upBuff1, upBufft);
- rec += stride;
}
}
break;
}
case SAO_EO_3: // dir: 45
{
- startX = !lpelx;
- endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
+ int startX = !lpelx;
+ int endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
- startY = !tpely;
- endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
+ int startY = !tpely;
+ int endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
if (!tpely)
rec += stride;
if (ctuWidth & 15)
{
- for (x = startX - 1; x < endX; x++)
+ for (int x = startX - 1; x < endX; x++)
upBuff1[x] = signOf(rec[x] - tmpU[x + 1]);
- for (y = startY; y < endY; y++)
+ for (int y = startY; y < endY; y++, rec += stride)
{
- x = startX;
+ int x = startX;
int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
int edgeType = signDown + upBuff1[x] + 2;
upBuff1[x - 1] = -signDown;
@@ -502,8 +483,6 @@
}
upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
-
- rec += stride;
}
}
else
@@ -522,9 +501,9 @@
if (rpelx == picWidth)
upBuff1[ctuWidth - 1] = lastSign;
- for (y = startY; y < endY; y++)
+ for (int y = startY; y < endY; y++, rec += stride)
{
- x = startX;
+ int x = startX;
int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
int edgeType = signDown + upBuff1[x] + 2;
upBuff1[x - 1] = -signDown;
@@ -533,8 +512,6 @@
primitives.saoCuOrgE3[endX > 16](rec, upBuff1, offsetEo, stride - 1, startX, endX);
upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
-
- rec += stride;
}
}
@@ -548,24 +525,14 @@
{
#define SAO_BO_BITS 5
const int boShift = X265_DEPTH - SAO_BO_BITS;
- for (y = 0; y < ctuHeight; y++)
- {
- for (x = 0; x < ctuWidth; x++)
- {
- int val = rec[x] + offsetBo[rec[x] >> boShift];
- if (val < 0)
- val = 0;
- else if (val > ((1 << X265_DEPTH) - 1))
- val = ((1 << X265_DEPTH) - 1);
- rec[x] = (pixel)val;
- }
- rec += stride;
- }
+
+ for (int y = 0; y < ctuHeight; y++, rec += stride)
+ for (int x = 0; x < ctuWidth; x++)
+ rec[x] = x265_clip(rec[x] + offsetBo[rec[x] >> boShift]);
}
else
- {
primitives.saoCuOrgB0(rec, offsetBo, ctuWidth, ctuHeight, stride);
- }
+
break;
}
default: break;
More information about the x265-devel
mailing list