[x265] [PATCH] sao: add saoCuOrgE3_2Rows function to process 2 rows
Divya Manivannan
divya at multicorewareinc.com
Tue Apr 14 06:58:09 CEST 2015
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1428986909 -19800
# Tue Apr 14 10:18:29 2015 +0530
# Node ID e3017cf9bff50cdd6e89efdd8cf37c89ca0c62e3
# Parent 4cccf22b00ee188a72c8dc3896d7dc1613d855ad
sao: add saoCuOrgE3_2Rows function to process 2 rows
diff -r 4cccf22b00ee -r e3017cf9bff5 source/common/loopfilter.cpp
--- a/source/common/loopfilter.cpp Fri Apr 10 18:15:38 2015 -0500
+++ b/source/common/loopfilter.cpp Tue Apr 14 10:18:29 2015 +0530
@@ -122,6 +122,29 @@
}
}
+void processSaoCUE3_2Rows(pixel *rec, int8_t *upBuff1, int8_t *offsetEo, intptr_t stride, int startX, int endX, int8_t* signDown)
+{
+ int8_t signDown1;
+ int8_t edgeType;
+
+ for (int y = 0; y < 2; y++)
+ {
+ edgeType = signDown[y] + upBuff1[startX] + 2;
+ upBuff1[startX - 1] = -signDown[y];
+ rec[startX] = x265_clip(rec[startX] + offsetEo[edgeType]);
+
+ for (int x = startX + 1; x < endX; x++)
+ {
+ signDown1 = signOf(rec[x] - rec[x + stride]);
+ edgeType = signDown1 + upBuff1[x] + 2;
+ upBuff1[x - 1] = -signDown1;
+ rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+ }
+ upBuff1[endX - 1] = signOf(rec[endX - 1 + stride + 1] - rec[endX]);
+ rec += stride + 1;
+ }
+}
+
void processSaoCUB0(pixel* rec, const int8_t* offset, int ctuWidth, int ctuHeight, intptr_t stride)
{
#define SAO_BO_BITS 5
@@ -146,6 +169,7 @@
p.saoCuOrgE1_2Rows = processSaoCUE1_2Rows;
p.saoCuOrgE2 = processSaoCUE2;
p.saoCuOrgE3 = processSaoCUE3;
+ p.saoCuOrgE3_2Rows = processSaoCUE3_2Rows;
p.saoCuOrgB0 = processSaoCUB0;
p.sign = calSign;
}
diff -r 4cccf22b00ee -r e3017cf9bff5 source/common/primitives.h
--- a/source/common/primitives.h Fri Apr 10 18:15:38 2015 -0500
+++ b/source/common/primitives.h Tue Apr 14 10:18:29 2015 +0530
@@ -172,6 +172,7 @@
typedef void (*saoCuOrgE1_t)(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
typedef void (*saoCuOrgE2_t)(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
typedef void (*saoCuOrgE3_t)(pixel* rec, int8_t* upBuff1, int8_t* m_offsetEo, intptr_t stride, int startX, int endX);
+typedef void (*saoCuOrgE3_2Rows_t)(pixel* rec, int8_t* upBuff1, int8_t* m_offsetEo, intptr_t stride, int startX, int endX, int8_t* signDown);
typedef void (*saoCuOrgB0_t)(pixel* rec, const int8_t* offsetBo, int ctuWidth, int ctuHeight, intptr_t stride);
typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
@@ -277,6 +278,7 @@
saoCuOrgE1_t saoCuOrgE1, saoCuOrgE1_2Rows;
saoCuOrgE2_t saoCuOrgE2;
saoCuOrgE3_t saoCuOrgE3;
+ saoCuOrgE3_2Rows_t saoCuOrgE3_2Rows;
saoCuOrgB0_t saoCuOrgB0;
downscale_t frameInitLowres;
diff -r 4cccf22b00ee -r e3017cf9bff5 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Fri Apr 10 18:15:38 2015 -0500
+++ b/source/encoder/sao.cpp Tue Apr 14 10:18:29 2015 +0530
@@ -516,20 +516,28 @@
if (rpelx == picWidth)
upBuff1[ctuWidth - 1] = lastSign;
- for (y = startY; y < endY; y++)
- {
- x = startX;
- int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
- int edgeType = signDown + upBuff1[x] + 2;
- upBuff1[x - 1] = -signDown;
- rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
-
- primitives.saoCuOrgE3(rec, upBuff1, m_offsetEo, stride - 1, startX, endX);
-
- upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
-
- rec += stride;
- }
+ int diff = endY - startY;
+ for (y = 0; y < diff / 2; y++)
+ {
+ int8_t signDown[2];
+ signDown[0] = signOf(rec[startX] - tmpL[y * 2 + 1 + startY]);
+ signDown[1] = signOf(rec[startX + stride] - tmpL[y * 2 + 2 + startY]);
+
+ primitives.saoCuOrgE3_2Rows(rec, upBuff1, m_offsetEo, stride - 1, startX, endX, signDown);
+
+ rec += 2 * stride;
+ }
+ if (diff & 1)
+ {
+ int8_t signDown1 = signOf(rec[startX] - tmpL[y * 2 + 1 + startY]);
+ int edgeType = signDown1 + upBuff1[startX] + 2;
+ upBuff1[startX - 1] = -signDown1;
+ rec[startX] = m_clipTable[rec[startX] + m_offsetEo[edgeType]];
+
+ primitives.saoCuOrgE3(rec, upBuff1, m_offsetEo, stride - 1, startX, endX);
+
+ upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
+ }
}
break;
diff -r 4cccf22b00ee -r e3017cf9bff5 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Fri Apr 10 18:15:38 2015 -0500
+++ b/source/test/pixelharness.cpp Tue Apr 14 10:18:29 2015 +0530
@@ -66,7 +66,7 @@
sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1;
psbuf1[i] = psbuf4[i] = (rand() % 65) - 32; // range is between -32 to 32
- psbuf2[i] = psbuf5[i] = (rand() % 3) - 1; // possible values {-1,0,1}
+ psbuf2[i] = psbuf5[i] = psbuf6[i] = psbuf7[i] = (rand() % 3) - 1; // possible values {-1,0,1}
psbuf3[i] = (rand() % 129) - 128;
sbuf3[i] = rand() % PIXEL_MAX; // for blockcopy only
}
@@ -1011,6 +1011,35 @@
return true;
}
+bool PixelHarness::check_saoCuOrgE3_2Rows_t(saoCuOrgE3_2Rows_t ref, saoCuOrgE3_2Rows_t opt)
+{
+ ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
+ ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
+
+ memset(ref_dest, 0xCD, sizeof(ref_dest));
+ memset(opt_dest, 0xCD, sizeof(opt_dest));
+
+ int j = 0;
+
+ for (int i = 0; i < ITERS; i++)
+ {
+ int stride = 16 * (rand() % 4 + 1);
+ int start = rand() % 2;
+ int end = (16 * (rand() % 4 + 1)) - rand() % 2;
+
+ ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end, psbuf6 + j);
+ checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end, psbuf7 + j);
+
+ if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE))
+ return false;
+
+ reportfail();
+ j += INCR;
+ }
+
+ return true;
+}
+
bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt)
{
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
@@ -1721,6 +1750,15 @@
}
}
+ if (opt.saoCuOrgE3_2Rows)
+ {
+ if (!check_saoCuOrgE3_2Rows_t(ref.saoCuOrgE3_2Rows, opt.saoCuOrgE3_2Rows))
+ {
+ printf("SAO_EO_3_2Rows failed\n");
+ return false;
+ }
+ }
+
if (opt.saoCuOrgB0)
{
if (!check_saoCuOrgB0_t(ref.saoCuOrgB0, opt.saoCuOrgB0))
@@ -2104,6 +2142,12 @@
REPORT_SPEEDUP(opt.saoCuOrgE3, ref.saoCuOrgE3, pbuf1, psbuf2, psbuf1, 64, 0, 64);
}
+ if (opt.saoCuOrgE3_2Rows)
+ {
+ HEADER0("SAO_EO_3_2Rows");
+ REPORT_SPEEDUP(opt.saoCuOrgE3_2Rows, ref.saoCuOrgE3_2Rows, pbuf1, psbuf2, psbuf1, 64, 0, 64, psbuf6);
+ }
+
if (opt.saoCuOrgB0)
{
HEADER0("SAO_BO_0");
diff -r 4cccf22b00ee -r e3017cf9bff5 source/test/pixelharness.h
--- a/source/test/pixelharness.h Fri Apr 10 18:15:38 2015 -0500
+++ b/source/test/pixelharness.h Tue Apr 14 10:18:29 2015 +0530
@@ -51,6 +51,8 @@
int8_t psbuf3[BUFFSIZE];
int8_t psbuf4[BUFFSIZE];
int8_t psbuf5[BUFFSIZE];
+ int8_t psbuf6[BUFFSIZE];
+ int8_t psbuf7[BUFFSIZE];
int16_t sbuf1[BUFFSIZE];
int16_t sbuf2[BUFFSIZE];
@@ -98,6 +100,7 @@
bool check_saoCuOrgE1_t(saoCuOrgE1_t ref, saoCuOrgE1_t opt);
bool check_saoCuOrgE2_t(saoCuOrgE2_t ref, saoCuOrgE2_t opt);
bool check_saoCuOrgE3_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
+ bool check_saoCuOrgE3_2Rows_t(saoCuOrgE3_2Rows_t ref, saoCuOrgE3_2Rows_t opt);
bool check_saoCuOrgB0_t(saoCuOrgB0_t ref, saoCuOrgB0_t opt);
bool check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt);
bool check_planecopy_cp(planecopy_cp_t ref, planecopy_cp_t opt);
More information about the x265-devel
mailing list