[x265] [PATCH] sao: modify saoCuOrgE2 primitive to handle width=16 separately
Divya Manivannan
divya at multicorewareinc.com
Thu Apr 23 15:03:33 CEST 2015
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1429792993 -19800
# Thu Apr 23 18:13:13 2015 +0530
# Node ID 861ffbedeaefd45eb6431d8ce6d5a3b4789f9a2c
# Parent cec68d3e37ef15c571cfa7f2784a12e944a2e2a7
sao: modify saoCuOrgE2 primitive to handle width=16 separately
diff -r cec68d3e37ef -r 861ffbedeaef source/common/loopfilter.cpp
--- a/source/common/loopfilter.cpp Wed Apr 22 21:35:55 2015 -0500
+++ b/source/common/loopfilter.cpp Thu Apr 23 18:13:13 2015 +0530
@@ -144,7 +144,8 @@
p.saoCuOrgE0 = processSaoCUE0;
p.saoCuOrgE1 = processSaoCUE1;
p.saoCuOrgE1_2Rows = processSaoCUE1_2Rows;
- p.saoCuOrgE2 = processSaoCUE2;
+ p.saoCuOrgE2[0] = processSaoCUE2;
+ p.saoCuOrgE2[1] = processSaoCUE2;
p.saoCuOrgE3[0] = processSaoCUE3;
p.saoCuOrgE3[1] = processSaoCUE3;
p.saoCuOrgB0 = processSaoCUB0;
diff -r cec68d3e37ef -r 861ffbedeaef source/common/primitives.h
--- a/source/common/primitives.h Wed Apr 22 21:35:55 2015 -0500
+++ b/source/common/primitives.h Thu Apr 23 18:13:13 2015 +0530
@@ -276,7 +276,7 @@
sign_t sign;
saoCuOrgE0_t saoCuOrgE0;
saoCuOrgE1_t saoCuOrgE1, saoCuOrgE1_2Rows;
- saoCuOrgE2_t saoCuOrgE2;
+ saoCuOrgE2_t saoCuOrgE2[2];
saoCuOrgE3_t saoCuOrgE3[2];
saoCuOrgB0_t saoCuOrgB0;
diff -r cec68d3e37ef -r 861ffbedeaef source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Apr 22 21:35:55 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp Thu Apr 23 18:13:13 2015 +0530
@@ -1496,7 +1496,8 @@
p.saoCuOrgE0 = x265_saoCuOrgE0_sse4;
p.saoCuOrgE1 = x265_saoCuOrgE1_sse4;
p.saoCuOrgE1_2Rows = x265_saoCuOrgE1_2Rows_sse4;
- p.saoCuOrgE2 = x265_saoCuOrgE2_sse4;
+ p.saoCuOrgE2[0] = x265_saoCuOrgE2_sse4;
+ p.saoCuOrgE2[1] = x265_saoCuOrgE2_sse4;
p.saoCuOrgE3[0] = x265_saoCuOrgE3_sse4;
p.saoCuOrgE3[1] = x265_saoCuOrgE3_sse4;
p.saoCuOrgB0 = x265_saoCuOrgB0_sse4;
diff -r cec68d3e37ef -r 861ffbedeaef source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Wed Apr 22 21:35:55 2015 -0500
+++ b/source/encoder/sao.cpp Thu Apr 23 18:13:13 2015 +0530
@@ -442,8 +442,8 @@
int8_t one = upBufft[1];
int8_t two = upBufft[endX + 1];
- primitives.saoCuOrgE2(rec, upBufft, upBuff1, m_offsetEo, ctuWidth, stride);
- if (!lpelx)
+ primitives.saoCuOrgE2[ctuWidth > 16](rec, upBufft, upBuff1, m_offsetEo, ctuWidth, stride);
+ if (startX)
{
rec[0] = firstPxl;
upBufft[1] = one;
diff -r cec68d3e37ef -r 861ffbedeaef source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Wed Apr 22 21:35:55 2015 -0500
+++ b/source/test/pixelharness.cpp Thu Apr 23 18:13:13 2015 +0530
@@ -951,7 +951,7 @@
return true;
}
-bool PixelHarness::check_saoCuOrgE2_t(saoCuOrgE2_t ref, saoCuOrgE2_t opt)
+bool PixelHarness::check_saoCuOrgE2_t(saoCuOrgE2_t ref[2], saoCuOrgE2_t opt[2])
{
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
@@ -959,24 +959,29 @@
memset(ref_dest, 0xCD, sizeof(ref_dest));
memset(opt_dest, 0xCD, sizeof(opt_dest));
- int j = 0;
+ for (int id = 0; id < 2; id++)
+ {
+ int j = 0;
+ if (opt[id])
+ {
+ for (int i = 0; i < ITERS; i++)
+ {
+ int width = 16 * (1 << (id * (rand() % 2 + 1)));
+ int stride = width + 1;
- for (int i = 0; i < ITERS; i++)
- {
- int width = 16 * (rand() % 4 + 1);
- int stride = width + 1;
+ ref[width > 16](ref_dest, psbuf1 + j, psbuf2 + j, psbuf3 + j, width, stride);
+ checked(opt[width > 16], opt_dest, psbuf4 + j, psbuf2 + j, psbuf3 + j, width, stride);
- ref(ref_dest, psbuf1 + j, psbuf2 + j, psbuf3 + j, width, stride);
- checked(opt, opt_dest, psbuf4 + j, psbuf2 + j, psbuf3 + j, width, stride);
+ if (memcmp(psbuf1 + j, psbuf4 + j, width * sizeof(int8_t)))
+ return false;
- if (memcmp(psbuf1 + j, psbuf4 + j, width * sizeof(int8_t)))
- return false;
+ if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
+ return false;
- if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
- return false;
-
- reportfail();
- j += INCR;
+ reportfail();
+ j += INCR;
+ }
+ }
}
return true;
@@ -996,7 +1001,7 @@
{
int stride = 16 * (rand() % 4 + 1);
int start = rand() % 2;
- int end = (16 * (rand() % 4 + 1)) - rand() % 2;
+ int end = 16 - rand() % 2;
ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end);
checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end);
@@ -1799,14 +1804,17 @@
}
}
- if (opt.saoCuOrgE2)
- {
- if (!check_saoCuOrgE2_t(ref.saoCuOrgE2, opt.saoCuOrgE2))
- {
- printf("SAO_EO_2 failed\n");
- return false;
- }
- }
+ if (opt.saoCuOrgE2[0] || opt.saoCuOrgE2[1])
+ {
+ saoCuOrgE2_t ref1[] = { ref.saoCuOrgE2[0], ref.saoCuOrgE2[1] };
+ saoCuOrgE2_t opt1[] = { opt.saoCuOrgE2[0], opt.saoCuOrgE2[1] };
+
+ if (!check_saoCuOrgE2_t(ref1, opt1))
+ {
+ printf("SAO_EO_2[0] && SAO_EO_2[1] failed\n");
+ return false;
+ }
+ }
if (opt.saoCuOrgE3[0])
{
@@ -2206,16 +2214,22 @@
REPORT_SPEEDUP(opt.saoCuOrgE1_2Rows, ref.saoCuOrgE1_2Rows, pbuf1, psbuf2, psbuf1, 64, 64);
}
- if (opt.saoCuOrgE2)
+ if (opt.saoCuOrgE2[0])
{
- HEADER0("SAO_EO_2");
- REPORT_SPEEDUP(opt.saoCuOrgE2, ref.saoCuOrgE2, pbuf1, psbuf1, psbuf2, psbuf3, 64, 64);
+ HEADER0("SAO_EO_2[0]");
+ REPORT_SPEEDUP(opt.saoCuOrgE2[0], ref.saoCuOrgE2[0], pbuf1, psbuf1, psbuf2, psbuf3, 16, 64);
+ }
+
+ if (opt.saoCuOrgE2[1])
+ {
+ HEADER0("SAO_EO_2[1]");
+ REPORT_SPEEDUP(opt.saoCuOrgE2[1], ref.saoCuOrgE2[1], pbuf1, psbuf1, psbuf2, psbuf3, 64, 64);
}
if (opt.saoCuOrgE3[0])
{
HEADER0("SAO_EO_3[0]");
- REPORT_SPEEDUP(opt.saoCuOrgE3[0], ref.saoCuOrgE3[0], pbuf1, psbuf2, psbuf1, 64, 0, 64);
+ REPORT_SPEEDUP(opt.saoCuOrgE3[0], ref.saoCuOrgE3[0], pbuf1, psbuf2, psbuf1, 64, 0, 16);
}
if (opt.saoCuOrgE3[1])
diff -r cec68d3e37ef -r 861ffbedeaef source/test/pixelharness.h
--- a/source/test/pixelharness.h Wed Apr 22 21:35:55 2015 -0500
+++ b/source/test/pixelharness.h Thu Apr 23 18:13:13 2015 +0530
@@ -96,7 +96,7 @@
bool check_addAvg(addAvg_t, addAvg_t);
bool check_saoCuOrgE0_t(saoCuOrgE0_t ref, saoCuOrgE0_t opt);
bool check_saoCuOrgE1_t(saoCuOrgE1_t ref, saoCuOrgE1_t opt);
- bool check_saoCuOrgE2_t(saoCuOrgE2_t ref, saoCuOrgE2_t opt);
+ bool check_saoCuOrgE2_t(saoCuOrgE2_t ref[], saoCuOrgE2_t opt[]);
bool check_saoCuOrgE3_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
bool check_saoCuOrgE3_32_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
bool check_saoCuOrgB0_t(saoCuOrgB0_t ref, saoCuOrgB0_t opt);
More information about the x265-devel
mailing list