<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Fri, Oct 18, 2013 at 3:49 AM, <span dir="ltr"><<a href="mailto:dnyaneshwar@multicorewareinc.com" target="_blank">dnyaneshwar@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Dnyaneshwar Gorade <<a href="mailto:dnyaneshwar@multicorewareinc.com">dnyaneshwar@multicorewareinc.com</a>><br>
# Date 1382086085 -19800<br>
# Fri Oct 18 14:18:05 2013 +0530<br>
# Node ID 6d9bd6b6209e45cb49da804b23ad78424914b323<br>
# Parent d6d7187c5f4ea0978ebbddc1a559cea3712bf345<br>
added cvt32to16_shr_sse2 function to testbench.<br>
Speed up measured is almost 14x.<br></blockquote><div><br></div><div>pushed with minor improvements, please review</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.cpp<br>
--- a/source/test/pixelharness.cpp Fri Oct 18 00:42:36 2013 -0500<br>
+++ b/source/test/pixelharness.cpp Fri Oct 18 14:18:05 2013 +0530<br>
@@ -45,10 +45,12 @@<br>
pbuf3 = (pixel*)X265_MALLOC(pixel, bufsize);<br>
pbuf4 = (pixel*)X265_MALLOC(pixel, bufsize);<br>
<br>
+ ibuf1 = (int*)X265_MALLOC(int, bufsize);<br>
+<br>
sbuf1 = (short*)X265_MALLOC(short, bufsize);<br>
sbuf2 = (short*)X265_MALLOC(short, bufsize);<br>
<br>
- if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2)<br>
+ if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 || !ibuf1)<br>
{<br>
fprintf(stderr, "malloc failed, unable to initiate tests!\n");<br>
exit(1);<br>
@@ -63,6 +65,8 @@<br>
<br>
sbuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; //max(SHORT_MIN, min(rand(), SHORT_MAX));<br>
sbuf2[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; //max(SHORT_MIN, min(rand(), SHORT_MAX));<br>
+<br>
+ ibuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;<br>
}<br>
}<br>
<br>
@@ -481,6 +485,22 @@<br>
return true;<br>
}<br>
<br>
+bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt)<br>
+{<br>
+ int shift = (rand() % 7 + 1);<br>
+<br>
+ ALIGN_VAR_16(short, ref_dest[64 * 64]);<br>
+ ALIGN_VAR_16(short, opt_dest[64 * 64]);<br>
+<br>
+ opt(opt_dest, ibuf1, STRIDE, shift, STRIDE);<br>
+ ref(ref_dest, ibuf1, STRIDE, shift, STRIDE);<br>
+<br>
+ if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short)))<br>
+ return false;<br>
+<br>
+ return true;<br>
+}<br>
+<br>
bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)<br>
{<br>
if (opt.satd[part])<br>
@@ -615,6 +635,15 @@<br>
}<br>
}<br>
<br>
+ if (opt.cvt32to16_shr)<br>
+ {<br>
+ if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr))<br>
+ {<br>
+ printf("cvt32to16 failed!\n");<br>
+ return false;<br>
+ }<br>
+ }<br>
+<br>
if (opt.blockcpy_pp)<br>
{<br>
if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp))<br>
@@ -810,6 +839,12 @@<br>
}<br>
}<br>
<br>
+ if (opt.cvt32to16_shr)<br>
+ {<br>
+ printf("cvt32to16 conversion");<br>
+ REPORT_SPEEDUP(opt.cvt32to16_shr, ref.cvt32to16_shr, sbuf1, ibuf1, 64, 5, 64);<br>
+ }<br>
+<br>
if (opt.blockcpy_pp)<br>
{<br>
printf("block cpy");<br>
diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.h<br>
--- a/source/test/pixelharness.h Fri Oct 18 00:42:36 2013 -0500<br>
+++ b/source/test/pixelharness.h Fri Oct 18 14:18:05 2013 +0530<br>
@@ -33,6 +33,8 @@<br>
<br>
pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;<br>
<br>
+ int *ibuf1;<br>
+<br>
short *sbuf1, *sbuf2;<br>
<br>
bool check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt);<br>
@@ -52,6 +54,7 @@<br>
bool check_pixeladd_ss(pixeladd_ss_t ref, pixeladd_ss_t opt);<br>
bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt);<br>
bool check_downscale_t(downscale_t ref, downscale_t opt);<br>
+ bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt);<br>
<br>
public:<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>