<div dir="ltr"><br><div class="gmail_quote"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div><div class="h5"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+template<int N, int width><br>
+void interp_horiz_pp(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int height, int coeffIdx)<br>
+{<br>
+ int cStride = 1;<br>
+ short const * coeff= g_chromaFilter[coeffIdx];<br>
+ src -= (N / 2 - 1) * cStride;<br>
+ coeffIdx;<br>
+ int offset;<br>
+ short maxVal;<br>
+ int headRoom = IF_INTERNAL_PREC - X265_DEPTH;<br>
+ offset = (1 << (headRoom - 1));<br>
+ maxVal = (1 << X265_DEPTH) - 1;<br>
+<br>
+ int row, col;<br>
+ for (row = 0; row < height; row++)<br>
+ {<br>
+ for (col = 0; col < width; col++)<br>
+ {<br>
+ int sum;<br>
+<br>
+ sum = src[col + 0 * cStride] * coeff[0];<br>
+ sum += src[col + 1 * cStride] * coeff[1];<br>
+ if (N >= 4)<br>
+ {<br>
+ sum += src[col + 2 * cStride] * coeff[2];<br>
+ sum += src[col + 3 * cStride] * coeff[3];<br>
+ }<br></blockquote></div></div><div>>>the N>= 6 check seems out of place, unless we're going to instantiate a 7tap filter </div><div>Actually, I wanted to add a single C primitive for chroma and luma this is why I did not change check condition as they will be required in luma functions.</div>
<div><div class="h5"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ if (N >= 6)<br>
+ {<br>
+ sum += src[col + 4 * cStride] * coeff[4];<br>
+ sum += src[col + 5 * cStride] * coeff[5];<br>
+ }<br>
+ if (N == 8)<br>
+ {<br>
+ sum += src[col + 6 * cStride] * coeff[6];<br>
+ sum += src[col + 7 * cStride] * coeff[7];<br>
+ }<br>
+ short val = (short)(sum + offset) >> headRoom;<br>
+<br>
+ if (val < 0) val = 0;<br>
+ if (val > maxVal) val = maxVal;<br>
+ dst[col] = (pixel)val;<br>
+ }<br>
+<br>
+ src += srcStride;<br>
+ dst += dstStride;<br>
+ }<br>
+}<br>
}<br>
<br>
namespace x265 {<br>
diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.cpp<br>
--- a/source/test/ipfilterharness.cpp Tue Oct 15 20:57:54 2013 +0530<br>
+++ b/source/test/ipfilterharness.cpp Tue Oct 15 21:22:03 2013 +0530<br>
@@ -3,6 +3,7 @@<br>
*<br>
* Authors: Deepthi Devaki <<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>>,<br>
* Rajesh Paulraj <<a href="mailto:rajesh@multicorewareinc.com" target="_blank">rajesh@multicorewareinc.com</a>><br>
+ * Praveen Kumar Tiwari <<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>><br>
*<br>
* This program is free software; you can redistribute it and/or modify<br>
* it under the terms of the GNU General Public License as published by<br>
@@ -39,6 +40,18 @@<br>
"ipfilterV_pp<4>"<br>
};<br>
<br>
+const char* ChromaFilterPPNames[] =<br>
+{<br>
+ "interp_4tap_horiz_pp_w2",<br>
+ "interp_4tap_horiz_pp_w4",<br>
+ "interp_4tap_horiz_pp_w6",<br>
+ "interp_4tap_horiz_pp_w8",<br>
+ "interp_4tap_horiz_pp_w12",<br>
+ "interp_4tap_horiz_pp_w16",<br>
+ "interp_4tap_horiz_pp_w24",<br>
+ "interp_4tap_horiz_pp_w32"<br>
+};<br></blockquote><div><br></div></div></div><div>the names should correspond with the chroma size enums, which only specify a width. This string table should be re-usable for more than just 4tap horizontal pixel to pixel interpolation. Each element should just be "W2" or something similar so it can be used as:</div>
<div><br></div><div>printf("chroma_hpp[%s]: ", ChromaFilterName[w]);</div><div class="im"><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+<br>
IPFilterHarness::IPFilterHarness()<br>
{<br>
ipf_t_size = 200 * 200;<br>
@@ -262,6 +275,47 @@<br>
return true;<br>
}<br>
<br>
+bool IPFilterHarness::check_IPFilter_primitive(filter_pp_t ref, filter_pp_t opt)<br></blockquote><div><br></div></div><div>there needs to be chroma and luma versions of this function for the two filter lengths, or pass filter length as an argument</div>
<div class="im">
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+{<br>
+ int rand_height = rand() % 100; // Randomly generated Height<br></blockquote><div><br></div></div><div>I don't see a point to testing any sizes not used by the encoder; this just prevents possible optimizations in the primitive. Primitives that have fixed dimensions should be tested with those fixed dimensions used by the encoder.</div>
<div class="im">
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ int rand_val, rand_srcStride, rand_dstStride, rand_coeffIdx;<br>
+<br>
+ for (int i = 0; i <= 100; i++)<br>
+ {<br>
+ memset(IPF_vec_output_p, 0, ipf_t_size); // Initialize output buffer to zero<br>
+ memset(IPF_C_output_p, 0, ipf_t_size); // Initialize output buffer to zero<br></blockquote><div><br></div></div><div>is memzero really necessary here? I don't think so</div><div class="im"><div><br></div>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+<br>
+ rand_coeffIdx = rand() % 8; // Random coeffIdex in the filter<br></blockquote><div><br></div></div><div>>>chroma coeff index should be 1, 2, or 3</div><div><br></div><div>I think chroma table is </div>
<div>const short g_chromaFilter[8][NTAPS_CHROMA] =</div><div>{</div><div> { 0, 64, 0, 0 },</div><div> { -2, 58, 10, -2 },</div><div> { -4, 54, 16, -2 },</div><div> { -6, 46, 28, -4 },</div><div> { -4, 36, 36, -4 },</div>
<div> { -4, 28, 46, -6 },</div><div> { -2, 16, 54, -4 },</div><div> { -2, 10, 58, -2 }</div><div>};</div><div class="im"><div> <font color="#222222"> we have coeff table also in similar fashion so I need 0 to 7 coeffIdex.</font></div>
<div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">+ rand_val = rand() % 4; // Random offset in the filter<br>
</blockquote><div><br></div></div><div>rand_val is unused</div><div><div class="h5"><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ rand_srcStride = rand() % 100; // Randomly generated srcStride<br>
+ rand_dstStride = rand() % 100; // Randomly generated dstStride<br>
+<br>
+ if (rand_srcStride < 32)<br>
+ rand_srcStride = 32;<br>
+<br>
+ if (rand_dstStride < 32)<br>
+ rand_dstStride = 32;<br>
+<br>
+ opt(pixel_buff + 3 * rand_srcStride,<br>
+ rand_srcStride,<br>
+ IPF_vec_output_p,<br>
+ rand_dstStride,<br>
+ rand_height, rand_coeffIdx<br>
+ );<br>
+ ref(pixel_buff + 3 * rand_srcStride,<br>
+ rand_srcStride,<br>
+ IPF_C_output_p,<br>
+ rand_dstStride,<br>
+ rand_height, rand_coeffIdx<br>
+ );<br>
+<br>
+ if (memcmp(IPF_vec_output_p, IPF_C_output_p, ipf_t_size))<br>
+ return false;<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
bool IPFilterHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)<br>
{<br>
for (int value = 0; value < NUM_IPFILTER_P_P; value++)<br>
@@ -318,6 +372,18 @@<br>
}<br>
}<br>
<br>
+ for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++)<br>
+ {<br>
+ if (opt.chroma_hpp[value])<br>
+ {<br></blockquote><div><br></div></div></div><div>this should test known heights for each width</div><div class="im"><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ if (!check_IPFilter_primitive(ref.chroma_hpp[value], opt.chroma_hpp[value]))<br>
+ {<br>
+ printf("%s failed\n", ChromaFilterPPNames[value]);<br>
+ return false;<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
return true;<br>
}<br>
<br>
@@ -372,4 +438,14 @@<br>
REPORT_SPEEDUP(opt.ipfilter_s2p, ref.ipfilter_s2p,<br>
short_buff, srcStride, IPF_vec_output_p, dstStride, width, height);<br>
}<br>
+<br>
+ for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++)<br>
+ {<br></blockquote><div><br></div></div><div>this should measure performance at each height supported by each width</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
<div class="im">
+ if (opt.chroma_hpp[value])<br>
+ {<br>
+ printf("%s\t", ChromaFilterPPNames[value]);<br>
+ REPORT_SPEEDUP(opt.chroma_hpp[value], ref.chroma_hpp[value],<br>
+ pixel_buff + 3 * srcStride, srcStride, IPF_vec_output_p, dstStride, height, 1);<br>
+ }<br>
+ }<br>
}<br>
diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.h<br>
--- a/source/test/ipfilterharness.h Tue Oct 15 20:57:54 2013 +0530<br>
+++ b/source/test/ipfilterharness.h Tue Oct 15 21:22:03 2013 +0530<br>
@@ -3,6 +3,7 @@<br>
*<br>
* Authors: Deepthi Devaki <<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>>,<br>
* Rajesh Paulraj <<a href="mailto:rajesh@multicorewareinc.com" target="_blank">rajesh@multicorewareinc.com</a>><br>
+ * Praveen Kumar Tiwari <<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>><br>
*<br>
* This program is free software; you can redistribute it and/or modify<br>
* it under the terms of the GNU General Public License as published by<br>
@@ -45,6 +46,7 @@<br>
bool check_IPFilter_primitive(ipfilter_sp_t ref, ipfilter_sp_t opt);<br>
bool check_IPFilter_primitive(ipfilter_p2s_t ref, ipfilter_p2s_t opt);<br>
bool check_IPFilter_primitive(ipfilter_s2p_t ref, ipfilter_s2p_t opt);<br>
+ bool check_IPFilter_primitive(filter_pp_t ref, filter_pp_t opt);<br>
<br>
public:<br>
<br></div>
_______________________________________________<br></blockquote></div></div></div>
<br></div><br></div>