<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Oct 1, 2013 at 2:51 AM, <span dir="ltr"><<a href="mailto:shazeb@multicorewareinc.com" target="_blank">shazeb@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Shazeb Nawaz Khan <<a href="mailto:shazeb@multicorewareinc.com">shazeb@multicorewareinc.com</a>><br>
# Date 1380607560 -19800<br>
# Tue Oct 01 11:36:00 2013 +0530<br>
# Node ID 07d712e6265cb6f052a55fe7a1448d48b5339acc<br>
# Parent b089a7ff0d73efa6b9da3ee98ba6c8191ff3652b<br>
Templating weightUnidir primitive to support pixel inputs<br>
<br>
To be used for full-pel planes<br>
<br>
diff -r b089a7ff0d73 -r 07d712e6265c source/common/pixel.cpp<br>
--- a/source/common/pixel.cpp Tue Oct 01 11:20:02 2013 +0530<br>
+++ b/source/common/pixel.cpp Tue Oct 01 11:36:00 2013 +0530<br>
@@ -514,17 +514,19 @@<br>
}<br>
}<br>
<br>
-void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>
+template <typename T><br>
+void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>
{<br>
+ T *src = static_cast<T *>(srcAbstract);<br>
int x, y;<br>
for (y = height - 1; y >= 0; y--)<br>
{<br>
for (x = width - 1; x >= 0; )<br>
{<br>
// note: luma min width is 4<br>
- dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br>
+ dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br></blockquote><div><br></div><div>FYI: this routine was previously relying on the C behavior of upcasting integer operands. When you multiply an int with a char, the char is up-casted to an int implicitly prior to the multiplcation.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
x--;<br>
- dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br>
+ dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br>
x--;<br>
}<br>
<br>
@@ -842,7 +844,8 @@<br>
p.transpose[3] = transpose<32>;<br>
p.transpose[4] = transpose<64>;<br>
<br>
- p.weightpUni = weightUnidir;<br>
+ p.weightpUniPixel = weightUnidir<pixel>;<br>
+ p.weightpUni = weightUnidir<short>;<br></blockquote><div><br></div><div>don't use short, use uint16_t</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
p.pixelsub_sp = pixelsub_sp_c;<br>
p.pixeladd_pp = pixeladd_pp_c;<br>
diff -r b089a7ff0d73 -r 07d712e6265c source/common/primitives.h<br>
--- a/source/common/primitives.h Tue Oct 01 11:20:02 2013 +0530<br>
+++ b/source/common/primitives.h Tue Oct 01 11:36:00 2013 +0530<br>
@@ -228,8 +228,7 @@<br>
typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel *dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow);<br>
typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int width, int height, int marginX);<br>
<br>
-<br>
-typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br></blockquote><div><br></div><div>Don't use void here. There should be two different function defines, one for pixel inputs and one for uint16_t inputs.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+typedef void (*weightpUni_t)(void *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);<br>
typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,<br>
intptr_t src_stride, intptr_t dst_stride, int width, int height);<br>
@@ -286,6 +285,7 @@<br>
calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];<br>
transpose_t transpose[NUM_SQUARE_BLOCKS];<br>
<br>
+ weightpUni_t weightpUniPixel;<br>
weightpUni_t weightpUni;<br>
pixelsub_sp_t pixelsub_sp;<br>
pixeladd_ss_t pixeladd_ss;<br>
diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel.inc<br>
--- a/source/common/vec/pixel.inc Tue Oct 01 11:20:02 2013 +0530<br>
+++ b/source/common/vec/pixel.inc Tue Oct 01 11:36:00 2013 +0530<br>
@@ -469,7 +469,9 @@<br>
p.transpose[2] = transpose16;<br>
p.transpose[3] = transpose32;<br>
p.transpose[4] = transpose<64>;<br>
- p.weightpUni = weightUnidir;<br>
+ p.weightpUniPixel = weightUnidir<pixel>;<br>
+ p.weightpUni = weightUnidir<short>;<br>
+<br>
#endif<br>
<br>
#if !HIGH_BIT_DEPTH<br>
diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel8.inc<br>
--- a/source/common/vec/pixel8.inc Tue Oct 01 11:20:02 2013 +0530<br>
+++ b/source/common/vec/pixel8.inc Tue Oct 01 11:36:00 2013 +0530<br>
@@ -8573,8 +8573,10 @@<br>
}<br>
}<br>
<br>
-void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>
+template <typename T><br>
+void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>
{<br>
+ T* src = static_cast<T *> (srcAbstract);<br>
int x, y;<br>
Vec8s tmp;<br></blockquote><div><br></div><div>I'm surprised this could actually work; usually we need different functions when the source type changes because you you entirely different load instructions.</div>
<div><br></div></div></div></div>