<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Oct 1, 2013 at 2:51 AM,  <span dir="ltr"><<a href="mailto:shazeb@multicorewareinc.com" target="_blank">shazeb@multicorewareinc.com</a>></span> wrote:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>

# User Shazeb Nawaz Khan <<a href="mailto:shazeb@multicorewareinc.com">shazeb@multicorewareinc.com</a>><br>

# Date 1380607560 -19800<br>

#      Tue Oct 01 11:36:00 2013 +0530<br>

# Node ID 07d712e6265cb6f052a55fe7a1448d48b5339acc<br>

# Parent  b089a7ff0d73efa6b9da3ee98ba6c8191ff3652b<br>

Templating weightUnidir primitive to support pixel inputs<br>

<br>

To be used for full-pel planes<br>

<br>

diff -r b089a7ff0d73 -r 07d712e6265c source/common/pixel.cpp<br>

--- a/source/common/pixel.cpp   Tue Oct 01 11:20:02 2013 +0530<br>

+++ b/source/common/pixel.cpp   Tue Oct 01 11:36:00 2013 +0530<br>

@@ -514,17 +514,19 @@<br>

     }<br>

 }<br>

<br>

-void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>

+template <typename T><br>

+void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>

 {<br>

+    T *src = static_cast<T *>(srcAbstract);<br>

     int x, y;<br>

     for (y = height - 1; y >= 0; y--)<br>

     {<br>

         for (x = width - 1; x >= 0; )<br>

         {<br>

             // note: luma min width is 4<br>

-            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br>

+            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br></blockquote><div><br></div><div>FYI: this routine was previously relying on the C behavior of upcasting integer operands.  When you multiply an int with a char, the char is up-casted to an int implicitly prior to the multiplcation.</div>

<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

             x--;<br>

-            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br>

+            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 * (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);<br>

             x--;<br>

         }<br>

<br>

@@ -842,7 +844,8 @@<br>

     p.transpose[3] = transpose<32>;<br>

     p.transpose[4] = transpose<64>;<br>

<br>

-    p.weightpUni = weightUnidir;<br>

+    p.weightpUniPixel = weightUnidir<pixel>;<br>

+    p.weightpUni = weightUnidir<short>;<br></blockquote><div><br></div><div>don't use short, use uint16_t</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<br>

     p.pixelsub_sp = pixelsub_sp_c;<br>

     p.pixeladd_pp = pixeladd_pp_c;<br>

diff -r b089a7ff0d73 -r 07d712e6265c source/common/primitives.h<br>

--- a/source/common/primitives.h        Tue Oct 01 11:20:02 2013 +0530<br>

+++ b/source/common/primitives.h        Tue Oct 01 11:36:00 2013 +0530<br>

@@ -228,8 +228,7 @@<br>

 typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel *dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height, int marginX, int marginY, int row, int isLastRow);<br>

 typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int width, int height, int marginX);<br>

<br>

-<br>

-typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br></blockquote><div><br></div><div>Don't use void here.  There should be two different function defines, one for pixel inputs and one for uint16_t inputs.</div>

<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+typedef void (*weightpUni_t)(void *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>

 typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);<br>

 typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,<br>

                             intptr_t src_stride, intptr_t dst_stride, int width, int height);<br>

@@ -286,6 +285,7 @@<br>

     calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];<br>

     transpose_t     transpose[NUM_SQUARE_BLOCKS];<br>

<br>

+    weightpUni_t    weightpUniPixel;<br>

     weightpUni_t    weightpUni;<br>

     pixelsub_sp_t   pixelsub_sp;<br>

     pixeladd_ss_t   pixeladd_ss;<br>

diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel.inc<br>

--- a/source/common/vec/pixel.inc       Tue Oct 01 11:20:02 2013 +0530<br>

+++ b/source/common/vec/pixel.inc       Tue Oct 01 11:36:00 2013 +0530<br>

@@ -469,7 +469,9 @@<br>

     p.transpose[2] = transpose16;<br>

     p.transpose[3] = transpose32;<br>

     p.transpose[4] = transpose<64>;<br>

-    p.weightpUni = weightUnidir;<br>

+    p.weightpUniPixel = weightUnidir<pixel>;<br>

+    p.weightpUni = weightUnidir<short>;<br>

+<br>

 #endif<br>

<br>

 #if !HIGH_BIT_DEPTH<br>

diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel8.inc<br>

--- a/source/common/vec/pixel8.inc      Tue Oct 01 11:20:02 2013 +0530<br>

+++ b/source/common/vec/pixel8.inc      Tue Oct 01 11:36:00 2013 +0530<br>

@@ -8573,8 +8573,10 @@<br>

     }<br>

 }<br>

<br>

-void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>

+template <typename T><br>

+void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>

 {<br>

+    T* src = static_cast<T *> (srcAbstract);<br>

     int x, y;<br>

     Vec8s tmp;<br></blockquote><div><br></div><div>I'm surprised this could actually work; usually we need different functions when the source type changes because you you entirely different load instructions.</div>

<div><br></div></div></div></div>