[x265] [PATCH 2 of 4] Templating weightUnidir primitive to support pixel inputs

Shazeb Khan shazeb at multicorewareinc.com
Wed Oct 2 13:47:44 CEST 2013


# HG changeset patch
# User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
# Date 1380713440 -19800
# Node ID 09de1a4441d6b00c36c79c6ed90e296cbe144004
# Parent  c5dc3e37d76772b5e26c6996ab0df5ce325f54a7
Templating weightUnidir primitive to support pixel inputs

diff -r c5dc3e37d767 -r 09de1a4441d6
source/Lib/TLibCommon/TComWeightPrediction.cpp
--- a/source/Lib/TLibCommon/TComWeightPrediction.cpp    Tue Oct 01 17:28:19
2013 +0530
+++ b/source/Lib/TLibCommon/TComWeightPrediction.cpp    Wed Oct 02 17:00:40
2013 +0530
@@ -431,7 +431,7 @@
         srcStride = srcYuv0->m_width;
         dstStride  = outDstYuv->getStride();

-        primitives.weightpUni(srcY0, dstY, srcStride, dstStride, width,
height, w0, round, shift, offset);
+        primitives.weightpUni((uint16_t *)srcY0, dstY, srcStride,
dstStride, width, height, w0, round, shift, offset);
     }

     // Chroma U : --------------------------------------------
@@ -447,7 +447,7 @@
     width  >>= 1;
     height >>= 1;

-    primitives.weightpUni(srcU0, dstU, srcStride, dstStride, width,
height, w0, round, shift, offset);
+    primitives.weightpUni((uint16_t *)srcU0, dstU, srcStride, dstStride,
width, height, w0, round, shift, offset);

     // Chroma V : --------------------------------------------
     w0      = wp0[2].w;
@@ -455,7 +455,7 @@
     shift   = wp0[2].shift + shiftNum;
     round   = shift ? (1 << (shift - 1)) : 0;

-    primitives.weightpUni(srcV0, dstV, srcStride, dstStride, width,
height, w0, round, shift, offset);
+    primitives.weightpUni((uint16_t *)srcV0, dstV, srcStride, dstStride,
width, height, w0, round, shift, offset);
 }

 //=======================================================
diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/pixel.cpp
--- a/source/common/pixel.cpp    Tue Oct 01 17:28:19 2013 +0530
+++ b/source/common/pixel.cpp    Wed Oct 02 17:00:40 2013 +0530
@@ -514,7 +514,8 @@
     }
 }

-void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
dstStride, int width, int height, int w0, int round, int shift, int offset)
+template <typename T>
+void weightUnidir(T *src, pixel *dst, intptr_t srcStride, intptr_t
dstStride, int width, int height, int w0, int round, int shift, int offset)
 {
     int x, y;
     for (y = height - 1; y >= 0; y--)
@@ -842,7 +843,8 @@
     p.transpose[3] = transpose<32>;
     p.transpose[4] = transpose<64>;

-    p.weightpUni = weightUnidir;
+    p.weightpUniPixel = weightUnidir<pixel>;
+    p.weightpUni = weightUnidir<uint16_t>;

     p.pixelsub_sp = pixelsub_sp_c;
     p.pixeladd_pp = pixeladd_pp_c;
diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/primitives.h
--- a/source/common/primitives.h    Tue Oct 01 17:28:19 2013 +0530
+++ b/source/common/primitives.h    Wed Oct 02 17:00:40 2013 +0530
@@ -228,8 +228,8 @@
 typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel
*dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height,
int marginX, int marginY, int row, int isLastRow);
 typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int
width, int height, int marginX);

-
-typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride,
intptr_t dstStride, int width, int height, int w0, int round, int shift,
int offset);
+typedef void (*weightpUniPixel_t)(pixel *src, pixel *dst, intptr_t
srcStride, intptr_t dstStride, int width, int height, int w0, int round,
int shift, int offset);
+typedef void (*weightpUni_t)(uint16_t *src, pixel *dst, intptr_t
srcStride, intptr_t dstStride, int width, int height, int w0, int round,
int shift, int offset);
 typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
 typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel
*dstv, pixel *dstc,
                             intptr_t src_stride, intptr_t dst_stride, int
width, int height);
@@ -286,6 +286,7 @@
     calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];
     transpose_t     transpose[NUM_SQUARE_BLOCKS];

+    weightpUniPixel_t    weightpUniPixel;
     weightpUni_t    weightpUni;
     pixelsub_sp_t   pixelsub_sp;
     pixeladd_ss_t   pixeladd_ss;
diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/vec/pixel.inc
--- a/source/common/vec/pixel.inc    Tue Oct 01 17:28:19 2013 +0530
+++ b/source/common/vec/pixel.inc    Wed Oct 02 17:00:40 2013 +0530
@@ -469,7 +469,9 @@
     p.transpose[2] = transpose16;
     p.transpose[3] = transpose32;
     p.transpose[4] = transpose<64>;
-    p.weightpUni = weightUnidir;
+    p.weightpUniPixel = weightUnidir<pixel>;
+    p.weightpUni = weightUnidir<uint16_t>;
+
 #endif

 #if !HIGH_BIT_DEPTH
diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/vec/pixel8.inc
--- a/source/common/vec/pixel8.inc    Tue Oct 01 17:28:19 2013 +0530
+++ b/source/common/vec/pixel8.inc    Wed Oct 02 17:00:40 2013 +0530
@@ -8573,7 +8573,8 @@
     }
 }

-void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
dstStride, int width, int height, int w0, int round, int shift, int offset)
+template <typename T>
+void weightUnidir(T *src, pixel *dst, intptr_t srcStride, intptr_t
dstStride, int width, int height, int w0, int round, int shift, int offset)
 {
     int x, y;
     Vec8s tmp;
diff -r c5dc3e37d767 -r 09de1a4441d6 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp    Tue Oct 01 17:28:19 2013 +0530
+++ b/source/test/pixelharness.cpp    Wed Oct 02 17:00:40 2013 +0530
@@ -343,8 +343,8 @@
     int offset = (rand() % 256) - 128;
     for (int i = 0; i < ITERS; i++)
     {
-        opt(sbuf1 + j, opt_dest, 64, 64, width, height, w0, round, shift,
offset);
-        ref(sbuf1 + j, ref_dest, 64, 64, width, height, w0, round, shift,
offset);
+        opt((uint16_t*)sbuf1 + j, opt_dest, 64, 64, width, height, w0,
round, shift, offset);
+        ref((uint16_t*)sbuf1 + j, ref_dest, 64, 64, width, height, w0,
round, shift, offset);

         if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
             return false;
@@ -355,6 +355,34 @@
     return true;
 }

+bool PixelHarness::check_weightpUni(weightpUniPixel_t ref,
weightpUniPixel_t opt)
+{
+    ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
+    ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
+
+    memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
+    memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
+    int j = 0;
+    int width = (2 * rand()) % 64;
+    int height = 8;
+    int w0 = rand() % 256;
+    int shift = rand() % 12;
+    int round = shift ? (1 << (shift - 1)) : 0;
+    int offset = (rand() % 256) - 128;
+    for (int i = 0; i < ITERS; i++)
+    {
+        opt((pixel *)sbuf1 + j, opt_dest, 64, 64, width, height, w0,
round, shift, offset);
+        ref((pixel *)sbuf1 + j, ref_dest, 64, 64, width, height, w0,
round, shift, offset);
+
+        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
+            return false;
+
+        j += INCR;
+    }
+
+    return true;
+}
+
 bool PixelHarness::check_pixelsub_sp(pixelsub_sp_t ref, pixelsub_sp_t opt)
 {
     ALIGN_VAR_16(short, ref_dest[64 * 64]);
@@ -604,6 +632,24 @@
         }
     }

+    if (opt.weightpUniPixel)
+    {
+        if (!check_weightpUni(ref.weightpUniPixel, opt.weightpUniPixel))
+        {
+            printf("Weighted Prediction for Unidir failed!\n");
+            return false;
+        }
+    }
+
+    if (opt.weightpUniPixel)
+    {
+        if (!check_weightpUni(ref.weightpUniPixel, opt.weightpUniPixel))
+        {
+            printf("Weighted Prediction for Unidir failed!\n");
+            return false;
+        }
+    }
+
     if (opt.weightpUni)
     {
         if (!check_weightpUni(ref.weightpUni, opt.weightpUni))
@@ -751,10 +797,16 @@
         REPORT_SPEEDUP(opt.blockcpy_sc, ref.blockcpy_sc, 64, 64,
(short*)pbuf1, FENC_STRIDE, (uint8_t*)pbuf2, STRIDE);
     }

+    if (opt.weightpUniPixel)
+    {
+        printf("WeightpUni");
+        REPORT_SPEEDUP(opt.weightpUniPixel, ref.weightpUniPixel, pbuf1,
pbuf2, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
+    }
+
     if (opt.weightpUni)
     {
         printf("WeightpUni");
-        REPORT_SPEEDUP(opt.weightpUni, ref.weightpUni, sbuf1, pbuf1, 64,
64, 32, 32, 128, 1 << 9, 10, 100);
+        REPORT_SPEEDUP(opt.weightpUni, ref.weightpUni, (uint16_t*)sbuf1,
pbuf1, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
     }

     if (opt.pixelsub_sp)
diff -r c5dc3e37d767 -r 09de1a4441d6 source/test/pixelharness.h
--- a/source/test/pixelharness.h    Tue Oct 01 17:28:19 2013 +0530
+++ b/source/test/pixelharness.h    Wed Oct 02 17:00:40 2013 +0530
@@ -46,6 +46,7 @@
     bool check_block_copy_s_c(blockcpy_sc_t ref, blockcpy_sc_t opt);
     bool check_calresidual(calcresidual_t ref, calcresidual_t opt);
     bool check_calcrecon(calcrecon_t ref, calcrecon_t opt);
+    bool check_weightpUni(weightpUniPixel_t ref, weightpUniPixel_t opt);
     bool check_weightpUni(weightpUni_t ref, weightpUni_t opt);
     bool check_pixelsub_sp(pixelsub_sp_t ref, pixelsub_sp_t opt);
     bool check_pixeladd_ss(pixeladd_ss_t ref, pixeladd_ss_t opt);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131002/7bb6688f/attachment.html>


More information about the x265-devel mailing list