[x265] Fwd: [PATCH] Added C primitive and unit test code for chroma filter
Praveen Tiwari
praveen at multicorewareinc.com
Wed Oct 16 06:52:15 CEST 2013
> +template<int N, int width>
> +void interp_horiz_pp(pixel *src, intptr_t srcStride, pixel *dst, intptr_t
> dstStride, int height, int coeffIdx)
> +{
> + int cStride = 1;
> + short const * coeff= g_chromaFilter[coeffIdx];
> + src -= (N / 2 - 1) * cStride;
> + coeffIdx;
> + int offset;
> + short maxVal;
> + int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
> + offset = (1 << (headRoom - 1));
> + maxVal = (1 << X265_DEPTH) - 1;
> +
> + int row, col;
> + for (row = 0; row < height; row++)
> + {
> + for (col = 0; col < width; col++)
> + {
> + int sum;
> +
> + sum = src[col + 0 * cStride] * coeff[0];
> + sum += src[col + 1 * cStride] * coeff[1];
> + if (N >= 4)
> + {
> + sum += src[col + 2 * cStride] * coeff[2];
> + sum += src[col + 3 * cStride] * coeff[3];
> + }
>
>>the N>= 6 check seems out of place, unless we're going to instantiate a
7tap filter
Actually, I wanted to add a single C primitive for chroma and luma this is
why I did not change check condition as they will be required in luma
functions.
> + if (N >= 6)
> + {
> + sum += src[col + 4 * cStride] * coeff[4];
> + sum += src[col + 5 * cStride] * coeff[5];
> + }
> + if (N == 8)
> + {
> + sum += src[col + 6 * cStride] * coeff[6];
> + sum += src[col + 7 * cStride] * coeff[7];
> + }
> + short val = (short)(sum + offset) >> headRoom;
> +
> + if (val < 0) val = 0;
> + if (val > maxVal) val = maxVal;
> + dst[col] = (pixel)val;
> + }
> +
> + src += srcStride;
> + dst += dstStride;
> + }
> +}
> }
>
> namespace x265 {
> diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.cpp
> --- a/source/test/ipfilterharness.cpp Tue Oct 15 20:57:54 2013 +0530
> +++ b/source/test/ipfilterharness.cpp Tue Oct 15 21:22:03 2013 +0530
> @@ -3,6 +3,7 @@
> *
> * Authors: Deepthi Devaki <deepthidevaki at multicorewareinc.com>,
> * Rajesh Paulraj <rajesh at multicorewareinc.com>
> + * Praveen Kumar Tiwari <praveen at multicorewareinc.com>
> *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> @@ -39,6 +40,18 @@
> "ipfilterV_pp<4>"
> };
>
> +const char* ChromaFilterPPNames[] =
> +{
> + "interp_4tap_horiz_pp_w2",
> + "interp_4tap_horiz_pp_w4",
> + "interp_4tap_horiz_pp_w6",
> + "interp_4tap_horiz_pp_w8",
> + "interp_4tap_horiz_pp_w12",
> + "interp_4tap_horiz_pp_w16",
> + "interp_4tap_horiz_pp_w24",
> + "interp_4tap_horiz_pp_w32"
> +};
>
the names should correspond with the chroma size enums, which only specify
a width. This string table should be re-usable for more than just 4tap
horizontal pixel to pixel interpolation. Each element should just be "W2"
or something similar so it can be used as:
printf("chroma_hpp[%s]: ", ChromaFilterName[w]);
> +
> IPFilterHarness::IPFilterHarness()
> {
> ipf_t_size = 200 * 200;
> @@ -262,6 +275,47 @@
> return true;
> }
>
> +bool IPFilterHarness::check_IPFilter_primitive(filter_pp_t ref,
> filter_pp_t opt)
>
there needs to be chroma and luma versions of this function for the two
filter lengths, or pass filter length as an argument
> +{
> + int rand_height = rand() % 100; // Randomly generated
> Height
>
I don't see a point to testing any sizes not used by the encoder; this just
prevents possible optimizations in the primitive. Primitives that have
fixed dimensions should be tested with those fixed dimensions used by the
encoder.
> + int rand_val, rand_srcStride, rand_dstStride, rand_coeffIdx;
> +
> + for (int i = 0; i <= 100; i++)
> + {
> + memset(IPF_vec_output_p, 0, ipf_t_size); // Initialize
> output buffer to zero
> + memset(IPF_C_output_p, 0, ipf_t_size); // Initialize
> output buffer to zero
>
is memzero really necessary here? I don't think so
+
> + rand_coeffIdx = rand() % 8; // Random coeffIdex in
> the filter
>
>>chroma coeff index should be 1, 2, or 3
I think chroma table is
const short g_chromaFilter[8][NTAPS_CHROMA] =
{
{ 0, 64, 0, 0 },
{ -2, 58, 10, -2 },
{ -4, 54, 16, -2 },
{ -6, 46, 28, -4 },
{ -4, 36, 36, -4 },
{ -4, 28, 46, -6 },
{ -2, 16, 54, -4 },
{ -2, 10, 58, -2 }
};
we have coeff table also in similar fashion so I need 0 to 7 coeffIdex.
+ rand_val = rand() % 4; // Random offset in the
> filter
>
rand_val is unused
> + rand_srcStride = rand() % 100; // Randomly generated
> srcStride
> + rand_dstStride = rand() % 100; // Randomly generated
> dstStride
> +
> + if (rand_srcStride < 32)
> + rand_srcStride = 32;
> +
> + if (rand_dstStride < 32)
> + rand_dstStride = 32;
> +
> + opt(pixel_buff + 3 * rand_srcStride,
> + rand_srcStride,
> + IPF_vec_output_p,
> + rand_dstStride,
> + rand_height, rand_coeffIdx
> + );
> + ref(pixel_buff + 3 * rand_srcStride,
> + rand_srcStride,
> + IPF_C_output_p,
> + rand_dstStride,
> + rand_height, rand_coeffIdx
> + );
> +
> + if (memcmp(IPF_vec_output_p, IPF_C_output_p, ipf_t_size))
> + return false;
> + }
> +
> + return true;
> +}
> +
> bool IPFilterHarness::testCorrectness(const EncoderPrimitives& ref, const
> EncoderPrimitives& opt)
> {
> for (int value = 0; value < NUM_IPFILTER_P_P; value++)
> @@ -318,6 +372,18 @@
> }
> }
>
> + for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++)
> + {
> + if (opt.chroma_hpp[value])
> + {
>
this should test known heights for each width
> + if (!check_IPFilter_primitive(ref.chroma_hpp[value],
> opt.chroma_hpp[value]))
> + {
> + printf("%s failed\n", ChromaFilterPPNames[value]);
> + return false;
> + }
> + }
> + }
> +
> return true;
> }
>
> @@ -372,4 +438,14 @@
> REPORT_SPEEDUP(opt.ipfilter_s2p, ref.ipfilter_s2p,
> short_buff, srcStride, IPF_vec_output_p,
> dstStride, width, height);
> }
> +
> + for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++)
> + {
>
this should measure performance at each height supported by each width
> + if (opt.chroma_hpp[value])
> + {
> + printf("%s\t", ChromaFilterPPNames[value]);
> + REPORT_SPEEDUP(opt.chroma_hpp[value], ref.chroma_hpp[value],
> + pixel_buff + 3 * srcStride, srcStride,
> IPF_vec_output_p, dstStride, height, 1);
> + }
> + }
> }
> diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.h
> --- a/source/test/ipfilterharness.h Tue Oct 15 20:57:54 2013 +0530
> +++ b/source/test/ipfilterharness.h Tue Oct 15 21:22:03 2013 +0530
> @@ -3,6 +3,7 @@
> *
> * Authors: Deepthi Devaki <deepthidevaki at multicorewareinc.com>,
> * Rajesh Paulraj <rajesh at multicorewareinc.com>
> + * Praveen Kumar Tiwari <praveen at multicorewareinc.com>
> *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> @@ -45,6 +46,7 @@
> bool check_IPFilter_primitive(ipfilter_sp_t ref, ipfilter_sp_t opt);
> bool check_IPFilter_primitive(ipfilter_p2s_t ref, ipfilter_p2s_t opt);
> bool check_IPFilter_primitive(ipfilter_s2p_t ref, ipfilter_s2p_t opt);
> + bool check_IPFilter_primitive(filter_pp_t ref, filter_pp_t opt);
>
> public:
>
> _______________________________________________
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131016/f38c1b0b/attachment.html>
More information about the x265-devel
mailing list