[x265] Fwd: [PATCH] Added C primitive and unit test code for chroma filter

Praveen Tiwari praveen at multicorewareinc.com
Wed Oct 16 06:52:15 CEST 2013


> +template<int N, int width>
> +void interp_horiz_pp(pixel *src, intptr_t srcStride, pixel *dst, intptr_t
> dstStride, int height, int coeffIdx)
> +{
> +    int cStride = 1;
> +    short const * coeff= g_chromaFilter[coeffIdx];
> +    src -= (N / 2 - 1) * cStride;
> +    coeffIdx;
> +    int offset;
> +    short maxVal;
> +    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
> +    offset =  (1 << (headRoom - 1));
> +    maxVal = (1 << X265_DEPTH) - 1;
> +
> +    int row, col;
> +    for (row = 0; row < height; row++)
> +    {
> +        for (col = 0; col < width; col++)
> +        {
> +            int sum;
> +
> +            sum  = src[col + 0 * cStride] * coeff[0];
> +            sum += src[col + 1 * cStride] * coeff[1];
> +            if (N >= 4)
> +            {
> +                sum += src[col + 2 * cStride] * coeff[2];
> +                sum += src[col + 3 * cStride] * coeff[3];
> +            }
>
>>the N>= 6 check seems out of place, unless we're going to instantiate a
7tap filter
Actually, I wanted to add a single C primitive for chroma and luma this is
why I did not change check condition as they will be required in luma
functions.

> +            if (N >= 6)
> +            {
> +                sum += src[col + 4 * cStride] * coeff[4];
> +                sum += src[col + 5 * cStride] * coeff[5];
> +            }
> +            if (N == 8)
> +            {
> +                sum += src[col + 6 * cStride] * coeff[6];
> +                sum += src[col + 7 * cStride] * coeff[7];
> +            }
> +            short val = (short)(sum + offset) >> headRoom;
> +
> +            if (val < 0) val = 0;
> +            if (val > maxVal) val = maxVal;
> +            dst[col] = (pixel)val;
> +        }
> +
> +        src += srcStride;
> +        dst += dstStride;
> +    }
> +}
>  }
>
>  namespace x265 {
> diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.cpp
> --- a/source/test/ipfilterharness.cpp   Tue Oct 15 20:57:54 2013 +0530
> +++ b/source/test/ipfilterharness.cpp   Tue Oct 15 21:22:03 2013 +0530
> @@ -3,6 +3,7 @@
>   *
>   * Authors: Deepthi Devaki <deepthidevaki at multicorewareinc.com>,
>   *          Rajesh Paulraj <rajesh at multicorewareinc.com>
> + *          Praveen  Kumar Tiwari <praveen at multicorewareinc.com>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -39,6 +40,18 @@
>      "ipfilterV_pp<4>"
>  };
>
> +const char* ChromaFilterPPNames[] =
> +{
> +    "interp_4tap_horiz_pp_w2",
> +    "interp_4tap_horiz_pp_w4",
> +    "interp_4tap_horiz_pp_w6",
> +    "interp_4tap_horiz_pp_w8",
> +    "interp_4tap_horiz_pp_w12",
> +    "interp_4tap_horiz_pp_w16",
> +    "interp_4tap_horiz_pp_w24",
> +    "interp_4tap_horiz_pp_w32"
> +};
>

the names should correspond with the chroma size enums, which only specify
a width. This string table should be re-usable for more than just 4tap
horizontal pixel to pixel interpolation.  Each element should just be "W2"
or something similar so it can be used as:

printf("chroma_hpp[%s]: ", ChromaFilterName[w]);


> +
>  IPFilterHarness::IPFilterHarness()
>  {
>      ipf_t_size = 200 * 200;
> @@ -262,6 +275,47 @@
>      return true;
>  }
>
> +bool IPFilterHarness::check_IPFilter_primitive(filter_pp_t ref,
> filter_pp_t opt)
>

there needs to be chroma and luma versions of this function for the two
filter lengths, or pass filter length as an argument


> +{
> +    int rand_height = rand() % 100;                 // Randomly generated
> Height
>

I don't see a point to testing any sizes not used by the encoder; this just
prevents possible optimizations in the primitive.  Primitives that have
fixed dimensions should be tested with those fixed dimensions used by the
encoder.


> +    int rand_val, rand_srcStride, rand_dstStride, rand_coeffIdx;
> +
> +    for (int i = 0; i <= 100; i++)
> +    {
> +        memset(IPF_vec_output_p, 0, ipf_t_size);      // Initialize
> output buffer to zero
> +        memset(IPF_C_output_p, 0, ipf_t_size);        // Initialize
> output buffer to zero
>

is memzero really necessary here? I don't think so

+
> +        rand_coeffIdx = rand() % 8;                // Random coeffIdex in
> the filter
>

>>chroma coeff index should be 1, 2, or 3

I think chroma table is
const short g_chromaFilter[8][NTAPS_CHROMA] =
{
    {  0, 64,  0,  0 },
    { -2, 58, 10, -2 },
    { -4, 54, 16, -2 },
    { -6, 46, 28, -4 },
    { -4, 36, 36, -4 },
    { -4, 28, 46, -6 },
    { -2, 16, 54, -4 },
    { -2, 10, 58, -2 }
};
  we have coeff table also in similar fashion so I need 0 to 7 coeffIdex.

+        rand_val = rand() % 4;                     // Random offset in the
> filter
>

rand_val is unused


> +        rand_srcStride = rand() % 100;              // Randomly generated
> srcStride
> +        rand_dstStride = rand() % 100;              // Randomly generated
> dstStride
> +
> +        if (rand_srcStride < 32)
> +            rand_srcStride = 32;
> +
> +        if (rand_dstStride < 32)
> +            rand_dstStride = 32;
> +
> +        opt(pixel_buff + 3 * rand_srcStride,
> +            rand_srcStride,
> +            IPF_vec_output_p,
> +            rand_dstStride,
> +            rand_height, rand_coeffIdx
> +            );
> +        ref(pixel_buff + 3 * rand_srcStride,
> +            rand_srcStride,
> +            IPF_C_output_p,
> +            rand_dstStride,
> +            rand_height, rand_coeffIdx
> +            );
> +
> +        if (memcmp(IPF_vec_output_p, IPF_C_output_p, ipf_t_size))
> +            return false;
> +    }
> +
> +    return true;
> +}
> +
>  bool IPFilterHarness::testCorrectness(const EncoderPrimitives& ref, const
> EncoderPrimitives& opt)
>  {
>      for (int value = 0; value < NUM_IPFILTER_P_P; value++)
> @@ -318,6 +372,18 @@
>          }
>      }
>
> +    for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++)
> +    {
> +        if (opt.chroma_hpp[value])
> +        {
>

this should test known heights for each width


> +            if (!check_IPFilter_primitive(ref.chroma_hpp[value],
> opt.chroma_hpp[value]))
> +            {
> +                printf("%s failed\n", ChromaFilterPPNames[value]);
> +                return false;
> +            }
> +        }
> +    }
> +
>      return true;
>  }
>
> @@ -372,4 +438,14 @@
>          REPORT_SPEEDUP(opt.ipfilter_s2p, ref.ipfilter_s2p,
>                         short_buff, srcStride, IPF_vec_output_p,
> dstStride, width, height);
>      }
> +
> +    for (int value = 0; value < NUM_CHROMA_PARTITIONS; value++)
> +    {
>

this should measure performance at each height supported by each width


> +        if (opt.chroma_hpp[value])
> +        {
> +            printf("%s\t", ChromaFilterPPNames[value]);
> +            REPORT_SPEEDUP(opt.chroma_hpp[value], ref.chroma_hpp[value],
> +                           pixel_buff + 3 * srcStride, srcStride,
> IPF_vec_output_p, dstStride, height, 1);
> +        }
> +    }
>  }
> diff -r 1087f1f3bf5a -r 39fc3c36e1b1 source/test/ipfilterharness.h
> --- a/source/test/ipfilterharness.h     Tue Oct 15 20:57:54 2013 +0530
> +++ b/source/test/ipfilterharness.h     Tue Oct 15 21:22:03 2013 +0530
> @@ -3,6 +3,7 @@
>   *
>   * Authors: Deepthi Devaki <deepthidevaki at multicorewareinc.com>,
>   *          Rajesh Paulraj <rajesh at multicorewareinc.com>
> + *          Praveen  Kumar Tiwari <praveen at multicorewareinc.com>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -45,6 +46,7 @@
>      bool check_IPFilter_primitive(ipfilter_sp_t ref, ipfilter_sp_t opt);
>      bool check_IPFilter_primitive(ipfilter_p2s_t ref, ipfilter_p2s_t opt);
>      bool check_IPFilter_primitive(ipfilter_s2p_t ref, ipfilter_s2p_t opt);
> +    bool check_IPFilter_primitive(filter_pp_t ref, filter_pp_t opt);
>
>  public:
>
> _______________________________________________
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131016/f38c1b0b/attachment.html>


More information about the x265-devel mailing list