[x265] [PATCH REVIEW Only ] Chroma function, partion based call

Steve Borho steve at borho.org
Wed Oct 9 19:59:19 CEST 2013


On Wed, Oct 9, 2013 at 8:54 AM, <praveen at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Praveen Tiwari
> # Date 1381326812 -19800
> # Node ID 37b42347a5baefe11822888d385e4c8422f4f3f3
> # Parent  fc7fbdd18bc0d6d7f98180332e065d83c054fe02
> Chroma function, partion based call
>
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp        Wed Oct 09 00:00:10 2013 -0500
> +++ b/source/common/ipfilter.cpp        Wed Oct 09 19:23:32 2013 +0530
> @@ -34,6 +34,56 @@
>  #pragma warning(disable: 4127) // conditional expression is constant,
> typical for templated functions
>  #endif
>
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  =
> FUNC_PREFIX<4,  WIDTH,  2>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  =
> FUNC_PREFIX<4,  WIDTH,  2>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x6]  =
> FUNC_PREFIX<4,  WIDTH,  6>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x12]  =
> FUNC_PREFIX<4,  WIDTH,  12>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x24]  =
> FUNC_PREFIX<4,  WIDTH,  24>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(FUNC_PREFIX) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX,
>  2) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX,
>  4) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX,
>  6) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX,
>  8) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX,
> 12) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX,
> 16) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX,
> 24) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX,
> 32) \
> +
>

Passing width as an argument to a macro with the width in the name seems
rather redundant.  I think it would be more clear if you skipped the
intermediate macros and just listed them all in this single macro.

FUNC_PREFIX also seems odd for a macro with CHROMA_HORIZONTAL_PP in the name

 namespace {
>  template<int N>
>  void filterVertical_s_p(short *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> @@ -88,8 +138,8 @@
>      }
>  }
>
> -template<int N>
> -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> +template<int N, int width, int height>
> +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, short const *coeff)
>  {
>

the original C primitive that takes width/height is still needed for the
odd block sizes used during subpel refine.


>      int cStride = 1;
>
> @@ -500,11 +550,13 @@
>
>  void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
>  {
> +
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(filterHorizontal_p_p)
> +
>      p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
>      p.ipfilter_ps[FILTER_H_P_S_8] = filterHorizontal_p_s<8>;
>      p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_p_s<8>;
>      p.ipfilter_sp[FILTER_V_S_P_8] = filterVertical_s_p<8>;
> -    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
>      p.ipfilter_ps[FILTER_H_P_S_4] = filterHorizontal_p_s<4>;
>      p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_p_s<4>;
>      p.ipfilter_sp[FILTER_V_S_P_4] = filterVertical_s_p<4>;
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/primitives.h
> --- a/source/common/primitives.h        Wed Oct 09 00:00:10 2013 -0500
> +++ b/source/common/primitives.h        Wed Oct 09 19:23:32 2013 +0530
> @@ -89,6 +89,17 @@
>      NUM_PARTITIONS
>  };
>
> +enum ChromaPartions
> +{
> +    CHROMA_HORIZONTAL_PP_PARTITION_2x4,
>  CHROMA_HORIZONTAL_PP_PARTITION_2x8,  CHROMA_HORIZONTAL_PP_PARTITION_4x2,
>  CHROMA_HORIZONTAL_PP_PARTITION_4x4,
> +    CHROMA_HORIZONTAL_PP_PARTITION_4x8,
>  CHROMA_HORIZONTAL_PP_PARTITION_4x16,  CHROMA_HORIZONTAL_PP_PARTITION_8x2,
>  CHROMA_HORIZONTAL_PP_PARTITION_8x4,
> +    CHROMA_HORIZONTAL_PP_PARTITION_8x6,
>  CHROMA_HORIZONTAL_PP_PARTITION_8x8,  CHROMA_HORIZONTAL_PP_PARTITION_8x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_8x32,
> +    CHROMA_HORIZONTAL_PP_PARTITION_6x8,
>  CHROMA_HORIZONTAL_PP_PARTITION_12x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_16x4,  CHROMA_HORIZONTAL_PP_PARTITION_16x8,
> +    CHROMA_HORIZONTAL_PP_PARTITION_16x12,
>  CHROMA_HORIZONTAL_PP_PARTITION_16x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_16x32,
>  CHROMA_HORIZONTAL_PP_PARTITION_24x32,
> +    CHROMA_HORIZONTAL_PP_PARTITION_32x8,
>  CHROMA_HORIZONTAL_PP_PARTITION_32x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_32x24,
>  CHROMA_HORIZONTAL_PP_PARTITION_32x32,
> +    NUM_CHROMA_HORIZONTAL_PP_PARTITIONS
> +};
>

the chroma partition table needs to be shareable between all the chroma
interpolation routines; these names are too specific (drop HORIZONTAL_PP).

This table needs to line up with the luma partition table, so you can take
a luma part index and use it directly in the chroma function tables to get
the appropriately 4:2:0 down-scaled chroma partition.  ..or.. you need to
make a mapping table to get from luma part idx to chroma part idx.


> +
>  enum SquareBlocks   // Routines can be indexed using log2n(width)
>  {
>      BLOCK_4x4,
> @@ -205,6 +216,8 @@
>  typedef void (*ssim_4x4x2_core_t)(const pixel *pix1, intptr_t stride1,
> const pixel *pix2, intptr_t stride2, ssim_t sums[2][4]);
>  typedef float (*ssim_end4_t)(ssim_t sum0[5][4], ssim_t sum1[5][4], int
> width);
>
> +typedef void (*chromaFilterHoriz_pp)(pixel *src, intptr_t srcStride,
> pixel *dst, intptr_t dstStride, const short *coeff);   // Modified argument
> list for chroma filter, removed width and height.
>

This same funcdef could be used for horizontal and vertical and luma, so
it's best to leave Chroma and Horiz from the name.  Also, funcdefs should
end in _t like the rest in this file.

Did we decide to put the coefficient tables into the assembly as byte
arrays?  if so, the last argument should be int coeffIdx (0..3)


> +
>  /* Define a structure containing function pointers to optimized encoder
>   * primitives.  Each pointer can reference either an assembly routine,
>   * a vectorized primitive, or a C function. */
> @@ -265,6 +278,8 @@
>      downscale_t     frame_init_lowres_core;
>      ssim_4x4x2_core_t ssim_4x4x2_core;
>      ssim_end4_t       ssim_end_4;
> +
> +    chromaFilterHoriz_pp
>  filterHorizontal_p_p[NUM_CHROMA_HORIZONTAL_PP_PARTITIONS];
>

taking everything above into account, this would be something like:

   filter_pp_t luma_hpp[NUM_PARTITIONS];
   filter_pp_t luma_vpp[NUM_PARTITIONS];
   filter_pp_t chroma_hpp[NUM_CHROMA_PARTITIONS];
   filter_pp_t chroma_vpp[NUM_CHROMA_PARTITIONS];


>  };
>
>  /* This copy of the table is what gets used by the encoder.
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/vec/ipfilter-sse41.cpp
> --- a/source/common/vec/ipfilter-sse41.cpp      Wed Oct 09 00:00:10 2013
> -0500
> +++ b/source/common/vec/ipfilter-sse41.cpp      Wed Oct 09 19:23:32 2013
> +0530
> @@ -1541,8 +1541,8 @@
>      -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0
>  };
>
> -template<int N>
> -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> +template<int N, int width, int height>
> +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, short const *coeff)
>  {
>      assert(X265_DEPTH == 8);
>
> @@ -1656,9 +1656,35 @@
>  }
>
>  namespace x265 {
> +#define SETUP_PARTITION(W, H) \
> +    p.filterHorizontal_p_p[CHROMA_HORIZONTAL_PP_PARTITION_##W##x##H] =
> filterHorizontal_p_p##<4,  W,  H>;
> +
>  void Setup_Vec_IPFilterPrimitives_sse41(EncoderPrimitives& p)
>  {
> -    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
> +        SETUP_PARTITION(2,  4);
> +        SETUP_PARTITION(2,  8);
> +        SETUP_PARTITION(4,  2);
> +        SETUP_PARTITION(4,  4);
> +        SETUP_PARTITION(4,  8);
> +        SETUP_PARTITION(4,  16);
> +        SETUP_PARTITION(6,  8);
> +        SETUP_PARTITION(8,  2);
> +        SETUP_PARTITION(8,  4);
> +        SETUP_PARTITION(8,  6);
> +        SETUP_PARTITION(8,  8);
> +        SETUP_PARTITION(8,  16);
> +        SETUP_PARTITION(12,  16);
> +        SETUP_PARTITION(16,  4);
> +        SETUP_PARTITION(16,  8);
> +        SETUP_PARTITION(16,  12);
> +        SETUP_PARTITION(16,  16);
> +        SETUP_PARTITION(16,  32);
> +        SETUP_PARTITION(24,  32);
> +        SETUP_PARTITION(32,  8);
> +        SETUP_PARTITION(32,  16);
> +        SETUP_PARTITION(32,  24);
> +        SETUP_PARTITION(32,  32);
> +
>      p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
>
>      p.ipfilter_pp[FILTER_V_P_P_4] = filterVertical_p_p<4>;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131009/6c2a8955/attachment-0001.html>


More information about the x265-devel mailing list