[x265] [PATCH REVIEW Only ] Chroma function, partion based call

Praveen Tiwari praveen at multicorewareinc.com
Wed Oct 9 16:12:47 CEST 2013


Steve,  I have added partition based call for all size s of chroma function
can you tell whether it is right direction, if it so I will replace the
luma functions similarly. I have few question about approach about
combining functions as you said,
[12:29:36 PM] Steve Borho: if YFrac is 0, it would just do lumaH_pp.  If
XFrac is 0, it would just do lumaV_pp.  else it does src -> lumaH_ps ->
temp -> lumaV_sp -> dst

Does YFrac and XFrac are template parameters? I think we need to combine
both functions C codes in a single function and pass an extra buffer temp
as an argument, further we have to modify intrinsic and asm code too?

Regards,
Praveen Tiwari


On Wed, Oct 9, 2013 at 7:24 PM, <praveen at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Praveen Tiwari
> # Date 1381326812 -19800
> # Node ID 37b42347a5baefe11822888d385e4c8422f4f3f3
> # Parent  fc7fbdd18bc0d6d7f98180332e065d83c054fe02
> Chroma function, partion based call
>
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp        Wed Oct 09 00:00:10 2013 -0500
> +++ b/source/common/ipfilter.cpp        Wed Oct 09 19:23:32 2013 +0530
> @@ -34,6 +34,56 @@
>  #pragma warning(disable: 4127) // conditional expression is constant,
> typical for templated functions
>  #endif
>
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  =
> FUNC_PREFIX<4,  WIDTH,  2>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX, WIDTH)
> \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2]  =
> FUNC_PREFIX<4,  WIDTH,  2>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x6]  =
> FUNC_PREFIX<4,  WIDTH,  6>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4]  =
> FUNC_PREFIX<4,  WIDTH,  4>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x12]  =
> FUNC_PREFIX<4,  WIDTH,  12>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX,
> WIDTH) \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8]  =
> FUNC_PREFIX<4,  WIDTH,  8>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16]  =
> FUNC_PREFIX<4,  WIDTH,  16>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x24]  =
> FUNC_PREFIX<4,  WIDTH,  24>; \
> +    p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32]  =
> FUNC_PREFIX<4,  WIDTH,  32>;
> +
> +#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(FUNC_PREFIX) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX,
>  2) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX,
>  4) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX,
>  6) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX,
>  8) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX,
> 12) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX,
> 16) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX,
> 24) \
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX,
> 32) \
> +
>  namespace {
>  template<int N>
>  void filterVertical_s_p(short *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> @@ -88,8 +138,8 @@
>      }
>  }
>
> -template<int N>
> -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> +template<int N, int width, int height>
> +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, short const *coeff)
>  {
>      int cStride = 1;
>
> @@ -500,11 +550,13 @@
>
>  void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
>  {
> +
> +    SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(filterHorizontal_p_p)
> +
>      p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
>      p.ipfilter_ps[FILTER_H_P_S_8] = filterHorizontal_p_s<8>;
>      p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_p_s<8>;
>      p.ipfilter_sp[FILTER_V_S_P_8] = filterVertical_s_p<8>;
> -    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
>      p.ipfilter_ps[FILTER_H_P_S_4] = filterHorizontal_p_s<4>;
>      p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_p_s<4>;
>      p.ipfilter_sp[FILTER_V_S_P_4] = filterVertical_s_p<4>;
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/primitives.h
> --- a/source/common/primitives.h        Wed Oct 09 00:00:10 2013 -0500
> +++ b/source/common/primitives.h        Wed Oct 09 19:23:32 2013 +0530
> @@ -89,6 +89,17 @@
>      NUM_PARTITIONS
>  };
>
> +enum ChromaPartions
> +{
> +    CHROMA_HORIZONTAL_PP_PARTITION_2x4,
>  CHROMA_HORIZONTAL_PP_PARTITION_2x8,  CHROMA_HORIZONTAL_PP_PARTITION_4x2,
>  CHROMA_HORIZONTAL_PP_PARTITION_4x4,
> +    CHROMA_HORIZONTAL_PP_PARTITION_4x8,
>  CHROMA_HORIZONTAL_PP_PARTITION_4x16,  CHROMA_HORIZONTAL_PP_PARTITION_8x2,
>  CHROMA_HORIZONTAL_PP_PARTITION_8x4,
> +    CHROMA_HORIZONTAL_PP_PARTITION_8x6,
>  CHROMA_HORIZONTAL_PP_PARTITION_8x8,  CHROMA_HORIZONTAL_PP_PARTITION_8x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_8x32,
> +    CHROMA_HORIZONTAL_PP_PARTITION_6x8,
>  CHROMA_HORIZONTAL_PP_PARTITION_12x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_16x4,  CHROMA_HORIZONTAL_PP_PARTITION_16x8,
> +    CHROMA_HORIZONTAL_PP_PARTITION_16x12,
>  CHROMA_HORIZONTAL_PP_PARTITION_16x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_16x32,
>  CHROMA_HORIZONTAL_PP_PARTITION_24x32,
> +    CHROMA_HORIZONTAL_PP_PARTITION_32x8,
>  CHROMA_HORIZONTAL_PP_PARTITION_32x16,
>  CHROMA_HORIZONTAL_PP_PARTITION_32x24,
>  CHROMA_HORIZONTAL_PP_PARTITION_32x32,
> +    NUM_CHROMA_HORIZONTAL_PP_PARTITIONS
> +};
> +
>  enum SquareBlocks   // Routines can be indexed using log2n(width)
>  {
>      BLOCK_4x4,
> @@ -205,6 +216,8 @@
>  typedef void (*ssim_4x4x2_core_t)(const pixel *pix1, intptr_t stride1,
> const pixel *pix2, intptr_t stride2, ssim_t sums[2][4]);
>  typedef float (*ssim_end4_t)(ssim_t sum0[5][4], ssim_t sum1[5][4], int
> width);
>
> +typedef void (*chromaFilterHoriz_pp)(pixel *src, intptr_t srcStride,
> pixel *dst, intptr_t dstStride, const short *coeff);   // Modified argument
> list for chroma filter, removed width and height.
> +
>  /* Define a structure containing function pointers to optimized encoder
>   * primitives.  Each pointer can reference either an assembly routine,
>   * a vectorized primitive, or a C function. */
> @@ -265,6 +278,8 @@
>      downscale_t     frame_init_lowres_core;
>      ssim_4x4x2_core_t ssim_4x4x2_core;
>      ssim_end4_t       ssim_end_4;
> +
> +    chromaFilterHoriz_pp
>  filterHorizontal_p_p[NUM_CHROMA_HORIZONTAL_PP_PARTITIONS];
>  };
>
>  /* This copy of the table is what gets used by the encoder.
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/vec/ipfilter-sse41.cpp
> --- a/source/common/vec/ipfilter-sse41.cpp      Wed Oct 09 00:00:10 2013
> -0500
> +++ b/source/common/vec/ipfilter-sse41.cpp      Wed Oct 09 19:23:32 2013
> +0530
> @@ -1541,8 +1541,8 @@
>      -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0
>  };
>
> -template<int N>
> -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> +template<int N, int width, int height>
> +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, short const *coeff)
>  {
>      assert(X265_DEPTH == 8);
>
> @@ -1656,9 +1656,35 @@
>  }
>
>  namespace x265 {
> +#define SETUP_PARTITION(W, H) \
> +    p.filterHorizontal_p_p[CHROMA_HORIZONTAL_PP_PARTITION_##W##x##H] =
> filterHorizontal_p_p##<4,  W,  H>;
> +
>  void Setup_Vec_IPFilterPrimitives_sse41(EncoderPrimitives& p)
>  {
> -    p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
> +        SETUP_PARTITION(2,  4);
> +        SETUP_PARTITION(2,  8);
> +        SETUP_PARTITION(4,  2);
> +        SETUP_PARTITION(4,  4);
> +        SETUP_PARTITION(4,  8);
> +        SETUP_PARTITION(4,  16);
> +        SETUP_PARTITION(6,  8);
> +        SETUP_PARTITION(8,  2);
> +        SETUP_PARTITION(8,  4);
> +        SETUP_PARTITION(8,  6);
> +        SETUP_PARTITION(8,  8);
> +        SETUP_PARTITION(8,  16);
> +        SETUP_PARTITION(12,  16);
> +        SETUP_PARTITION(16,  4);
> +        SETUP_PARTITION(16,  8);
> +        SETUP_PARTITION(16,  12);
> +        SETUP_PARTITION(16,  16);
> +        SETUP_PARTITION(16,  32);
> +        SETUP_PARTITION(24,  32);
> +        SETUP_PARTITION(32,  8);
> +        SETUP_PARTITION(32,  16);
> +        SETUP_PARTITION(32,  24);
> +        SETUP_PARTITION(32,  32);
> +
>      p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
>
>      p.ipfilter_pp[FILTER_V_P_P_4] = filterVertical_p_p<4>;
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131009/027aaac3/attachment.html>


More information about the x265-devel mailing list