[x265] [PATCH REVIEW Only ] Chroma function, partion based call
Praveen Tiwari
praveen at multicorewareinc.com
Wed Oct 9 16:12:47 CEST 2013
Steve, I have added partition based call for all size s of chroma function
can you tell whether it is right direction, if it so I will replace the
luma functions similarly. I have few question about approach about
combining functions as you said,
[12:29:36 PM] Steve Borho: if YFrac is 0, it would just do lumaH_pp. If
XFrac is 0, it would just do lumaV_pp. else it does src -> lumaH_ps ->
temp -> lumaV_sp -> dst
Does YFrac and XFrac are template parameters? I think we need to combine
both functions C codes in a single function and pass an extra buffer temp
as an argument, further we have to modify intrinsic and asm code too?
Regards,
Praveen Tiwari
On Wed, Oct 9, 2013 at 7:24 PM, <praveen at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1381326812 -19800
> # Node ID 37b42347a5baefe11822888d385e4c8422f4f3f3
> # Parent fc7fbdd18bc0d6d7f98180332e065d83c054fe02
> Chroma function, partion based call
>
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp Wed Oct 09 00:00:10 2013 -0500
> +++ b/source/common/ipfilter.cpp Wed Oct 09 19:23:32 2013 +0530
> @@ -34,6 +34,56 @@
> #pragma warning(disable: 4127) // conditional expression is constant,
> typical for templated functions
> #endif
>
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX, WIDTH)
> \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] =
> FUNC_PREFIX<4, WIDTH, 4>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] =
> FUNC_PREFIX<4, WIDTH, 8>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX, WIDTH)
> \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2] =
> FUNC_PREFIX<4, WIDTH, 2>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] =
> FUNC_PREFIX<4, WIDTH, 4>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] =
> FUNC_PREFIX<4, WIDTH, 8>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] =
> FUNC_PREFIX<4, WIDTH, 16>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX, WIDTH)
> \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] =
> FUNC_PREFIX<4, WIDTH, 8>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX, WIDTH)
> \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2] =
> FUNC_PREFIX<4, WIDTH, 2>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] =
> FUNC_PREFIX<4, WIDTH, 4>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x6] =
> FUNC_PREFIX<4, WIDTH, 6>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] =
> FUNC_PREFIX<4, WIDTH, 8>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] =
> FUNC_PREFIX<4, WIDTH, 16>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] =
> FUNC_PREFIX<4, WIDTH, 32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX,
> WIDTH) \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] =
> FUNC_PREFIX<4, WIDTH, 16>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX,
> WIDTH) \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] =
> FUNC_PREFIX<4, WIDTH, 4>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] =
> FUNC_PREFIX<4, WIDTH, 8>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x12] =
> FUNC_PREFIX<4, WIDTH, 12>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] =
> FUNC_PREFIX<4, WIDTH, 16>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] =
> FUNC_PREFIX<4, WIDTH, 32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX,
> WIDTH) \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] =
> FUNC_PREFIX<4, WIDTH, 32>;
> +
> +#define
> SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX,
> WIDTH) \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] =
> FUNC_PREFIX<4, WIDTH, 8>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] =
> FUNC_PREFIX<4, WIDTH, 16>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x24] =
> FUNC_PREFIX<4, WIDTH, 24>; \
> + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] =
> FUNC_PREFIX<4, WIDTH, 32>;
> +
> +#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(FUNC_PREFIX) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX,
> 2) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX,
> 4) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX,
> 6) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX,
> 8) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX,
> 12) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX,
> 16) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX,
> 24) \
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX,
> 32) \
> +
> namespace {
> template<int N>
> void filterVertical_s_p(short *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> @@ -88,8 +138,8 @@
> }
> }
>
> -template<int N>
> -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> +template<int N, int width, int height>
> +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, short const *coeff)
> {
> int cStride = 1;
>
> @@ -500,11 +550,13 @@
>
> void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
> {
> +
> + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(filterHorizontal_p_p)
> +
> p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
> p.ipfilter_ps[FILTER_H_P_S_8] = filterHorizontal_p_s<8>;
> p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_p_s<8>;
> p.ipfilter_sp[FILTER_V_S_P_8] = filterVertical_s_p<8>;
> - p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
> p.ipfilter_ps[FILTER_H_P_S_4] = filterHorizontal_p_s<4>;
> p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_p_s<4>;
> p.ipfilter_sp[FILTER_V_S_P_4] = filterVertical_s_p<4>;
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/primitives.h
> --- a/source/common/primitives.h Wed Oct 09 00:00:10 2013 -0500
> +++ b/source/common/primitives.h Wed Oct 09 19:23:32 2013 +0530
> @@ -89,6 +89,17 @@
> NUM_PARTITIONS
> };
>
> +enum ChromaPartions
> +{
> + CHROMA_HORIZONTAL_PP_PARTITION_2x4,
> CHROMA_HORIZONTAL_PP_PARTITION_2x8, CHROMA_HORIZONTAL_PP_PARTITION_4x2,
> CHROMA_HORIZONTAL_PP_PARTITION_4x4,
> + CHROMA_HORIZONTAL_PP_PARTITION_4x8,
> CHROMA_HORIZONTAL_PP_PARTITION_4x16, CHROMA_HORIZONTAL_PP_PARTITION_8x2,
> CHROMA_HORIZONTAL_PP_PARTITION_8x4,
> + CHROMA_HORIZONTAL_PP_PARTITION_8x6,
> CHROMA_HORIZONTAL_PP_PARTITION_8x8, CHROMA_HORIZONTAL_PP_PARTITION_8x16,
> CHROMA_HORIZONTAL_PP_PARTITION_8x32,
> + CHROMA_HORIZONTAL_PP_PARTITION_6x8,
> CHROMA_HORIZONTAL_PP_PARTITION_12x16,
> CHROMA_HORIZONTAL_PP_PARTITION_16x4, CHROMA_HORIZONTAL_PP_PARTITION_16x8,
> + CHROMA_HORIZONTAL_PP_PARTITION_16x12,
> CHROMA_HORIZONTAL_PP_PARTITION_16x16,
> CHROMA_HORIZONTAL_PP_PARTITION_16x32,
> CHROMA_HORIZONTAL_PP_PARTITION_24x32,
> + CHROMA_HORIZONTAL_PP_PARTITION_32x8,
> CHROMA_HORIZONTAL_PP_PARTITION_32x16,
> CHROMA_HORIZONTAL_PP_PARTITION_32x24,
> CHROMA_HORIZONTAL_PP_PARTITION_32x32,
> + NUM_CHROMA_HORIZONTAL_PP_PARTITIONS
> +};
> +
> enum SquareBlocks // Routines can be indexed using log2n(width)
> {
> BLOCK_4x4,
> @@ -205,6 +216,8 @@
> typedef void (*ssim_4x4x2_core_t)(const pixel *pix1, intptr_t stride1,
> const pixel *pix2, intptr_t stride2, ssim_t sums[2][4]);
> typedef float (*ssim_end4_t)(ssim_t sum0[5][4], ssim_t sum1[5][4], int
> width);
>
> +typedef void (*chromaFilterHoriz_pp)(pixel *src, intptr_t srcStride,
> pixel *dst, intptr_t dstStride, const short *coeff); // Modified argument
> list for chroma filter, removed width and height.
> +
> /* Define a structure containing function pointers to optimized encoder
> * primitives. Each pointer can reference either an assembly routine,
> * a vectorized primitive, or a C function. */
> @@ -265,6 +278,8 @@
> downscale_t frame_init_lowres_core;
> ssim_4x4x2_core_t ssim_4x4x2_core;
> ssim_end4_t ssim_end_4;
> +
> + chromaFilterHoriz_pp
> filterHorizontal_p_p[NUM_CHROMA_HORIZONTAL_PP_PARTITIONS];
> };
>
> /* This copy of the table is what gets used by the encoder.
> diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/vec/ipfilter-sse41.cpp
> --- a/source/common/vec/ipfilter-sse41.cpp Wed Oct 09 00:00:10 2013
> -0500
> +++ b/source/common/vec/ipfilter-sse41.cpp Wed Oct 09 19:23:32 2013
> +0530
> @@ -1541,8 +1541,8 @@
> -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0
> };
>
> -template<int N>
> -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, int width, int height, short const *coeff)
> +template<int N, int width, int height>
> +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst,
> intptr_t dstStride, short const *coeff)
> {
> assert(X265_DEPTH == 8);
>
> @@ -1656,9 +1656,35 @@
> }
>
> namespace x265 {
> +#define SETUP_PARTITION(W, H) \
> + p.filterHorizontal_p_p[CHROMA_HORIZONTAL_PP_PARTITION_##W##x##H] =
> filterHorizontal_p_p##<4, W, H>;
> +
> void Setup_Vec_IPFilterPrimitives_sse41(EncoderPrimitives& p)
> {
> - p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>;
> + SETUP_PARTITION(2, 4);
> + SETUP_PARTITION(2, 8);
> + SETUP_PARTITION(4, 2);
> + SETUP_PARTITION(4, 4);
> + SETUP_PARTITION(4, 8);
> + SETUP_PARTITION(4, 16);
> + SETUP_PARTITION(6, 8);
> + SETUP_PARTITION(8, 2);
> + SETUP_PARTITION(8, 4);
> + SETUP_PARTITION(8, 6);
> + SETUP_PARTITION(8, 8);
> + SETUP_PARTITION(8, 16);
> + SETUP_PARTITION(12, 16);
> + SETUP_PARTITION(16, 4);
> + SETUP_PARTITION(16, 8);
> + SETUP_PARTITION(16, 12);
> + SETUP_PARTITION(16, 16);
> + SETUP_PARTITION(16, 32);
> + SETUP_PARTITION(24, 32);
> + SETUP_PARTITION(32, 8);
> + SETUP_PARTITION(32, 16);
> + SETUP_PARTITION(32, 24);
> + SETUP_PARTITION(32, 32);
> +
> p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>;
>
> p.ipfilter_pp[FILTER_V_P_P_4] = filterVertical_p_p<4>;
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131009/027aaac3/attachment.html>
More information about the x265-devel
mailing list