[x265] [PATCH] Modify primitives to support multiple color space formats
Steve Borho
steve at borho.org
Wed Jan 8 00:27:22 CET 2014
On Fri, Jan 3, 2014 at 6:45 AM, <ashok at multicorewareinc.com> wrote:
> # HG changeset patch
> # User ashok at multicorewareinc.com
> # Date 1388753074 -19800
> # Fri Jan 03 18:14:34 2014 +0530
> # Node ID 019ad3c515b3219497dfa51bd8f8c3a709b7ec5d
> # Parent 8137881d4cad4555e1128320d62dd56dd24ed3dc
> Modify primitives to support multiple color space formats
>
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/TShortYUV.h
> --- a/source/common/TShortYUV.h Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/TShortYUV.h Fri Jan 03 18:14:34 2014 +0530
> @@ -87,9 +87,9 @@
> // Access starting position of YUV partition unit buffer
> int16_t* getLumaAddr(unsigned int partUnitIdx) { return m_bufY +
> getAddrOffset(partUnitIdx, m_width); }
>
> - int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb +
> (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
> + int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb +
> (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
>
> - int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr +
> (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
> + int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr +
> (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
>
> // Access starting position of YUV transform unit buffer
> int16_t* getLumaAddr(unsigned int partIdx, unsigned int size) {
> return m_bufY + getAddrOffset(partIdx, size, m_width); }
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/ipfilter.cpp Fri Jan 03 18:14:34 2014 +0530
> @@ -449,74 +449,108 @@
> namespace x265 {
> // x265 private namespace
>
> -#define CHROMA(W, H) \
> +#define CHROMA_420(W, H) \
> p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] =
> interp_horiz_pp_c<4, W, H>; \
> p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] =
> interp_horiz_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>; \
> p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] =
> interp_vert_ss_c<4, W, H>;
>
it's preferable to put white-space changes into their own patch
>
> +#define CHROMA_444(W, H) \
> + p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] =
> interp_horiz_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] =
> interp_horiz_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] =
> interp_vert_ss_c<4, W, H>;
>
4:4:4 uses 4-tap filters for chroma?
> +
> #define LUMA(W, H) \
> p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \
> p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>; \
> - p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
> - p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
> - p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
> - p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
> + p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
> + p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
> + p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
> + p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
> p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;
>
> void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
> {
> LUMA(4, 4);
> LUMA(8, 8);
> - CHROMA(4, 4);
> + CHROMA_420(4, 4);
> LUMA(4, 8);
> - CHROMA(2, 4);
> + CHROMA_420(2, 4);
> LUMA(8, 4);
> - CHROMA(4, 2);
> + CHROMA_420(4, 2);
> LUMA(16, 16);
> - CHROMA(8, 8);
> + CHROMA_420(8, 8);
> LUMA(16, 8);
> - CHROMA(8, 4);
> + CHROMA_420(8, 4);
> LUMA(8, 16);
> - CHROMA(4, 8);
> + CHROMA_420(4, 8);
> LUMA(16, 12);
> - CHROMA(8, 6);
> + CHROMA_420(8, 6);
> LUMA(12, 16);
> - CHROMA(6, 8);
> + CHROMA_420(6, 8);
> LUMA(16, 4);
> - CHROMA(8, 2);
> + CHROMA_420(8, 2);
> LUMA(4, 16);
> - CHROMA(2, 8);
> + CHROMA_420(2, 8);
> LUMA(32, 32);
> - CHROMA(16, 16);
> + CHROMA_420(16, 16);
> LUMA(32, 16);
> - CHROMA(16, 8);
> + CHROMA_420(16, 8);
> LUMA(16, 32);
> - CHROMA(8, 16);
> + CHROMA_420(8, 16);
> LUMA(32, 24);
> - CHROMA(16, 12);
> + CHROMA_420(16, 12);
> LUMA(24, 32);
> - CHROMA(12, 16);
> + CHROMA_420(12, 16);
> LUMA(32, 8);
> - CHROMA(16, 4);
> + CHROMA_420(16, 4);
> LUMA(8, 32);
> - CHROMA(4, 16);
> + CHROMA_420(4, 16);
> LUMA(64, 64);
> - CHROMA(32, 32);
> + CHROMA_420(32, 32);
> LUMA(64, 32);
> - CHROMA(32, 16);
> + CHROMA_420(32, 16);
> LUMA(32, 64);
> - CHROMA(16, 32);
> + CHROMA_420(16, 32);
> LUMA(64, 48);
> - CHROMA(32, 24);
> + CHROMA_420(32, 24);
> LUMA(48, 64);
> - CHROMA(24, 32);
> + CHROMA_420(24, 32);
> LUMA(64, 16);
> - CHROMA(32, 8);
> + CHROMA_420(32, 8);
> LUMA(16, 64);
> - CHROMA(8, 32);
> + CHROMA_420(8, 32);
> +
> + CHROMA_444(4, 4);
> + CHROMA_444(8, 8);
> + CHROMA_444(4, 8);
> + CHROMA_444(8, 4);
> + CHROMA_444(16, 16);
> + CHROMA_444(16, 8);
> + CHROMA_444(8, 16);
> + CHROMA_444(16, 12);
> + CHROMA_444(12, 16);
> + CHROMA_444(16, 4);
> + CHROMA_444(4, 16);
> + CHROMA_444(32, 32);
> + CHROMA_444(32, 16);
> + CHROMA_444(16, 32);
> + CHROMA_444(32, 24);
> + CHROMA_444(24, 32);
> + CHROMA_444(32, 8);
> + CHROMA_444(8, 32);
> + CHROMA_444(64, 64);
> + CHROMA_444(64, 32);
> + CHROMA_444(32, 64);
> + CHROMA_444(64, 48);
> + CHROMA_444(48, 64);
> + CHROMA_444(64, 16);
> + CHROMA_444(16, 64);
>
> p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_ps_c<8>;
> p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_ps_c<4>;
> @@ -525,7 +559,9 @@
>
> p.chroma_vsp = filterVertical_sp_c<4>;
> p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
> - p.chroma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
> +
> + p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;
> + p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE /
> 2>;
>
> p.extendRowBorder = extendCURowColBorder;
> }
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/pixel.cpp Fri Jan 03 18:14:34 2014 +0530
> @@ -805,6 +805,27 @@
> namespace x265 {
> // x265 private namespace
>
> +#define CHROMA_420(W, H) \
> + p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> + p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> + p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> + p.chroma[X265_CSP_I420].sub_ps [CHROMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> + p.chroma[X265_CSP_I420].add_ps [CHROMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> +
> +#define CHROMA_444(W, H) \
> + p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> + p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> + p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> + p.chroma[X265_CSP_I444].sub_ps [LUMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> + p.chroma[X265_CSP_I444].add_ps [LUMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> +
> +#define LUMA(W, H) \
> + p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> + p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> + p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> + p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> + p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> +
> /* It should initialize entries for pixel functions defined in this file.
> */
> void Setup_C_PixelPrimitives(EncoderPrimitives &p)
> {
> @@ -840,69 +861,81 @@
> p.satd[LUMA_64x16] = satd8<64, 16>;
> p.satd[LUMA_16x64] = satd8<16, 64>;
>
> -#define CHROMA(W, H) \
> - p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> - p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> - p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> - p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> - p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> -
> -#define LUMA(W, H) \
> - p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> - p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> - p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> - p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> - p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> -
> LUMA(4, 4);
> LUMA(8, 8);
> - CHROMA(4, 4);
> + CHROMA_420(4, 4);
> LUMA(4, 8);
> - CHROMA(2, 4);
> + CHROMA_420(2, 4);
> LUMA(8, 4);
> - CHROMA(4, 2);
> + CHROMA_420(4, 2);
> LUMA(16, 16);
> - CHROMA(8, 8);
> + CHROMA_420(8, 8);
> LUMA(16, 8);
> - CHROMA(8, 4);
> + CHROMA_420(8, 4);
> LUMA(8, 16);
> - CHROMA(4, 8);
> + CHROMA_420(4, 8);
> LUMA(16, 12);
> - CHROMA(8, 6);
> + CHROMA_420(8, 6);
> LUMA(12, 16);
> - CHROMA(6, 8);
> + CHROMA_420(6, 8);
> LUMA(16, 4);
> - CHROMA(8, 2);
> + CHROMA_420(8, 2);
> LUMA(4, 16);
> - CHROMA(2, 8);
> + CHROMA_420(2, 8);
> LUMA(32, 32);
> - CHROMA(16, 16);
> + CHROMA_420(16, 16);
> LUMA(32, 16);
> - CHROMA(16, 8);
> + CHROMA_420(16, 8);
> LUMA(16, 32);
> - CHROMA(8, 16);
> + CHROMA_420(8, 16);
> LUMA(32, 24);
> - CHROMA(16, 12);
> + CHROMA_420(16, 12);
> LUMA(24, 32);
> - CHROMA(12, 16);
> + CHROMA_420(12, 16);
> LUMA(32, 8);
> - CHROMA(16, 4);
> + CHROMA_420(16, 4);
> LUMA(8, 32);
> - CHROMA(4, 16);
> + CHROMA_420(4, 16);
> LUMA(64, 64);
> - CHROMA(32, 32);
> + CHROMA_420(32, 32);
> LUMA(64, 32);
> - CHROMA(32, 16);
> + CHROMA_420(32, 16);
> LUMA(32, 64);
> - CHROMA(16, 32);
> + CHROMA_420(16, 32);
> LUMA(64, 48);
> - CHROMA(32, 24);
> + CHROMA_420(32, 24);
> LUMA(48, 64);
> - CHROMA(24, 32);
> + CHROMA_420(24, 32);
> LUMA(64, 16);
> - CHROMA(32, 8);
> + CHROMA_420(32, 8);
> LUMA(16, 64);
> - CHROMA(8, 32);
> + CHROMA_420(8, 32);
> +
> + CHROMA_444(4, 4);
> + CHROMA_444(8, 8);
> + CHROMA_444(4, 8);
> + CHROMA_444(8, 4);
> + CHROMA_444(16, 16);
> + CHROMA_444(16, 8);
> + CHROMA_444(8, 16);
> + CHROMA_444(16, 12);
> + CHROMA_444(12, 16);
> + CHROMA_444(16, 4);
> + CHROMA_444(4, 16);
> + CHROMA_444(32, 32);
> + CHROMA_444(32, 16);
> + CHROMA_444(16, 32);
> + CHROMA_444(32, 24);
> + CHROMA_444(24, 32);
> + CHROMA_444(32, 8);
> + CHROMA_444(8, 32);
> + CHROMA_444(64, 64);
> + CHROMA_444(64, 32);
> + CHROMA_444(32, 64);
> + CHROMA_444(64, 48);
> + CHROMA_444(48, 64);
> + CHROMA_444(64, 16);
> + CHROMA_444(16, 64);
>
There's no need to setup 4:4:4 pixel primitives here if they are exact
copies of the luma functions. This only makes the testbench take longer.
>
> SET_FUNC_PRIMITIVE_TABLE_C(sse_pp, sse, pixelcmp_t, pixel, pixel)
> SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, pixelcmp_sp_t, int16_t, pixel)
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/primitives.h
> --- a/source/common/primitives.h Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/primitives.h Fri Jan 03 18:14:34 2014 +0530
> @@ -75,7 +75,7 @@
> // 4:2:0 chroma partition sizes. These enums are just a convenience for
> indexing into the
> // chroma primitive arrays when instantiating templates. The function
> tables should always
> // be indexed by the luma partition enum
> -enum Chroma420Partions
> +enum Chroma420Partitions
> {
> CHROMA_2x2, // never used by HEVC
> CHROMA_4x4, CHROMA_4x2, CHROMA_2x4,
> @@ -240,7 +240,7 @@
> ipfilter_ps_t ipfilter_ps[NUM_IPFILTER_P_S];
> ipfilter_ss_t ipfilter_ss[NUM_IPFILTER_S_S];
> filter_p2s_t luma_p2s;
> - filter_p2s_t chroma_p2s;
> + filter_p2s_t chroma_p2s[NUM_CHROMA_PARTITIONS];
>
chroma_p2s should be moved into the per-colorspace struct
> ipfilter_sp_t chroma_vsp;
>
> weightp_sp_t weight_sp;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140107/b32687af/attachment-0001.html>
More information about the x265-devel
mailing list