[x265] [PATCH] Modify primitives to support multiple color space formats

Steve Borho steve at borho.org
Wed Jan 8 00:27:22 CET 2014


On Fri, Jan 3, 2014 at 6:45 AM, <ashok at multicorewareinc.com> wrote:

> # HG changeset patch
> # User ashok at multicorewareinc.com
> # Date 1388753074 -19800
> #      Fri Jan 03 18:14:34 2014 +0530
> # Node ID 019ad3c515b3219497dfa51bd8f8c3a709b7ec5d
> # Parent  8137881d4cad4555e1128320d62dd56dd24ed3dc
> Modify primitives to support multiple color space formats
>
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/TShortYUV.h
> --- a/source/common/TShortYUV.h Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/TShortYUV.h Fri Jan 03 18:14:34 2014 +0530
> @@ -87,9 +87,9 @@
>      //  Access starting position of YUV partition unit buffer
>      int16_t* getLumaAddr(unsigned int partUnitIdx) { return m_bufY +
> getAddrOffset(partUnitIdx, m_width); }
>
> -    int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb +
> (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
> +    int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb +
> (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
>
> -    int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr +
> (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }
> +    int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr +
> (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }
>
>      //  Access starting position of YUV transform unit buffer
>      int16_t* getLumaAddr(unsigned int partIdx, unsigned int size) {
> return m_bufY + getAddrOffset(partIdx, size, m_width); }
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp        Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/ipfilter.cpp        Fri Jan 03 18:14:34 2014 +0530
> @@ -449,74 +449,108 @@
>  namespace x265 {
>  // x265 private namespace
>
> -#define CHROMA(W, H) \
> +#define CHROMA_420(W, H) \
>      p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] =
> interp_horiz_pp_c<4, W, H>; \
>      p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] =
> interp_horiz_ps_c<4, W, H>; \
> -    p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>; \
> -    p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>; \
> -    p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>; \
> +    p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>;  \
> +    p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>;  \
> +    p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>;  \
>      p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] =
> interp_vert_ss_c<4, W, H>;
>

it's preferable to put white-space changes into their own patch


>
> +#define CHROMA_444(W, H) \
> +    p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] =
> interp_horiz_pp_c<4, W, H>; \
> +    p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] =
> interp_horiz_ps_c<4, W, H>; \
> +    p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] =
> interp_vert_pp_c<4, W, H>;  \
> +    p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] =
> interp_vert_ps_c<4, W, H>;  \
> +    p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] =
> interp_vert_sp_c<4, W, H>;  \
> +    p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] =
> interp_vert_ss_c<4, W, H>;
>

4:4:4 uses 4-tap filters for chroma?


> +
>  #define LUMA(W, H) \
>      p.luma_hpp[LUMA_ ## W ## x ## H]     = interp_horiz_pp_c<8, W, H>; \
>      p.luma_hps[LUMA_ ## W ## x ## H]     = interp_horiz_ps_c<8, W, H>; \
> -    p.luma_vpp[LUMA_ ## W ## x ## H]     = interp_vert_pp_c<8, W, H>; \
> -    p.luma_vps[LUMA_ ## W ## x ## H]     = interp_vert_ps_c<8, W, H>; \
> -    p.luma_vsp[LUMA_ ## W ## x ## H]     = interp_vert_sp_c<8, W, H>; \
> -    p.luma_vss[LUMA_ ## W ## x ## H]     = interp_vert_ss_c<8, W, H>; \
> +    p.luma_vpp[LUMA_ ## W ## x ## H]     = interp_vert_pp_c<8, W, H>;  \
> +    p.luma_vps[LUMA_ ## W ## x ## H]     = interp_vert_ps_c<8, W, H>;  \
> +    p.luma_vsp[LUMA_ ## W ## x ## H]     = interp_vert_sp_c<8, W, H>;  \
> +    p.luma_vss[LUMA_ ## W ## x ## H]     = interp_vert_ss_c<8, W, H>;  \
>      p.luma_hvpp[LUMA_ ## W ## x ## H]    = interp_hv_pp_c<8, W, H>;
>
>  void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
>  {
>      LUMA(4, 4);
>      LUMA(8, 8);
> -    CHROMA(4, 4);
> +    CHROMA_420(4,  4);
>      LUMA(4, 8);
> -    CHROMA(2, 4);
> +    CHROMA_420(2,  4);
>      LUMA(8, 4);
> -    CHROMA(4, 2);
> +    CHROMA_420(4,  2);
>      LUMA(16, 16);
> -    CHROMA(8, 8);
> +    CHROMA_420(8,  8);
>      LUMA(16,  8);
> -    CHROMA(8, 4);
> +    CHROMA_420(8,  4);
>      LUMA(8, 16);
> -    CHROMA(4, 8);
> +    CHROMA_420(4,  8);
>      LUMA(16, 12);
> -    CHROMA(8, 6);
> +    CHROMA_420(8,  6);
>      LUMA(12, 16);
> -    CHROMA(6, 8);
> +    CHROMA_420(6,  8);
>      LUMA(16,  4);
> -    CHROMA(8, 2);
> +    CHROMA_420(8,  2);
>      LUMA(4, 16);
> -    CHROMA(2, 8);
> +    CHROMA_420(2,  8);
>      LUMA(32, 32);
> -    CHROMA(16, 16);
> +    CHROMA_420(16, 16);
>      LUMA(32, 16);
> -    CHROMA(16, 8);
> +    CHROMA_420(16, 8);
>      LUMA(16, 32);
> -    CHROMA(8, 16);
> +    CHROMA_420(8,  16);
>      LUMA(32, 24);
> -    CHROMA(16, 12);
> +    CHROMA_420(16, 12);
>      LUMA(24, 32);
> -    CHROMA(12, 16);
> +    CHROMA_420(12, 16);
>      LUMA(32,  8);
> -    CHROMA(16, 4);
> +    CHROMA_420(16, 4);
>      LUMA(8, 32);
> -    CHROMA(4, 16);
> +    CHROMA_420(4,  16);
>      LUMA(64, 64);
> -    CHROMA(32, 32);
> +    CHROMA_420(32, 32);
>      LUMA(64, 32);
> -    CHROMA(32, 16);
> +    CHROMA_420(32, 16);
>      LUMA(32, 64);
> -    CHROMA(16, 32);
> +    CHROMA_420(16, 32);
>      LUMA(64, 48);
> -    CHROMA(32, 24);
> +    CHROMA_420(32, 24);
>      LUMA(48, 64);
> -    CHROMA(24, 32);
> +    CHROMA_420(24, 32);
>      LUMA(64, 16);
> -    CHROMA(32, 8);
> +    CHROMA_420(32, 8);
>      LUMA(16, 64);
> -    CHROMA(8, 32);
> +    CHROMA_420(8,  32);
> +
> +    CHROMA_444(4,  4);
> +    CHROMA_444(8,  8);
> +    CHROMA_444(4,  8);
> +    CHROMA_444(8,  4);
> +    CHROMA_444(16, 16);
> +    CHROMA_444(16, 8);
> +    CHROMA_444(8,  16);
> +    CHROMA_444(16, 12);
> +    CHROMA_444(12, 16);
> +    CHROMA_444(16, 4);
> +    CHROMA_444(4,  16);
> +    CHROMA_444(32, 32);
> +    CHROMA_444(32, 16);
> +    CHROMA_444(16, 32);
> +    CHROMA_444(32, 24);
> +    CHROMA_444(24, 32);
> +    CHROMA_444(32, 8);
> +    CHROMA_444(8,  32);
> +    CHROMA_444(64, 64);
> +    CHROMA_444(64, 32);
> +    CHROMA_444(32, 64);
> +    CHROMA_444(64, 48);
> +    CHROMA_444(48, 64);
> +    CHROMA_444(64, 16);
> +    CHROMA_444(16, 64);
>
>      p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_ps_c<8>;
>      p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_ps_c<4>;
> @@ -525,7 +559,9 @@
>
>      p.chroma_vsp = filterVertical_sp_c<4>;
>      p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;
> -    p.chroma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;
> +
> +    p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;
> +    p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE /
> 2>;
>
>      p.extendRowBorder = extendCURowColBorder;
>  }
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/pixel.cpp
> --- a/source/common/pixel.cpp   Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/pixel.cpp   Fri Jan 03 18:14:34 2014 +0530
> @@ -805,6 +805,27 @@
>  namespace x265 {
>  // x265 private namespace
>
> +#define CHROMA_420(W, H) \
> +    p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> +    p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> +    p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> +    p.chroma[X265_CSP_I420].sub_ps [CHROMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> +    p.chroma[X265_CSP_I420].add_ps [CHROMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> +
> +#define CHROMA_444(W, H) \
> +    p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> +    p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> +    p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> +    p.chroma[X265_CSP_I444].sub_ps [LUMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> +    p.chroma[X265_CSP_I444].add_ps [LUMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> +
> +#define LUMA(W, H) \
> +    p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> +    p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> +    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> +    p.luma_sub_ps[LUMA_ ## W ## x ## H]  = pixel_sub_ps_c<W, H>; \
> +    p.luma_add_ps[LUMA_ ## W ## x ## H]  = pixel_add_ps_c<W, H>;
> +
>  /* It should initialize entries for pixel functions defined in this file.
> */
>  void Setup_C_PixelPrimitives(EncoderPrimitives &p)
>  {
> @@ -840,69 +861,81 @@
>      p.satd[LUMA_64x16] = satd8<64, 16>;
>      p.satd[LUMA_16x64] = satd8<16, 64>;
>
> -#define CHROMA(W, H) \
> -    p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] =
> blockcopy_pp_c<W, H>; \
> -    p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] =
> blockcopy_sp_c<W, H>; \
> -    p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] =
> blockcopy_ps_c<W, H>; \
> -    p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] =
> pixel_sub_ps_c<W, H>; \
> -    p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] =
> pixel_add_ps_c<W, H>;
> -
> -#define LUMA(W, H) \
> -    p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> -    p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> -    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> -    p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> -    p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> -
>      LUMA(4, 4);
>      LUMA(8, 8);
> -    CHROMA(4, 4);
> +    CHROMA_420(4, 4);
>      LUMA(4, 8);
> -    CHROMA(2, 4);
> +    CHROMA_420(2, 4);
>      LUMA(8, 4);
> -    CHROMA(4, 2);
> +    CHROMA_420(4, 2);
>      LUMA(16, 16);
> -    CHROMA(8, 8);
> +    CHROMA_420(8,  8);
>      LUMA(16,  8);
> -    CHROMA(8, 4);
> +    CHROMA_420(8,  4);
>      LUMA(8, 16);
> -    CHROMA(4, 8);
> +    CHROMA_420(4,  8);
>      LUMA(16, 12);
> -    CHROMA(8, 6);
> +    CHROMA_420(8,  6);
>      LUMA(12, 16);
> -    CHROMA(6, 8);
> +    CHROMA_420(6,  8);
>      LUMA(16,  4);
> -    CHROMA(8, 2);
> +    CHROMA_420(8,  2);
>      LUMA(4, 16);
> -    CHROMA(2, 8);
> +    CHROMA_420(2,  8);
>      LUMA(32, 32);
> -    CHROMA(16, 16);
> +    CHROMA_420(16, 16);
>      LUMA(32, 16);
> -    CHROMA(16, 8);
> +    CHROMA_420(16, 8);
>      LUMA(16, 32);
> -    CHROMA(8, 16);
> +    CHROMA_420(8,  16);
>      LUMA(32, 24);
> -    CHROMA(16, 12);
> +    CHROMA_420(16, 12);
>      LUMA(24, 32);
> -    CHROMA(12, 16);
> +    CHROMA_420(12, 16);
>      LUMA(32,  8);
> -    CHROMA(16, 4);
> +    CHROMA_420(16, 4);
>      LUMA(8, 32);
> -    CHROMA(4, 16);
> +    CHROMA_420(4,  16);
>      LUMA(64, 64);
> -    CHROMA(32, 32);
> +    CHROMA_420(32, 32);
>      LUMA(64, 32);
> -    CHROMA(32, 16);
> +    CHROMA_420(32, 16);
>      LUMA(32, 64);
> -    CHROMA(16, 32);
> +    CHROMA_420(16, 32);
>      LUMA(64, 48);
> -    CHROMA(32, 24);
> +    CHROMA_420(32, 24);
>      LUMA(48, 64);
> -    CHROMA(24, 32);
> +    CHROMA_420(24, 32);
>      LUMA(64, 16);
> -    CHROMA(32, 8);
> +    CHROMA_420(32, 8);
>      LUMA(16, 64);
> -    CHROMA(8, 32);
> +    CHROMA_420(8,  32);
> +
> +    CHROMA_444(4,  4);
> +    CHROMA_444(8,  8);
> +    CHROMA_444(4,  8);
> +    CHROMA_444(8,  4);
> +    CHROMA_444(16, 16);
> +    CHROMA_444(16, 8);
> +    CHROMA_444(8,  16);
> +    CHROMA_444(16, 12);
> +    CHROMA_444(12, 16);
> +    CHROMA_444(16, 4);
> +    CHROMA_444(4,  16);
> +    CHROMA_444(32, 32);
> +    CHROMA_444(32, 16);
> +    CHROMA_444(16, 32);
> +    CHROMA_444(32, 24);
> +    CHROMA_444(24, 32);
> +    CHROMA_444(32, 8);
> +    CHROMA_444(8,  32);
> +    CHROMA_444(64, 64);
> +    CHROMA_444(64, 32);
> +    CHROMA_444(32, 64);
> +    CHROMA_444(64, 48);
> +    CHROMA_444(48, 64);
> +    CHROMA_444(64, 16);
> +    CHROMA_444(16, 64);
>

There's no need to setup 4:4:4 pixel primitives here if they are exact
copies of the luma functions.  This only makes the testbench take longer.


>
>      SET_FUNC_PRIMITIVE_TABLE_C(sse_pp, sse, pixelcmp_t, pixel, pixel)
>      SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, pixelcmp_sp_t, int16_t, pixel)
> diff -r 8137881d4cad -r 019ad3c515b3 source/common/primitives.h
> --- a/source/common/primitives.h        Thu Jan 02 16:18:35 2014 +0530
> +++ b/source/common/primitives.h        Fri Jan 03 18:14:34 2014 +0530
> @@ -75,7 +75,7 @@
>  // 4:2:0 chroma partition sizes. These enums are just a convenience for
> indexing into the
>  // chroma primitive arrays when instantiating templates. The function
> tables should always
>  // be indexed by the luma partition enum
> -enum Chroma420Partions
> +enum Chroma420Partitions
>  {
>      CHROMA_2x2, // never used by HEVC
>      CHROMA_4x4,   CHROMA_4x2,   CHROMA_2x4,
> @@ -240,7 +240,7 @@
>      ipfilter_ps_t   ipfilter_ps[NUM_IPFILTER_P_S];
>      ipfilter_ss_t   ipfilter_ss[NUM_IPFILTER_S_S];
>      filter_p2s_t    luma_p2s;
> -    filter_p2s_t    chroma_p2s;
> +    filter_p2s_t    chroma_p2s[NUM_CHROMA_PARTITIONS];
>

chroma_p2s should be moved into the per-colorspace struct


>      ipfilter_sp_t   chroma_vsp;
>
>      weightp_sp_t    weight_sp;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140107/b32687af/attachment-0001.html>


More information about the x265-devel mailing list