[x265] [PATCH 1 of 5] Refactor EncoderPrimitives under common
Steve Borho
steve at borho.org
Fri Jan 9 11:19:24 CET 2015
On 01/09, Kevin Wu wrote:
> # HG changeset patch
> # User Kevin Wu <kevin at multicorewareinc.com>
> # Date 1420752218 21600
> # Thu Jan 08 15:23:38 2015 -0600
> # Node ID c6ca0fd54aa7c50119c9e5bdbbd02d49abb45559
> # Parent 1924c460d1304d9ce775f35864712dd98f758f9f
> Refactor EncoderPrimitives under common.
this series is queued for testing
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/dct.cpp
> --- a/source/common/dct.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/dct.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -765,22 +765,22 @@
> p.dequant_normal = dequant_normal_c;
> p.quant = quant_c;
> p.nquant = nquant_c;
> - p.dct[DST_4x4] = dst4_c;
> - p.dct[DCT_4x4] = dct4_c;
> - p.dct[DCT_8x8] = dct8_c;
> - p.dct[DCT_16x16] = dct16_c;
> - p.dct[DCT_32x32] = dct32_c;
> - p.idct[IDST_4x4] = idst4_c;
> - p.idct[IDCT_4x4] = idct4_c;
> - p.idct[IDCT_8x8] = idct8_c;
> - p.idct[IDCT_16x16] = idct16_c;
> - p.idct[IDCT_32x32] = idct32_c;
> + p.dst4x4 = dst4_c;
> + p.cu[BLOCK_4x4].dct = dct4_c;
> + p.cu[BLOCK_8x8].dct = dct8_c;
> + p.cu[BLOCK_16x16].dct = dct16_c;
> + p.cu[BLOCK_32x32].dct = dct32_c;
> + p.idst4x4 = idst4_c;
> + p.cu[BLOCK_4x4].idct = idct4_c;
> + p.cu[BLOCK_8x8].idct = idct8_c;
> + p.cu[BLOCK_16x16].idct = idct16_c;
> + p.cu[BLOCK_32x32].idct = idct32_c;
> p.count_nonzero = count_nonzero_c;
> p.denoiseDct = denoiseDct_c;
>
> - p.copy_cnt[BLOCK_4x4] = copy_count<4>;
> - p.copy_cnt[BLOCK_8x8] = copy_count<8>;
> - p.copy_cnt[BLOCK_16x16] = copy_count<16>;
> - p.copy_cnt[BLOCK_32x32] = copy_count<32>;
> + p.cu[BLOCK_4x4].copy_cnt = copy_count<4>;
> + p.cu[BLOCK_8x8].copy_cnt = copy_count<8>;
> + p.cu[BLOCK_16x16].copy_cnt = copy_count<16>;
> + p.cu[BLOCK_32x32].copy_cnt = copy_count<32>;
> }
> }
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/ipfilter.cpp
> --- a/source/common/ipfilter.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/ipfilter.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -373,37 +373,37 @@
> // x265 private namespace
>
> #define CHROMA_420(W, H) \
> - p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
> - p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
>
> #define CHROMA_422(W, H) \
> - p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
> - p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
>
> #define CHROMA_444(W, H) \
> - p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
> - p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
> - p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
> - p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
>
> #define LUMA(W, H) \
> - p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \
> - p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>; \
> - p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
> - p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
> - p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
> - p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
> - p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;
> + p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp_horiz_pp_c<8, W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_hps = interp_horiz_ps_c<8, W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vpp = interp_vert_pp_c<8, W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vps = interp_vert_ps_c<8, W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vsp = interp_vert_sp_c<8, W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vss = interp_vert_ss_c<8, W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_c<8, W, H>;
>
> void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
> {
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/lowres.h
> --- a/source/common/lowres.h Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/lowres.h Thu Jan 08 15:23:38 2015 -0600
> @@ -69,7 +69,7 @@
> int qmvy = qmv.y + (qmv.y & 1);
> int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
> pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
> - primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
> + primitives.pu[LUMA_8x8].pixelavg_pp(buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
> return buf;
> }
> else
> @@ -91,7 +91,7 @@
> int qmvy = qmv.y + (qmv.y & 1);
> int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
> pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
> - primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
> + primitives.pu[LUMA_8x8].pixelavg_pp(subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
> return comp(fenc, FENC_STRIDE, subpelbuf, 8);
> }
> else
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/pixel.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -33,58 +33,58 @@
> using namespace x265;
>
> #define SET_FUNC_PRIMITIVE_TABLE_C(FUNC_PREFIX, FUNC_PREFIX_DEF, DATA_TYPE1, DATA_TYPE2) \
> - p.FUNC_PREFIX[LUMA_4x4] = FUNC_PREFIX_DEF<4, 4, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_8x8] = FUNC_PREFIX_DEF<8, 8, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_8x4] = FUNC_PREFIX_DEF<8, 4, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_4x8] = FUNC_PREFIX_DEF<4, 8, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_16x16] = FUNC_PREFIX_DEF<16, 16, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_16x8] = FUNC_PREFIX_DEF<16, 8, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_8x16] = FUNC_PREFIX_DEF<8, 16, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_16x12] = FUNC_PREFIX_DEF<16, 12, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_12x16] = FUNC_PREFIX_DEF<12, 16, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_16x4] = FUNC_PREFIX_DEF<16, 4, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_4x16] = FUNC_PREFIX_DEF<4, 16, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_32x32] = FUNC_PREFIX_DEF<32, 32, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_32x16] = FUNC_PREFIX_DEF<32, 16, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_16x32] = FUNC_PREFIX_DEF<16, 32, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_32x24] = FUNC_PREFIX_DEF<32, 24, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_24x32] = FUNC_PREFIX_DEF<24, 32, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_32x8] = FUNC_PREFIX_DEF<32, 8, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_8x32] = FUNC_PREFIX_DEF<8, 32, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_64x64] = FUNC_PREFIX_DEF<64, 64, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_64x32] = FUNC_PREFIX_DEF<64, 32, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_32x64] = FUNC_PREFIX_DEF<32, 64, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_64x48] = FUNC_PREFIX_DEF<64, 48, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_48x64] = FUNC_PREFIX_DEF<48, 64, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_64x16] = FUNC_PREFIX_DEF<64, 16, DATA_TYPE1, DATA_TYPE2>; \
> - p.FUNC_PREFIX[LUMA_16x64] = FUNC_PREFIX_DEF<16, 64, DATA_TYPE1, DATA_TYPE2>;
> + p.pu[LUMA_4x4].FUNC_PREFIX = FUNC_PREFIX_DEF<4, 4, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_8x8].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 8, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_8x4].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 4, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_4x8].FUNC_PREFIX = FUNC_PREFIX_DEF<4, 8, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_16x16].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 16, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_16x8].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 8, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_8x16].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 16, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_16x12].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 12, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_12x16].FUNC_PREFIX = FUNC_PREFIX_DEF<12, 16, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_16x4].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 4, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_4x16].FUNC_PREFIX = FUNC_PREFIX_DEF<4, 16, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_32x32].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 32, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_32x16].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 16, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_16x32].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 32, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_32x24].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 24, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_24x32].FUNC_PREFIX = FUNC_PREFIX_DEF<24, 32, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_32x8].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 8, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_8x32].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 32, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_64x64].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 64, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_64x32].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 32, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_32x64].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 64, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_64x48].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 48, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_48x64].FUNC_PREFIX = FUNC_PREFIX_DEF<48, 64, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_64x16].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 16, DATA_TYPE1, DATA_TYPE2>; \
> + p.pu[LUMA_16x64].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 64, DATA_TYPE1, DATA_TYPE2>;
>
> #define SET_FUNC_PRIMITIVE_TABLE_C2(FUNC_PREFIX) \
> - p.FUNC_PREFIX[LUMA_4x4] = FUNC_PREFIX<4, 4>; \
> - p.FUNC_PREFIX[LUMA_8x8] = FUNC_PREFIX<8, 8>; \
> - p.FUNC_PREFIX[LUMA_8x4] = FUNC_PREFIX<8, 4>; \
> - p.FUNC_PREFIX[LUMA_4x8] = FUNC_PREFIX<4, 8>; \
> - p.FUNC_PREFIX[LUMA_16x16] = FUNC_PREFIX<16, 16>; \
> - p.FUNC_PREFIX[LUMA_16x8] = FUNC_PREFIX<16, 8>; \
> - p.FUNC_PREFIX[LUMA_8x16] = FUNC_PREFIX<8, 16>; \
> - p.FUNC_PREFIX[LUMA_16x12] = FUNC_PREFIX<16, 12>; \
> - p.FUNC_PREFIX[LUMA_12x16] = FUNC_PREFIX<12, 16>; \
> - p.FUNC_PREFIX[LUMA_16x4] = FUNC_PREFIX<16, 4>; \
> - p.FUNC_PREFIX[LUMA_4x16] = FUNC_PREFIX<4, 16>; \
> - p.FUNC_PREFIX[LUMA_32x32] = FUNC_PREFIX<32, 32>; \
> - p.FUNC_PREFIX[LUMA_32x16] = FUNC_PREFIX<32, 16>; \
> - p.FUNC_PREFIX[LUMA_16x32] = FUNC_PREFIX<16, 32>; \
> - p.FUNC_PREFIX[LUMA_32x24] = FUNC_PREFIX<32, 24>; \
> - p.FUNC_PREFIX[LUMA_24x32] = FUNC_PREFIX<24, 32>; \
> - p.FUNC_PREFIX[LUMA_32x8] = FUNC_PREFIX<32, 8>; \
> - p.FUNC_PREFIX[LUMA_8x32] = FUNC_PREFIX<8, 32>; \
> - p.FUNC_PREFIX[LUMA_64x64] = FUNC_PREFIX<64, 64>; \
> - p.FUNC_PREFIX[LUMA_64x32] = FUNC_PREFIX<64, 32>; \
> - p.FUNC_PREFIX[LUMA_32x64] = FUNC_PREFIX<32, 64>; \
> - p.FUNC_PREFIX[LUMA_64x48] = FUNC_PREFIX<64, 48>; \
> - p.FUNC_PREFIX[LUMA_48x64] = FUNC_PREFIX<48, 64>; \
> - p.FUNC_PREFIX[LUMA_64x16] = FUNC_PREFIX<64, 16>; \
> - p.FUNC_PREFIX[LUMA_16x64] = FUNC_PREFIX<16, 64>;
> + p.pu[LUMA_4x4].FUNC_PREFIX = FUNC_PREFIX<4, 4>; \
> + p.pu[LUMA_8x8].FUNC_PREFIX = FUNC_PREFIX<8, 8>; \
> + p.pu[LUMA_8x4].FUNC_PREFIX = FUNC_PREFIX<8, 4>; \
> + p.pu[LUMA_4x8].FUNC_PREFIX = FUNC_PREFIX<4, 8>; \
> + p.pu[LUMA_16x16].FUNC_PREFIX = FUNC_PREFIX<16, 16>; \
> + p.pu[LUMA_16x8].FUNC_PREFIX = FUNC_PREFIX<16, 8>; \
> + p.pu[LUMA_8x16].FUNC_PREFIX = FUNC_PREFIX<8, 16>; \
> + p.pu[LUMA_16x12].FUNC_PREFIX = FUNC_PREFIX<16, 12>; \
> + p.pu[LUMA_12x16].FUNC_PREFIX = FUNC_PREFIX<12, 16>; \
> + p.pu[LUMA_16x4].FUNC_PREFIX = FUNC_PREFIX<16, 4>; \
> + p.pu[LUMA_4x16].FUNC_PREFIX = FUNC_PREFIX<4, 16>; \
> + p.pu[LUMA_32x32].FUNC_PREFIX = FUNC_PREFIX<32, 32>; \
> + p.pu[LUMA_32x16].FUNC_PREFIX = FUNC_PREFIX<32, 16>; \
> + p.pu[LUMA_16x32].FUNC_PREFIX = FUNC_PREFIX<16, 32>; \
> + p.pu[LUMA_32x24].FUNC_PREFIX = FUNC_PREFIX<32, 24>; \
> + p.pu[LUMA_24x32].FUNC_PREFIX = FUNC_PREFIX<24, 32>; \
> + p.pu[LUMA_32x8].FUNC_PREFIX = FUNC_PREFIX<32, 8>; \
> + p.pu[LUMA_8x32].FUNC_PREFIX = FUNC_PREFIX<8, 32>; \
> + p.pu[LUMA_64x64].FUNC_PREFIX = FUNC_PREFIX<64, 64>; \
> + p.pu[LUMA_64x32].FUNC_PREFIX = FUNC_PREFIX<64, 32>; \
> + p.pu[LUMA_32x64].FUNC_PREFIX = FUNC_PREFIX<32, 64>; \
> + p.pu[LUMA_64x48].FUNC_PREFIX = FUNC_PREFIX<64, 48>; \
> + p.pu[LUMA_48x64].FUNC_PREFIX = FUNC_PREFIX<48, 64>; \
> + p.pu[LUMA_64x16].FUNC_PREFIX = FUNC_PREFIX<64, 16>; \
> + p.pu[LUMA_16x64].FUNC_PREFIX = FUNC_PREFIX<16, 64>;
>
> namespace {
> // place functions in anonymous namespace (file static)
> @@ -1019,132 +1019,132 @@
> SET_FUNC_PRIMITIVE_TABLE_C2(pixelavg_pp)
>
> // satd
> - p.satd[LUMA_4x4] = satd_4x4;
> - p.satd[LUMA_8x8] = satd8<8, 8>;
> - p.satd[LUMA_8x4] = satd_8x4;
> - p.satd[LUMA_4x8] = satd4<4, 8>;
> - p.satd[LUMA_16x16] = satd8<16, 16>;
> - p.satd[LUMA_16x8] = satd8<16, 8>;
> - p.satd[LUMA_8x16] = satd8<8, 16>;
> - p.satd[LUMA_16x12] = satd8<16, 12>;
> - p.satd[LUMA_12x16] = satd4<12, 16>;
> - p.satd[LUMA_16x4] = satd8<16, 4>;
> - p.satd[LUMA_4x16] = satd4<4, 16>;
> - p.satd[LUMA_32x32] = satd8<32, 32>;
> - p.satd[LUMA_32x16] = satd8<32, 16>;
> - p.satd[LUMA_16x32] = satd8<16, 32>;
> - p.satd[LUMA_32x24] = satd8<32, 24>;
> - p.satd[LUMA_24x32] = satd8<24, 32>;
> - p.satd[LUMA_32x8] = satd8<32, 8>;
> - p.satd[LUMA_8x32] = satd8<8, 32>;
> - p.satd[LUMA_64x64] = satd8<64, 64>;
> - p.satd[LUMA_64x32] = satd8<64, 32>;
> - p.satd[LUMA_32x64] = satd8<32, 64>;
> - p.satd[LUMA_64x48] = satd8<64, 48>;
> - p.satd[LUMA_48x64] = satd8<48, 64>;
> - p.satd[LUMA_64x16] = satd8<64, 16>;
> - p.satd[LUMA_16x64] = satd8<16, 64>;
> + p.pu[LUMA_4x4].satd = satd_4x4;
> + p.pu[LUMA_8x8].satd = satd8<8, 8>;
> + p.pu[LUMA_8x4].satd = satd_8x4;
> + p.pu[LUMA_4x8].satd = satd4<4, 8>;
> + p.pu[LUMA_16x16].satd = satd8<16, 16>;
> + p.pu[LUMA_16x8].satd = satd8<16, 8>;
> + p.pu[LUMA_8x16].satd = satd8<8, 16>;
> + p.pu[LUMA_16x12].satd = satd8<16, 12>;
> + p.pu[LUMA_12x16].satd = satd4<12, 16>;
> + p.pu[LUMA_16x4].satd = satd8<16, 4>;
> + p.pu[LUMA_4x16].satd = satd4<4, 16>;
> + p.pu[LUMA_32x32].satd = satd8<32, 32>;
> + p.pu[LUMA_32x16].satd = satd8<32, 16>;
> + p.pu[LUMA_16x32].satd = satd8<16, 32>;
> + p.pu[LUMA_32x24].satd = satd8<32, 24>;
> + p.pu[LUMA_24x32].satd = satd8<24, 32>;
> + p.pu[LUMA_32x8].satd = satd8<32, 8>;
> + p.pu[LUMA_8x32].satd = satd8<8, 32>;
> + p.pu[LUMA_64x64].satd = satd8<64, 64>;
> + p.pu[LUMA_64x32].satd = satd8<64, 32>;
> + p.pu[LUMA_32x64].satd = satd8<32, 64>;
> + p.pu[LUMA_64x48].satd = satd8<64, 48>;
> + p.pu[LUMA_48x64].satd = satd8<48, 64>;
> + p.pu[LUMA_64x16].satd = satd8<64, 16>;
> + p.pu[LUMA_16x64].satd = satd8<16, 64>;
>
> - p.chroma[X265_CSP_I420].satd[CHROMA_2x2] = NULL;
> - p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = satd_4x4;
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = satd8<8, 8>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = satd8<16, 16>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = satd8<32, 32>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_2x2].satd = NULL;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x4].satd = satd_4x4;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x8].satd = satd8<8, 8>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x16].satd = satd8<16, 16>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x32].satd = satd8<32, 32>;
>
> - p.chroma[X265_CSP_I420].satd[CHROMA_4x2] = NULL;
> - p.chroma[X265_CSP_I420].satd[CHROMA_2x4] = NULL;
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = satd_8x4;
> - p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = satd4<4, 8>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = satd8<16, 8>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = satd8<8, 16>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = satd8<32, 16>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = satd8<16, 32>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x2].satd = NULL;
> + p.chroma[X265_CSP_I420].pu[CHROMA_2x4].satd = NULL;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x4].satd = satd_8x4;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x8].satd = satd4<4, 8>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x8].satd = satd8<16, 8>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x16].satd = satd8<8, 16>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x16].satd = satd8<32, 16>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x32].satd = satd8<16, 32>;
>
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x6] = NULL;
> - p.chroma[X265_CSP_I420].satd[CHROMA_6x8] = NULL;
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x2] = NULL;
> - p.chroma[X265_CSP_I420].satd[CHROMA_2x8] = NULL;
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = satd4<16, 12>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = satd4<12, 16>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = satd4<16, 4>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = satd4<4, 16>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = satd8<32, 24>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = satd8<24, 32>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = satd8<32, 8>;
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = satd8<8, 32>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x6].satd = NULL;
> + p.chroma[X265_CSP_I420].pu[CHROMA_6x8].satd = NULL;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x2].satd = NULL;
> + p.chroma[X265_CSP_I420].pu[CHROMA_2x8].satd = NULL;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x12].satd = satd4<16, 12>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_12x16].satd = satd4<12, 16>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x4].satd = satd4<16, 4>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x16].satd = satd4<4, 16>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x24].satd = satd8<32, 24>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_24x32].satd = satd8<24, 32>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x8].satd = satd8<32, 8>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x32].satd = satd8<8, 32>;
>
> - p.chroma[X265_CSP_I422].satd[CHROMA422_2x4] = NULL;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = satd4<4, 8>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = satd8<8, 16>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = satd8<16, 32>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = satd8<32, 64>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_2x4].satd = NULL;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x8].satd = satd4<4, 8>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x16].satd = satd8<8, 16>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].satd = satd8<16, 32>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x64].satd = satd8<32, 64>;
>
> - p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = satd_4x4;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_2x8] = NULL;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = satd8<8, 8>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = satd4<4, 16>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = satd8<16, 16>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = satd8<8, 32>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = satd8<32, 32>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = satd8<16, 64>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x4].satd = satd_4x4;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_2x8].satd = NULL;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x8].satd = satd8<8, 8>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x16].satd = satd4<4, 16>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x16].satd = satd8<16, 16>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x32].satd = satd8<8, 32>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x32].satd = satd8<32, 32>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].satd = satd8<16, 64>;
>
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_6x16] = NULL;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = satd4<8, 4>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_2x16] = NULL;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = satd8<16, 8>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = satd8<32, 16>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x12].satd = satd4<8, 12>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_6x16].satd = NULL;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x4].satd = satd4<8, 4>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_2x16].satd = NULL;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x24].satd = satd8<16, 24>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_12x32].satd = satd4<12, 32>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x8].satd = satd8<16, 8>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x32].satd = satd4<4, 32>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x48].satd = satd8<32, 48>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_24x64].satd = satd8<24, 64>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x16].satd = satd8<32, 16>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x64].satd = satd8<8, 64>;
>
> #define CHROMA_420(W, H) \
> - p.chroma[X265_CSP_I420].addAvg[CHROMA_ ## W ## x ## H] = addAvg<W, H>; \
> - p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> - p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> - p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> - p.chroma[X265_CSP_I420].copy_ss[CHROMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].addAvg = addAvg<W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>;
>
> #define CHROMA_422(W, H) \
> - p.chroma[X265_CSP_I422].addAvg[CHROMA422_ ## W ## x ## H] = addAvg<W, H>; \
> - p.chroma[X265_CSP_I422].copy_pp[CHROMA422_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> - p.chroma[X265_CSP_I422].copy_sp[CHROMA422_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> - p.chroma[X265_CSP_I422].copy_ps[CHROMA422_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> - p.chroma[X265_CSP_I422].copy_ss[CHROMA422_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].addAvg = addAvg<W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>;
>
> #define CHROMA_444(W, H) \
> - p.chroma[X265_CSP_I444].satd[LUMA_ ## W ## x ## H] = p.satd[LUMA_ ## W ## x ## H]; \
> - p.chroma[X265_CSP_I444].addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
> - p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> - p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> - p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> - p.chroma[X265_CSP_I444].copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].satd = p.pu[LUMA_ ## W ## x ## H].satd; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].addAvg = addAvg<W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>;
>
> #define LUMA(W, H) \
> - p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
> - p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
> - p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> - p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
> - p.luma_copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
> + p.pu[LUMA_ ## W ## x ## H].luma_addAvg = addAvg<W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_copy_pp = blockcopy_pp_c<W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_copy_sp = blockcopy_sp_c<W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_copy_ps = blockcopy_ps_c<W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_copy_ss = blockcopy_ss_c<W, H>;
>
> #define LUMA_PIXELSUB(W, H) \
> - p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> - p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> + p.pu[LUMA_ ## W ## x ## H].luma_sub_ps = pixel_sub_ps_c<W, H>; \
> + p.pu[LUMA_ ## W ## x ## H].luma_add_ps = pixel_add_ps_c<W, H>;
>
> #define CHROMA_PIXELSUB_420(W, H) \
> - p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> - p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> + p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
> + p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].add_ps = pixel_add_ps_c<W, H>;
>
> #define CHROMA_PIXELSUB_422(W, H) \
> - p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> - p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> + p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
> + p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].add_ps = pixel_add_ps_c<W, H>;
>
> #define CHROMA_PIXELSUB_444(W, H) \
> - p.chroma[X265_CSP_I444].sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
> - p.chroma[X265_CSP_I444].add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
> + p.chroma[X265_CSP_I444].cu[LUMA_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
> + p.chroma[X265_CSP_I444].cu[LUMA_ ## W ## x ## H].add_ps = pixel_add_ps_c<W, H>;
>
> LUMA(4, 4);
> LUMA(8, 8);
> @@ -1269,89 +1269,89 @@
> SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, int16_t, pixel)
> SET_FUNC_PRIMITIVE_TABLE_C(sse_ss, sse, int16_t, int16_t)
>
> - p.blockfill_s[BLOCK_4x4] = blockfil_s_c<4>;
> - p.blockfill_s[BLOCK_8x8] = blockfil_s_c<8>;
> - p.blockfill_s[BLOCK_16x16] = blockfil_s_c<16>;
> - p.blockfill_s[BLOCK_32x32] = blockfil_s_c<32>;
> - p.blockfill_s[BLOCK_64x64] = blockfil_s_c<64>;
> + p.cu[BLOCK_4x4].blockfill_s = blockfil_s_c<4>;
> + p.cu[BLOCK_8x8].blockfill_s = blockfil_s_c<8>;
> + p.cu[BLOCK_16x16].blockfill_s = blockfil_s_c<16>;
> + p.cu[BLOCK_32x32].blockfill_s = blockfil_s_c<32>;
> + p.cu[BLOCK_64x64].blockfill_s = blockfil_s_c<64>;
>
> - p.cpy2Dto1D_shl[BLOCK_4x4] = cpy2Dto1D_shl<4>;
> - p.cpy2Dto1D_shl[BLOCK_8x8] = cpy2Dto1D_shl<8>;
> - p.cpy2Dto1D_shl[BLOCK_16x16] = cpy2Dto1D_shl<16>;
> - p.cpy2Dto1D_shl[BLOCK_32x32] = cpy2Dto1D_shl<32>;
> - p.cpy2Dto1D_shr[BLOCK_4x4] = cpy2Dto1D_shr<4>;
> - p.cpy2Dto1D_shr[BLOCK_8x8] = cpy2Dto1D_shr<8>;
> - p.cpy2Dto1D_shr[BLOCK_16x16] = cpy2Dto1D_shr<16>;
> - p.cpy2Dto1D_shr[BLOCK_32x32] = cpy2Dto1D_shr<32>;
> - p.cpy1Dto2D_shl[BLOCK_4x4] = cpy1Dto2D_shl<4>;
> - p.cpy1Dto2D_shl[BLOCK_8x8] = cpy1Dto2D_shl<8>;
> - p.cpy1Dto2D_shl[BLOCK_16x16] = cpy1Dto2D_shl<16>;
> - p.cpy1Dto2D_shl[BLOCK_32x32] = cpy1Dto2D_shl<32>;
> - p.cpy1Dto2D_shr[BLOCK_4x4] = cpy1Dto2D_shr<4>;
> - p.cpy1Dto2D_shr[BLOCK_8x8] = cpy1Dto2D_shr<8>;
> - p.cpy1Dto2D_shr[BLOCK_16x16] = cpy1Dto2D_shr<16>;
> - p.cpy1Dto2D_shr[BLOCK_32x32] = cpy1Dto2D_shr<32>;
> + p.cu[BLOCK_4x4].cpy2Dto1D_shl = cpy2Dto1D_shl<4>;
> + p.cu[BLOCK_8x8].cpy2Dto1D_shl = cpy2Dto1D_shl<8>;
> + p.cu[BLOCK_16x16].cpy2Dto1D_shl = cpy2Dto1D_shl<16>;
> + p.cu[BLOCK_32x32].cpy2Dto1D_shl = cpy2Dto1D_shl<32>;
> + p.cu[BLOCK_4x4].cpy2Dto1D_shr = cpy2Dto1D_shr<4>;
> + p.cu[BLOCK_8x8].cpy2Dto1D_shr = cpy2Dto1D_shr<8>;
> + p.cu[BLOCK_16x16].cpy2Dto1D_shr = cpy2Dto1D_shr<16>;
> + p.cu[BLOCK_32x32].cpy2Dto1D_shr = cpy2Dto1D_shr<32>;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shl = cpy1Dto2D_shl<4>;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shl = cpy1Dto2D_shl<8>;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shl = cpy1Dto2D_shl<16>;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shl = cpy1Dto2D_shl<32>;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shr = cpy1Dto2D_shr<4>;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shr = cpy1Dto2D_shr<8>;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shr = cpy1Dto2D_shr<16>;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shr = cpy1Dto2D_shr<32>;
>
> - p.sa8d[BLOCK_4x4] = satd_4x4;
> - p.sa8d[BLOCK_8x8] = sa8d_8x8;
> - p.sa8d[BLOCK_16x16] = sa8d_16x16;
> - p.sa8d[BLOCK_32x32] = sa8d16<32, 32>;
> - p.sa8d[BLOCK_64x64] = sa8d16<64, 64>;
> + p.cu[BLOCK_4x4].sa8d = satd_4x4;
> + p.cu[BLOCK_8x8].sa8d = sa8d_8x8;
> + p.cu[BLOCK_16x16].sa8d = sa8d_16x16;
> + p.cu[BLOCK_32x32].sa8d = sa8d16<32, 32>;
> + p.cu[BLOCK_64x64].sa8d = sa8d16<64, 64>;
>
> - p.psy_cost_pp[BLOCK_4x4] = psyCost_pp<BLOCK_4x4>;
> - p.psy_cost_pp[BLOCK_8x8] = psyCost_pp<BLOCK_8x8>;
> - p.psy_cost_pp[BLOCK_16x16] = psyCost_pp<BLOCK_16x16>;
> - p.psy_cost_pp[BLOCK_32x32] = psyCost_pp<BLOCK_32x32>;
> - p.psy_cost_pp[BLOCK_64x64] = psyCost_pp<BLOCK_64x64>;
> + p.cu[BLOCK_4x4].psy_cost_pp = psyCost_pp<BLOCK_4x4>;
> + p.cu[BLOCK_8x8].psy_cost_pp = psyCost_pp<BLOCK_8x8>;
> + p.cu[BLOCK_16x16].psy_cost_pp = psyCost_pp<BLOCK_16x16>;
> + p.cu[BLOCK_32x32].psy_cost_pp = psyCost_pp<BLOCK_32x32>;
> + p.cu[BLOCK_64x64].psy_cost_pp = psyCost_pp<BLOCK_64x64>;
>
> - p.psy_cost_ss[BLOCK_4x4] = psyCost_ss<BLOCK_4x4>;
> - p.psy_cost_ss[BLOCK_8x8] = psyCost_ss<BLOCK_8x8>;
> - p.psy_cost_ss[BLOCK_16x16] = psyCost_ss<BLOCK_16x16>;
> - p.psy_cost_ss[BLOCK_32x32] = psyCost_ss<BLOCK_32x32>;
> - p.psy_cost_ss[BLOCK_64x64] = psyCost_ss<BLOCK_64x64>;
> + p.cu[BLOCK_4x4].psy_cost_ss = psyCost_ss<BLOCK_4x4>;
> + p.cu[BLOCK_8x8].psy_cost_ss = psyCost_ss<BLOCK_8x8>;
> + p.cu[BLOCK_16x16].psy_cost_ss = psyCost_ss<BLOCK_16x16>;
> + p.cu[BLOCK_32x32].psy_cost_ss = psyCost_ss<BLOCK_32x32>;
> + p.cu[BLOCK_64x64].psy_cost_ss = psyCost_ss<BLOCK_64x64>;
>
> - p.sa8d_inter[LUMA_4x4] = satd_4x4;
> - p.sa8d_inter[LUMA_8x8] = sa8d_8x8;
> - p.sa8d_inter[LUMA_8x4] = satd_8x4;
> - p.sa8d_inter[LUMA_4x8] = satd4<4, 8>;
> - p.sa8d_inter[LUMA_16x16] = sa8d_16x16;
> - p.sa8d_inter[LUMA_16x8] = sa8d8<16, 8>;
> - p.sa8d_inter[LUMA_8x16] = sa8d8<8, 16>;
> - p.sa8d_inter[LUMA_16x12] = satd8<16, 12>;
> - p.sa8d_inter[LUMA_12x16] = satd4<12, 16>;
> - p.sa8d_inter[LUMA_4x16] = satd4<4, 16>;
> - p.sa8d_inter[LUMA_16x4] = satd8<16, 4>;
> - p.sa8d_inter[LUMA_32x32] = sa8d16<32, 32>;
> - p.sa8d_inter[LUMA_32x16] = sa8d16<32, 16>;
> - p.sa8d_inter[LUMA_16x32] = sa8d16<16, 32>;
> - p.sa8d_inter[LUMA_32x24] = sa8d8<32, 24>;
> - p.sa8d_inter[LUMA_24x32] = sa8d8<24, 32>;
> - p.sa8d_inter[LUMA_32x8] = sa8d8<32, 8>;
> - p.sa8d_inter[LUMA_8x32] = sa8d8<8, 32>;
> - p.sa8d_inter[LUMA_64x64] = sa8d16<64, 64>;
> - p.sa8d_inter[LUMA_64x32] = sa8d16<64, 32>;
> - p.sa8d_inter[LUMA_32x64] = sa8d16<32, 64>;
> - p.sa8d_inter[LUMA_64x48] = sa8d16<64, 48>;
> - p.sa8d_inter[LUMA_48x64] = sa8d16<48, 64>;
> - p.sa8d_inter[LUMA_64x16] = sa8d16<64, 16>;
> - p.sa8d_inter[LUMA_16x64] = sa8d16<16, 64>;
> + p.pu[LUMA_4x4].sa8d_inter = satd_4x4;
> + p.pu[LUMA_8x8].sa8d_inter = sa8d_8x8;
> + p.pu[LUMA_8x4].sa8d_inter = satd_8x4;
> + p.pu[LUMA_4x8].sa8d_inter = satd4<4, 8>;
> + p.pu[LUMA_16x16].sa8d_inter = sa8d_16x16;
> + p.pu[LUMA_16x8].sa8d_inter = sa8d8<16, 8>;
> + p.pu[LUMA_8x16].sa8d_inter = sa8d8<8, 16>;
> + p.pu[LUMA_16x12].sa8d_inter = satd8<16, 12>;
> + p.pu[LUMA_12x16].sa8d_inter = satd4<12, 16>;
> + p.pu[LUMA_4x16].sa8d_inter = satd4<4, 16>;
> + p.pu[LUMA_16x4].sa8d_inter = satd8<16, 4>;
> + p.pu[LUMA_32x32].sa8d_inter = sa8d16<32, 32>;
> + p.pu[LUMA_32x16].sa8d_inter = sa8d16<32, 16>;
> + p.pu[LUMA_16x32].sa8d_inter = sa8d16<16, 32>;
> + p.pu[LUMA_32x24].sa8d_inter = sa8d8<32, 24>;
> + p.pu[LUMA_24x32].sa8d_inter = sa8d8<24, 32>;
> + p.pu[LUMA_32x8].sa8d_inter = sa8d8<32, 8>;
> + p.pu[LUMA_8x32].sa8d_inter = sa8d8<8, 32>;
> + p.pu[LUMA_64x64].sa8d_inter = sa8d16<64, 64>;
> + p.pu[LUMA_64x32].sa8d_inter = sa8d16<64, 32>;
> + p.pu[LUMA_32x64].sa8d_inter = sa8d16<32, 64>;
> + p.pu[LUMA_64x48].sa8d_inter = sa8d16<64, 48>;
> + p.pu[LUMA_48x64].sa8d_inter = sa8d16<48, 64>;
> + p.pu[LUMA_64x16].sa8d_inter = sa8d16<64, 16>;
> + p.pu[LUMA_16x64].sa8d_inter = sa8d16<16, 64>;
>
> - p.calcresidual[BLOCK_4x4] = getResidual<4>;
> - p.calcresidual[BLOCK_8x8] = getResidual<8>;
> - p.calcresidual[BLOCK_16x16] = getResidual<16>;
> - p.calcresidual[BLOCK_32x32] = getResidual<32>;
> - p.calcresidual[BLOCK_64x64] = NULL;
> + p.cu[BLOCK_4x4].calcresidual = getResidual<4>;
> + p.cu[BLOCK_8x8].calcresidual = getResidual<8>;
> + p.cu[BLOCK_16x16].calcresidual = getResidual<16>;
> + p.cu[BLOCK_32x32].calcresidual = getResidual<32>;
> + p.cu[BLOCK_64x64].calcresidual = NULL;
>
> - p.transpose[BLOCK_4x4] = transpose<4>;
> - p.transpose[BLOCK_8x8] = transpose<8>;
> - p.transpose[BLOCK_16x16] = transpose<16>;
> - p.transpose[BLOCK_32x32] = transpose<32>;
> - p.transpose[BLOCK_64x64] = transpose<64>;
> + p.cu[BLOCK_4x4].transpose = transpose<4>;
> + p.cu[BLOCK_8x8].transpose = transpose<8>;
> + p.cu[BLOCK_16x16].transpose = transpose<16>;
> + p.cu[BLOCK_32x32].transpose = transpose<32>;
> + p.cu[BLOCK_64x64].transpose = transpose<64>;
>
> - p.ssd_s[BLOCK_4x4] = pixel_ssd_s_c<4>;
> - p.ssd_s[BLOCK_8x8] = pixel_ssd_s_c<8>;
> - p.ssd_s[BLOCK_16x16] = pixel_ssd_s_c<16>;
> - p.ssd_s[BLOCK_32x32] = pixel_ssd_s_c<32>;
> + p.cu[BLOCK_4x4].ssd_s = pixel_ssd_s_c<4>;
> + p.cu[BLOCK_8x8].ssd_s = pixel_ssd_s_c<8>;
> + p.cu[BLOCK_16x16].ssd_s = pixel_ssd_s_c<16>;
> + p.cu[BLOCK_32x32].ssd_s = pixel_ssd_s_c<32>;
>
> p.weight_pp = weight_pp_c;
> p.weight_sp = weight_sp_c;
> @@ -1362,10 +1362,10 @@
> p.ssim_4x4x2_core = ssim_4x4x2_core;
> p.ssim_end_4 = ssim_end_4;
>
> - p.var[BLOCK_8x8] = pixel_var<8>;
> - p.var[BLOCK_16x16] = pixel_var<16>;
> - p.var[BLOCK_32x32] = pixel_var<32>;
> - p.var[BLOCK_64x64] = pixel_var<64>;
> + p.cu[BLOCK_8x8].var = pixel_var<8>;
> + p.cu[BLOCK_16x16].var = pixel_var<16>;
> + p.cu[BLOCK_32x32].var = pixel_var<32>;
> + p.cu[BLOCK_64x64].var = pixel_var<64>;
> p.planecopy_cp = planecopy_cp_c;
> p.planecopy_sp = planecopy_sp_c;
> p.propagateCost = estimateCUPropagateCost;
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/predict.cpp
> --- a/source/common/predict.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/predict.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -334,13 +334,13 @@
> int yFrac = mv.y & 0x3;
>
> if (!(yFrac | xFrac))
> - primitives.luma_copy_pp[partEnum](dst, dstStride, src, srcStride);
> + primitives.pu[partEnum].luma_copy_pp(dst, dstStride, src, srcStride);
> else if (!yFrac)
> - primitives.luma_hpp[partEnum](src, srcStride, dst, dstStride, xFrac);
> + primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
> else if (!xFrac)
> - primitives.luma_vpp[partEnum](src, srcStride, dst, dstStride, yFrac);
> + primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
> else
> - primitives.luma_hvpp[partEnum](src, srcStride, dst, dstStride, xFrac, yFrac);
> + primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
> }
>
> void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
> @@ -363,16 +363,16 @@
> if (!(yFrac | xFrac))
> primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight);
> else if (!yFrac)
> - primitives.luma_hps[partEnum](src, srcStride, dst, dstStride, xFrac, 0);
> + primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0);
> else if (!xFrac)
> - primitives.luma_vps[partEnum](src, srcStride, dst, dstStride, yFrac);
> + primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac);
> else
> {
> int tmpStride = m_puWidth;
> int filterSize = NTAPS_LUMA;
> int halfFilterSize = (filterSize >> 1);
> - primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
> - primitives.luma_vss[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
> + primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals, tmpStride, xFrac, 1);
> + primitives.pu[partEnum].luma_vss(m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
> }
> }
>
> @@ -399,18 +399,18 @@
>
> if (!(yFrac | xFrac))
> {
> - primitives.chroma[m_csp].copy_pp[partEnum](dstCb, dstStride, refCb, refStride);
> - primitives.chroma[m_csp].copy_pp[partEnum](dstCr, dstStride, refCr, refStride);
> + primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride);
> + primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCr, dstStride, refCr, refStride);
> }
> else if (!yFrac)
> {
> - primitives.chroma[m_csp].filter_hpp[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
> - primitives.chroma[m_csp].filter_hpp[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
> }
> else if (!xFrac)
> {
> - primitives.chroma[m_csp].filter_vpp[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
> - primitives.chroma[m_csp].filter_vpp[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> }
> else
> {
> @@ -418,11 +418,11 @@
> int filterSize = NTAPS_CHROMA;
> int halfFilterSize = (filterSize >> 1);
>
> - primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> - primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> + primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
>
> - primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> - primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> + primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> }
> }
>
> @@ -459,23 +459,23 @@
> }
> else if (!yFrac)
> {
> - primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
> - primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
> + primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
> + primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
> }
> else if (!xFrac)
> {
> - primitives.chroma[m_csp].filter_vps[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
> - primitives.chroma[m_csp].filter_vps[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> }
> else
> {
> int extStride = cxWidth;
> int filterSize = NTAPS_CHROMA;
> int halfFilterSize = (filterSize >> 1);
> - primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> - primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
> - primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> - primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> + primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
> + primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
> + primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
> }
> }
>
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/primitives.cpp
> --- a/source/common/primitives.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/primitives.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -71,79 +71,79 @@
> /* copy reusable luma primitives to chroma 4:4:4 */
> for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
> {
> - p.chroma[X265_CSP_I444].copy_pp[i] = p.luma_copy_pp[i];
> - p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i];
> - p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i];
> - p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i];
> - p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];
> - p.chroma[X265_CSP_I444].satd[i] = p.satd[i];
> + p.chroma[X265_CSP_I444].pu[i].copy_pp = p.pu[i].luma_copy_pp;
> + p.chroma[X265_CSP_I444].pu[i].copy_ps = p.pu[i].luma_copy_ps;
> + p.chroma[X265_CSP_I444].pu[i].copy_sp = p.pu[i].luma_copy_sp;
> + p.chroma[X265_CSP_I444].pu[i].copy_ss = p.pu[i].luma_copy_ss;
> + p.chroma[X265_CSP_I444].pu[i].addAvg = p.pu[i].luma_addAvg;
> + p.chroma[X265_CSP_I444].pu[i].satd = p.pu[i].satd;
> }
>
> for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
> {
> - p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i];
> - p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];
> + p.chroma[X265_CSP_I444].cu[i].add_ps = p.pu[i].luma_add_ps;
> + p.chroma[X265_CSP_I444].cu[i].sub_ps = p.pu[i].luma_sub_ps;
> }
>
> - primitives.sa8d[BLOCK_4x4] = primitives.satd[LUMA_4x4];
> - primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8];
> - primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];
> - primitives.sa8d[BLOCK_32x32] = primitives.sa8d_inter[LUMA_32x32];
> - primitives.sa8d[BLOCK_64x64] = primitives.sa8d_inter[LUMA_64x64];
> + primitives.cu[BLOCK_4x4].sa8d = primitives.pu[LUMA_4x4].satd;
> + primitives.cu[BLOCK_8x8].sa8d = primitives.pu[LUMA_8x8].sa8d_inter;
> + primitives.cu[BLOCK_16x16].sa8d = primitives.pu[LUMA_16x16].sa8d_inter;
> + primitives.cu[BLOCK_32x32].sa8d = primitives.pu[LUMA_32x32].sa8d_inter;
> + primitives.cu[BLOCK_64x64].sa8d = primitives.pu[LUMA_64x64].sa8d_inter;
>
> // SA8D devolves to SATD for blocks not even multiples of 8x8
> - primitives.sa8d_inter[LUMA_4x4] = primitives.satd[LUMA_4x4];
> - primitives.sa8d_inter[LUMA_4x8] = primitives.satd[LUMA_4x8];
> - primitives.sa8d_inter[LUMA_4x16] = primitives.satd[LUMA_4x16];
> - primitives.sa8d_inter[LUMA_8x4] = primitives.satd[LUMA_8x4];
> - primitives.sa8d_inter[LUMA_16x4] = primitives.satd[LUMA_16x4];
> - primitives.sa8d_inter[LUMA_16x12] = primitives.satd[LUMA_16x12];
> - primitives.sa8d_inter[LUMA_12x16] = primitives.satd[LUMA_12x16];
> + primitives.pu[LUMA_4x4].sa8d_inter = primitives.pu[LUMA_4x4].satd;
> + primitives.pu[LUMA_4x8].sa8d_inter = primitives.pu[LUMA_4x8].satd;
> + primitives.pu[LUMA_4x16].sa8d_inter = primitives.pu[LUMA_4x16].satd;
> + primitives.pu[LUMA_8x4].sa8d_inter = primitives.pu[LUMA_8x4].satd;
> + primitives.pu[LUMA_16x4].sa8d_inter = primitives.pu[LUMA_16x4].satd;
> + primitives.pu[LUMA_16x12].sa8d_inter = primitives.pu[LUMA_16x12].satd;
> + primitives.pu[LUMA_12x16].sa8d_inter = primitives.pu[LUMA_12x16].satd;
>
> // Chroma SATD can often reuse luma primitives
> - p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = primitives.satd[LUMA_4x4];
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = primitives.satd[LUMA_8x8];
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = primitives.satd[LUMA_16x16];
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = primitives.satd[LUMA_32x32];
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x4].satd = primitives.pu[LUMA_4x4].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x8].satd = primitives.pu[LUMA_8x8].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x16].satd = primitives.pu[LUMA_16x16].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x32].satd = primitives.pu[LUMA_32x32].satd;
>
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = primitives.satd[LUMA_8x4];
> - p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = primitives.satd[LUMA_4x8];
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = primitives.satd[LUMA_16x8];
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = primitives.satd[LUMA_8x16];
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = primitives.satd[LUMA_32x16];
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = primitives.satd[LUMA_16x32];
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x4].satd = primitives.pu[LUMA_8x4].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x8].satd = primitives.pu[LUMA_4x8].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x8].satd = primitives.pu[LUMA_16x8].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x16].satd = primitives.pu[LUMA_8x16].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x16].satd = primitives.pu[LUMA_32x16].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x32].satd = primitives.pu[LUMA_16x32].satd;
>
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = primitives.satd[LUMA_16x12];
> - p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = primitives.satd[LUMA_12x16];
> - p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = primitives.satd[LUMA_16x4];
> - p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = primitives.satd[LUMA_4x16];
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = primitives.satd[LUMA_32x24];
> - p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = primitives.satd[LUMA_24x32];
> - p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = primitives.satd[LUMA_32x8];
> - p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = primitives.satd[LUMA_8x32];
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x12].satd = primitives.pu[LUMA_16x12].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_12x16].satd = primitives.pu[LUMA_12x16].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x4].satd = primitives.pu[LUMA_16x4].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x16].satd = primitives.pu[LUMA_4x16].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x24].satd = primitives.pu[LUMA_32x24].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_24x32].satd = primitives.pu[LUMA_24x32].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x8].satd = primitives.pu[LUMA_32x8].satd;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x32].satd = primitives.pu[LUMA_8x32].satd;
>
> - p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = primitives.satd[LUMA_4x8];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = primitives.satd[LUMA_8x16];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = primitives.satd[LUMA_16x32];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = primitives.satd[LUMA_32x64];
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x8].satd = primitives.pu[LUMA_4x8].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x16].satd = primitives.pu[LUMA_8x16].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].satd = primitives.pu[LUMA_16x32].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x64].satd = primitives.pu[LUMA_32x64].satd;
>
> - p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = primitives.satd[LUMA_4x4];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = primitives.satd[LUMA_8x8];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = primitives.satd[LUMA_4x16];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = primitives.satd[LUMA_16x16];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = primitives.satd[LUMA_8x32];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = primitives.satd[LUMA_32x32];
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = primitives.satd[LUMA_16x64];
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x4].satd = primitives.pu[LUMA_4x4].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x8].satd = primitives.pu[LUMA_8x8].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x16].satd = primitives.pu[LUMA_4x16].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x16].satd = primitives.pu[LUMA_16x16].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x32].satd = primitives.pu[LUMA_8x32].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x32].satd = primitives.pu[LUMA_32x32].satd;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].satd = primitives.pu[LUMA_16x64].satd;
>
> //p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = primitives.satd[LUMA_8x4];
> + p.chroma[X265_CSP_I422].pu[CHROMA422_8x4].satd = primitives.pu[LUMA_8x4].satd;
> //p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
> //p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = primitives.satd[LUMA_16x8];
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x8].satd = primitives.pu[LUMA_16x8].satd;
> //p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>;
> //p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
> //p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
> - p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16];
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x16].satd = primitives.pu[LUMA_32x16].satd;
> //p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
> }
> }
> @@ -158,7 +158,7 @@
> cpuid = x265::cpu_detect();
>
> // initialize global variables
> - if (!primitives.sad[0])
> + if (!primitives.pu[0].sad)
> {
> Setup_C_Primitives(primitives);
>
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/primitives.h
> --- a/source/common/primitives.h Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/primitives.h Thu Jan 08 15:23:38 2015 -0600
> @@ -42,7 +42,7 @@
> LUMA_4x4, LUMA_8x8, LUMA_16x16, LUMA_32x32, LUMA_64x64,
> // Rectangular
> LUMA_8x4, LUMA_4x8,
> - LUMA_16x8, LUMA_8x16,
> + LUMA_16x8, LUMA_8x16,
> LUMA_32x16, LUMA_16x32,
> LUMA_64x32, LUMA_32x64,
> // Asymmetrical (0.75, 0.25)
> @@ -206,42 +206,76 @@
> * a vectorized primitive, or a C function. */
> struct EncoderPrimitives
> {
> - pixelcmp_t sad[NUM_LUMA_PARTITIONS]; // Sum of Differences for each size
> - pixelcmp_x3_t sad_x3[NUM_LUMA_PARTITIONS]; // Sum of Differences 3x for each size
> - pixelcmp_x4_t sad_x4[NUM_LUMA_PARTITIONS]; // Sum of Differences 4x for each size
> - pixelcmp_t sse_pp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
> - pixelcmp_ss_t sse_ss[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, short) fenc alignment not assumed
> - pixelcmp_sp_t sse_sp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, pixel) fenc alignment not assumed
> - pixel_ssd_s_t ssd_s[NUM_SQUARE_BLOCKS - 1]; // Sum of Square Error (short) fenc alignment not assumed
> - pixelcmp_t satd[NUM_LUMA_PARTITIONS]; // Sum of Transformed differences (HADAMARD)
> - pixelcmp_t sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
> - pixelcmp_t sa8d[NUM_SQUARE_BLOCKS]; // sa8d primitives for square intra blocks
> - pixelcmp_t psy_cost_pp[NUM_SQUARE_BLOCKS]; // difference in AC energy between two blocks
> - pixelcmp_ss_t psy_cost_ss[NUM_SQUARE_BLOCKS];
> + struct PU
> + {
> + pixelcmp_t sad; // Sum of Differences for each size
> + pixelcmp_x3_t sad_x3; // Sum of Differences 3x for each size
> + pixelcmp_x4_t sad_x4; // Sum of Differences 4x for each size
> + pixelcmp_t sse_pp; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
> + pixelcmp_ss_t sse_ss; // Sum of Square Error (short, short) fenc alignment not assumed
> + pixelcmp_sp_t sse_sp; // Sum of Square Error (short, pixel) fenc alignment not assumed
> + pixelcmp_t satd; // Sum of Transformed differences (HADAMARD)
> + pixelcmp_t sa8d_inter; // sa8d primitives for motion search partitions
>
> - dct_t dct[NUM_DCTS];
> - idct_t idct[NUM_IDCTS];
> + pixelavg_pp_t pixelavg_pp;
> + addAvg_t luma_addAvg;
> +
> + filter_pp_t luma_hpp;
> + filter_hps_t luma_hps;
> + filter_pp_t luma_vpp;
> + filter_ps_t luma_vps;
> + filter_sp_t luma_vsp;
> + filter_ss_t luma_vss;
> + filter_hv_pp_t luma_hvpp;
> +
> + copy_pp_t luma_copy_pp;
> + copy_sp_t luma_copy_sp;
> + copy_ps_t luma_copy_ps;
> + copy_ss_t luma_copy_ss;
> +
> + pixel_sub_ps_t luma_sub_ps;
> + pixel_add_ps_t luma_add_ps;
> +
> + } pu[NUM_LUMA_PARTITIONS];
> +
> + struct CU
> + {
> + dct_t dct;
> + idct_t idct;
> + calcresidual_t calcresidual;
> + blockfill_s_t blockfill_s; // block fill with value
> + cpy2Dto1D_shl_t cpy2Dto1D_shl;
> + cpy2Dto1D_shr_t cpy2Dto1D_shr;
> + cpy1Dto2D_shl_t cpy1Dto2D_shl;
> + cpy1Dto2D_shr_t cpy1Dto2D_shr;
> + copy_cnt_t copy_cnt;
> +
> + transpose_t transpose;
> +
> + var_t var;
> +
> + pixelcmp_t sa8d; // sa8d primitives for square intra blocks
> + pixel_ssd_s_t ssd_s; // Sum of Square Error (short) fenc alignment not assumed
> + pixelcmp_t psy_cost_pp; // difference in AC energy between two blocks
> + pixelcmp_ss_t psy_cost_ss;
> +
> + } cu[NUM_SQUARE_BLOCKS];
> +
> + dct_t dst4x4;
> + idct_t idst4x4;
> +
> quant_t quant;
> nquant_t nquant;
> dequant_scaling_t dequant_scaling;
> dequant_normal_t dequant_normal;
> count_nonzero_t count_nonzero;
> denoiseDct_t denoiseDct;
> - calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];
> - blockfill_s_t blockfill_s[NUM_SQUARE_BLOCKS]; // block fill with value
> - cpy2Dto1D_shl_t cpy2Dto1D_shl[NUM_SQUARE_BLOCKS - 1];
> - cpy2Dto1D_shr_t cpy2Dto1D_shr[NUM_SQUARE_BLOCKS - 1];
> - cpy1Dto2D_shl_t cpy1Dto2D_shl[NUM_SQUARE_BLOCKS - 1];
> - cpy1Dto2D_shr_t cpy1Dto2D_shr[NUM_SQUARE_BLOCKS - 1];
> - copy_cnt_t copy_cnt[NUM_SQUARE_BLOCKS - 1];
>
> intra_pred_t intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE];
> intra_allangs_t intra_pred_allangs[NUM_TR_SIZE];
> - transpose_t transpose[NUM_SQUARE_BLOCKS];
> scale_t scale1D_128to64;
> scale_t scale2D_64to32;
>
> - var_t var[NUM_SQUARE_BLOCKS];
> ssim_4x4x2_core_t ssim_4x4x2_core;
> ssim_end4_t ssim_end_4;
>
> @@ -261,42 +295,36 @@
>
> weightp_sp_t weight_sp;
> weightp_pp_t weight_pp;
> - pixelavg_pp_t pixelavg_pp[NUM_LUMA_PARTITIONS];
> - addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS];
>
> - filter_pp_t luma_hpp[NUM_LUMA_PARTITIONS];
> - filter_hps_t luma_hps[NUM_LUMA_PARTITIONS];
> - filter_pp_t luma_vpp[NUM_LUMA_PARTITIONS];
> - filter_ps_t luma_vps[NUM_LUMA_PARTITIONS];
> - filter_sp_t luma_vsp[NUM_LUMA_PARTITIONS];
> - filter_ss_t luma_vss[NUM_LUMA_PARTITIONS];
> - filter_hv_pp_t luma_hvpp[NUM_LUMA_PARTITIONS];
> filter_p2s_t luma_p2s;
>
> - copy_pp_t luma_copy_pp[NUM_LUMA_PARTITIONS];
> - copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS];
> - copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS];
> - copy_ss_t luma_copy_ss[NUM_LUMA_PARTITIONS];
> - pixel_sub_ps_t luma_sub_ps[NUM_SQUARE_BLOCKS];
> - pixel_add_ps_t luma_add_ps[NUM_SQUARE_BLOCKS];
> + struct Chroma
> + {
> + struct PUChroma
> + {
> + // ME and MC
> + pixelcmp_t satd;
> + filter_pp_t filter_vpp;
> + filter_ps_t filter_vps;
> + filter_sp_t filter_vsp;
> + filter_ss_t filter_vss;
> + filter_pp_t filter_hpp;
> + filter_hps_t filter_hps;
> + addAvg_t addAvg;
> + copy_pp_t copy_pp;
> + copy_sp_t copy_sp;
> + copy_ps_t copy_ps;
> + copy_ss_t copy_ss;
> + } pu[NUM_LUMA_PARTITIONS];
>
> - struct
> - {
> - pixelcmp_t satd[NUM_LUMA_PARTITIONS];
> - filter_pp_t filter_vpp[NUM_LUMA_PARTITIONS];
> - filter_ps_t filter_vps[NUM_LUMA_PARTITIONS];
> - filter_sp_t filter_vsp[NUM_LUMA_PARTITIONS];
> - filter_ss_t filter_vss[NUM_LUMA_PARTITIONS];
> - filter_pp_t filter_hpp[NUM_LUMA_PARTITIONS];
> - filter_hps_t filter_hps[NUM_LUMA_PARTITIONS];
> - addAvg_t addAvg[NUM_LUMA_PARTITIONS];
> - copy_pp_t copy_pp[NUM_LUMA_PARTITIONS];
> - copy_sp_t copy_sp[NUM_LUMA_PARTITIONS];
> - copy_ps_t copy_ps[NUM_LUMA_PARTITIONS];
> - copy_ss_t copy_ss[NUM_LUMA_PARTITIONS];
> - pixel_sub_ps_t sub_ps[NUM_SQUARE_BLOCKS];
> - pixel_add_ps_t add_ps[NUM_SQUARE_BLOCKS];
> - filter_p2s_t p2s;
> + struct CUChroma
> + {
> + pixelcmp_t sa8d;
> + pixel_sub_ps_t sub_ps;
> + pixel_add_ps_t add_ps;
> + } cu[NUM_SQUARE_BLOCKS];
> +
> + filter_p2s_t p2s;
> } chroma[X265_CSP_COUNT];
> };
>
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/quant.cpp
> --- a/source/common/quant.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/quant.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -329,7 +329,7 @@
> if (cu.m_tqBypass[absPartIdx])
> {
> X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
> - return primitives.copy_cnt[sizeIdx](coeff, residual, resiStride);
> + return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
> }
>
> bool isLuma = ttype == TEXT_LUMA;
> @@ -341,21 +341,21 @@
> {
> #if X265_DEPTH <= 10
> X265_CHECK(transformShift >= 0, "invalid transformShift\n");
> - primitives.cpy2Dto1D_shl[sizeIdx](m_resiDctCoeff, residual, resiStride, transformShift);
> + primitives.cu[sizeIdx].cpy2Dto1D_shl(m_resiDctCoeff, residual, resiStride, transformShift);
> #else
> if (transformShift >= 0)
> - primitives.cpy2Dto1D_shl[sizeIdx](m_resiDctCoeff, residual, resiStride, transformShift);
> + primitives.cu[sizeIdx].cpy2Dto1D_shl(m_resiDctCoeff, residual, resiStride, transformShift);
> else
> - primitives.cpy2Dto1D_shr[sizeIdx](m_resiDctCoeff, residual, resiStride, -transformShift);
> + primitives.cu[sizeIdx].cpy2Dto1D_shr(m_resiDctCoeff, residual, resiStride, -transformShift);
> #endif
> }
> else
> {
> bool isIntra = cu.isIntra(absPartIdx);
> int useDST = !sizeIdx && isLuma && isIntra;
> - int index = DCT_4x4 + sizeIdx - useDST;
> + int index = BLOCK_4x4 + sizeIdx - useDST;
>
> - primitives.dct[index](residual, m_resiDctCoeff, resiStride);
> + primitives.cu[index].dct(residual, m_resiDctCoeff, resiStride);
>
> /* NOTE: if RDOQ is disabled globally, psy-rdoq is also disabled, so
> * there is no risk of performing this DCT unnecessarily */
> @@ -363,8 +363,8 @@
> {
> int trSize = 1 << log2TrSize;
> /* perform DCT on source pixels for psy-rdoq */
> - primitives.luma_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
> - primitives.dct[index](m_fencShortBuf, m_fencDctCoeff, trSize);
> + primitives.pu[sizeIdx].luma_copy_ps(m_fencShortBuf, trSize, fenc, fencStride);
> + primitives.cu[index].dct(m_fencShortBuf, m_fencDctCoeff, trSize);
> }
>
> if (m_nr)
> @@ -411,7 +411,7 @@
> const uint32_t sizeIdx = log2TrSize - 2;
> if (transQuantBypass)
> {
> - primitives.cpy1Dto2D_shl[sizeIdx](residual, coeff, resiStride, 0);
> + primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
> return;
> }
>
> @@ -438,12 +438,12 @@
> {
> #if X265_DEPTH <= 10
> X265_CHECK(transformShift > 0, "invalid transformShift\n");
> - primitives.cpy1Dto2D_shr[sizeIdx](residual, m_resiDctCoeff, resiStride, transformShift);
> + primitives.cu[sizeIdx].cpy1Dto2D_shr(residual, m_resiDctCoeff, resiStride, transformShift);
> #else
> if (transformShift > 0)
> - primitives.cpy1Dto2D_shr[sizeIdx](residual, m_resiDctCoeff, resiStride, transformShift);
> + primitives.cu[sizeIdx].cpy1Dto2D_shr(residual, m_resiDctCoeff, resiStride, transformShift);
> else
> - primitives.cpy1Dto2D_shl[sizeIdx](residual, m_resiDctCoeff, resiStride, -transformShift);
> + primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, m_resiDctCoeff, resiStride, -transformShift);
> #endif
> }
> else
> @@ -461,11 +461,11 @@
> const int add_2nd = 1 << (shift_2nd - 1);
>
> int dc_val = (((m_resiDctCoeff[0] * (64 >> 6) + add_1st) >> shift_1st) * (64 >> 3) + add_2nd) >> shift_2nd;
> - primitives.blockfill_s[sizeIdx](residual, resiStride, (int16_t)dc_val);
> + primitives.cu[sizeIdx].blockfill_s(residual, resiStride, (int16_t)dc_val);
> return;
> }
>
> - primitives.idct[IDCT_4x4 + sizeIdx - useDST](m_resiDctCoeff, residual, resiStride);
> + primitives.cu[BLOCK_4x4 + sizeIdx - useDST].idct(m_resiDctCoeff, residual, resiStride);
> }
> }
>
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/shortyuv.cpp
> --- a/source/common/shortyuv.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/shortyuv.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -74,9 +74,9 @@
> void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size)
> {
> const int sizeIdx = log2Size - 2;
> - primitives.luma_sub_ps[sizeIdx](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> - primitives.chroma[m_csp].sub_ps[sizeIdx](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> - primitives.chroma[m_csp].sub_ps[sizeIdx](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> + primitives.pu[sizeIdx].luma_sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> + primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> + primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> }
>
> void ShortYuv::copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
> @@ -84,7 +84,7 @@
> const int16_t* src = getLumaAddr(absPartIdx);
> int16_t* dst = dstYuv.getLumaAddr(absPartIdx);
>
> - primitives.luma_copy_ss[log2Size - 2](dst, dstYuv.m_size, src, m_size);
> + primitives.pu[log2Size - 2].luma_copy_ss(dst, dstYuv.m_size, src, m_size);
> }
>
> void ShortYuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
> @@ -92,7 +92,7 @@
> const int16_t* src = getLumaAddr(absPartIdx);
> pixel* dst = dstYuv.getLumaAddr(absPartIdx);
>
> - primitives.luma_copy_sp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
> + primitives.pu[log2Size - 2].luma_copy_sp(dst, dstYuv.m_size, src, m_size);
> }
>
> void ShortYuv::copyPartToPartChroma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
> @@ -103,8 +103,8 @@
> int16_t* dstU = dstYuv.getCbAddr(absPartIdx);
> int16_t* dstV = dstYuv.getCrAddr(absPartIdx);
>
> - primitives.chroma[m_csp].copy_ss[part](dstU, dstYuv.m_csize, srcU, m_csize);
> - primitives.chroma[m_csp].copy_ss[part](dstV, dstYuv.m_csize, srcV, m_csize);
> + primitives.chroma[m_csp].pu[part].copy_ss(dstU, dstYuv.m_csize, srcU, m_csize);
> + primitives.chroma[m_csp].pu[part].copy_ss(dstV, dstYuv.m_csize, srcV, m_csize);
> }
>
> void ShortYuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
> @@ -115,6 +115,6 @@
> pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> pixel* dstV = dstYuv.getCrAddr(absPartIdx);
>
> - primitives.chroma[m_csp].copy_sp[part](dstU, dstYuv.m_csize, srcU, m_csize);
> - primitives.chroma[m_csp].copy_sp[part](dstV, dstYuv.m_csize, srcV, m_csize);
> + primitives.chroma[m_csp].pu[part].copy_sp(dstU, dstYuv.m_csize, srcU, m_csize);
> + primitives.chroma[m_csp].pu[part].copy_sp(dstV, dstYuv.m_csize, srcV, m_csize);
> }
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/vec/dct-sse3.cpp
> --- a/source/common/vec/dct-sse3.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/vec/dct-sse3.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -1402,9 +1402,9 @@
> * still somewhat rare on end-user PCs we still compile and link these SSE3
> * intrinsic SIMD functions */
> #if !HIGH_BIT_DEPTH
> - p.idct[IDCT_8x8] = idct8;
> - p.idct[IDCT_16x16] = idct16;
> - p.idct[IDCT_32x32] = idct32;
> + p.cu[BLOCK_8x8].idct = idct8;
> + p.cu[BLOCK_16x16].idct = idct16;
> + p.cu[BLOCK_32x32].idct = idct32;
> #endif
> }
> }
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/vec/dct-ssse3.cpp
> --- a/source/common/vec/dct-ssse3.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/vec/dct-ssse3.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -1111,8 +1111,8 @@
> * still somewhat rare on end-user PCs we still compile and link these SSSE3
> * intrinsic SIMD functions */
> #if !HIGH_BIT_DEPTH
> - p.dct[DCT_16x16] = dct16;
> - p.dct[DCT_32x32] = dct32;
> + p.cu[BLOCK_16x16].dct = dct16;
> + p.cu[BLOCK_32x32].dct = dct32;
> #endif
> }
> }
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/x86/asm-primitives.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -46,29 +46,29 @@
> const int filterSize = NTAPS_LUMA;
> const int halfFilterSize = filterSize >> 1;
>
> - x265::primitives.luma_hps[size](src, srcStride, immed, MAX_CU_SIZE, idxX, 1);
> - x265::primitives.luma_vsp[size](immed + (halfFilterSize - 1) * MAX_CU_SIZE, MAX_CU_SIZE, dst, dstStride, idxY);
> + x265::primitives.pu[size].luma_hps(src, srcStride, immed, MAX_CU_SIZE, idxX, 1);
> + x265::primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * MAX_CU_SIZE, MAX_CU_SIZE, dst, dstStride, idxY);
> }
>
> #define INIT2_NAME(name1, name2, cpu) \
> - p.name1[LUMA_16x16] = x265_pixel_ ## name2 ## _16x16 ## cpu; \
> - p.name1[LUMA_16x8] = x265_pixel_ ## name2 ## _16x8 ## cpu;
> + p.pu[LUMA_16x16].name1 = x265_pixel_ ## name2 ## _16x16 ## cpu; \
> + p.pu[LUMA_16x8].name1 = x265_pixel_ ## name2 ## _16x8 ## cpu;
> #define INIT4_NAME(name1, name2, cpu) \
> INIT2_NAME(name1, name2, cpu) \
> - p.name1[LUMA_8x16] = x265_pixel_ ## name2 ## _8x16 ## cpu; \
> - p.name1[LUMA_8x8] = x265_pixel_ ## name2 ## _8x8 ## cpu;
> + p.pu[LUMA_8x16].name1 = x265_pixel_ ## name2 ## _8x16 ## cpu; \
> + p.pu[LUMA_8x8].name1 = x265_pixel_ ## name2 ## _8x8 ## cpu;
> #define INIT5_NAME(name1, name2, cpu) \
> INIT4_NAME(name1, name2, cpu) \
> - p.name1[LUMA_8x4] = x265_pixel_ ## name2 ## _8x4 ## cpu;
> + p.pu[LUMA_8x4].name1 = x265_pixel_ ## name2 ## _8x4 ## cpu;
> #define INIT6_NAME(name1, name2, cpu) \
> INIT5_NAME(name1, name2, cpu) \
> - p.name1[LUMA_4x8] = x265_pixel_ ## name2 ## _4x8 ## cpu;
> + p.pu[LUMA_4x8].name1 = x265_pixel_ ## name2 ## _4x8 ## cpu;
> #define INIT7_NAME(name1, name2, cpu) \
> INIT6_NAME(name1, name2, cpu) \
> - p.name1[LUMA_4x4] = x265_pixel_ ## name2 ## _4x4 ## cpu;
> + p.pu[LUMA_4x4].name1 = x265_pixel_ ## name2 ## _4x4 ## cpu;
> #define INIT8_NAME(name1, name2, cpu) \
> INIT7_NAME(name1, name2, cpu) \
> - p.name1[LUMA_4x16] = x265_pixel_ ## name2 ## _4x16 ## cpu;
> + p.pu[LUMA_4x16].name1 = x265_pixel_ ## name2 ## _4x16 ## cpu;
> #define INIT2(name, cpu) INIT2_NAME(name, name, cpu)
> #define INIT4(name, cpu) INIT4_NAME(name, name, cpu)
> #define INIT5(name, cpu) INIT5_NAME(name, name, cpu)
> @@ -77,220 +77,220 @@
> #define INIT8(name, cpu) INIT8_NAME(name, name, cpu)
>
> #define HEVC_SATD(cpu) \
> - p.satd[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \
> - p.satd[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \
> - p.satd[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \
> - p.satd[LUMA_8x8] = x265_pixel_satd_8x8_ ## cpu; \
> - p.satd[LUMA_8x16] = x265_pixel_satd_8x16_ ## cpu; \
> - p.satd[LUMA_8x32] = x265_pixel_satd_8x32_ ## cpu; \
> - p.satd[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \
> - p.satd[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \
> - p.satd[LUMA_16x8] = x265_pixel_satd_16x8_ ## cpu; \
> - p.satd[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \
> - p.satd[LUMA_16x16] = x265_pixel_satd_16x16_ ## cpu; \
> - p.satd[LUMA_16x32] = x265_pixel_satd_16x32_ ## cpu; \
> - p.satd[LUMA_16x64] = x265_pixel_satd_16x64_ ## cpu; \
> - p.satd[LUMA_24x32] = x265_pixel_satd_24x32_ ## cpu; \
> - p.satd[LUMA_32x8] = x265_pixel_satd_32x8_ ## cpu; \
> - p.satd[LUMA_32x16] = x265_pixel_satd_32x16_ ## cpu; \
> - p.satd[LUMA_32x24] = x265_pixel_satd_32x24_ ## cpu; \
> - p.satd[LUMA_32x32] = x265_pixel_satd_32x32_ ## cpu; \
> - p.satd[LUMA_32x64] = x265_pixel_satd_32x64_ ## cpu; \
> - p.satd[LUMA_48x64] = x265_pixel_satd_48x64_ ## cpu; \
> - p.satd[LUMA_64x16] = x265_pixel_satd_64x16_ ## cpu; \
> - p.satd[LUMA_64x32] = x265_pixel_satd_64x32_ ## cpu; \
> - p.satd[LUMA_64x48] = x265_pixel_satd_64x48_ ## cpu; \
> - p.satd[LUMA_64x64] = x265_pixel_satd_64x64_ ## cpu;
> + p.pu[LUMA_4x8].satd = x265_pixel_satd_4x8_ ## cpu; \
> + p.pu[LUMA_4x16].satd = x265_pixel_satd_4x16_ ## cpu; \
> + p.pu[LUMA_8x4].satd = x265_pixel_satd_8x4_ ## cpu; \
> + p.pu[LUMA_8x8].satd = x265_pixel_satd_8x8_ ## cpu; \
> + p.pu[LUMA_8x16].satd = x265_pixel_satd_8x16_ ## cpu; \
> + p.pu[LUMA_8x32].satd = x265_pixel_satd_8x32_ ## cpu; \
> + p.pu[LUMA_12x16].satd = x265_pixel_satd_12x16_ ## cpu; \
> + p.pu[LUMA_16x4].satd = x265_pixel_satd_16x4_ ## cpu; \
> + p.pu[LUMA_16x8].satd = x265_pixel_satd_16x8_ ## cpu; \
> + p.pu[LUMA_16x12].satd = x265_pixel_satd_16x12_ ## cpu; \
> + p.pu[LUMA_16x16].satd = x265_pixel_satd_16x16_ ## cpu; \
> + p.pu[LUMA_16x32].satd = x265_pixel_satd_16x32_ ## cpu; \
> + p.pu[LUMA_16x64].satd = x265_pixel_satd_16x64_ ## cpu; \
> + p.pu[LUMA_24x32].satd = x265_pixel_satd_24x32_ ## cpu; \
> + p.pu[LUMA_32x8].satd = x265_pixel_satd_32x8_ ## cpu; \
> + p.pu[LUMA_32x16].satd = x265_pixel_satd_32x16_ ## cpu; \
> + p.pu[LUMA_32x24].satd = x265_pixel_satd_32x24_ ## cpu; \
> + p.pu[LUMA_32x32].satd = x265_pixel_satd_32x32_ ## cpu; \
> + p.pu[LUMA_32x64].satd = x265_pixel_satd_32x64_ ## cpu; \
> + p.pu[LUMA_48x64].satd = x265_pixel_satd_48x64_ ## cpu; \
> + p.pu[LUMA_64x16].satd = x265_pixel_satd_64x16_ ## cpu; \
> + p.pu[LUMA_64x32].satd = x265_pixel_satd_64x32_ ## cpu; \
> + p.pu[LUMA_64x48].satd = x265_pixel_satd_64x48_ ## cpu; \
> + p.pu[LUMA_64x64].satd = x265_pixel_satd_64x64_ ## cpu;
>
> #define SAD_X3(cpu) \
> - p.sad_x3[LUMA_16x8] = x265_pixel_sad_x3_16x8_ ## cpu; \
> - p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_ ## cpu; \
> - p.sad_x3[LUMA_16x16] = x265_pixel_sad_x3_16x16_ ## cpu; \
> - p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_ ## cpu; \
> - p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_ ## cpu; \
> - p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_ ## cpu; \
> - p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_ ## cpu; \
> - p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_ ## cpu; \
> - p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_ ## cpu; \
> - p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_ ## cpu; \
> - p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_ ## cpu; \
> - p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_ ## cpu; \
> - p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_ ## cpu; \
> - p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_ ## cpu; \
> - p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_ ## cpu; \
> - p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_ ## cpu
> + p.pu[LUMA_16x8].sad_x3 = x265_pixel_sad_x3_16x8_ ## cpu; \
> + p.pu[LUMA_16x12].sad_x3 = x265_pixel_sad_x3_16x12_ ## cpu; \
> + p.pu[LUMA_16x16].sad_x3 = x265_pixel_sad_x3_16x16_ ## cpu; \
> + p.pu[LUMA_16x32].sad_x3 = x265_pixel_sad_x3_16x32_ ## cpu; \
> + p.pu[LUMA_16x64].sad_x3 = x265_pixel_sad_x3_16x64_ ## cpu; \
> + p.pu[LUMA_32x8].sad_x3 = x265_pixel_sad_x3_32x8_ ## cpu; \
> + p.pu[LUMA_32x16].sad_x3 = x265_pixel_sad_x3_32x16_ ## cpu; \
> + p.pu[LUMA_32x24].sad_x3 = x265_pixel_sad_x3_32x24_ ## cpu; \
> + p.pu[LUMA_32x32].sad_x3 = x265_pixel_sad_x3_32x32_ ## cpu; \
> + p.pu[LUMA_32x64].sad_x3 = x265_pixel_sad_x3_32x64_ ## cpu; \
> + p.pu[LUMA_24x32].sad_x3 = x265_pixel_sad_x3_24x32_ ## cpu; \
> + p.pu[LUMA_48x64].sad_x3 = x265_pixel_sad_x3_48x64_ ## cpu; \
> + p.pu[LUMA_64x16].sad_x3 = x265_pixel_sad_x3_64x16_ ## cpu; \
> + p.pu[LUMA_64x32].sad_x3 = x265_pixel_sad_x3_64x32_ ## cpu; \
> + p.pu[LUMA_64x48].sad_x3 = x265_pixel_sad_x3_64x48_ ## cpu; \
> + p.pu[LUMA_64x64].sad_x3 = x265_pixel_sad_x3_64x64_ ## cpu
>
> #define SAD_X4(cpu) \
> - p.sad_x4[LUMA_16x8] = x265_pixel_sad_x4_16x8_ ## cpu; \
> - p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_ ## cpu; \
> - p.sad_x4[LUMA_16x16] = x265_pixel_sad_x4_16x16_ ## cpu; \
> - p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ ## cpu; \
> - p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_ ## cpu; \
> - p.sad_x4[LUMA_32x8] = x265_pixel_sad_x4_32x8_ ## cpu; \
> - p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_ ## cpu; \
> - p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_ ## cpu; \
> - p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_ ## cpu; \
> - p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_ ## cpu; \
> - p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_ ## cpu; \
> - p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_ ## cpu; \
> - p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_ ## cpu; \
> - p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_ ## cpu; \
> - p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ ## cpu; \
> - p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ ## cpu
> + p.pu[LUMA_16x8].sad_x4 = x265_pixel_sad_x4_16x8_ ## cpu; \
> + p.pu[LUMA_16x12].sad_x4 = x265_pixel_sad_x4_16x12_ ## cpu; \
> + p.pu[LUMA_16x16].sad_x4 = x265_pixel_sad_x4_16x16_ ## cpu; \
> + p.pu[LUMA_16x32].sad_x4 = x265_pixel_sad_x4_16x32_ ## cpu; \
> + p.pu[LUMA_16x64].sad_x4 = x265_pixel_sad_x4_16x64_ ## cpu; \
> + p.pu[LUMA_32x8].sad_x4 = x265_pixel_sad_x4_32x8_ ## cpu; \
> + p.pu[LUMA_32x16].sad_x4 = x265_pixel_sad_x4_32x16_ ## cpu; \
> + p.pu[LUMA_32x24].sad_x4 = x265_pixel_sad_x4_32x24_ ## cpu; \
> + p.pu[LUMA_32x32].sad_x4 = x265_pixel_sad_x4_32x32_ ## cpu; \
> + p.pu[LUMA_32x64].sad_x4 = x265_pixel_sad_x4_32x64_ ## cpu; \
> + p.pu[LUMA_24x32].sad_x4 = x265_pixel_sad_x4_24x32_ ## cpu; \
> + p.pu[LUMA_48x64].sad_x4 = x265_pixel_sad_x4_48x64_ ## cpu; \
> + p.pu[LUMA_64x16].sad_x4 = x265_pixel_sad_x4_64x16_ ## cpu; \
> + p.pu[LUMA_64x32].sad_x4 = x265_pixel_sad_x4_64x32_ ## cpu; \
> + p.pu[LUMA_64x48].sad_x4 = x265_pixel_sad_x4_64x48_ ## cpu; \
> + p.pu[LUMA_64x64].sad_x4 = x265_pixel_sad_x4_64x64_ ## cpu
>
> #define SAD(cpu) \
> - p.sad[LUMA_8x32] = x265_pixel_sad_8x32_ ## cpu; \
> - p.sad[LUMA_16x4] = x265_pixel_sad_16x4_ ## cpu; \
> - p.sad[LUMA_16x12] = x265_pixel_sad_16x12_ ## cpu; \
> - p.sad[LUMA_16x32] = x265_pixel_sad_16x32_ ## cpu; \
> - p.sad[LUMA_16x64] = x265_pixel_sad_16x64_ ## cpu; \
> - p.sad[LUMA_32x8] = x265_pixel_sad_32x8_ ## cpu; \
> - p.sad[LUMA_32x16] = x265_pixel_sad_32x16_ ## cpu; \
> - p.sad[LUMA_32x24] = x265_pixel_sad_32x24_ ## cpu; \
> - p.sad[LUMA_32x32] = x265_pixel_sad_32x32_ ## cpu; \
> - p.sad[LUMA_32x64] = x265_pixel_sad_32x64_ ## cpu; \
> - p.sad[LUMA_64x16] = x265_pixel_sad_64x16_ ## cpu; \
> - p.sad[LUMA_64x32] = x265_pixel_sad_64x32_ ## cpu; \
> - p.sad[LUMA_64x48] = x265_pixel_sad_64x48_ ## cpu; \
> - p.sad[LUMA_64x64] = x265_pixel_sad_64x64_ ## cpu; \
> - p.sad[LUMA_48x64] = x265_pixel_sad_48x64_ ## cpu; \
> - p.sad[LUMA_24x32] = x265_pixel_sad_24x32_ ## cpu; \
> - p.sad[LUMA_12x16] = x265_pixel_sad_12x16_ ## cpu
> + p.pu[LUMA_8x32].sad = x265_pixel_sad_8x32_ ## cpu; \
> + p.pu[LUMA_16x4].sad = x265_pixel_sad_16x4_ ## cpu; \
> + p.pu[LUMA_16x12].sad = x265_pixel_sad_16x12_ ## cpu; \
> + p.pu[LUMA_16x32].sad = x265_pixel_sad_16x32_ ## cpu; \
> + p.pu[LUMA_16x64].sad = x265_pixel_sad_16x64_ ## cpu; \
> + p.pu[LUMA_32x8].sad = x265_pixel_sad_32x8_ ## cpu; \
> + p.pu[LUMA_32x16].sad = x265_pixel_sad_32x16_ ## cpu; \
> + p.pu[LUMA_32x24].sad = x265_pixel_sad_32x24_ ## cpu; \
> + p.pu[LUMA_32x32].sad = x265_pixel_sad_32x32_ ## cpu; \
> + p.pu[LUMA_32x64].sad = x265_pixel_sad_32x64_ ## cpu; \
> + p.pu[LUMA_64x16].sad = x265_pixel_sad_64x16_ ## cpu; \
> + p.pu[LUMA_64x32].sad = x265_pixel_sad_64x32_ ## cpu; \
> + p.pu[LUMA_64x48].sad = x265_pixel_sad_64x48_ ## cpu; \
> + p.pu[LUMA_64x64].sad = x265_pixel_sad_64x64_ ## cpu; \
> + p.pu[LUMA_48x64].sad = x265_pixel_sad_48x64_ ## cpu; \
> + p.pu[LUMA_24x32].sad = x265_pixel_sad_24x32_ ## cpu; \
> + p.pu[LUMA_12x16].sad = x265_pixel_sad_12x16_ ## cpu
>
> #define ASSGN_SSE(cpu) \
> - p.sse_pp[LUMA_8x8] = x265_pixel_ssd_8x8_ ## cpu; \
> - p.sse_pp[LUMA_8x4] = x265_pixel_ssd_8x4_ ## cpu; \
> - p.sse_pp[LUMA_16x16] = x265_pixel_ssd_16x16_ ## cpu; \
> - p.sse_pp[LUMA_16x4] = x265_pixel_ssd_16x4_ ## cpu; \
> - p.sse_pp[LUMA_16x8] = x265_pixel_ssd_16x8_ ## cpu; \
> - p.sse_pp[LUMA_8x16] = x265_pixel_ssd_8x16_ ## cpu; \
> - p.sse_pp[LUMA_16x12] = x265_pixel_ssd_16x12_ ## cpu; \
> - p.sse_pp[LUMA_32x32] = x265_pixel_ssd_32x32_ ## cpu; \
> - p.sse_pp[LUMA_32x16] = x265_pixel_ssd_32x16_ ## cpu; \
> - p.sse_pp[LUMA_16x32] = x265_pixel_ssd_16x32_ ## cpu; \
> - p.sse_pp[LUMA_8x32] = x265_pixel_ssd_8x32_ ## cpu; \
> - p.sse_pp[LUMA_32x8] = x265_pixel_ssd_32x8_ ## cpu; \
> - p.sse_pp[LUMA_32x24] = x265_pixel_ssd_32x24_ ## cpu; \
> - p.sse_pp[LUMA_32x64] = x265_pixel_ssd_32x64_ ## cpu; \
> - p.sse_pp[LUMA_16x64] = x265_pixel_ssd_16x64_ ## cpu
> + p.pu[LUMA_8x8].sse_pp = x265_pixel_ssd_8x8_ ## cpu; \
> + p.pu[LUMA_8x4].sse_pp = x265_pixel_ssd_8x4_ ## cpu; \
> + p.pu[LUMA_16x16].sse_pp = x265_pixel_ssd_16x16_ ## cpu; \
> + p.pu[LUMA_16x4].sse_pp = x265_pixel_ssd_16x4_ ## cpu; \
> + p.pu[LUMA_16x8].sse_pp = x265_pixel_ssd_16x8_ ## cpu; \
> + p.pu[LUMA_8x16].sse_pp = x265_pixel_ssd_8x16_ ## cpu; \
> + p.pu[LUMA_16x12].sse_pp = x265_pixel_ssd_16x12_ ## cpu; \
> + p.pu[LUMA_32x32].sse_pp = x265_pixel_ssd_32x32_ ## cpu; \
> + p.pu[LUMA_32x16].sse_pp = x265_pixel_ssd_32x16_ ## cpu; \
> + p.pu[LUMA_16x32].sse_pp = x265_pixel_ssd_16x32_ ## cpu; \
> + p.pu[LUMA_8x32].sse_pp = x265_pixel_ssd_8x32_ ## cpu; \
> + p.pu[LUMA_32x8].sse_pp = x265_pixel_ssd_32x8_ ## cpu; \
> + p.pu[LUMA_32x24].sse_pp = x265_pixel_ssd_32x24_ ## cpu; \
> + p.pu[LUMA_32x64].sse_pp = x265_pixel_ssd_32x64_ ## cpu; \
> + p.pu[LUMA_16x64].sse_pp = x265_pixel_ssd_16x64_ ## cpu
>
> #define ASSGN_SSE_SS(cpu) \
> - p.sse_ss[LUMA_4x4] = x265_pixel_ssd_ss_4x4_ ## cpu; \
> - p.sse_ss[LUMA_4x8] = x265_pixel_ssd_ss_4x8_ ## cpu; \
> - p.sse_ss[LUMA_4x16] = x265_pixel_ssd_ss_4x16_ ## cpu; \
> - p.sse_ss[LUMA_8x4] = x265_pixel_ssd_ss_8x4_ ## cpu; \
> - p.sse_ss[LUMA_8x8] = x265_pixel_ssd_ss_8x8_ ## cpu; \
> - p.sse_ss[LUMA_8x16] = x265_pixel_ssd_ss_8x16_ ## cpu; \
> - p.sse_ss[LUMA_8x32] = x265_pixel_ssd_ss_8x32_ ## cpu; \
> - p.sse_ss[LUMA_12x16] = x265_pixel_ssd_ss_12x16_ ## cpu; \
> - p.sse_ss[LUMA_16x4] = x265_pixel_ssd_ss_16x4_ ## cpu; \
> - p.sse_ss[LUMA_16x8] = x265_pixel_ssd_ss_16x8_ ## cpu; \
> - p.sse_ss[LUMA_16x12] = x265_pixel_ssd_ss_16x12_ ## cpu; \
> - p.sse_ss[LUMA_16x16] = x265_pixel_ssd_ss_16x16_ ## cpu; \
> - p.sse_ss[LUMA_16x32] = x265_pixel_ssd_ss_16x32_ ## cpu; \
> - p.sse_ss[LUMA_16x64] = x265_pixel_ssd_ss_16x64_ ## cpu; \
> - p.sse_ss[LUMA_24x32] = x265_pixel_ssd_ss_24x32_ ## cpu; \
> - p.sse_ss[LUMA_32x8] = x265_pixel_ssd_ss_32x8_ ## cpu; \
> - p.sse_ss[LUMA_32x16] = x265_pixel_ssd_ss_32x16_ ## cpu; \
> - p.sse_ss[LUMA_32x24] = x265_pixel_ssd_ss_32x24_ ## cpu; \
> - p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_ ## cpu; \
> - p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_ ## cpu; \
> - p.sse_ss[LUMA_48x64] = x265_pixel_ssd_ss_48x64_ ## cpu; \
> - p.sse_ss[LUMA_64x16] = x265_pixel_ssd_ss_64x16_ ## cpu; \
> - p.sse_ss[LUMA_64x32] = x265_pixel_ssd_ss_64x32_ ## cpu; \
> - p.sse_ss[LUMA_64x48] = x265_pixel_ssd_ss_64x48_ ## cpu; \
> - p.sse_ss[LUMA_64x64] = x265_pixel_ssd_ss_64x64_ ## cpu;
> + p.pu[LUMA_4x4].sse_ss = x265_pixel_ssd_ss_4x4_ ## cpu; \
> + p.pu[LUMA_4x8].sse_ss = x265_pixel_ssd_ss_4x8_ ## cpu; \
> + p.pu[LUMA_4x16].sse_ss = x265_pixel_ssd_ss_4x16_ ## cpu; \
> + p.pu[LUMA_8x4].sse_ss = x265_pixel_ssd_ss_8x4_ ## cpu; \
> + p.pu[LUMA_8x8].sse_ss = x265_pixel_ssd_ss_8x8_ ## cpu; \
> + p.pu[LUMA_8x16].sse_ss = x265_pixel_ssd_ss_8x16_ ## cpu; \
> + p.pu[LUMA_8x32].sse_ss = x265_pixel_ssd_ss_8x32_ ## cpu; \
> + p.pu[LUMA_12x16].sse_ss = x265_pixel_ssd_ss_12x16_ ## cpu; \
> + p.pu[LUMA_16x4].sse_ss = x265_pixel_ssd_ss_16x4_ ## cpu; \
> + p.pu[LUMA_16x8].sse_ss = x265_pixel_ssd_ss_16x8_ ## cpu; \
> + p.pu[LUMA_16x12].sse_ss = x265_pixel_ssd_ss_16x12_ ## cpu; \
> + p.pu[LUMA_16x16].sse_ss = x265_pixel_ssd_ss_16x16_ ## cpu; \
> + p.pu[LUMA_16x32].sse_ss = x265_pixel_ssd_ss_16x32_ ## cpu; \
> + p.pu[LUMA_16x64].sse_ss = x265_pixel_ssd_ss_16x64_ ## cpu; \
> + p.pu[LUMA_24x32].sse_ss = x265_pixel_ssd_ss_24x32_ ## cpu; \
> + p.pu[LUMA_32x8].sse_ss = x265_pixel_ssd_ss_32x8_ ## cpu; \
> + p.pu[LUMA_32x16].sse_ss = x265_pixel_ssd_ss_32x16_ ## cpu; \
> + p.pu[LUMA_32x24].sse_ss = x265_pixel_ssd_ss_32x24_ ## cpu; \
> + p.pu[LUMA_32x32].sse_ss = x265_pixel_ssd_ss_32x32_ ## cpu; \
> + p.pu[LUMA_32x64].sse_ss = x265_pixel_ssd_ss_32x64_ ## cpu; \
> + p.pu[LUMA_48x64].sse_ss = x265_pixel_ssd_ss_48x64_ ## cpu; \
> + p.pu[LUMA_64x16].sse_ss = x265_pixel_ssd_ss_64x16_ ## cpu; \
> + p.pu[LUMA_64x32].sse_ss = x265_pixel_ssd_ss_64x32_ ## cpu; \
> + p.pu[LUMA_64x48].sse_ss = x265_pixel_ssd_ss_64x48_ ## cpu; \
> + p.pu[LUMA_64x64].sse_ss = x265_pixel_ssd_ss_64x64_ ## cpu;
>
> #define SA8D_INTER_FROM_BLOCK(cpu) \
> - p.sa8d_inter[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \
> - p.sa8d_inter[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \
> - p.sa8d_inter[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \
> - p.sa8d_inter[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \
> - p.sa8d_inter[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \
> - p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_ ## cpu; \
> - p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_ ## cpu; \
> - p.sa8d_inter[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \
> - p.sa8d_inter[LUMA_16x8] = x265_pixel_sa8d_16x8_ ## cpu; \
> - p.sa8d_inter[LUMA_8x16] = x265_pixel_sa8d_8x16_ ## cpu; \
> - p.sa8d_inter[LUMA_32x24] = x265_pixel_sa8d_32x24_ ## cpu; \
> - p.sa8d_inter[LUMA_24x32] = x265_pixel_sa8d_24x32_ ## cpu; \
> - p.sa8d_inter[LUMA_32x8] = x265_pixel_sa8d_32x8_ ## cpu; \
> - p.sa8d_inter[LUMA_8x32] = x265_pixel_sa8d_8x32_ ## cpu; \
> - p.sa8d_inter[LUMA_32x32] = x265_pixel_sa8d_32x32_ ## cpu; \
> - p.sa8d_inter[LUMA_32x16] = x265_pixel_sa8d_32x16_ ## cpu; \
> - p.sa8d_inter[LUMA_16x32] = x265_pixel_sa8d_16x32_ ## cpu; \
> - p.sa8d_inter[LUMA_64x64] = x265_pixel_sa8d_64x64_ ## cpu; \
> - p.sa8d_inter[LUMA_64x32] = x265_pixel_sa8d_64x32_ ## cpu; \
> - p.sa8d_inter[LUMA_32x64] = x265_pixel_sa8d_32x64_ ## cpu; \
> - p.sa8d_inter[LUMA_64x48] = x265_pixel_sa8d_64x48_ ## cpu; \
> - p.sa8d_inter[LUMA_48x64] = x265_pixel_sa8d_48x64_ ## cpu; \
> - p.sa8d_inter[LUMA_64x16] = x265_pixel_sa8d_64x16_ ## cpu; \
> - p.sa8d_inter[LUMA_16x64] = x265_pixel_sa8d_16x64_ ## cpu;
> + p.pu[LUMA_4x8].sa8d_inter = x265_pixel_satd_4x8_ ## cpu; \
> + p.pu[LUMA_8x4].sa8d_inter = x265_pixel_satd_8x4_ ## cpu; \
> + p.pu[LUMA_4x16].sa8d_inter = x265_pixel_satd_4x16_ ## cpu; \
> + p.pu[LUMA_16x4].sa8d_inter = x265_pixel_satd_16x4_ ## cpu; \
> + p.pu[LUMA_12x16].sa8d_inter = x265_pixel_satd_12x16_ ## cpu; \
> + p.pu[LUMA_8x8].sa8d_inter = x265_pixel_sa8d_8x8_ ## cpu; \
> + p.pu[LUMA_16x16].sa8d_inter = x265_pixel_sa8d_16x16_ ## cpu; \
> + p.pu[LUMA_16x12].sa8d_inter = x265_pixel_satd_16x12_ ## cpu; \
> + p.pu[LUMA_16x8].sa8d_inter = x265_pixel_sa8d_16x8_ ## cpu; \
> + p.pu[LUMA_8x16].sa8d_inter = x265_pixel_sa8d_8x16_ ## cpu; \
> + p.pu[LUMA_32x24].sa8d_inter = x265_pixel_sa8d_32x24_ ## cpu; \
> + p.pu[LUMA_24x32].sa8d_inter = x265_pixel_sa8d_24x32_ ## cpu; \
> + p.pu[LUMA_32x8].sa8d_inter = x265_pixel_sa8d_32x8_ ## cpu; \
> + p.pu[LUMA_8x32].sa8d_inter = x265_pixel_sa8d_8x32_ ## cpu; \
> + p.pu[LUMA_32x32].sa8d_inter = x265_pixel_sa8d_32x32_ ## cpu; \
> + p.pu[LUMA_32x16].sa8d_inter = x265_pixel_sa8d_32x16_ ## cpu; \
> + p.pu[LUMA_16x32].sa8d_inter = x265_pixel_sa8d_16x32_ ## cpu; \
> + p.pu[LUMA_64x64].sa8d_inter = x265_pixel_sa8d_64x64_ ## cpu; \
> + p.pu[LUMA_64x32].sa8d_inter = x265_pixel_sa8d_64x32_ ## cpu; \
> + p.pu[LUMA_32x64].sa8d_inter = x265_pixel_sa8d_32x64_ ## cpu; \
> + p.pu[LUMA_64x48].sa8d_inter = x265_pixel_sa8d_64x48_ ## cpu; \
> + p.pu[LUMA_48x64].sa8d_inter = x265_pixel_sa8d_48x64_ ## cpu; \
> + p.pu[LUMA_64x16].sa8d_inter = x265_pixel_sa8d_64x16_ ## cpu; \
> + p.pu[LUMA_16x64].sa8d_inter = x265_pixel_sa8d_16x64_ ## cpu;
>
> #define PIXEL_AVG(cpu) \
> - p.pixelavg_pp[LUMA_64x64] = x265_pixel_avg_64x64_ ## cpu; \
> - p.pixelavg_pp[LUMA_64x48] = x265_pixel_avg_64x48_ ## cpu; \
> - p.pixelavg_pp[LUMA_64x32] = x265_pixel_avg_64x32_ ## cpu; \
> - p.pixelavg_pp[LUMA_64x16] = x265_pixel_avg_64x16_ ## cpu; \
> - p.pixelavg_pp[LUMA_48x64] = x265_pixel_avg_48x64_ ## cpu; \
> - p.pixelavg_pp[LUMA_32x64] = x265_pixel_avg_32x64_ ## cpu; \
> - p.pixelavg_pp[LUMA_32x32] = x265_pixel_avg_32x32_ ## cpu; \
> - p.pixelavg_pp[LUMA_32x24] = x265_pixel_avg_32x24_ ## cpu; \
> - p.pixelavg_pp[LUMA_32x16] = x265_pixel_avg_32x16_ ## cpu; \
> - p.pixelavg_pp[LUMA_32x8] = x265_pixel_avg_32x8_ ## cpu; \
> - p.pixelavg_pp[LUMA_24x32] = x265_pixel_avg_24x32_ ## cpu; \
> - p.pixelavg_pp[LUMA_16x64] = x265_pixel_avg_16x64_ ## cpu; \
> - p.pixelavg_pp[LUMA_16x32] = x265_pixel_avg_16x32_ ## cpu; \
> - p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \
> - p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \
> - p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \
> - p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \
> - p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \
> - p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \
> - p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \
> - p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \
> - p.pixelavg_pp[LUMA_8x4] = x265_pixel_avg_8x4_ ## cpu;
> + p.pu[LUMA_64x64].pixelavg_pp = x265_pixel_avg_64x64_ ## cpu; \
> + p.pu[LUMA_64x48].pixelavg_pp = x265_pixel_avg_64x48_ ## cpu; \
> + p.pu[LUMA_64x32].pixelavg_pp = x265_pixel_avg_64x32_ ## cpu; \
> + p.pu[LUMA_64x16].pixelavg_pp = x265_pixel_avg_64x16_ ## cpu; \
> + p.pu[LUMA_48x64].pixelavg_pp = x265_pixel_avg_48x64_ ## cpu; \
> + p.pu[LUMA_32x64].pixelavg_pp = x265_pixel_avg_32x64_ ## cpu; \
> + p.pu[LUMA_32x32].pixelavg_pp = x265_pixel_avg_32x32_ ## cpu; \
> + p.pu[LUMA_32x24].pixelavg_pp = x265_pixel_avg_32x24_ ## cpu; \
> + p.pu[LUMA_32x16].pixelavg_pp = x265_pixel_avg_32x16_ ## cpu; \
> + p.pu[LUMA_32x8].pixelavg_pp = x265_pixel_avg_32x8_ ## cpu; \
> + p.pu[LUMA_24x32].pixelavg_pp = x265_pixel_avg_24x32_ ## cpu; \
> + p.pu[LUMA_16x64].pixelavg_pp = x265_pixel_avg_16x64_ ## cpu; \
> + p.pu[LUMA_16x32].pixelavg_pp = x265_pixel_avg_16x32_ ## cpu; \
> + p.pu[LUMA_16x16].pixelavg_pp = x265_pixel_avg_16x16_ ## cpu; \
> + p.pu[LUMA_16x12].pixelavg_pp = x265_pixel_avg_16x12_ ## cpu; \
> + p.pu[LUMA_16x8].pixelavg_pp = x265_pixel_avg_16x8_ ## cpu; \
> + p.pu[LUMA_16x4].pixelavg_pp = x265_pixel_avg_16x4_ ## cpu; \
> + p.pu[LUMA_12x16].pixelavg_pp = x265_pixel_avg_12x16_ ## cpu; \
> + p.pu[LUMA_8x32].pixelavg_pp = x265_pixel_avg_8x32_ ## cpu; \
> + p.pu[LUMA_8x16].pixelavg_pp = x265_pixel_avg_8x16_ ## cpu; \
> + p.pu[LUMA_8x8].pixelavg_pp = x265_pixel_avg_8x8_ ## cpu; \
> + p.pu[LUMA_8x4].pixelavg_pp = x265_pixel_avg_8x4_ ## cpu;
>
> #define PIXEL_AVG_W4(cpu) \
> - p.pixelavg_pp[LUMA_4x4] = x265_pixel_avg_4x4_ ## cpu; \
> - p.pixelavg_pp[LUMA_4x8] = x265_pixel_avg_4x8_ ## cpu; \
> - p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_ ## cpu;
> + p.pu[LUMA_4x4].pixelavg_pp = x265_pixel_avg_4x4_ ## cpu; \
> + p.pu[LUMA_4x8].pixelavg_pp = x265_pixel_avg_4x8_ ## cpu; \
> + p.pu[LUMA_4x16].pixelavg_pp = x265_pixel_avg_4x16_ ## cpu;
>
> #define SETUP_CHROMA_FUNC_DEF_420(W, H, cpu) \
> - p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_FUNC_DEF_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_FUNC_DEF_444(W, H, cpu) \
> - p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_SP_FUNC_DEF_420(W, H, cpu) \
> - p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_SP_FUNC_DEF_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_SP_FUNC_DEF_444(W, H, cpu) \
> - p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_SS_FUNC_DEF_420(W, H, cpu) \
> - p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_SS_FUNC_DEF_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_SS_FUNC_DEF_444(W, H, cpu) \
> - p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
>
> #define CHROMA_FILTERS_420(cpu) \
> SETUP_CHROMA_FUNC_DEF_420(4, 4, cpu); \
> @@ -538,37 +538,37 @@
>
> #if HIGH_BIT_DEPTH // temporary, until all 10bit functions are completed
> #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
> - p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
> - p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
> - p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
> - p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
> + p.pu[LUMA_ ## W ## x ## H].luma_hpp = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_hps = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vpp = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vps = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vsp = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
> #else
> #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
> - p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
> - p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
> - p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
> - p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
> + p.pu[LUMA_ ## W ## x ## H].luma_hpp = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_hps = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vpp = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vps = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_vsp = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
> #endif // if HIGH_BIT_DEPTH
>
> #define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \
> - p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
> - p.luma_add_ps[LUMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
> + p.pu[LUMA_ ## W ## x ## H].luma_sub_ps = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
> + p.pu[LUMA_ ## W ## x ## H].luma_add_ps = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
>
> #define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
> - p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
> + p.pu[LUMA_ ## W ## x ## H].luma_vsp = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
>
> #define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \
> - p.luma_vss[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu;
> + p.pu[LUMA_ ## W ## x ## H].luma_vss = x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu;
>
> #define SETUP_LUMA_BLOCKCOPY(type, W, H, cpu) \
> - p.luma_copy_ ## type[LUMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
> + p.pu[LUMA_ ## W ## x ## H].luma_copy_ ## type = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
>
> #define SETUP_CHROMA_BLOCKCOPY(type, W, H, cpu) \
> - p.chroma[X265_CSP_I420].copy_ ## type[CHROMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_ ## type = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
>
> #define CHROMA_BLOCKCOPY(type, cpu) \
> SETUP_CHROMA_BLOCKCOPY(type, 2, 4, cpu); \
> @@ -597,7 +597,7 @@
> SETUP_CHROMA_BLOCKCOPY(type, 32, 32, cpu);
>
> #define SETUP_CHROMA_BLOCKCOPY_422(type, W, H, cpu) \
> - p.chroma[X265_CSP_I422].copy_ ## type[CHROMA422_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_ ## type = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
>
> #define CHROMA_BLOCKCOPY_422(type, cpu) \
> SETUP_CHROMA_BLOCKCOPY_422(type, 2, 8, cpu); \
> @@ -653,7 +653,7 @@
> SETUP_LUMA_BLOCKCOPY(type, 16, 64, cpu);
>
> #define SETUP_CHROMA_BLOCKCOPY_SP(W, H, cpu) \
> - p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_sp = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
>
> #define CHROMA_BLOCKCOPY_SP(cpu) \
> SETUP_CHROMA_BLOCKCOPY_SP(2, 4, cpu); \
> @@ -682,7 +682,7 @@
> SETUP_CHROMA_BLOCKCOPY_SP(32, 32, cpu);
>
> #define SETUP_CHROMA_BLOCKCOPY_SP_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].copy_sp[CHROMA422_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_sp = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
>
> #define CHROMA_BLOCKCOPY_SP_422(cpu) \
> SETUP_CHROMA_BLOCKCOPY_SP_422(2, 8, cpu); \
> @@ -711,8 +711,8 @@
> SETUP_CHROMA_BLOCKCOPY_SP_422(32, 64, cpu);
>
> #define SETUP_CHROMA_PIXELSUB(W, H, cpu) \
> - p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].sub_ps = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].add_ps = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
>
> #define CHROMA_PIXELSUB_PS(cpu) \
> SETUP_CHROMA_PIXELSUB(4, 4, cpu); \
> @@ -721,8 +721,8 @@
> SETUP_CHROMA_PIXELSUB(32, 32, cpu);
>
> #define SETUP_CHROMA_PIXELSUB_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].sub_ps = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].add_ps = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
>
> #define CHROMA_PIXELSUB_PS_422(cpu) \
> SETUP_CHROMA_PIXELSUB_422(4, 8, cpu); \
> @@ -819,7 +819,7 @@
> SETUP_LUMA_SS_FUNC_DEF(16, 64, cpu);
>
> #define SETUP_PIXEL_VAR_DEF(W, H, cpu) \
> - p.var[BLOCK_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
> + p.cu[BLOCK_ ## W ## x ## H].var = x265_pixel_var_ ## W ## x ## H ## cpu;
>
> #define LUMA_VAR(cpu) \
> SETUP_PIXEL_VAR_DEF(8, 8, cpu); \
> @@ -828,7 +828,7 @@
> SETUP_PIXEL_VAR_DEF(64, 64, cpu);
>
> #define SETUP_PIXEL_SSE_SP_DEF(W, H, cpu) \
> - p.sse_sp[LUMA_ ## W ## x ## H] = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu;
> + p.pu[LUMA_ ## W ## x ## H].sse_sp = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu;
>
> #define LUMA_SSE_SP(cpu) \
> SETUP_PIXEL_SSE_SP_DEF(4, 4, cpu); \
> @@ -858,7 +858,7 @@
> SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
>
> #define SETUP_LUMA_ADDAVG_FUNC_DEF(W, H, cpu) \
> - p.luma_addAvg[LUMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
> + p.pu[LUMA_ ## W ## x ## H].luma_addAvg = x265_addAvg_ ## W ## x ## H ## cpu;
>
> #define LUMA_ADDAVG(cpu) \
> SETUP_LUMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
> @@ -888,7 +888,7 @@
> SETUP_LUMA_ADDAVG_FUNC_DEF(64, 64, cpu); \
>
> #define SETUP_CHROMA_ADDAVG_FUNC_DEF(W, H, cpu) \
> - p.chroma[X265_CSP_I420].addAvg[CHROMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].addAvg = x265_addAvg_ ## W ## x ## H ## cpu;
>
> #define CHROMA_ADDAVG(cpu) \
> SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 4, cpu); \
> @@ -917,7 +917,7 @@
> SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu);
>
> #define SETUP_CHROMA_ADDAVG_FUNC_DEF_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].addAvg[CHROMA422_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].addAvg = x265_addAvg_ ## W ## x ## H ## cpu;
>
> #define CHROMA_ADDAVG_422(cpu) \
> SETUP_CHROMA_ADDAVG_FUNC_DEF_422(2, 8, cpu); \
> @@ -1054,10 +1054,10 @@
> SETUP_INTRA_ANG16_32(33, 33, cpu);
>
> #define SETUP_CHROMA_VERT_FUNC_DEF(W, H, cpu) \
> - p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
>
> #define CHROMA_VERT_FILTERS(cpu) \
> SETUP_CHROMA_VERT_FUNC_DEF(4, 4, cpu); \
> @@ -1088,10 +1088,10 @@
> SETUP_CHROMA_VERT_FUNC_DEF(6, 8, cpu);
>
> #define SETUP_CHROMA_VERT_FUNC_DEF_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
>
> #define CHROMA_VERT_FILTERS_422(cpu) \
> SETUP_CHROMA_VERT_FUNC_DEF_422(4, 8, cpu); \
> @@ -1122,10 +1122,10 @@
> SETUP_CHROMA_VERT_FUNC_DEF_422(6, 16, cpu);
>
> #define SETUP_CHROMA_VERT_FUNC_DEF_444(W, H, cpu) \
> - p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
>
> #define CHROMA_VERT_FILTERS_444(cpu) \
> SETUP_CHROMA_VERT_FUNC_DEF_444(8, 8, cpu); \
> @@ -1154,8 +1154,8 @@
> SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu);
>
> #define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \
> - p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
>
> #define CHROMA_HORIZ_FILTERS(cpu) \
> SETUP_CHROMA_HORIZ_FUNC_DEF(4, 4, cpu); \
> @@ -1184,8 +1184,8 @@
> SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu);
>
> #define SETUP_CHROMA_HORIZ_FUNC_DEF_422(W, H, cpu) \
> - p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
>
> #define CHROMA_HORIZ_FILTERS_422(cpu) \
> SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 8, cpu); \
> @@ -1214,8 +1214,8 @@
> SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 64, cpu);
>
> #define SETUP_CHROMA_HORIZ_FUNC_DEF_444(W, H, cpu) \
> - p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> - p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
> + p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
>
> #define CHROMA_HORIZ_FILTERS_444(cpu) \
> SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 8, cpu); \
> @@ -1257,44 +1257,44 @@
>
> INIT6(satd, _sse2);
> HEVC_SATD(sse2);
> - p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
> + p.pu[LUMA_4x4].satd = x265_pixel_satd_4x4_mmx2;
>
> - p.sa8d_inter[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
> + p.pu[LUMA_4x4].sa8d_inter = x265_pixel_satd_4x4_mmx2;
> SA8D_INTER_FROM_BLOCK(sse2);
> - p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2;
> - p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2;
> + p.pu[LUMA_8x8].sa8d_inter = x265_pixel_sa8d_8x8_sse2;
> + p.pu[LUMA_16x16].sa8d_inter = x265_pixel_sa8d_16x16_sse2;
>
> - p.sse_ss[LUMA_4x4] = x265_pixel_ssd_ss_4x4_mmx2;
> - p.sse_ss[LUMA_4x8] = x265_pixel_ssd_ss_4x8_mmx2;
> - p.sse_ss[LUMA_4x16] = x265_pixel_ssd_ss_4x16_mmx2;
> - p.sse_ss[LUMA_8x4] = x265_pixel_ssd_ss_8x4_sse2;
> - p.sse_ss[LUMA_8x8] = x265_pixel_ssd_ss_8x8_sse2;
> - p.sse_ss[LUMA_8x16] = x265_pixel_ssd_ss_8x16_sse2;
> - p.sse_ss[LUMA_8x32] = x265_pixel_ssd_ss_8x32_sse2;
> - p.sse_ss[LUMA_12x16] = x265_pixel_ssd_ss_12x16_sse2;
> - p.sse_ss[LUMA_16x4] = x265_pixel_ssd_ss_16x4_sse2;
> - p.sse_ss[LUMA_16x8] = x265_pixel_ssd_ss_16x8_sse2;
> - p.sse_ss[LUMA_16x12] = x265_pixel_ssd_ss_16x12_sse2;
> - p.sse_ss[LUMA_16x16] = x265_pixel_ssd_ss_16x16_sse2;
> - p.sse_ss[LUMA_16x32] = x265_pixel_ssd_ss_16x32_sse2;
> - p.sse_ss[LUMA_16x64] = x265_pixel_ssd_ss_16x64_sse2;
> - p.sse_ss[LUMA_24x32] = x265_pixel_ssd_ss_24x32_sse2;
> - p.sse_ss[LUMA_32x8] = x265_pixel_ssd_ss_32x8_sse2;
> - p.sse_ss[LUMA_32x16] = x265_pixel_ssd_ss_32x16_sse2;
> - p.sse_ss[LUMA_32x24] = x265_pixel_ssd_ss_32x24_sse2;
> - p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_sse2;
> - p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_sse2;
> - p.sse_ss[LUMA_48x64] = x265_pixel_ssd_ss_48x64_sse2;
> - p.sse_ss[LUMA_64x16] = x265_pixel_ssd_ss_64x16_sse2;
> - p.sse_ss[LUMA_64x32] = x265_pixel_ssd_ss_64x32_sse2;
> - p.sse_ss[LUMA_64x48] = x265_pixel_ssd_ss_64x48_sse2;
> - p.sse_ss[LUMA_64x64] = x265_pixel_ssd_ss_64x64_sse2;
> + p.pu[LUMA_4x4].sse_ss = x265_pixel_ssd_ss_4x4_mmx2;
> + p.pu[LUMA_4x8].sse_ss = x265_pixel_ssd_ss_4x8_mmx2;
> + p.pu[LUMA_4x16].sse_ss = x265_pixel_ssd_ss_4x16_mmx2;
> + p.pu[LUMA_8x4].sse_ss = x265_pixel_ssd_ss_8x4_sse2;
> + p.pu[LUMA_8x8].sse_ss = x265_pixel_ssd_ss_8x8_sse2;
> + p.pu[LUMA_8x16].sse_ss = x265_pixel_ssd_ss_8x16_sse2;
> + p.pu[LUMA_8x32].sse_ss = x265_pixel_ssd_ss_8x32_sse2;
> + p.pu[LUMA_12x16].sse_ss = x265_pixel_ssd_ss_12x16_sse2;
> + p.pu[LUMA_16x4].sse_ss = x265_pixel_ssd_ss_16x4_sse2;
> + p.pu[LUMA_16x8].sse_ss = x265_pixel_ssd_ss_16x8_sse2;
> + p.pu[LUMA_16x12].sse_ss = x265_pixel_ssd_ss_16x12_sse2;
> + p.pu[LUMA_16x16].sse_ss = x265_pixel_ssd_ss_16x16_sse2;
> + p.pu[LUMA_16x32].sse_ss = x265_pixel_ssd_ss_16x32_sse2;
> + p.pu[LUMA_16x64].sse_ss = x265_pixel_ssd_ss_16x64_sse2;
> + p.pu[LUMA_24x32].sse_ss = x265_pixel_ssd_ss_24x32_sse2;
> + p.pu[LUMA_32x8].sse_ss = x265_pixel_ssd_ss_32x8_sse2;
> + p.pu[LUMA_32x16].sse_ss = x265_pixel_ssd_ss_32x16_sse2;
> + p.pu[LUMA_32x24].sse_ss = x265_pixel_ssd_ss_32x24_sse2;
> + p.pu[LUMA_32x32].sse_ss = x265_pixel_ssd_ss_32x32_sse2;
> + p.pu[LUMA_32x64].sse_ss = x265_pixel_ssd_ss_32x64_sse2;
> + p.pu[LUMA_48x64].sse_ss = x265_pixel_ssd_ss_48x64_sse2;
> + p.pu[LUMA_64x16].sse_ss = x265_pixel_ssd_ss_64x16_sse2;
> + p.pu[LUMA_64x32].sse_ss = x265_pixel_ssd_ss_64x32_sse2;
> + p.pu[LUMA_64x48].sse_ss = x265_pixel_ssd_ss_64x48_sse2;
> + p.pu[LUMA_64x64].sse_ss = x265_pixel_ssd_ss_64x64_sse2;
>
> - p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
> - p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
> - p.transpose[BLOCK_16x16] = x265_transpose16_sse2;
> - p.transpose[BLOCK_32x32] = x265_transpose32_sse2;
> - p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
> + p.cu[BLOCK_4x4].transpose = x265_transpose4_sse2;
> + p.cu[BLOCK_8x8].transpose = x265_transpose8_sse2;
> + p.cu[BLOCK_16x16].transpose = x265_transpose16_sse2;
> + p.cu[BLOCK_32x32].transpose = x265_transpose32_sse2;
> + p.cu[BLOCK_64x64].transpose = x265_transpose64_sse2;
>
> p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
> p.ssim_end_4 = x265_pixel_ssim_end4_sse2;
> @@ -1303,43 +1303,43 @@
> LUMA_VAR(_sse2);
>
> SAD_X3(sse2);
> - p.sad_x3[LUMA_4x4] = x265_pixel_sad_x3_4x4_mmx2;
> - p.sad_x3[LUMA_4x8] = x265_pixel_sad_x3_4x8_mmx2;
> - p.sad_x3[LUMA_4x16] = x265_pixel_sad_x3_4x16_mmx2;
> - p.sad_x3[LUMA_8x4] = x265_pixel_sad_x3_8x4_sse2;
> - p.sad_x3[LUMA_8x8] = x265_pixel_sad_x3_8x8_sse2;
> - p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_sse2;
> - p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_sse2;
> - p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_sse2;
> - p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_mmx2;
> + p.pu[LUMA_4x4].sad_x3 = x265_pixel_sad_x3_4x4_mmx2;
> + p.pu[LUMA_4x8].sad_x3 = x265_pixel_sad_x3_4x8_mmx2;
> + p.pu[LUMA_4x16].sad_x3 = x265_pixel_sad_x3_4x16_mmx2;
> + p.pu[LUMA_8x4].sad_x3 = x265_pixel_sad_x3_8x4_sse2;
> + p.pu[LUMA_8x8].sad_x3 = x265_pixel_sad_x3_8x8_sse2;
> + p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_sse2;
> + p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_sse2;
> + p.pu[LUMA_16x4].sad_x3 = x265_pixel_sad_x3_16x4_sse2;
> + p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_mmx2;
>
> SAD_X4(sse2);
> - p.sad_x4[LUMA_4x4] = x265_pixel_sad_x4_4x4_mmx2;
> - p.sad_x4[LUMA_4x8] = x265_pixel_sad_x4_4x8_mmx2;
> - p.sad_x4[LUMA_4x16] = x265_pixel_sad_x4_4x16_mmx2;
> - p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_sse2;
> - p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_sse2;
> - p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_sse2;
> - p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_sse2;
> - p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_sse2;
> - p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_mmx2;
> + p.pu[LUMA_4x4].sad_x4 = x265_pixel_sad_x4_4x4_mmx2;
> + p.pu[LUMA_4x8].sad_x4 = x265_pixel_sad_x4_4x8_mmx2;
> + p.pu[LUMA_4x16].sad_x4 = x265_pixel_sad_x4_4x16_mmx2;
> + p.pu[LUMA_8x4].sad_x4 = x265_pixel_sad_x4_8x4_sse2;
> + p.pu[LUMA_8x8].sad_x4 = x265_pixel_sad_x4_8x8_sse2;
> + p.pu[LUMA_8x16].sad_x4 = x265_pixel_sad_x4_8x16_sse2;
> + p.pu[LUMA_8x32].sad_x4 = x265_pixel_sad_x4_8x32_sse2;
> + p.pu[LUMA_16x4].sad_x4 = x265_pixel_sad_x4_16x4_sse2;
> + p.pu[LUMA_12x16].sad_x4 = x265_pixel_sad_x4_12x16_mmx2;
>
> - p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2;
> - p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2;
> - p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2;
> - p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2;
> - p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2;
> - p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2;
> - p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2;
> - p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2;
> - p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2;
> - p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2;
> - p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2;
> - p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2;
> - p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2;
> - p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2;
> - p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2;
> - p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2;
> + p.cu[BLOCK_4x4].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_4_sse2;
> + p.cu[BLOCK_8x8].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_8_sse2;
> + p.cu[BLOCK_16x16].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_16_sse2;
> + p.cu[BLOCK_32x32].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_32_sse2;
> + p.cu[BLOCK_4x4].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_4_sse2;
> + p.cu[BLOCK_8x8].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_8_sse2;
> + p.cu[BLOCK_16x16].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_16_sse2;
> + p.cu[BLOCK_32x32].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_32_sse2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_sse2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_sse2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_sse2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_sse2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_sse2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_sse2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_sse2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_sse2;
>
> CHROMA_PIXELSUB_PS(_sse2);
> CHROMA_PIXELSUB_PS_422(_sse2);
> @@ -1357,28 +1357,28 @@
> p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2;
> p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s
>
> - p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
> - p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
> - p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2;
> - p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2;
> + p.cu[BLOCK_4x4].blockfill_s = x265_blockfill_s_4x4_sse2;
> + p.cu[BLOCK_8x8].blockfill_s = x265_blockfill_s_8x8_sse2;
> + p.cu[BLOCK_16x16].blockfill_s = x265_blockfill_s_16x16_sse2;
> + p.cu[BLOCK_32x32].blockfill_s = x265_blockfill_s_32x32_sse2;
>
> // TODO: overflow on 12-bits mode!
> - p.ssd_s[BLOCK_4x4] = x265_pixel_ssd_s_4_sse2;
> - p.ssd_s[BLOCK_8x8] = x265_pixel_ssd_s_8_sse2;
> - p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2;
> - p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2;
> + p.cu[BLOCK_4x4].ssd_s = x265_pixel_ssd_s_4_sse2;
> + p.cu[BLOCK_8x8].ssd_s = x265_pixel_ssd_s_8_sse2;
> + p.cu[BLOCK_16x16].ssd_s = x265_pixel_ssd_s_16_sse2;
> + p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_sse2;
>
> - p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
> - p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
> - p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse2;
> - p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse2;
> + p.cu[BLOCK_4x4].calcresidual = x265_getResidual4_sse2;
> + p.cu[BLOCK_8x8].calcresidual = x265_getResidual8_sse2;
> + p.cu[BLOCK_16x16].calcresidual = x265_getResidual16_sse2;
> + p.cu[BLOCK_32x32].calcresidual = x265_getResidual32_sse2;
>
> - p.dct[DCT_4x4] = x265_dct4_sse2;
> - p.idct[IDCT_4x4] = x265_idct4_sse2;
> + p.cu[BLOCK_4x4].dct = x265_dct4_sse2;
> + p.cu[BLOCK_4x4].idct = x265_idct4_sse2;
> #if X86_64
> - p.idct[IDCT_8x8] = x265_idct8_sse2;
> + p.cu[BLOCK_8x8].idct = x265_idct8_sse2;
> #endif
> - p.idct[IDST_4x4] = x265_idst4_sse2;
> + p.idst4x4 = x265_idst4_sse2;
>
> LUMA_SS_FILTERS(_sse2);
> }
> @@ -1389,8 +1389,8 @@
>
> INTRA_ANG_SSSE3(ssse3);
>
> - p.dct[DST_4x4] = x265_dst4_ssse3;
> - p.idct[IDCT_8x8] = x265_idct8_ssse3;
> + p.dst4x4 = x265_dst4_ssse3;
> + p.cu[BLOCK_8x8].idct = x265_idct8_ssse3;
> p.count_nonzero = x265_count_nonzero_ssse3;
> }
> if (cpuMask & X265_CPU_SSE4)
> @@ -1405,7 +1405,7 @@
> CHROMA_VERT_FILTERS_SSE4_422(_sse4);
> CHROMA_HORIZ_FILTERS_444(_sse4);
>
> - p.dct[DCT_8x8] = x265_dct8_sse4;
> + p.cu[BLOCK_8x8].dct = x265_dct8_sse4;
> p.quant = x265_quant_sse4;
> p.nquant = x265_nquant_sse4;
> p.dequant_normal = x265_dequant_normal_sse4;
> @@ -1423,12 +1423,12 @@
> INTRA_ANG_SSE4_COMMON(sse4);
> INTRA_ANG_SSE4_HIGH(sse4);
>
> - p.psy_cost_pp[BLOCK_4x4] = x265_psyCost_pp_4x4_sse4;
> + p.cu[BLOCK_4x4].psy_cost_pp = x265_psyCost_pp_4x4_sse4;
> #if X86_64
> - p.psy_cost_pp[BLOCK_8x8] = x265_psyCost_pp_8x8_sse4;
> - p.psy_cost_pp[BLOCK_16x16] = x265_psyCost_pp_16x16_sse4;
> - p.psy_cost_pp[BLOCK_32x32] = x265_psyCost_pp_32x32_sse4;
> - p.psy_cost_pp[BLOCK_64x64] = x265_psyCost_pp_64x64_sse4;
> + p.cu[BLOCK_8x8].psy_cost_pp = x265_psyCost_pp_8x8_sse4;
> + p.cu[BLOCK_16x16].psy_cost_pp = x265_psyCost_pp_16x16_sse4;
> + p.cu[BLOCK_32x32].psy_cost_pp = x265_psyCost_pp_32x32_sse4;
> + p.cu[BLOCK_64x64].psy_cost_pp = x265_psyCost_pp_64x64_sse4;
> #endif
> }
> if (cpuMask & X265_CPU_XOP)
> @@ -1440,59 +1440,59 @@
> }
> if (cpuMask & X265_CPU_AVX2)
> {
> - p.dct[DCT_4x4] = x265_dct4_avx2;
> + p.cu[BLOCK_4x4].dct = x265_dct4_avx2;
> p.quant = x265_quant_avx2;
> p.nquant = x265_nquant_avx2;
> - p.dequant_normal = x265_dequant_normal_avx2;
> + p.dequant_normal = x265_dequant_normal_avx2;
> p.scale1D_128to64 = x265_scale1D_128to64_avx2;
> - p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2;
> - p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2;
> - p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2;
> - p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2;
> - p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2;
> - p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2;
> - p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2;
> - p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_avx2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_avx2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_avx2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_avx2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_avx2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_avx2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_avx2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_avx2;
> #if X86_64
> - p.dct[DCT_8x8] = x265_dct8_avx2;
> - p.dct[DCT_16x16] = x265_dct16_avx2;
> - p.dct[DCT_32x32] = x265_dct32_avx2;
> - p.idct[IDCT_4x4] = x265_idct4_avx2;
> - p.idct[IDCT_8x8] = x265_idct8_avx2;
> - p.idct[IDCT_16x16] = x265_idct16_avx2;
> - p.idct[IDCT_32x32] = x265_idct32_avx2;
> - p.transpose[BLOCK_8x8] = x265_transpose8_avx2;
> - p.transpose[BLOCK_16x16] = x265_transpose16_avx2;
> - p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
> - p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
> + p.cu[BLOCK_8x8].dct = x265_dct8_avx2;
> + p.cu[BLOCK_16x16].dct = x265_dct16_avx2;
> + p.cu[BLOCK_32x32].dct = x265_dct32_avx2;
> + p.cu[BLOCK_4x4].idct = x265_idct4_avx2;
> + p.cu[BLOCK_8x8].idct = x265_idct8_avx2;
> + p.cu[BLOCK_16x16].idct = x265_idct16_avx2;
> + p.cu[BLOCK_32x32].idct = x265_idct32_avx2;
> + p.cu[BLOCK_8x8].transpose = x265_transpose8_avx2;
> + p.cu[BLOCK_16x16].transpose = x265_transpose16_avx2;
> + p.cu[BLOCK_32x32].transpose = x265_transpose32_avx2;
> + p.cu[BLOCK_64x64].transpose = x265_transpose64_avx2;
> #endif
> }
> /* at HIGH_BIT_DEPTH, pixel == short so we can reuse a number of primitives */
> for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
> {
> - p.sse_pp[i] = (pixelcmp_t)p.sse_ss[i];
> - p.sse_sp[i] = (pixelcmp_sp_t)p.sse_ss[i];
> + p.pu[i].sse_pp = (pixelcmp_t)p.pu[i].sse_ss;
> + p.pu[i].sse_sp = (pixelcmp_sp_t)p.pu[i].sse_ss;
> }
>
> for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
> {
> - p.luma_copy_ps[i] = (copy_ps_t)p.luma_copy_ss[i];
> - p.luma_copy_sp[i] = (copy_sp_t)p.luma_copy_ss[i];
> - p.luma_copy_pp[i] = (copy_pp_t)p.luma_copy_ss[i];
> + p.pu[i].luma_copy_ps = (copy_ps_t)p.pu[i].luma_copy_ss;
> + p.pu[i].luma_copy_sp = (copy_sp_t)p.pu[i].luma_copy_ss;
> + p.pu[i].luma_copy_pp = (copy_pp_t)p.pu[i].luma_copy_ss;
> }
>
> for (int i = 0; i < NUM_CHROMA_PARTITIONS; i++)
> {
> - p.chroma[X265_CSP_I420].copy_ps[i] = (copy_ps_t)p.chroma[X265_CSP_I420].copy_ss[i];
> - p.chroma[X265_CSP_I420].copy_sp[i] = (copy_sp_t)p.chroma[X265_CSP_I420].copy_ss[i];
> - p.chroma[X265_CSP_I420].copy_pp[i] = (copy_pp_t)p.chroma[X265_CSP_I420].copy_ss[i];
> + p.chroma[X265_CSP_I420].pu[i].copy_ps = (copy_ps_t)p.chroma[X265_CSP_I420].pu[i].copy_ss;
> + p.chroma[X265_CSP_I420].pu[i].copy_sp = (copy_sp_t)p.chroma[X265_CSP_I420].pu[i].copy_ss;
> + p.chroma[X265_CSP_I420].pu[i].copy_pp = (copy_pp_t)p.chroma[X265_CSP_I420].pu[i].copy_ss;
> }
>
> for (int i = 0; i < NUM_CHROMA_PARTITIONS; i++)
> {
> - p.chroma[X265_CSP_I422].copy_ps[i] = (copy_ps_t)p.chroma[X265_CSP_I422].copy_ss[i];
> - p.chroma[X265_CSP_I422].copy_sp[i] = (copy_sp_t)p.chroma[X265_CSP_I422].copy_ss[i];
> - p.chroma[X265_CSP_I422].copy_pp[i] = (copy_pp_t)p.chroma[X265_CSP_I422].copy_ss[i];
> + p.chroma[X265_CSP_I422].pu[i].copy_ps = (copy_ps_t)p.chroma[X265_CSP_I422].pu[i].copy_ss;
> + p.chroma[X265_CSP_I422].pu[i].copy_sp = (copy_sp_t)p.chroma[X265_CSP_I422].pu[i].copy_ss;
> + p.chroma[X265_CSP_I422].pu[i].copy_pp = (copy_pp_t)p.chroma[X265_CSP_I422].pu[i].copy_ss;
> }
>
> #else // if HIGH_BIT_DEPTH
> @@ -1502,7 +1502,7 @@
> INIT8(sad, _mmx2);
> INIT8(sad_x3, _mmx2);
> INIT8(sad_x4, _mmx2);
> - p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
> + p.pu[LUMA_4x4].satd = x265_pixel_satd_4x4_mmx2;
> p.frameInitLowres = x265_frame_init_lowres_core_mmx2;
>
> PIXEL_AVG(sse2);
> @@ -1541,52 +1541,52 @@
> // until all partitions are coded and commit smaller patches, easier to
> // review.
>
> - p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
> - p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
> - p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2;
> - p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2;
> + p.cu[BLOCK_4x4].blockfill_s = x265_blockfill_s_4x4_sse2;
> + p.cu[BLOCK_8x8].blockfill_s = x265_blockfill_s_8x8_sse2;
> + p.cu[BLOCK_16x16].blockfill_s = x265_blockfill_s_16x16_sse2;
> + p.cu[BLOCK_32x32].blockfill_s = x265_blockfill_s_32x32_sse2;
>
> - p.ssd_s[BLOCK_4x4] = x265_pixel_ssd_s_4_sse2;
> - p.ssd_s[BLOCK_8x8] = x265_pixel_ssd_s_8_sse2;
> - p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2;
> - p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2;
> + p.cu[BLOCK_4x4].ssd_s = x265_pixel_ssd_s_4_sse2;
> + p.cu[BLOCK_8x8].ssd_s = x265_pixel_ssd_s_8_sse2;
> + p.cu[BLOCK_16x16].ssd_s = x265_pixel_ssd_s_16_sse2;
> + p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_sse2;
>
> p.frameInitLowres = x265_frame_init_lowres_core_sse2;
> SA8D_INTER_FROM_BLOCK(sse2);
>
> - p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2;
> - p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2;
> - p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2;
> - p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2;
> - p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2;
> - p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2;
> - p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2;
> - p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2;
> - p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2;
> - p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2;
> - p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2;
> - p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2;
> - p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2;
> - p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2;
> - p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2;
> - p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2;
> + p.cu[BLOCK_4x4].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_4_sse2;
> + p.cu[BLOCK_8x8].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_8_sse2;
> + p.cu[BLOCK_16x16].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_16_sse2;
> + p.cu[BLOCK_32x32].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_32_sse2;
> + p.cu[BLOCK_4x4].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_4_sse2;
> + p.cu[BLOCK_8x8].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_8_sse2;
> + p.cu[BLOCK_16x16].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_16_sse2;
> + p.cu[BLOCK_32x32].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_32_sse2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_sse2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_sse2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_sse2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_sse2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_sse2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_sse2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_sse2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_sse2;
>
> - p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
> - p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
> - p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
> - p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
> - p.transpose[BLOCK_16x16] = x265_transpose16_sse2;
> - p.transpose[BLOCK_32x32] = x265_transpose32_sse2;
> - p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
> + p.cu[BLOCK_4x4].calcresidual = x265_getResidual4_sse2;
> + p.cu[BLOCK_8x8].calcresidual = x265_getResidual8_sse2;
> + p.cu[BLOCK_4x4].transpose = x265_transpose4_sse2;
> + p.cu[BLOCK_8x8].transpose = x265_transpose8_sse2;
> + p.cu[BLOCK_16x16].transpose = x265_transpose16_sse2;
> + p.cu[BLOCK_32x32].transpose = x265_transpose32_sse2;
> + p.cu[BLOCK_64x64].transpose = x265_transpose64_sse2;
> p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
> p.ssim_end_4 = x265_pixel_ssim_end4_sse2;
>
> - p.dct[DCT_4x4] = x265_dct4_sse2;
> - p.idct[IDCT_4x4] = x265_idct4_sse2;
> + p.cu[BLOCK_4x4].dct = x265_dct4_sse2;
> + p.cu[BLOCK_4x4].idct = x265_idct4_sse2;
> #if X86_64
> - p.idct[IDCT_8x8] = x265_idct8_sse2;
> + p.cu[BLOCK_8x8].idct = x265_idct8_sse2;
> #endif
> - p.idct[IDST_4x4] = x265_idst4_sse2;
> + p.idst4x4 = x265_idst4_sse2;
>
> p.planecopy_sp = x265_downShift_16_sse2;
> }
> @@ -1594,7 +1594,7 @@
> {
> p.frameInitLowres = x265_frame_init_lowres_core_ssse3;
> SA8D_INTER_FROM_BLOCK(ssse3);
> - p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3;
> + p.pu[LUMA_4x4].sse_pp = x265_pixel_ssd_4x4_ssse3;
> ASSGN_SSE(ssse3);
> PIXEL_AVG(ssse3);
> PIXEL_AVG_W4(ssse3);
> @@ -1605,23 +1605,23 @@
> p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
> SAD_X3(ssse3);
> SAD_X4(ssse3);
> - p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
> - p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
> - p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3;
> - p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_ssse3;
> - p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_ssse3;
> - p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_ssse3;
> + p.pu[LUMA_8x4].sad_x4 = x265_pixel_sad_x4_8x4_ssse3;
> + p.pu[LUMA_8x8].sad_x4 = x265_pixel_sad_x4_8x8_ssse3;
> + p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_ssse3;
> + p.pu[LUMA_8x16].sad_x4 = x265_pixel_sad_x4_8x16_ssse3;
> + p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_ssse3;
> + p.pu[LUMA_8x32].sad_x4 = x265_pixel_sad_x4_8x32_ssse3;
>
> - p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_ssse3;
> - p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_ssse3;
> + p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_ssse3;
> + p.pu[LUMA_12x16].sad_x4 = x265_pixel_sad_x4_12x16_ssse3;
>
> p.luma_p2s = x265_luma_p2s_ssse3;
> p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3;
> p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3;
> p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_ssse3; // for i444, chroma_p2s can use luma_p2s
>
> - p.dct[DST_4x4] = x265_dst4_ssse3;
> - p.idct[IDCT_8x8] = x265_idct8_ssse3;
> + p.dst4x4 = x265_dst4_ssse3;
> + p.cu[BLOCK_8x8].idct = x265_idct8_ssse3;
> p.count_nonzero = x265_count_nonzero_ssse3;
> }
> if (cpuMask & X265_CPU_SSE4)
> @@ -1638,21 +1638,21 @@
> CHROMA_ADDAVG_422(_sse4);
>
> // TODO: check POPCNT flag!
> - p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_sse4;
> - p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_sse4;
> - p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_sse4;
> - p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_sse4;
> + p.cu[BLOCK_4x4].copy_cnt = x265_copy_cnt_4_sse4;
> + p.cu[BLOCK_8x8].copy_cnt = x265_copy_cnt_8_sse4;
> + p.cu[BLOCK_16x16].copy_cnt = x265_copy_cnt_16_sse4;
> + p.cu[BLOCK_32x32].copy_cnt = x265_copy_cnt_32_sse4;
>
> HEVC_SATD(sse4);
> SA8D_INTER_FROM_BLOCK(sse4);
>
> - p.sse_pp[LUMA_12x16] = x265_pixel_ssd_12x16_sse4;
> - p.sse_pp[LUMA_24x32] = x265_pixel_ssd_24x32_sse4;
> - p.sse_pp[LUMA_48x64] = x265_pixel_ssd_48x64_sse4;
> - p.sse_pp[LUMA_64x16] = x265_pixel_ssd_64x16_sse4;
> - p.sse_pp[LUMA_64x32] = x265_pixel_ssd_64x32_sse4;
> - p.sse_pp[LUMA_64x48] = x265_pixel_ssd_64x48_sse4;
> - p.sse_pp[LUMA_64x64] = x265_pixel_ssd_64x64_sse4;
> + p.pu[LUMA_12x16].sse_pp = x265_pixel_ssd_12x16_sse4;
> + p.pu[LUMA_24x32].sse_pp = x265_pixel_ssd_24x32_sse4;
> + p.pu[LUMA_48x64].sse_pp = x265_pixel_ssd_48x64_sse4;
> + p.pu[LUMA_64x16].sse_pp = x265_pixel_ssd_64x16_sse4;
> + p.pu[LUMA_64x32].sse_pp = x265_pixel_ssd_64x32_sse4;
> + p.pu[LUMA_64x48].sse_pp = x265_pixel_ssd_64x48_sse4;
> + p.pu[LUMA_64x64].sse_pp = x265_pixel_ssd_64x64_sse4;
>
> LUMA_SSE_SP(_sse4);
>
> @@ -1673,17 +1673,17 @@
> ASSGN_SSE_SS(sse4);
>
> // MUST be done after LUMA_FILTERS() to overwrite default version
> - p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_sse4;
> + p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_sse4;
>
> - p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
> - p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
> - p.chroma[X265_CSP_I420].copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
> + p.chroma[X265_CSP_I420].pu[CHROMA_2x4].copy_sp = x265_blockcopy_sp_2x4_sse4;
> + p.chroma[X265_CSP_I420].pu[CHROMA_2x8].copy_sp = x265_blockcopy_sp_2x8_sse4;
> + p.chroma[X265_CSP_I420].pu[CHROMA_6x8].copy_sp = x265_blockcopy_sp_6x8_sse4;
> CHROMA_BLOCKCOPY(ps, _sse4);
> CHROMA_BLOCKCOPY_422(ps, _sse4);
> LUMA_BLOCKCOPY(ps, _sse4);
>
> - p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4;
> - p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4;
> + p.cu[BLOCK_16x16].calcresidual = x265_getResidual16_sse4;
> + p.cu[BLOCK_32x32].calcresidual = x265_getResidual32_sse4;
> p.quant = x265_quant_sse4;
> p.nquant = x265_nquant_sse4;
> p.dequant_normal = x265_dequant_normal_sse4;
> @@ -1707,14 +1707,14 @@
> INTRA_ANG_SSE4_COMMON(sse4);
> INTRA_ANG_SSE4(sse4);
>
> - p.dct[DCT_8x8] = x265_dct8_sse4;
> + p.cu[BLOCK_8x8].dct = x265_dct8_sse4;
> p.denoiseDct = x265_denoise_dct_sse4;
> - p.psy_cost_pp[BLOCK_4x4] = x265_psyCost_pp_4x4_sse4;
> + p.cu[BLOCK_4x4].psy_cost_pp = x265_psyCost_pp_4x4_sse4;
> #if X86_64
> - p.psy_cost_pp[BLOCK_8x8] = x265_psyCost_pp_8x8_sse4;
> - p.psy_cost_pp[BLOCK_16x16] = x265_psyCost_pp_16x16_sse4;
> - p.psy_cost_pp[BLOCK_32x32] = x265_psyCost_pp_32x32_sse4;
> - p.psy_cost_pp[BLOCK_64x64] = x265_psyCost_pp_64x64_sse4;
> + p.cu[BLOCK_8x8].psy_cost_pp = x265_psyCost_pp_8x8_sse4;
> + p.cu[BLOCK_16x16].psy_cost_pp = x265_psyCost_pp_16x16_sse4;
> + p.cu[BLOCK_32x32].psy_cost_pp = x265_psyCost_pp_32x32_sse4;
> + p.cu[BLOCK_64x64].psy_cost_pp = x265_psyCost_pp_64x64_sse4;
> #endif
> }
> if (cpuMask & X265_CPU_AVX)
> @@ -1727,36 +1727,36 @@
> ASSGN_SSE_SS(avx);
> SAD_X3(avx);
> SAD_X4(avx);
> - p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_avx;
> - p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_avx;
> - p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_avx;
> - p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_avx;
> + p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_avx;
> + p.pu[LUMA_12x16].sad_x4 = x265_pixel_sad_x4_12x16_avx;
> + p.pu[LUMA_16x4].sad_x3 = x265_pixel_sad_x3_16x4_avx;
> + p.pu[LUMA_16x4].sad_x4 = x265_pixel_sad_x4_16x4_avx;
>
> p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_avx;
> p.ssim_end_4 = x265_pixel_ssim_end4_avx;
> - p.luma_copy_ss[LUMA_64x16] = x265_blockcopy_ss_64x16_avx;
> - p.luma_copy_ss[LUMA_64x32] = x265_blockcopy_ss_64x32_avx;
> - p.luma_copy_ss[LUMA_64x48] = x265_blockcopy_ss_64x48_avx;
> - p.luma_copy_ss[LUMA_64x64] = x265_blockcopy_ss_64x64_avx;
> + p.pu[LUMA_64x16].luma_copy_ss = x265_blockcopy_ss_64x16_avx;
> + p.pu[LUMA_64x32].luma_copy_ss = x265_blockcopy_ss_64x32_avx;
> + p.pu[LUMA_64x48].luma_copy_ss = x265_blockcopy_ss_64x48_avx;
> + p.pu[LUMA_64x64].luma_copy_ss = x265_blockcopy_ss_64x64_avx;
>
> - p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x8] = x265_blockcopy_pp_32x8_avx;
> - p.luma_copy_pp[LUMA_32x8] = x265_blockcopy_pp_32x8_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x8].copy_pp = x265_blockcopy_pp_32x8_avx;
> + p.pu[LUMA_32x8].luma_copy_pp = x265_blockcopy_pp_32x8_avx;
>
> - p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x16] = x265_blockcopy_pp_32x16_avx;
> - p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x16] = x265_blockcopy_pp_32x16_avx;
> - p.luma_copy_pp[LUMA_32x16] = x265_blockcopy_pp_32x16_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x16].copy_pp = x265_blockcopy_pp_32x16_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x16].copy_pp = x265_blockcopy_pp_32x16_avx;
> + p.pu[LUMA_32x16].luma_copy_pp = x265_blockcopy_pp_32x16_avx;
>
> - p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x24] = x265_blockcopy_pp_32x24_avx;
> - p.luma_copy_pp[LUMA_32x24] = x265_blockcopy_pp_32x24_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x24].copy_pp = x265_blockcopy_pp_32x24_avx;
> + p.pu[LUMA_32x24].luma_copy_pp = x265_blockcopy_pp_32x24_avx;
>
> - p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x32] = x265_blockcopy_pp_32x32_avx;
> - p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x32] = x265_blockcopy_pp_32x32_avx;
> - p.luma_copy_pp[LUMA_32x32] = x265_blockcopy_pp_32x32_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x32].copy_pp = x265_blockcopy_pp_32x32_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x32].copy_pp = x265_blockcopy_pp_32x32_avx;
> + p.pu[LUMA_32x32].luma_copy_pp = x265_blockcopy_pp_32x32_avx;
>
> - p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x48] = x265_blockcopy_pp_32x48_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x48].copy_pp = x265_blockcopy_pp_32x48_avx;
>
> - p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x64] = x265_blockcopy_pp_32x64_avx;
> - p.luma_copy_pp[LUMA_32x64] = x265_blockcopy_pp_32x64_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_32x64].copy_pp = x265_blockcopy_pp_32x64_avx;
> + p.pu[LUMA_32x64].luma_copy_pp = x265_blockcopy_pp_32x64_avx;
> }
> if (cpuMask & X265_CPU_XOP)
> {
> @@ -1771,139 +1771,139 @@
> INIT2(sad_x4, _avx2);
> INIT4(satd, _avx2);
> INIT2_NAME(sse_pp, ssd, _avx2);
> - p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
> - p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
> - p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2;
> + p.pu[LUMA_16x12].sad_x4 = x265_pixel_sad_x4_16x12_avx2;
> + p.pu[LUMA_16x32].sad_x4 = x265_pixel_sad_x4_16x32_avx2;
> + p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_avx2;
>
> /* Need to update assembly code as per changed interface of the copy_cnt primitive, once
> * code is updated, avx2 version will be enabled */
>
> - p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
> - p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
> - p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
> + p.cu[BLOCK_8x8].copy_cnt = x265_copy_cnt_8_avx2;
> + p.cu[BLOCK_16x16].copy_cnt = x265_copy_cnt_16_avx2;
> + p.cu[BLOCK_32x32].copy_cnt = x265_copy_cnt_32_avx2;
>
> - p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_avx2;
> - p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_avx2;
> + p.cu[BLOCK_16x16].blockfill_s = x265_blockfill_s_16x16_avx2;
> + p.cu[BLOCK_32x32].blockfill_s = x265_blockfill_s_32x32_avx2;
>
> - p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2;
> - p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2;
> - p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2;
> - p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2;
> - p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2;
> - p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2;
> - p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2;
> - p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_avx2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_avx2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_avx2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_avx2;
> + p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_avx2;
> + p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_avx2;
> + p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_avx2;
> + p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_avx2;
>
> p.denoiseDct = x265_denoise_dct_avx2;
> - p.dct[DCT_4x4] = x265_dct4_avx2;
> + p.cu[BLOCK_4x4].dct = x265_dct4_avx2;
> p.quant = x265_quant_avx2;
> p.nquant = x265_nquant_avx2;
> p.dequant_normal = x265_dequant_normal_avx2;
>
> - p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x4] = x265_blockcopy_ss_16x4_avx;
> - p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x12] = x265_blockcopy_ss_16x12_avx;
> - p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x8] = x265_blockcopy_ss_16x8_avx;
> - p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x16] = x265_blockcopy_ss_16x16_avx;
> - p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x32] = x265_blockcopy_ss_16x32_avx;
> - p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x8] = x265_blockcopy_ss_16x8_avx;
> - p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x16] = x265_blockcopy_ss_16x16_avx;
> - p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x24] = x265_blockcopy_ss_16x24_avx;
> - p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x32] = x265_blockcopy_ss_16x32_avx;
> - p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x64] = x265_blockcopy_ss_16x64_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x4].copy_ss = x265_blockcopy_ss_16x4_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x12].copy_ss = x265_blockcopy_ss_16x12_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x8].copy_ss = x265_blockcopy_ss_16x8_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x16].copy_ss = x265_blockcopy_ss_16x16_avx;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x32].copy_ss = x265_blockcopy_ss_16x32_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x8] .copy_ss = x265_blockcopy_ss_16x8_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x16].copy_ss = x265_blockcopy_ss_16x16_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x24].copy_ss = x265_blockcopy_ss_16x24_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].copy_ss = x265_blockcopy_ss_16x32_avx;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].copy_ss = x265_blockcopy_ss_16x64_avx;
> p.scale1D_128to64 = x265_scale1D_128to64_avx2;
>
> p.weight_pp = x265_weight_pp_avx2;
>
> #if X86_64
>
> - p.dct[DCT_8x8] = x265_dct8_avx2;
> - p.dct[DCT_16x16] = x265_dct16_avx2;
> - p.dct[DCT_32x32] = x265_dct32_avx2;
> - p.idct[IDCT_4x4] = x265_idct4_avx2;
> - p.idct[IDCT_8x8] = x265_idct8_avx2;
> - p.idct[IDCT_16x16] = x265_idct16_avx2;
> - p.idct[IDCT_32x32] = x265_idct32_avx2;
> + p.cu[BLOCK_8x8].dct = x265_dct8_avx2;
> + p.cu[BLOCK_16x16].dct = x265_dct16_avx2;
> + p.cu[BLOCK_32x32].dct = x265_dct32_avx2;
> + p.cu[BLOCK_4x4].idct = x265_idct4_avx2;
> + p.cu[BLOCK_8x8].idct = x265_idct8_avx2;
> + p.cu[BLOCK_16x16].idct = x265_idct16_avx2;
> + p.cu[BLOCK_32x32].idct = x265_idct32_avx2;
>
> - p.transpose[BLOCK_8x8] = x265_transpose8_avx2;
> - p.transpose[BLOCK_16x16] = x265_transpose16_avx2;
> - p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
> - p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
> + p.cu[BLOCK_8x8].transpose = x265_transpose8_avx2;
> + p.cu[BLOCK_16x16].transpose = x265_transpose16_avx2;
> + p.cu[BLOCK_32x32].transpose = x265_transpose32_avx2;
> + p.cu[BLOCK_64x64].transpose = x265_transpose64_avx2;
>
> - p.luma_vpp[LUMA_12x16] = x265_interp_8tap_vert_pp_12x16_avx2;
> + p.pu[LUMA_12x16].luma_vpp = x265_interp_8tap_vert_pp_12x16_avx2;
>
> - p.luma_vpp[LUMA_16x4] = x265_interp_8tap_vert_pp_16x4_avx2;
> - p.luma_vpp[LUMA_16x8] = x265_interp_8tap_vert_pp_16x8_avx2;
> - p.luma_vpp[LUMA_16x12] = x265_interp_8tap_vert_pp_16x12_avx2;
> - p.luma_vpp[LUMA_16x16] = x265_interp_8tap_vert_pp_16x16_avx2;
> - p.luma_vpp[LUMA_16x32] = x265_interp_8tap_vert_pp_16x32_avx2;
> - p.luma_vpp[LUMA_16x64] = x265_interp_8tap_vert_pp_16x64_avx2;
> + p.pu[LUMA_16x4].luma_vpp = x265_interp_8tap_vert_pp_16x4_avx2;
> + p.pu[LUMA_16x8].luma_vpp = x265_interp_8tap_vert_pp_16x8_avx2;
> + p.pu[LUMA_16x12].luma_vpp = x265_interp_8tap_vert_pp_16x12_avx2;
> + p.pu[LUMA_16x16].luma_vpp = x265_interp_8tap_vert_pp_16x16_avx2;
> + p.pu[LUMA_16x32].luma_vpp = x265_interp_8tap_vert_pp_16x32_avx2;
> + p.pu[LUMA_16x64].luma_vpp = x265_interp_8tap_vert_pp_16x64_avx2;
>
> - p.luma_vpp[LUMA_24x32] = x265_interp_8tap_vert_pp_24x32_avx2;
> + p.pu[LUMA_24x32].luma_vpp = x265_interp_8tap_vert_pp_24x32_avx2;
>
> - p.luma_vpp[LUMA_32x8] = x265_interp_8tap_vert_pp_32x8_avx2;
> - p.luma_vpp[LUMA_32x16] = x265_interp_8tap_vert_pp_32x16_avx2;
> - p.luma_vpp[LUMA_32x24] = x265_interp_8tap_vert_pp_32x24_avx2;
> - p.luma_vpp[LUMA_32x32] = x265_interp_8tap_vert_pp_32x32_avx2;
> - p.luma_vpp[LUMA_32x64] = x265_interp_8tap_vert_pp_32x64_avx2;
> + p.pu[LUMA_32x8].luma_vpp = x265_interp_8tap_vert_pp_32x8_avx2;
> + p.pu[LUMA_32x16].luma_vpp = x265_interp_8tap_vert_pp_32x16_avx2;
> + p.pu[LUMA_32x24].luma_vpp = x265_interp_8tap_vert_pp_32x24_avx2;
> + p.pu[LUMA_32x32].luma_vpp = x265_interp_8tap_vert_pp_32x32_avx2;
> + p.pu[LUMA_32x64].luma_vpp = x265_interp_8tap_vert_pp_32x64_avx2;
>
> - p.luma_vpp[LUMA_48x64] = x265_interp_8tap_vert_pp_48x64_avx2;
> + p.pu[LUMA_48x64].luma_vpp = x265_interp_8tap_vert_pp_48x64_avx2;
>
> - p.luma_vpp[LUMA_64x16] = x265_interp_8tap_vert_pp_64x16_avx2;
> - p.luma_vpp[LUMA_64x32] = x265_interp_8tap_vert_pp_64x32_avx2;
> - p.luma_vpp[LUMA_64x48] = x265_interp_8tap_vert_pp_64x48_avx2;
> - p.luma_vpp[LUMA_64x64] = x265_interp_8tap_vert_pp_64x64_avx2;
> + p.pu[LUMA_64x16].luma_vpp = x265_interp_8tap_vert_pp_64x16_avx2;
> + p.pu[LUMA_64x32].luma_vpp = x265_interp_8tap_vert_pp_64x32_avx2;
> + p.pu[LUMA_64x48].luma_vpp = x265_interp_8tap_vert_pp_64x48_avx2;
> + p.pu[LUMA_64x64].luma_vpp = x265_interp_8tap_vert_pp_64x64_avx2;
> #endif
> - p.luma_hpp[LUMA_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
> + p.pu[LUMA_4x4].luma_hpp = x265_interp_8tap_horiz_pp_4x4_avx2;
>
> - p.luma_hpp[LUMA_8x4] = x265_interp_8tap_horiz_pp_8x4_avx2;
> - p.luma_hpp[LUMA_8x8] = x265_interp_8tap_horiz_pp_8x8_avx2;
> - p.luma_hpp[LUMA_8x16] = x265_interp_8tap_horiz_pp_8x16_avx2;
> - p.luma_hpp[LUMA_8x32] = x265_interp_8tap_horiz_pp_8x32_avx2;
> + p.pu[LUMA_8x4].luma_hpp = x265_interp_8tap_horiz_pp_8x4_avx2;
> + p.pu[LUMA_8x8].luma_hpp = x265_interp_8tap_horiz_pp_8x8_avx2;
> + p.pu[LUMA_8x16].luma_hpp = x265_interp_8tap_horiz_pp_8x16_avx2;
> + p.pu[LUMA_8x32].luma_hpp = x265_interp_8tap_horiz_pp_8x32_avx2;
>
> - p.luma_hpp[LUMA_16x4] = x265_interp_8tap_horiz_pp_16x4_avx2;
> - p.luma_hpp[LUMA_16x8] = x265_interp_8tap_horiz_pp_16x8_avx2;
> - p.luma_hpp[LUMA_16x12] = x265_interp_8tap_horiz_pp_16x12_avx2;
> - p.luma_hpp[LUMA_16x16] = x265_interp_8tap_horiz_pp_16x16_avx2;
> - p.luma_hpp[LUMA_16x32] = x265_interp_8tap_horiz_pp_16x32_avx2;
> - p.luma_hpp[LUMA_16x64] = x265_interp_8tap_horiz_pp_16x64_avx2;
> + p.pu[LUMA_16x4].luma_hpp = x265_interp_8tap_horiz_pp_16x4_avx2;
> + p.pu[LUMA_16x8].luma_hpp = x265_interp_8tap_horiz_pp_16x8_avx2;
> + p.pu[LUMA_16x12].luma_hpp = x265_interp_8tap_horiz_pp_16x12_avx2;
> + p.pu[LUMA_16x16].luma_hpp = x265_interp_8tap_horiz_pp_16x16_avx2;
> + p.pu[LUMA_16x32].luma_hpp = x265_interp_8tap_horiz_pp_16x32_avx2;
> + p.pu[LUMA_16x64].luma_hpp = x265_interp_8tap_horiz_pp_16x64_avx2;
>
> - p.luma_hpp[LUMA_32x8] = x265_interp_8tap_horiz_pp_32x8_avx2;
> - p.luma_hpp[LUMA_32x16] = x265_interp_8tap_horiz_pp_32x16_avx2;
> - p.luma_hpp[LUMA_32x24] = x265_interp_8tap_horiz_pp_32x24_avx2;
> - p.luma_hpp[LUMA_32x32] = x265_interp_8tap_horiz_pp_32x32_avx2;
> - p.luma_hpp[LUMA_32x64] = x265_interp_8tap_horiz_pp_32x64_avx2;
> + p.pu[LUMA_32x8].luma_hpp = x265_interp_8tap_horiz_pp_32x8_avx2;
> + p.pu[LUMA_32x16].luma_hpp = x265_interp_8tap_horiz_pp_32x16_avx2;
> + p.pu[LUMA_32x24].luma_hpp = x265_interp_8tap_horiz_pp_32x24_avx2;
> + p.pu[LUMA_32x32].luma_hpp = x265_interp_8tap_horiz_pp_32x32_avx2;
> + p.pu[LUMA_32x64].luma_hpp = x265_interp_8tap_horiz_pp_32x64_avx2;
>
> - p.luma_hpp[LUMA_64x64] = x265_interp_8tap_horiz_pp_64x64_avx2;
> - p.luma_hpp[LUMA_64x48] = x265_interp_8tap_horiz_pp_64x48_avx2;
> - p.luma_hpp[LUMA_64x32] = x265_interp_8tap_horiz_pp_64x32_avx2;
> - p.luma_hpp[LUMA_64x16] = x265_interp_8tap_horiz_pp_64x16_avx2;
> + p.pu[LUMA_64x64].luma_hpp = x265_interp_8tap_horiz_pp_64x64_avx2;
> + p.pu[LUMA_64x48].luma_hpp = x265_interp_8tap_horiz_pp_64x48_avx2;
> + p.pu[LUMA_64x32].luma_hpp = x265_interp_8tap_horiz_pp_64x32_avx2;
> + p.pu[LUMA_64x16].luma_hpp = x265_interp_8tap_horiz_pp_64x16_avx2;
>
> - p.luma_hpp[LUMA_48x64] = x265_interp_8tap_horiz_pp_48x64_avx2;
> + p.pu[LUMA_48x64].luma_hpp = x265_interp_8tap_horiz_pp_48x64_avx2;
>
> - p.chroma[X265_CSP_I420].filter_hpp[CHROMA_8x8] = x265_interp_4tap_horiz_pp_8x8_avx2;
> - p.chroma[X265_CSP_I420].filter_hpp[CHROMA_4x4] = x265_interp_4tap_horiz_pp_4x4_avx2;
> - p.chroma[X265_CSP_I420].filter_hpp[CHROMA_32x32] = x265_interp_4tap_horiz_pp_32x32_avx2;
> - p.chroma[X265_CSP_I420].filter_hpp[CHROMA_16x16] = x265_interp_4tap_horiz_pp_16x16_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x8].filter_hpp = x265_interp_4tap_horiz_pp_8x8_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x16].filter_hpp = x265_interp_4tap_horiz_pp_16x16_avx2;
>
> - p.luma_vpp[LUMA_4x4] = x265_interp_8tap_vert_pp_4x4_avx2;
> + p.pu[LUMA_4x4].luma_vpp = x265_interp_8tap_vert_pp_4x4_avx2;
>
> - p.luma_vpp[LUMA_8x4] = x265_interp_8tap_vert_pp_8x4_avx2;
> - p.luma_vpp[LUMA_8x8] = x265_interp_8tap_vert_pp_8x8_avx2;
> - p.luma_vpp[LUMA_8x16] = x265_interp_8tap_vert_pp_8x16_avx2;
> - p.luma_vpp[LUMA_8x32] = x265_interp_8tap_vert_pp_8x32_avx2;
> + p.pu[LUMA_8x4].luma_vpp = x265_interp_8tap_vert_pp_8x4_avx2;
> + p.pu[LUMA_8x8].luma_vpp = x265_interp_8tap_vert_pp_8x8_avx2;
> + p.pu[LUMA_8x16].luma_vpp = x265_interp_8tap_vert_pp_8x16_avx2;
> + p.pu[LUMA_8x32].luma_vpp = x265_interp_8tap_vert_pp_8x32_avx2;
>
> // color space i420
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_4x4] = x265_interp_4tap_vert_pp_4x4_avx2;
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_8x8] = x265_interp_4tap_vert_pp_8x8_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_4x4].filter_vpp = x265_interp_4tap_vert_pp_4x4_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_8x8].filter_vpp = x265_interp_4tap_vert_pp_8x8_avx2;
>
> // color space i422
> - p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_4x4] = x265_interp_4tap_vert_pp_4x4_avx2;
> + p.chroma[X265_CSP_I422].pu[CHROMA422_4x4].filter_vpp = x265_interp_4tap_vert_pp_4x4_avx2;
>
> - p.luma_vps[LUMA_4x4] = x265_interp_8tap_vert_ps_4x4_avx2;
> + p.pu[LUMA_4x4].luma_vps = x265_interp_8tap_vert_ps_4x4_avx2;
>
> #if X86_64
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_16x16] = x265_interp_4tap_vert_pp_16x16_avx2;
> - p.chroma[X265_CSP_I420].filter_vpp[CHROMA_32x32] = x265_interp_4tap_vert_pp_32x32_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_16x16].filter_vpp = x265_interp_4tap_vert_pp_16x16_avx2;
> + p.chroma[X265_CSP_I420].pu[CHROMA_32x32].filter_vpp = x265_interp_4tap_vert_pp_32x32_avx2;
> #endif
> }
> #endif // if HIGH_BIT_DEPTH
> diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/yuv.cpp
> --- a/source/common/yuv.cpp Fri Jan 09 11:35:26 2015 +0530
> +++ b/source/common/yuv.cpp Thu Jan 08 15:23:38 2015 -0600
> @@ -81,32 +81,32 @@
> void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
> {
> pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
> - primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
> + primitives.pu[m_part].luma_copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size);
>
> pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
> pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
> - primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPic.m_strideC, m_buf[1], m_csize);
> - primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPic.m_strideC, m_buf[2], m_csize);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
> }
>
> void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
> {
> const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
> - primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
> + primitives.pu[m_part].luma_copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride);
>
> const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
> const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
> - primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC);
> - primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
> }
>
> void Yuv::copyFromYuv(const Yuv& srcYuv)
> {
> X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
>
> - primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
> - primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
> - primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
> + primitives.pu[m_part].luma_copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
> }
>
> /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
> @@ -115,47 +115,47 @@
> X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
>
> const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
> - primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size);
> + primitives.pu[partEnum].luma_copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size);
>
> if (bChroma)
> {
> const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
> const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
> - primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize);
> - primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize);
> + primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize);
> + primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize);
> }
> }
>
> void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
> {
> pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
> - primitives.luma_copy_pp[m_part](dstY, dstYuv.m_size, m_buf[0], m_size);
> + primitives.pu[m_part].luma_copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
>
> pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> - primitives.chroma[m_csp].copy_pp[m_part](dstU, dstYuv.m_csize, m_buf[1], m_csize);
> - primitives.chroma[m_csp].copy_pp[m_part](dstV, dstYuv.m_csize, m_buf[2], m_csize);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
> + primitives.chroma[m_csp].pu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
> }
>
> void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
> {
> pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
> pixel* dstY = dstYuv.m_buf[0];
> - primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
> + primitives.pu[dstYuv.m_part].luma_copy_pp(dstY, dstYuv.m_size, srcY, m_size);
>
> pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
> pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
> pixel* dstU = dstYuv.m_buf[1];
> pixel* dstV = dstYuv.m_buf[2];
> - primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstU, dstYuv.m_csize, srcU, m_csize);
> - primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstV, dstYuv.m_csize, srcV, m_csize);
> + primitives.chroma[m_csp].pu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
> + primitives.chroma[m_csp].pu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
> }
>
> void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
> {
> - primitives.luma_add_ps[log2SizeL - 2](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> - primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> - primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> + primitives.pu[log2SizeL - 2].luma_add_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
> + primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
> + primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
> }
>
> void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
> @@ -167,7 +167,7 @@
> const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
> const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
> pixel* dstY = getLumaAddr(absPartIdx);
> - primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
> + primitives.pu[part].luma_addAvg(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
> }
> if (bChroma)
> {
> @@ -177,8 +177,8 @@
> const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
> pixel* dstU = getCbAddr(absPartIdx);
> pixel* dstV = getCrAddr(absPartIdx);
> - primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
> - primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
> + primitives.chroma[m_csp].pu[part].addAvg(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
> + primitives.chroma[m_csp].pu[part].addAvg(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
> }
> }
>
> @@ -186,7 +186,7 @@
> {
> const pixel* src = getLumaAddr(absPartIdx);
> pixel* dst = dstYuv.getLumaAddr(absPartIdx);
> - primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
> + primitives.pu[log2Size - 2].luma_copy_pp(dst, dstYuv.m_size, src, m_size);
> }
>
> void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
> @@ -196,6 +196,6 @@
> const pixel* srcV = getCrAddr(absPartIdx);
> pixel* dstU = dstYuv.getCbAddr(absPartIdx);
> pixel* dstV = dstYuv.getCrAddr(absPartIdx);
> - primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize);
> - primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize);
> + primitives.chroma[m_csp].pu[part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
> + primitives.chroma[m_csp].pu[part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
> }
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list