[x265] [PATCH 1 of 5] Refactor EncoderPrimitives under common
Kevin Wu
kevin at multicorewareinc.com
Fri Jan 9 07:53:48 CET 2015
# HG changeset patch
# User Kevin Wu <kevin at multicorewareinc.com>
# Date 1420752218 21600
# Thu Jan 08 15:23:38 2015 -0600
# Node ID c6ca0fd54aa7c50119c9e5bdbbd02d49abb45559
# Parent 1924c460d1304d9ce775f35864712dd98f758f9f
Refactor EncoderPrimitives under common.
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/dct.cpp
--- a/source/common/dct.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/dct.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -765,22 +765,22 @@
p.dequant_normal = dequant_normal_c;
p.quant = quant_c;
p.nquant = nquant_c;
- p.dct[DST_4x4] = dst4_c;
- p.dct[DCT_4x4] = dct4_c;
- p.dct[DCT_8x8] = dct8_c;
- p.dct[DCT_16x16] = dct16_c;
- p.dct[DCT_32x32] = dct32_c;
- p.idct[IDST_4x4] = idst4_c;
- p.idct[IDCT_4x4] = idct4_c;
- p.idct[IDCT_8x8] = idct8_c;
- p.idct[IDCT_16x16] = idct16_c;
- p.idct[IDCT_32x32] = idct32_c;
+ p.dst4x4 = dst4_c;
+ p.cu[BLOCK_4x4].dct = dct4_c;
+ p.cu[BLOCK_8x8].dct = dct8_c;
+ p.cu[BLOCK_16x16].dct = dct16_c;
+ p.cu[BLOCK_32x32].dct = dct32_c;
+ p.idst4x4 = idst4_c;
+ p.cu[BLOCK_4x4].idct = idct4_c;
+ p.cu[BLOCK_8x8].idct = idct8_c;
+ p.cu[BLOCK_16x16].idct = idct16_c;
+ p.cu[BLOCK_32x32].idct = idct32_c;
p.count_nonzero = count_nonzero_c;
p.denoiseDct = denoiseDct_c;
- p.copy_cnt[BLOCK_4x4] = copy_count<4>;
- p.copy_cnt[BLOCK_8x8] = copy_count<8>;
- p.copy_cnt[BLOCK_16x16] = copy_count<16>;
- p.copy_cnt[BLOCK_32x32] = copy_count<32>;
+ p.cu[BLOCK_4x4].copy_cnt = copy_count<4>;
+ p.cu[BLOCK_8x8].copy_cnt = copy_count<8>;
+ p.cu[BLOCK_16x16].copy_cnt = copy_count<16>;
+ p.cu[BLOCK_32x32].copy_cnt = copy_count<32>;
}
}
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/ipfilter.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -373,37 +373,37 @@
// x265 private namespace
#define CHROMA_420(W, H) \
- p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
- p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
#define CHROMA_422(W, H) \
- p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
- p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
- p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
- p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
#define CHROMA_444(W, H) \
- p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
- p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
- p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
- p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
- p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_c<4, W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_c<4, W, H>;
#define LUMA(W, H) \
- p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \
- p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>; \
- p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
- p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
- p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
- p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
- p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_hpp = interp_horiz_pp_c<8, W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hps = interp_horiz_ps_c<8, W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vpp = interp_vert_pp_c<8, W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vps = interp_vert_ps_c<8, W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vsp = interp_vert_sp_c<8, W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vss = interp_vert_ss_c<8, W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_hv_pp_c<8, W, H>;
void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)
{
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/lowres.h
--- a/source/common/lowres.h Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/lowres.h Thu Jan 08 15:23:38 2015 -0600
@@ -69,7 +69,7 @@
int qmvy = qmv.y + (qmv.y & 1);
int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
- primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
+ primitives.pu[LUMA_8x8].pixelavg_pp(buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
return buf;
}
else
@@ -91,7 +91,7 @@
int qmvy = qmv.y + (qmv.y & 1);
int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
- primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
+ primitives.pu[LUMA_8x8].pixelavg_pp(subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
return comp(fenc, FENC_STRIDE, subpelbuf, 8);
}
else
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/pixel.cpp
--- a/source/common/pixel.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/pixel.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -33,58 +33,58 @@
using namespace x265;
#define SET_FUNC_PRIMITIVE_TABLE_C(FUNC_PREFIX, FUNC_PREFIX_DEF, DATA_TYPE1, DATA_TYPE2) \
- p.FUNC_PREFIX[LUMA_4x4] = FUNC_PREFIX_DEF<4, 4, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_8x8] = FUNC_PREFIX_DEF<8, 8, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_8x4] = FUNC_PREFIX_DEF<8, 4, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_4x8] = FUNC_PREFIX_DEF<4, 8, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_16x16] = FUNC_PREFIX_DEF<16, 16, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_16x8] = FUNC_PREFIX_DEF<16, 8, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_8x16] = FUNC_PREFIX_DEF<8, 16, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_16x12] = FUNC_PREFIX_DEF<16, 12, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_12x16] = FUNC_PREFIX_DEF<12, 16, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_16x4] = FUNC_PREFIX_DEF<16, 4, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_4x16] = FUNC_PREFIX_DEF<4, 16, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_32x32] = FUNC_PREFIX_DEF<32, 32, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_32x16] = FUNC_PREFIX_DEF<32, 16, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_16x32] = FUNC_PREFIX_DEF<16, 32, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_32x24] = FUNC_PREFIX_DEF<32, 24, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_24x32] = FUNC_PREFIX_DEF<24, 32, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_32x8] = FUNC_PREFIX_DEF<32, 8, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_8x32] = FUNC_PREFIX_DEF<8, 32, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_64x64] = FUNC_PREFIX_DEF<64, 64, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_64x32] = FUNC_PREFIX_DEF<64, 32, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_32x64] = FUNC_PREFIX_DEF<32, 64, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_64x48] = FUNC_PREFIX_DEF<64, 48, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_48x64] = FUNC_PREFIX_DEF<48, 64, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_64x16] = FUNC_PREFIX_DEF<64, 16, DATA_TYPE1, DATA_TYPE2>; \
- p.FUNC_PREFIX[LUMA_16x64] = FUNC_PREFIX_DEF<16, 64, DATA_TYPE1, DATA_TYPE2>;
+ p.pu[LUMA_4x4].FUNC_PREFIX = FUNC_PREFIX_DEF<4, 4, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_8x8].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 8, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_8x4].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 4, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_4x8].FUNC_PREFIX = FUNC_PREFIX_DEF<4, 8, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_16x16].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 16, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_16x8].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 8, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_8x16].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 16, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_16x12].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 12, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_12x16].FUNC_PREFIX = FUNC_PREFIX_DEF<12, 16, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_16x4].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 4, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_4x16].FUNC_PREFIX = FUNC_PREFIX_DEF<4, 16, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_32x32].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 32, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_32x16].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 16, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_16x32].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 32, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_32x24].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 24, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_24x32].FUNC_PREFIX = FUNC_PREFIX_DEF<24, 32, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_32x8].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 8, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_8x32].FUNC_PREFIX = FUNC_PREFIX_DEF<8, 32, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_64x64].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 64, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_64x32].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 32, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_32x64].FUNC_PREFIX = FUNC_PREFIX_DEF<32, 64, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_64x48].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 48, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_48x64].FUNC_PREFIX = FUNC_PREFIX_DEF<48, 64, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_64x16].FUNC_PREFIX = FUNC_PREFIX_DEF<64, 16, DATA_TYPE1, DATA_TYPE2>; \
+ p.pu[LUMA_16x64].FUNC_PREFIX = FUNC_PREFIX_DEF<16, 64, DATA_TYPE1, DATA_TYPE2>;
#define SET_FUNC_PRIMITIVE_TABLE_C2(FUNC_PREFIX) \
- p.FUNC_PREFIX[LUMA_4x4] = FUNC_PREFIX<4, 4>; \
- p.FUNC_PREFIX[LUMA_8x8] = FUNC_PREFIX<8, 8>; \
- p.FUNC_PREFIX[LUMA_8x4] = FUNC_PREFIX<8, 4>; \
- p.FUNC_PREFIX[LUMA_4x8] = FUNC_PREFIX<4, 8>; \
- p.FUNC_PREFIX[LUMA_16x16] = FUNC_PREFIX<16, 16>; \
- p.FUNC_PREFIX[LUMA_16x8] = FUNC_PREFIX<16, 8>; \
- p.FUNC_PREFIX[LUMA_8x16] = FUNC_PREFIX<8, 16>; \
- p.FUNC_PREFIX[LUMA_16x12] = FUNC_PREFIX<16, 12>; \
- p.FUNC_PREFIX[LUMA_12x16] = FUNC_PREFIX<12, 16>; \
- p.FUNC_PREFIX[LUMA_16x4] = FUNC_PREFIX<16, 4>; \
- p.FUNC_PREFIX[LUMA_4x16] = FUNC_PREFIX<4, 16>; \
- p.FUNC_PREFIX[LUMA_32x32] = FUNC_PREFIX<32, 32>; \
- p.FUNC_PREFIX[LUMA_32x16] = FUNC_PREFIX<32, 16>; \
- p.FUNC_PREFIX[LUMA_16x32] = FUNC_PREFIX<16, 32>; \
- p.FUNC_PREFIX[LUMA_32x24] = FUNC_PREFIX<32, 24>; \
- p.FUNC_PREFIX[LUMA_24x32] = FUNC_PREFIX<24, 32>; \
- p.FUNC_PREFIX[LUMA_32x8] = FUNC_PREFIX<32, 8>; \
- p.FUNC_PREFIX[LUMA_8x32] = FUNC_PREFIX<8, 32>; \
- p.FUNC_PREFIX[LUMA_64x64] = FUNC_PREFIX<64, 64>; \
- p.FUNC_PREFIX[LUMA_64x32] = FUNC_PREFIX<64, 32>; \
- p.FUNC_PREFIX[LUMA_32x64] = FUNC_PREFIX<32, 64>; \
- p.FUNC_PREFIX[LUMA_64x48] = FUNC_PREFIX<64, 48>; \
- p.FUNC_PREFIX[LUMA_48x64] = FUNC_PREFIX<48, 64>; \
- p.FUNC_PREFIX[LUMA_64x16] = FUNC_PREFIX<64, 16>; \
- p.FUNC_PREFIX[LUMA_16x64] = FUNC_PREFIX<16, 64>;
+ p.pu[LUMA_4x4].FUNC_PREFIX = FUNC_PREFIX<4, 4>; \
+ p.pu[LUMA_8x8].FUNC_PREFIX = FUNC_PREFIX<8, 8>; \
+ p.pu[LUMA_8x4].FUNC_PREFIX = FUNC_PREFIX<8, 4>; \
+ p.pu[LUMA_4x8].FUNC_PREFIX = FUNC_PREFIX<4, 8>; \
+ p.pu[LUMA_16x16].FUNC_PREFIX = FUNC_PREFIX<16, 16>; \
+ p.pu[LUMA_16x8].FUNC_PREFIX = FUNC_PREFIX<16, 8>; \
+ p.pu[LUMA_8x16].FUNC_PREFIX = FUNC_PREFIX<8, 16>; \
+ p.pu[LUMA_16x12].FUNC_PREFIX = FUNC_PREFIX<16, 12>; \
+ p.pu[LUMA_12x16].FUNC_PREFIX = FUNC_PREFIX<12, 16>; \
+ p.pu[LUMA_16x4].FUNC_PREFIX = FUNC_PREFIX<16, 4>; \
+ p.pu[LUMA_4x16].FUNC_PREFIX = FUNC_PREFIX<4, 16>; \
+ p.pu[LUMA_32x32].FUNC_PREFIX = FUNC_PREFIX<32, 32>; \
+ p.pu[LUMA_32x16].FUNC_PREFIX = FUNC_PREFIX<32, 16>; \
+ p.pu[LUMA_16x32].FUNC_PREFIX = FUNC_PREFIX<16, 32>; \
+ p.pu[LUMA_32x24].FUNC_PREFIX = FUNC_PREFIX<32, 24>; \
+ p.pu[LUMA_24x32].FUNC_PREFIX = FUNC_PREFIX<24, 32>; \
+ p.pu[LUMA_32x8].FUNC_PREFIX = FUNC_PREFIX<32, 8>; \
+ p.pu[LUMA_8x32].FUNC_PREFIX = FUNC_PREFIX<8, 32>; \
+ p.pu[LUMA_64x64].FUNC_PREFIX = FUNC_PREFIX<64, 64>; \
+ p.pu[LUMA_64x32].FUNC_PREFIX = FUNC_PREFIX<64, 32>; \
+ p.pu[LUMA_32x64].FUNC_PREFIX = FUNC_PREFIX<32, 64>; \
+ p.pu[LUMA_64x48].FUNC_PREFIX = FUNC_PREFIX<64, 48>; \
+ p.pu[LUMA_48x64].FUNC_PREFIX = FUNC_PREFIX<48, 64>; \
+ p.pu[LUMA_64x16].FUNC_PREFIX = FUNC_PREFIX<64, 16>; \
+ p.pu[LUMA_16x64].FUNC_PREFIX = FUNC_PREFIX<16, 64>;
namespace {
// place functions in anonymous namespace (file static)
@@ -1019,132 +1019,132 @@
SET_FUNC_PRIMITIVE_TABLE_C2(pixelavg_pp)
// satd
- p.satd[LUMA_4x4] = satd_4x4;
- p.satd[LUMA_8x8] = satd8<8, 8>;
- p.satd[LUMA_8x4] = satd_8x4;
- p.satd[LUMA_4x8] = satd4<4, 8>;
- p.satd[LUMA_16x16] = satd8<16, 16>;
- p.satd[LUMA_16x8] = satd8<16, 8>;
- p.satd[LUMA_8x16] = satd8<8, 16>;
- p.satd[LUMA_16x12] = satd8<16, 12>;
- p.satd[LUMA_12x16] = satd4<12, 16>;
- p.satd[LUMA_16x4] = satd8<16, 4>;
- p.satd[LUMA_4x16] = satd4<4, 16>;
- p.satd[LUMA_32x32] = satd8<32, 32>;
- p.satd[LUMA_32x16] = satd8<32, 16>;
- p.satd[LUMA_16x32] = satd8<16, 32>;
- p.satd[LUMA_32x24] = satd8<32, 24>;
- p.satd[LUMA_24x32] = satd8<24, 32>;
- p.satd[LUMA_32x8] = satd8<32, 8>;
- p.satd[LUMA_8x32] = satd8<8, 32>;
- p.satd[LUMA_64x64] = satd8<64, 64>;
- p.satd[LUMA_64x32] = satd8<64, 32>;
- p.satd[LUMA_32x64] = satd8<32, 64>;
- p.satd[LUMA_64x48] = satd8<64, 48>;
- p.satd[LUMA_48x64] = satd8<48, 64>;
- p.satd[LUMA_64x16] = satd8<64, 16>;
- p.satd[LUMA_16x64] = satd8<16, 64>;
+ p.pu[LUMA_4x4].satd = satd_4x4;
+ p.pu[LUMA_8x8].satd = satd8<8, 8>;
+ p.pu[LUMA_8x4].satd = satd_8x4;
+ p.pu[LUMA_4x8].satd = satd4<4, 8>;
+ p.pu[LUMA_16x16].satd = satd8<16, 16>;
+ p.pu[LUMA_16x8].satd = satd8<16, 8>;
+ p.pu[LUMA_8x16].satd = satd8<8, 16>;
+ p.pu[LUMA_16x12].satd = satd8<16, 12>;
+ p.pu[LUMA_12x16].satd = satd4<12, 16>;
+ p.pu[LUMA_16x4].satd = satd8<16, 4>;
+ p.pu[LUMA_4x16].satd = satd4<4, 16>;
+ p.pu[LUMA_32x32].satd = satd8<32, 32>;
+ p.pu[LUMA_32x16].satd = satd8<32, 16>;
+ p.pu[LUMA_16x32].satd = satd8<16, 32>;
+ p.pu[LUMA_32x24].satd = satd8<32, 24>;
+ p.pu[LUMA_24x32].satd = satd8<24, 32>;
+ p.pu[LUMA_32x8].satd = satd8<32, 8>;
+ p.pu[LUMA_8x32].satd = satd8<8, 32>;
+ p.pu[LUMA_64x64].satd = satd8<64, 64>;
+ p.pu[LUMA_64x32].satd = satd8<64, 32>;
+ p.pu[LUMA_32x64].satd = satd8<32, 64>;
+ p.pu[LUMA_64x48].satd = satd8<64, 48>;
+ p.pu[LUMA_48x64].satd = satd8<48, 64>;
+ p.pu[LUMA_64x16].satd = satd8<64, 16>;
+ p.pu[LUMA_16x64].satd = satd8<16, 64>;
- p.chroma[X265_CSP_I420].satd[CHROMA_2x2] = NULL;
- p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = satd_4x4;
- p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = satd8<8, 8>;
- p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = satd8<16, 16>;
- p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = satd8<32, 32>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_2x2].satd = NULL;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x4].satd = satd_4x4;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x8].satd = satd8<8, 8>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x16].satd = satd8<16, 16>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x32].satd = satd8<32, 32>;
- p.chroma[X265_CSP_I420].satd[CHROMA_4x2] = NULL;
- p.chroma[X265_CSP_I420].satd[CHROMA_2x4] = NULL;
- p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = satd_8x4;
- p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = satd4<4, 8>;
- p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = satd8<16, 8>;
- p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = satd8<8, 16>;
- p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = satd8<32, 16>;
- p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = satd8<16, 32>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x2].satd = NULL;
+ p.chroma[X265_CSP_I420].pu[CHROMA_2x4].satd = NULL;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x4].satd = satd_8x4;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x8].satd = satd4<4, 8>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x8].satd = satd8<16, 8>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x16].satd = satd8<8, 16>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x16].satd = satd8<32, 16>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x32].satd = satd8<16, 32>;
- p.chroma[X265_CSP_I420].satd[CHROMA_8x6] = NULL;
- p.chroma[X265_CSP_I420].satd[CHROMA_6x8] = NULL;
- p.chroma[X265_CSP_I420].satd[CHROMA_8x2] = NULL;
- p.chroma[X265_CSP_I420].satd[CHROMA_2x8] = NULL;
- p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = satd4<16, 12>;
- p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = satd4<12, 16>;
- p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = satd4<16, 4>;
- p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = satd4<4, 16>;
- p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = satd8<32, 24>;
- p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = satd8<24, 32>;
- p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = satd8<32, 8>;
- p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = satd8<8, 32>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x6].satd = NULL;
+ p.chroma[X265_CSP_I420].pu[CHROMA_6x8].satd = NULL;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x2].satd = NULL;
+ p.chroma[X265_CSP_I420].pu[CHROMA_2x8].satd = NULL;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x12].satd = satd4<16, 12>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_12x16].satd = satd4<12, 16>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x4].satd = satd4<16, 4>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x16].satd = satd4<4, 16>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x24].satd = satd8<32, 24>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_24x32].satd = satd8<24, 32>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x8].satd = satd8<32, 8>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x32].satd = satd8<8, 32>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_2x4] = NULL;
- p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = satd4<4, 8>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = satd8<8, 16>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = satd8<16, 32>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = satd8<32, 64>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_2x4].satd = NULL;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x8].satd = satd4<4, 8>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x16].satd = satd8<8, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].satd = satd8<16, 32>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x64].satd = satd8<32, 64>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = satd_4x4;
- p.chroma[X265_CSP_I422].satd[CHROMA422_2x8] = NULL;
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = satd8<8, 8>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = satd4<4, 16>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = satd8<16, 16>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = satd8<8, 32>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = satd8<32, 32>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = satd8<16, 64>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x4].satd = satd_4x4;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_2x8].satd = NULL;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x8].satd = satd8<8, 8>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x16].satd = satd4<4, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x16].satd = satd8<16, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x32].satd = satd8<8, 32>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x32].satd = satd8<32, 32>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].satd = satd8<16, 64>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_6x16] = NULL;
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = satd4<8, 4>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_2x16] = NULL;
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = satd8<16, 8>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = satd8<32, 16>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x12].satd = satd4<8, 12>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_6x16].satd = NULL;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x4].satd = satd4<8, 4>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_2x16].satd = NULL;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x24].satd = satd8<16, 24>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_12x32].satd = satd4<12, 32>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x8].satd = satd8<16, 8>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x32].satd = satd4<4, 32>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x48].satd = satd8<32, 48>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_24x64].satd = satd8<24, 64>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x16].satd = satd8<32, 16>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x64].satd = satd8<8, 64>;
#define CHROMA_420(W, H) \
- p.chroma[X265_CSP_I420].addAvg[CHROMA_ ## W ## x ## H] = addAvg<W, H>; \
- p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
- p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I420].copy_ss[CHROMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].addAvg = addAvg<W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>;
#define CHROMA_422(W, H) \
- p.chroma[X265_CSP_I422].addAvg[CHROMA422_ ## W ## x ## H] = addAvg<W, H>; \
- p.chroma[X265_CSP_I422].copy_pp[CHROMA422_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
- p.chroma[X265_CSP_I422].copy_sp[CHROMA422_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.chroma[X265_CSP_I422].copy_ps[CHROMA422_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].addAvg = addAvg<W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>;
#define CHROMA_444(W, H) \
- p.chroma[X265_CSP_I444].satd[LUMA_ ## W ## x ## H] = p.satd[LUMA_ ## W ## x ## H]; \
- p.chroma[X265_CSP_I444].addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
- p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
- p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I444].copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].satd = p.pu[LUMA_ ## W ## x ## H].satd; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].addAvg = addAvg<W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_c<W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_sp = blockcopy_sp_c<W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_ps = blockcopy_ps_c<W, H>; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].copy_ss = blockcopy_ss_c<W, H>;
#define LUMA(W, H) \
- p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
- p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
- p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.luma_copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_addAvg = addAvg<W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_copy_pp = blockcopy_pp_c<W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_copy_sp = blockcopy_sp_c<W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_copy_ps = blockcopy_ps_c<W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_copy_ss = blockcopy_ss_c<W, H>;
#define LUMA_PIXELSUB(W, H) \
- p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_sub_ps = pixel_sub_ps_c<W, H>; \
+ p.pu[LUMA_ ## W ## x ## H].luma_add_ps = pixel_add_ps_c<W, H>;
#define CHROMA_PIXELSUB_420(W, H) \
- p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+ p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].add_ps = pixel_add_ps_c<W, H>;
#define CHROMA_PIXELSUB_422(W, H) \
- p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+ p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].add_ps = pixel_add_ps_c<W, H>;
#define CHROMA_PIXELSUB_444(W, H) \
- p.chroma[X265_CSP_I444].sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.chroma[X265_CSP_I444].add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+ p.chroma[X265_CSP_I444].cu[LUMA_ ## W ## x ## H].sub_ps = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I444].cu[LUMA_ ## W ## x ## H].add_ps = pixel_add_ps_c<W, H>;
LUMA(4, 4);
LUMA(8, 8);
@@ -1269,89 +1269,89 @@
SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, int16_t, pixel)
SET_FUNC_PRIMITIVE_TABLE_C(sse_ss, sse, int16_t, int16_t)
- p.blockfill_s[BLOCK_4x4] = blockfil_s_c<4>;
- p.blockfill_s[BLOCK_8x8] = blockfil_s_c<8>;
- p.blockfill_s[BLOCK_16x16] = blockfil_s_c<16>;
- p.blockfill_s[BLOCK_32x32] = blockfil_s_c<32>;
- p.blockfill_s[BLOCK_64x64] = blockfil_s_c<64>;
+ p.cu[BLOCK_4x4].blockfill_s = blockfil_s_c<4>;
+ p.cu[BLOCK_8x8].blockfill_s = blockfil_s_c<8>;
+ p.cu[BLOCK_16x16].blockfill_s = blockfil_s_c<16>;
+ p.cu[BLOCK_32x32].blockfill_s = blockfil_s_c<32>;
+ p.cu[BLOCK_64x64].blockfill_s = blockfil_s_c<64>;
- p.cpy2Dto1D_shl[BLOCK_4x4] = cpy2Dto1D_shl<4>;
- p.cpy2Dto1D_shl[BLOCK_8x8] = cpy2Dto1D_shl<8>;
- p.cpy2Dto1D_shl[BLOCK_16x16] = cpy2Dto1D_shl<16>;
- p.cpy2Dto1D_shl[BLOCK_32x32] = cpy2Dto1D_shl<32>;
- p.cpy2Dto1D_shr[BLOCK_4x4] = cpy2Dto1D_shr<4>;
- p.cpy2Dto1D_shr[BLOCK_8x8] = cpy2Dto1D_shr<8>;
- p.cpy2Dto1D_shr[BLOCK_16x16] = cpy2Dto1D_shr<16>;
- p.cpy2Dto1D_shr[BLOCK_32x32] = cpy2Dto1D_shr<32>;
- p.cpy1Dto2D_shl[BLOCK_4x4] = cpy1Dto2D_shl<4>;
- p.cpy1Dto2D_shl[BLOCK_8x8] = cpy1Dto2D_shl<8>;
- p.cpy1Dto2D_shl[BLOCK_16x16] = cpy1Dto2D_shl<16>;
- p.cpy1Dto2D_shl[BLOCK_32x32] = cpy1Dto2D_shl<32>;
- p.cpy1Dto2D_shr[BLOCK_4x4] = cpy1Dto2D_shr<4>;
- p.cpy1Dto2D_shr[BLOCK_8x8] = cpy1Dto2D_shr<8>;
- p.cpy1Dto2D_shr[BLOCK_16x16] = cpy1Dto2D_shr<16>;
- p.cpy1Dto2D_shr[BLOCK_32x32] = cpy1Dto2D_shr<32>;
+ p.cu[BLOCK_4x4].cpy2Dto1D_shl = cpy2Dto1D_shl<4>;
+ p.cu[BLOCK_8x8].cpy2Dto1D_shl = cpy2Dto1D_shl<8>;
+ p.cu[BLOCK_16x16].cpy2Dto1D_shl = cpy2Dto1D_shl<16>;
+ p.cu[BLOCK_32x32].cpy2Dto1D_shl = cpy2Dto1D_shl<32>;
+ p.cu[BLOCK_4x4].cpy2Dto1D_shr = cpy2Dto1D_shr<4>;
+ p.cu[BLOCK_8x8].cpy2Dto1D_shr = cpy2Dto1D_shr<8>;
+ p.cu[BLOCK_16x16].cpy2Dto1D_shr = cpy2Dto1D_shr<16>;
+ p.cu[BLOCK_32x32].cpy2Dto1D_shr = cpy2Dto1D_shr<32>;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shl = cpy1Dto2D_shl<4>;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shl = cpy1Dto2D_shl<8>;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shl = cpy1Dto2D_shl<16>;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shl = cpy1Dto2D_shl<32>;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shr = cpy1Dto2D_shr<4>;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shr = cpy1Dto2D_shr<8>;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shr = cpy1Dto2D_shr<16>;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shr = cpy1Dto2D_shr<32>;
- p.sa8d[BLOCK_4x4] = satd_4x4;
- p.sa8d[BLOCK_8x8] = sa8d_8x8;
- p.sa8d[BLOCK_16x16] = sa8d_16x16;
- p.sa8d[BLOCK_32x32] = sa8d16<32, 32>;
- p.sa8d[BLOCK_64x64] = sa8d16<64, 64>;
+ p.cu[BLOCK_4x4].sa8d = satd_4x4;
+ p.cu[BLOCK_8x8].sa8d = sa8d_8x8;
+ p.cu[BLOCK_16x16].sa8d = sa8d_16x16;
+ p.cu[BLOCK_32x32].sa8d = sa8d16<32, 32>;
+ p.cu[BLOCK_64x64].sa8d = sa8d16<64, 64>;
- p.psy_cost_pp[BLOCK_4x4] = psyCost_pp<BLOCK_4x4>;
- p.psy_cost_pp[BLOCK_8x8] = psyCost_pp<BLOCK_8x8>;
- p.psy_cost_pp[BLOCK_16x16] = psyCost_pp<BLOCK_16x16>;
- p.psy_cost_pp[BLOCK_32x32] = psyCost_pp<BLOCK_32x32>;
- p.psy_cost_pp[BLOCK_64x64] = psyCost_pp<BLOCK_64x64>;
+ p.cu[BLOCK_4x4].psy_cost_pp = psyCost_pp<BLOCK_4x4>;
+ p.cu[BLOCK_8x8].psy_cost_pp = psyCost_pp<BLOCK_8x8>;
+ p.cu[BLOCK_16x16].psy_cost_pp = psyCost_pp<BLOCK_16x16>;
+ p.cu[BLOCK_32x32].psy_cost_pp = psyCost_pp<BLOCK_32x32>;
+ p.cu[BLOCK_64x64].psy_cost_pp = psyCost_pp<BLOCK_64x64>;
- p.psy_cost_ss[BLOCK_4x4] = psyCost_ss<BLOCK_4x4>;
- p.psy_cost_ss[BLOCK_8x8] = psyCost_ss<BLOCK_8x8>;
- p.psy_cost_ss[BLOCK_16x16] = psyCost_ss<BLOCK_16x16>;
- p.psy_cost_ss[BLOCK_32x32] = psyCost_ss<BLOCK_32x32>;
- p.psy_cost_ss[BLOCK_64x64] = psyCost_ss<BLOCK_64x64>;
+ p.cu[BLOCK_4x4].psy_cost_ss = psyCost_ss<BLOCK_4x4>;
+ p.cu[BLOCK_8x8].psy_cost_ss = psyCost_ss<BLOCK_8x8>;
+ p.cu[BLOCK_16x16].psy_cost_ss = psyCost_ss<BLOCK_16x16>;
+ p.cu[BLOCK_32x32].psy_cost_ss = psyCost_ss<BLOCK_32x32>;
+ p.cu[BLOCK_64x64].psy_cost_ss = psyCost_ss<BLOCK_64x64>;
- p.sa8d_inter[LUMA_4x4] = satd_4x4;
- p.sa8d_inter[LUMA_8x8] = sa8d_8x8;
- p.sa8d_inter[LUMA_8x4] = satd_8x4;
- p.sa8d_inter[LUMA_4x8] = satd4<4, 8>;
- p.sa8d_inter[LUMA_16x16] = sa8d_16x16;
- p.sa8d_inter[LUMA_16x8] = sa8d8<16, 8>;
- p.sa8d_inter[LUMA_8x16] = sa8d8<8, 16>;
- p.sa8d_inter[LUMA_16x12] = satd8<16, 12>;
- p.sa8d_inter[LUMA_12x16] = satd4<12, 16>;
- p.sa8d_inter[LUMA_4x16] = satd4<4, 16>;
- p.sa8d_inter[LUMA_16x4] = satd8<16, 4>;
- p.sa8d_inter[LUMA_32x32] = sa8d16<32, 32>;
- p.sa8d_inter[LUMA_32x16] = sa8d16<32, 16>;
- p.sa8d_inter[LUMA_16x32] = sa8d16<16, 32>;
- p.sa8d_inter[LUMA_32x24] = sa8d8<32, 24>;
- p.sa8d_inter[LUMA_24x32] = sa8d8<24, 32>;
- p.sa8d_inter[LUMA_32x8] = sa8d8<32, 8>;
- p.sa8d_inter[LUMA_8x32] = sa8d8<8, 32>;
- p.sa8d_inter[LUMA_64x64] = sa8d16<64, 64>;
- p.sa8d_inter[LUMA_64x32] = sa8d16<64, 32>;
- p.sa8d_inter[LUMA_32x64] = sa8d16<32, 64>;
- p.sa8d_inter[LUMA_64x48] = sa8d16<64, 48>;
- p.sa8d_inter[LUMA_48x64] = sa8d16<48, 64>;
- p.sa8d_inter[LUMA_64x16] = sa8d16<64, 16>;
- p.sa8d_inter[LUMA_16x64] = sa8d16<16, 64>;
+ p.pu[LUMA_4x4].sa8d_inter = satd_4x4;
+ p.pu[LUMA_8x8].sa8d_inter = sa8d_8x8;
+ p.pu[LUMA_8x4].sa8d_inter = satd_8x4;
+ p.pu[LUMA_4x8].sa8d_inter = satd4<4, 8>;
+ p.pu[LUMA_16x16].sa8d_inter = sa8d_16x16;
+ p.pu[LUMA_16x8].sa8d_inter = sa8d8<16, 8>;
+ p.pu[LUMA_8x16].sa8d_inter = sa8d8<8, 16>;
+ p.pu[LUMA_16x12].sa8d_inter = satd8<16, 12>;
+ p.pu[LUMA_12x16].sa8d_inter = satd4<12, 16>;
+ p.pu[LUMA_4x16].sa8d_inter = satd4<4, 16>;
+ p.pu[LUMA_16x4].sa8d_inter = satd8<16, 4>;
+ p.pu[LUMA_32x32].sa8d_inter = sa8d16<32, 32>;
+ p.pu[LUMA_32x16].sa8d_inter = sa8d16<32, 16>;
+ p.pu[LUMA_16x32].sa8d_inter = sa8d16<16, 32>;
+ p.pu[LUMA_32x24].sa8d_inter = sa8d8<32, 24>;
+ p.pu[LUMA_24x32].sa8d_inter = sa8d8<24, 32>;
+ p.pu[LUMA_32x8].sa8d_inter = sa8d8<32, 8>;
+ p.pu[LUMA_8x32].sa8d_inter = sa8d8<8, 32>;
+ p.pu[LUMA_64x64].sa8d_inter = sa8d16<64, 64>;
+ p.pu[LUMA_64x32].sa8d_inter = sa8d16<64, 32>;
+ p.pu[LUMA_32x64].sa8d_inter = sa8d16<32, 64>;
+ p.pu[LUMA_64x48].sa8d_inter = sa8d16<64, 48>;
+ p.pu[LUMA_48x64].sa8d_inter = sa8d16<48, 64>;
+ p.pu[LUMA_64x16].sa8d_inter = sa8d16<64, 16>;
+ p.pu[LUMA_16x64].sa8d_inter = sa8d16<16, 64>;
- p.calcresidual[BLOCK_4x4] = getResidual<4>;
- p.calcresidual[BLOCK_8x8] = getResidual<8>;
- p.calcresidual[BLOCK_16x16] = getResidual<16>;
- p.calcresidual[BLOCK_32x32] = getResidual<32>;
- p.calcresidual[BLOCK_64x64] = NULL;
+ p.cu[BLOCK_4x4].calcresidual = getResidual<4>;
+ p.cu[BLOCK_8x8].calcresidual = getResidual<8>;
+ p.cu[BLOCK_16x16].calcresidual = getResidual<16>;
+ p.cu[BLOCK_32x32].calcresidual = getResidual<32>;
+ p.cu[BLOCK_64x64].calcresidual = NULL;
- p.transpose[BLOCK_4x4] = transpose<4>;
- p.transpose[BLOCK_8x8] = transpose<8>;
- p.transpose[BLOCK_16x16] = transpose<16>;
- p.transpose[BLOCK_32x32] = transpose<32>;
- p.transpose[BLOCK_64x64] = transpose<64>;
+ p.cu[BLOCK_4x4].transpose = transpose<4>;
+ p.cu[BLOCK_8x8].transpose = transpose<8>;
+ p.cu[BLOCK_16x16].transpose = transpose<16>;
+ p.cu[BLOCK_32x32].transpose = transpose<32>;
+ p.cu[BLOCK_64x64].transpose = transpose<64>;
- p.ssd_s[BLOCK_4x4] = pixel_ssd_s_c<4>;
- p.ssd_s[BLOCK_8x8] = pixel_ssd_s_c<8>;
- p.ssd_s[BLOCK_16x16] = pixel_ssd_s_c<16>;
- p.ssd_s[BLOCK_32x32] = pixel_ssd_s_c<32>;
+ p.cu[BLOCK_4x4].ssd_s = pixel_ssd_s_c<4>;
+ p.cu[BLOCK_8x8].ssd_s = pixel_ssd_s_c<8>;
+ p.cu[BLOCK_16x16].ssd_s = pixel_ssd_s_c<16>;
+ p.cu[BLOCK_32x32].ssd_s = pixel_ssd_s_c<32>;
p.weight_pp = weight_pp_c;
p.weight_sp = weight_sp_c;
@@ -1362,10 +1362,10 @@
p.ssim_4x4x2_core = ssim_4x4x2_core;
p.ssim_end_4 = ssim_end_4;
- p.var[BLOCK_8x8] = pixel_var<8>;
- p.var[BLOCK_16x16] = pixel_var<16>;
- p.var[BLOCK_32x32] = pixel_var<32>;
- p.var[BLOCK_64x64] = pixel_var<64>;
+ p.cu[BLOCK_8x8].var = pixel_var<8>;
+ p.cu[BLOCK_16x16].var = pixel_var<16>;
+ p.cu[BLOCK_32x32].var = pixel_var<32>;
+ p.cu[BLOCK_64x64].var = pixel_var<64>;
p.planecopy_cp = planecopy_cp_c;
p.planecopy_sp = planecopy_sp_c;
p.propagateCost = estimateCUPropagateCost;
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/predict.cpp
--- a/source/common/predict.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/predict.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -334,13 +334,13 @@
int yFrac = mv.y & 0x3;
if (!(yFrac | xFrac))
- primitives.luma_copy_pp[partEnum](dst, dstStride, src, srcStride);
+ primitives.pu[partEnum].luma_copy_pp(dst, dstStride, src, srcStride);
else if (!yFrac)
- primitives.luma_hpp[partEnum](src, srcStride, dst, dstStride, xFrac);
+ primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
else if (!xFrac)
- primitives.luma_vpp[partEnum](src, srcStride, dst, dstStride, yFrac);
+ primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
else
- primitives.luma_hvpp[partEnum](src, srcStride, dst, dstStride, xFrac, yFrac);
+ primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
}
void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
@@ -363,16 +363,16 @@
if (!(yFrac | xFrac))
primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight);
else if (!yFrac)
- primitives.luma_hps[partEnum](src, srcStride, dst, dstStride, xFrac, 0);
+ primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0);
else if (!xFrac)
- primitives.luma_vps[partEnum](src, srcStride, dst, dstStride, yFrac);
+ primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac);
else
{
int tmpStride = m_puWidth;
int filterSize = NTAPS_LUMA;
int halfFilterSize = (filterSize >> 1);
- primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1);
- primitives.luma_vss[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
+ primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals, tmpStride, xFrac, 1);
+ primitives.pu[partEnum].luma_vss(m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
}
}
@@ -399,18 +399,18 @@
if (!(yFrac | xFrac))
{
- primitives.chroma[m_csp].copy_pp[partEnum](dstCb, dstStride, refCb, refStride);
- primitives.chroma[m_csp].copy_pp[partEnum](dstCr, dstStride, refCr, refStride);
+ primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride);
+ primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCr, dstStride, refCr, refStride);
}
else if (!yFrac)
{
- primitives.chroma[m_csp].filter_hpp[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
- primitives.chroma[m_csp].filter_hpp[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
}
else if (!xFrac)
{
- primitives.chroma[m_csp].filter_vpp[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].filter_vpp[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
}
else
{
@@ -418,11 +418,11 @@
int filterSize = NTAPS_CHROMA;
int halfFilterSize = (filterSize >> 1);
- primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
}
}
@@ -459,23 +459,23 @@
}
else if (!yFrac)
{
- primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
- primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
}
else if (!xFrac)
{
- primitives.chroma[m_csp].filter_vps[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].filter_vps[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
}
else
{
int extStride = cxWidth;
int filterSize = NTAPS_CHROMA;
int halfFilterSize = (filterSize >> 1);
- primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
}
}
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/primitives.cpp
--- a/source/common/primitives.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/primitives.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -71,79 +71,79 @@
/* copy reusable luma primitives to chroma 4:4:4 */
for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
{
- p.chroma[X265_CSP_I444].copy_pp[i] = p.luma_copy_pp[i];
- p.chroma[X265_CSP_I444].copy_ps[i] = p.luma_copy_ps[i];
- p.chroma[X265_CSP_I444].copy_sp[i] = p.luma_copy_sp[i];
- p.chroma[X265_CSP_I444].copy_ss[i] = p.luma_copy_ss[i];
- p.chroma[X265_CSP_I444].addAvg[i] = p.luma_addAvg[i];
- p.chroma[X265_CSP_I444].satd[i] = p.satd[i];
+ p.chroma[X265_CSP_I444].pu[i].copy_pp = p.pu[i].luma_copy_pp;
+ p.chroma[X265_CSP_I444].pu[i].copy_ps = p.pu[i].luma_copy_ps;
+ p.chroma[X265_CSP_I444].pu[i].copy_sp = p.pu[i].luma_copy_sp;
+ p.chroma[X265_CSP_I444].pu[i].copy_ss = p.pu[i].luma_copy_ss;
+ p.chroma[X265_CSP_I444].pu[i].addAvg = p.pu[i].luma_addAvg;
+ p.chroma[X265_CSP_I444].pu[i].satd = p.pu[i].satd;
}
for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
{
- p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i];
- p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];
+ p.chroma[X265_CSP_I444].cu[i].add_ps = p.pu[i].luma_add_ps;
+ p.chroma[X265_CSP_I444].cu[i].sub_ps = p.pu[i].luma_sub_ps;
}
- primitives.sa8d[BLOCK_4x4] = primitives.satd[LUMA_4x4];
- primitives.sa8d[BLOCK_8x8] = primitives.sa8d_inter[LUMA_8x8];
- primitives.sa8d[BLOCK_16x16] = primitives.sa8d_inter[LUMA_16x16];
- primitives.sa8d[BLOCK_32x32] = primitives.sa8d_inter[LUMA_32x32];
- primitives.sa8d[BLOCK_64x64] = primitives.sa8d_inter[LUMA_64x64];
+ primitives.cu[BLOCK_4x4].sa8d = primitives.pu[LUMA_4x4].satd;
+ primitives.cu[BLOCK_8x8].sa8d = primitives.pu[LUMA_8x8].sa8d_inter;
+ primitives.cu[BLOCK_16x16].sa8d = primitives.pu[LUMA_16x16].sa8d_inter;
+ primitives.cu[BLOCK_32x32].sa8d = primitives.pu[LUMA_32x32].sa8d_inter;
+ primitives.cu[BLOCK_64x64].sa8d = primitives.pu[LUMA_64x64].sa8d_inter;
// SA8D devolves to SATD for blocks not even multiples of 8x8
- primitives.sa8d_inter[LUMA_4x4] = primitives.satd[LUMA_4x4];
- primitives.sa8d_inter[LUMA_4x8] = primitives.satd[LUMA_4x8];
- primitives.sa8d_inter[LUMA_4x16] = primitives.satd[LUMA_4x16];
- primitives.sa8d_inter[LUMA_8x4] = primitives.satd[LUMA_8x4];
- primitives.sa8d_inter[LUMA_16x4] = primitives.satd[LUMA_16x4];
- primitives.sa8d_inter[LUMA_16x12] = primitives.satd[LUMA_16x12];
- primitives.sa8d_inter[LUMA_12x16] = primitives.satd[LUMA_12x16];
+ primitives.pu[LUMA_4x4].sa8d_inter = primitives.pu[LUMA_4x4].satd;
+ primitives.pu[LUMA_4x8].sa8d_inter = primitives.pu[LUMA_4x8].satd;
+ primitives.pu[LUMA_4x16].sa8d_inter = primitives.pu[LUMA_4x16].satd;
+ primitives.pu[LUMA_8x4].sa8d_inter = primitives.pu[LUMA_8x4].satd;
+ primitives.pu[LUMA_16x4].sa8d_inter = primitives.pu[LUMA_16x4].satd;
+ primitives.pu[LUMA_16x12].sa8d_inter = primitives.pu[LUMA_16x12].satd;
+ primitives.pu[LUMA_12x16].sa8d_inter = primitives.pu[LUMA_12x16].satd;
// Chroma SATD can often reuse luma primitives
- p.chroma[X265_CSP_I420].satd[CHROMA_4x4] = primitives.satd[LUMA_4x4];
- p.chroma[X265_CSP_I420].satd[CHROMA_8x8] = primitives.satd[LUMA_8x8];
- p.chroma[X265_CSP_I420].satd[CHROMA_16x16] = primitives.satd[LUMA_16x16];
- p.chroma[X265_CSP_I420].satd[CHROMA_32x32] = primitives.satd[LUMA_32x32];
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x4].satd = primitives.pu[LUMA_4x4].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x8].satd = primitives.pu[LUMA_8x8].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x16].satd = primitives.pu[LUMA_16x16].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x32].satd = primitives.pu[LUMA_32x32].satd;
- p.chroma[X265_CSP_I420].satd[CHROMA_8x4] = primitives.satd[LUMA_8x4];
- p.chroma[X265_CSP_I420].satd[CHROMA_4x8] = primitives.satd[LUMA_4x8];
- p.chroma[X265_CSP_I420].satd[CHROMA_16x8] = primitives.satd[LUMA_16x8];
- p.chroma[X265_CSP_I420].satd[CHROMA_8x16] = primitives.satd[LUMA_8x16];
- p.chroma[X265_CSP_I420].satd[CHROMA_32x16] = primitives.satd[LUMA_32x16];
- p.chroma[X265_CSP_I420].satd[CHROMA_16x32] = primitives.satd[LUMA_16x32];
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x4].satd = primitives.pu[LUMA_8x4].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x8].satd = primitives.pu[LUMA_4x8].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x8].satd = primitives.pu[LUMA_16x8].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x16].satd = primitives.pu[LUMA_8x16].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x16].satd = primitives.pu[LUMA_32x16].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x32].satd = primitives.pu[LUMA_16x32].satd;
- p.chroma[X265_CSP_I420].satd[CHROMA_16x12] = primitives.satd[LUMA_16x12];
- p.chroma[X265_CSP_I420].satd[CHROMA_12x16] = primitives.satd[LUMA_12x16];
- p.chroma[X265_CSP_I420].satd[CHROMA_16x4] = primitives.satd[LUMA_16x4];
- p.chroma[X265_CSP_I420].satd[CHROMA_4x16] = primitives.satd[LUMA_4x16];
- p.chroma[X265_CSP_I420].satd[CHROMA_32x24] = primitives.satd[LUMA_32x24];
- p.chroma[X265_CSP_I420].satd[CHROMA_24x32] = primitives.satd[LUMA_24x32];
- p.chroma[X265_CSP_I420].satd[CHROMA_32x8] = primitives.satd[LUMA_32x8];
- p.chroma[X265_CSP_I420].satd[CHROMA_8x32] = primitives.satd[LUMA_8x32];
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x12].satd = primitives.pu[LUMA_16x12].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_12x16].satd = primitives.pu[LUMA_12x16].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x4].satd = primitives.pu[LUMA_16x4].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x16].satd = primitives.pu[LUMA_4x16].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x24].satd = primitives.pu[LUMA_32x24].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_24x32].satd = primitives.pu[LUMA_24x32].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x8].satd = primitives.pu[LUMA_32x8].satd;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x32].satd = primitives.pu[LUMA_8x32].satd;
- p.chroma[X265_CSP_I422].satd[CHROMA422_4x8] = primitives.satd[LUMA_4x8];
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x16] = primitives.satd[LUMA_8x16];
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x32] = primitives.satd[LUMA_16x32];
- p.chroma[X265_CSP_I422].satd[CHROMA422_32x64] = primitives.satd[LUMA_32x64];
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x8].satd = primitives.pu[LUMA_4x8].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x16].satd = primitives.pu[LUMA_8x16].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].satd = primitives.pu[LUMA_16x32].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x64].satd = primitives.pu[LUMA_32x64].satd;
- p.chroma[X265_CSP_I422].satd[CHROMA422_4x4] = primitives.satd[LUMA_4x4];
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x8] = primitives.satd[LUMA_8x8];
- p.chroma[X265_CSP_I422].satd[CHROMA422_4x16] = primitives.satd[LUMA_4x16];
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x16] = primitives.satd[LUMA_16x16];
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x32] = primitives.satd[LUMA_8x32];
- p.chroma[X265_CSP_I422].satd[CHROMA422_32x32] = primitives.satd[LUMA_32x32];
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x64] = primitives.satd[LUMA_16x64];
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x4].satd = primitives.pu[LUMA_4x4].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x8].satd = primitives.pu[LUMA_8x8].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x16].satd = primitives.pu[LUMA_4x16].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x16].satd = primitives.pu[LUMA_16x16].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x32].satd = primitives.pu[LUMA_8x32].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x32].satd = primitives.pu[LUMA_32x32].satd;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].satd = primitives.pu[LUMA_16x64].satd;
//p.chroma[X265_CSP_I422].satd[CHROMA422_8x12] = satd4<8, 12>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_8x4] = primitives.satd[LUMA_8x4];
+ p.chroma[X265_CSP_I422].pu[CHROMA422_8x4].satd = primitives.pu[LUMA_8x4].satd;
//p.chroma[X265_CSP_I422].satd[CHROMA422_16x24] = satd8<16, 24>;
//p.chroma[X265_CSP_I422].satd[CHROMA422_12x32] = satd4<12, 32>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_16x8] = primitives.satd[LUMA_16x8];
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x8].satd = primitives.pu[LUMA_16x8].satd;
//p.chroma[X265_CSP_I422].satd[CHROMA422_4x32] = satd4<4, 32>;
//p.chroma[X265_CSP_I422].satd[CHROMA422_32x48] = satd8<32, 48>;
//p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
- p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16];
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x16].satd = primitives.pu[LUMA_32x16].satd;
//p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
}
}
@@ -158,7 +158,7 @@
cpuid = x265::cpu_detect();
// initialize global variables
- if (!primitives.sad[0])
+ if (!primitives.pu[0].sad)
{
Setup_C_Primitives(primitives);
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/primitives.h
--- a/source/common/primitives.h Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/primitives.h Thu Jan 08 15:23:38 2015 -0600
@@ -42,7 +42,7 @@
LUMA_4x4, LUMA_8x8, LUMA_16x16, LUMA_32x32, LUMA_64x64,
// Rectangular
LUMA_8x4, LUMA_4x8,
- LUMA_16x8, LUMA_8x16,
+ LUMA_16x8, LUMA_8x16,
LUMA_32x16, LUMA_16x32,
LUMA_64x32, LUMA_32x64,
// Asymmetrical (0.75, 0.25)
@@ -206,42 +206,76 @@
* a vectorized primitive, or a C function. */
struct EncoderPrimitives
{
- pixelcmp_t sad[NUM_LUMA_PARTITIONS]; // Sum of Differences for each size
- pixelcmp_x3_t sad_x3[NUM_LUMA_PARTITIONS]; // Sum of Differences 3x for each size
- pixelcmp_x4_t sad_x4[NUM_LUMA_PARTITIONS]; // Sum of Differences 4x for each size
- pixelcmp_t sse_pp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
- pixelcmp_ss_t sse_ss[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, short) fenc alignment not assumed
- pixelcmp_sp_t sse_sp[NUM_LUMA_PARTITIONS]; // Sum of Square Error (short, pixel) fenc alignment not assumed
- pixel_ssd_s_t ssd_s[NUM_SQUARE_BLOCKS - 1]; // Sum of Square Error (short) fenc alignment not assumed
- pixelcmp_t satd[NUM_LUMA_PARTITIONS]; // Sum of Transformed differences (HADAMARD)
- pixelcmp_t sa8d_inter[NUM_LUMA_PARTITIONS]; // sa8d primitives for motion search partitions
- pixelcmp_t sa8d[NUM_SQUARE_BLOCKS]; // sa8d primitives for square intra blocks
- pixelcmp_t psy_cost_pp[NUM_SQUARE_BLOCKS]; // difference in AC energy between two blocks
- pixelcmp_ss_t psy_cost_ss[NUM_SQUARE_BLOCKS];
+ struct PU
+ {
+ pixelcmp_t sad; // Sum of Differences for each size
+ pixelcmp_x3_t sad_x3; // Sum of Differences 3x for each size
+ pixelcmp_x4_t sad_x4; // Sum of Differences 4x for each size
+ pixelcmp_t sse_pp; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
+ pixelcmp_ss_t sse_ss; // Sum of Square Error (short, short) fenc alignment not assumed
+ pixelcmp_sp_t sse_sp; // Sum of Square Error (short, pixel) fenc alignment not assumed
+ pixelcmp_t satd; // Sum of Transformed differences (HADAMARD)
+ pixelcmp_t sa8d_inter; // sa8d primitives for motion search partitions
- dct_t dct[NUM_DCTS];
- idct_t idct[NUM_IDCTS];
+ pixelavg_pp_t pixelavg_pp;
+ addAvg_t luma_addAvg;
+
+ filter_pp_t luma_hpp;
+ filter_hps_t luma_hps;
+ filter_pp_t luma_vpp;
+ filter_ps_t luma_vps;
+ filter_sp_t luma_vsp;
+ filter_ss_t luma_vss;
+ filter_hv_pp_t luma_hvpp;
+
+ copy_pp_t luma_copy_pp;
+ copy_sp_t luma_copy_sp;
+ copy_ps_t luma_copy_ps;
+ copy_ss_t luma_copy_ss;
+
+ pixel_sub_ps_t luma_sub_ps;
+ pixel_add_ps_t luma_add_ps;
+
+ } pu[NUM_LUMA_PARTITIONS];
+
+ struct CU
+ {
+ dct_t dct;
+ idct_t idct;
+ calcresidual_t calcresidual;
+ blockfill_s_t blockfill_s; // block fill with value
+ cpy2Dto1D_shl_t cpy2Dto1D_shl;
+ cpy2Dto1D_shr_t cpy2Dto1D_shr;
+ cpy1Dto2D_shl_t cpy1Dto2D_shl;
+ cpy1Dto2D_shr_t cpy1Dto2D_shr;
+ copy_cnt_t copy_cnt;
+
+ transpose_t transpose;
+
+ var_t var;
+
+ pixelcmp_t sa8d; // sa8d primitives for square intra blocks
+ pixel_ssd_s_t ssd_s; // Sum of Square Error (short) fenc alignment not assumed
+ pixelcmp_t psy_cost_pp; // difference in AC energy between two blocks
+ pixelcmp_ss_t psy_cost_ss;
+
+ } cu[NUM_SQUARE_BLOCKS];
+
+ dct_t dst4x4;
+ idct_t idst4x4;
+
quant_t quant;
nquant_t nquant;
dequant_scaling_t dequant_scaling;
dequant_normal_t dequant_normal;
count_nonzero_t count_nonzero;
denoiseDct_t denoiseDct;
- calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];
- blockfill_s_t blockfill_s[NUM_SQUARE_BLOCKS]; // block fill with value
- cpy2Dto1D_shl_t cpy2Dto1D_shl[NUM_SQUARE_BLOCKS - 1];
- cpy2Dto1D_shr_t cpy2Dto1D_shr[NUM_SQUARE_BLOCKS - 1];
- cpy1Dto2D_shl_t cpy1Dto2D_shl[NUM_SQUARE_BLOCKS - 1];
- cpy1Dto2D_shr_t cpy1Dto2D_shr[NUM_SQUARE_BLOCKS - 1];
- copy_cnt_t copy_cnt[NUM_SQUARE_BLOCKS - 1];
intra_pred_t intra_pred[NUM_INTRA_MODE][NUM_TR_SIZE];
intra_allangs_t intra_pred_allangs[NUM_TR_SIZE];
- transpose_t transpose[NUM_SQUARE_BLOCKS];
scale_t scale1D_128to64;
scale_t scale2D_64to32;
- var_t var[NUM_SQUARE_BLOCKS];
ssim_4x4x2_core_t ssim_4x4x2_core;
ssim_end4_t ssim_end_4;
@@ -261,42 +295,36 @@
weightp_sp_t weight_sp;
weightp_pp_t weight_pp;
- pixelavg_pp_t pixelavg_pp[NUM_LUMA_PARTITIONS];
- addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS];
- filter_pp_t luma_hpp[NUM_LUMA_PARTITIONS];
- filter_hps_t luma_hps[NUM_LUMA_PARTITIONS];
- filter_pp_t luma_vpp[NUM_LUMA_PARTITIONS];
- filter_ps_t luma_vps[NUM_LUMA_PARTITIONS];
- filter_sp_t luma_vsp[NUM_LUMA_PARTITIONS];
- filter_ss_t luma_vss[NUM_LUMA_PARTITIONS];
- filter_hv_pp_t luma_hvpp[NUM_LUMA_PARTITIONS];
filter_p2s_t luma_p2s;
- copy_pp_t luma_copy_pp[NUM_LUMA_PARTITIONS];
- copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS];
- copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS];
- copy_ss_t luma_copy_ss[NUM_LUMA_PARTITIONS];
- pixel_sub_ps_t luma_sub_ps[NUM_SQUARE_BLOCKS];
- pixel_add_ps_t luma_add_ps[NUM_SQUARE_BLOCKS];
+ struct Chroma
+ {
+ struct PUChroma
+ {
+ // ME and MC
+ pixelcmp_t satd;
+ filter_pp_t filter_vpp;
+ filter_ps_t filter_vps;
+ filter_sp_t filter_vsp;
+ filter_ss_t filter_vss;
+ filter_pp_t filter_hpp;
+ filter_hps_t filter_hps;
+ addAvg_t addAvg;
+ copy_pp_t copy_pp;
+ copy_sp_t copy_sp;
+ copy_ps_t copy_ps;
+ copy_ss_t copy_ss;
+ } pu[NUM_LUMA_PARTITIONS];
- struct
- {
- pixelcmp_t satd[NUM_LUMA_PARTITIONS];
- filter_pp_t filter_vpp[NUM_LUMA_PARTITIONS];
- filter_ps_t filter_vps[NUM_LUMA_PARTITIONS];
- filter_sp_t filter_vsp[NUM_LUMA_PARTITIONS];
- filter_ss_t filter_vss[NUM_LUMA_PARTITIONS];
- filter_pp_t filter_hpp[NUM_LUMA_PARTITIONS];
- filter_hps_t filter_hps[NUM_LUMA_PARTITIONS];
- addAvg_t addAvg[NUM_LUMA_PARTITIONS];
- copy_pp_t copy_pp[NUM_LUMA_PARTITIONS];
- copy_sp_t copy_sp[NUM_LUMA_PARTITIONS];
- copy_ps_t copy_ps[NUM_LUMA_PARTITIONS];
- copy_ss_t copy_ss[NUM_LUMA_PARTITIONS];
- pixel_sub_ps_t sub_ps[NUM_SQUARE_BLOCKS];
- pixel_add_ps_t add_ps[NUM_SQUARE_BLOCKS];
- filter_p2s_t p2s;
+ struct CUChroma
+ {
+ pixelcmp_t sa8d;
+ pixel_sub_ps_t sub_ps;
+ pixel_add_ps_t add_ps;
+ } cu[NUM_SQUARE_BLOCKS];
+
+ filter_p2s_t p2s;
} chroma[X265_CSP_COUNT];
};
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/quant.cpp
--- a/source/common/quant.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/quant.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -329,7 +329,7 @@
if (cu.m_tqBypass[absPartIdx])
{
X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
- return primitives.copy_cnt[sizeIdx](coeff, residual, resiStride);
+ return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
}
bool isLuma = ttype == TEXT_LUMA;
@@ -341,21 +341,21 @@
{
#if X265_DEPTH <= 10
X265_CHECK(transformShift >= 0, "invalid transformShift\n");
- primitives.cpy2Dto1D_shl[sizeIdx](m_resiDctCoeff, residual, resiStride, transformShift);
+ primitives.cu[sizeIdx].cpy2Dto1D_shl(m_resiDctCoeff, residual, resiStride, transformShift);
#else
if (transformShift >= 0)
- primitives.cpy2Dto1D_shl[sizeIdx](m_resiDctCoeff, residual, resiStride, transformShift);
+ primitives.cu[sizeIdx].cpy2Dto1D_shl(m_resiDctCoeff, residual, resiStride, transformShift);
else
- primitives.cpy2Dto1D_shr[sizeIdx](m_resiDctCoeff, residual, resiStride, -transformShift);
+ primitives.cu[sizeIdx].cpy2Dto1D_shr(m_resiDctCoeff, residual, resiStride, -transformShift);
#endif
}
else
{
bool isIntra = cu.isIntra(absPartIdx);
int useDST = !sizeIdx && isLuma && isIntra;
- int index = DCT_4x4 + sizeIdx - useDST;
+ int index = BLOCK_4x4 + sizeIdx - useDST;
- primitives.dct[index](residual, m_resiDctCoeff, resiStride);
+ primitives.cu[index].dct(residual, m_resiDctCoeff, resiStride);
/* NOTE: if RDOQ is disabled globally, psy-rdoq is also disabled, so
* there is no risk of performing this DCT unnecessarily */
@@ -363,8 +363,8 @@
{
int trSize = 1 << log2TrSize;
/* perform DCT on source pixels for psy-rdoq */
- primitives.luma_copy_ps[sizeIdx](m_fencShortBuf, trSize, fenc, fencStride);
- primitives.dct[index](m_fencShortBuf, m_fencDctCoeff, trSize);
+ primitives.pu[sizeIdx].luma_copy_ps(m_fencShortBuf, trSize, fenc, fencStride);
+ primitives.cu[index].dct(m_fencShortBuf, m_fencDctCoeff, trSize);
}
if (m_nr)
@@ -411,7 +411,7 @@
const uint32_t sizeIdx = log2TrSize - 2;
if (transQuantBypass)
{
- primitives.cpy1Dto2D_shl[sizeIdx](residual, coeff, resiStride, 0);
+ primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
return;
}
@@ -438,12 +438,12 @@
{
#if X265_DEPTH <= 10
X265_CHECK(transformShift > 0, "invalid transformShift\n");
- primitives.cpy1Dto2D_shr[sizeIdx](residual, m_resiDctCoeff, resiStride, transformShift);
+ primitives.cu[sizeIdx].cpy1Dto2D_shr(residual, m_resiDctCoeff, resiStride, transformShift);
#else
if (transformShift > 0)
- primitives.cpy1Dto2D_shr[sizeIdx](residual, m_resiDctCoeff, resiStride, transformShift);
+ primitives.cu[sizeIdx].cpy1Dto2D_shr(residual, m_resiDctCoeff, resiStride, transformShift);
else
- primitives.cpy1Dto2D_shl[sizeIdx](residual, m_resiDctCoeff, resiStride, -transformShift);
+ primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, m_resiDctCoeff, resiStride, -transformShift);
#endif
}
else
@@ -461,11 +461,11 @@
const int add_2nd = 1 << (shift_2nd - 1);
int dc_val = (((m_resiDctCoeff[0] * (64 >> 6) + add_1st) >> shift_1st) * (64 >> 3) + add_2nd) >> shift_2nd;
- primitives.blockfill_s[sizeIdx](residual, resiStride, (int16_t)dc_val);
+ primitives.cu[sizeIdx].blockfill_s(residual, resiStride, (int16_t)dc_val);
return;
}
- primitives.idct[IDCT_4x4 + sizeIdx - useDST](m_resiDctCoeff, residual, resiStride);
+ primitives.cu[BLOCK_4x4 + sizeIdx - useDST].idct(m_resiDctCoeff, residual, resiStride);
}
}
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/shortyuv.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -74,9 +74,9 @@
void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size)
{
const int sizeIdx = log2Size - 2;
- primitives.luma_sub_ps[sizeIdx](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
- primitives.chroma[m_csp].sub_ps[sizeIdx](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
- primitives.chroma[m_csp].sub_ps[sizeIdx](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
+ primitives.pu[sizeIdx].luma_sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
+ primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
+ primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
}
void ShortYuv::copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
@@ -84,7 +84,7 @@
const int16_t* src = getLumaAddr(absPartIdx);
int16_t* dst = dstYuv.getLumaAddr(absPartIdx);
- primitives.luma_copy_ss[log2Size - 2](dst, dstYuv.m_size, src, m_size);
+ primitives.pu[log2Size - 2].luma_copy_ss(dst, dstYuv.m_size, src, m_size);
}
void ShortYuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
@@ -92,7 +92,7 @@
const int16_t* src = getLumaAddr(absPartIdx);
pixel* dst = dstYuv.getLumaAddr(absPartIdx);
- primitives.luma_copy_sp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
+ primitives.pu[log2Size - 2].luma_copy_sp(dst, dstYuv.m_size, src, m_size);
}
void ShortYuv::copyPartToPartChroma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
@@ -103,8 +103,8 @@
int16_t* dstU = dstYuv.getCbAddr(absPartIdx);
int16_t* dstV = dstYuv.getCrAddr(absPartIdx);
- primitives.chroma[m_csp].copy_ss[part](dstU, dstYuv.m_csize, srcU, m_csize);
- primitives.chroma[m_csp].copy_ss[part](dstV, dstYuv.m_csize, srcV, m_csize);
+ primitives.chroma[m_csp].pu[part].copy_ss(dstU, dstYuv.m_csize, srcU, m_csize);
+ primitives.chroma[m_csp].pu[part].copy_ss(dstV, dstYuv.m_csize, srcV, m_csize);
}
void ShortYuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
@@ -115,6 +115,6 @@
pixel* dstU = dstYuv.getCbAddr(absPartIdx);
pixel* dstV = dstYuv.getCrAddr(absPartIdx);
- primitives.chroma[m_csp].copy_sp[part](dstU, dstYuv.m_csize, srcU, m_csize);
- primitives.chroma[m_csp].copy_sp[part](dstV, dstYuv.m_csize, srcV, m_csize);
+ primitives.chroma[m_csp].pu[part].copy_sp(dstU, dstYuv.m_csize, srcU, m_csize);
+ primitives.chroma[m_csp].pu[part].copy_sp(dstV, dstYuv.m_csize, srcV, m_csize);
}
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/vec/dct-sse3.cpp
--- a/source/common/vec/dct-sse3.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/vec/dct-sse3.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -1402,9 +1402,9 @@
* still somewhat rare on end-user PCs we still compile and link these SSE3
* intrinsic SIMD functions */
#if !HIGH_BIT_DEPTH
- p.idct[IDCT_8x8] = idct8;
- p.idct[IDCT_16x16] = idct16;
- p.idct[IDCT_32x32] = idct32;
+ p.cu[BLOCK_8x8].idct = idct8;
+ p.cu[BLOCK_16x16].idct = idct16;
+ p.cu[BLOCK_32x32].idct = idct32;
#endif
}
}
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/vec/dct-ssse3.cpp
--- a/source/common/vec/dct-ssse3.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/vec/dct-ssse3.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -1111,8 +1111,8 @@
* still somewhat rare on end-user PCs we still compile and link these SSSE3
* intrinsic SIMD functions */
#if !HIGH_BIT_DEPTH
- p.dct[DCT_16x16] = dct16;
- p.dct[DCT_32x32] = dct32;
+ p.cu[BLOCK_16x16].dct = dct16;
+ p.cu[BLOCK_32x32].dct = dct32;
#endif
}
}
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -46,29 +46,29 @@
const int filterSize = NTAPS_LUMA;
const int halfFilterSize = filterSize >> 1;
- x265::primitives.luma_hps[size](src, srcStride, immed, MAX_CU_SIZE, idxX, 1);
- x265::primitives.luma_vsp[size](immed + (halfFilterSize - 1) * MAX_CU_SIZE, MAX_CU_SIZE, dst, dstStride, idxY);
+ x265::primitives.pu[size].luma_hps(src, srcStride, immed, MAX_CU_SIZE, idxX, 1);
+ x265::primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * MAX_CU_SIZE, MAX_CU_SIZE, dst, dstStride, idxY);
}
#define INIT2_NAME(name1, name2, cpu) \
- p.name1[LUMA_16x16] = x265_pixel_ ## name2 ## _16x16 ## cpu; \
- p.name1[LUMA_16x8] = x265_pixel_ ## name2 ## _16x8 ## cpu;
+ p.pu[LUMA_16x16].name1 = x265_pixel_ ## name2 ## _16x16 ## cpu; \
+ p.pu[LUMA_16x8].name1 = x265_pixel_ ## name2 ## _16x8 ## cpu;
#define INIT4_NAME(name1, name2, cpu) \
INIT2_NAME(name1, name2, cpu) \
- p.name1[LUMA_8x16] = x265_pixel_ ## name2 ## _8x16 ## cpu; \
- p.name1[LUMA_8x8] = x265_pixel_ ## name2 ## _8x8 ## cpu;
+ p.pu[LUMA_8x16].name1 = x265_pixel_ ## name2 ## _8x16 ## cpu; \
+ p.pu[LUMA_8x8].name1 = x265_pixel_ ## name2 ## _8x8 ## cpu;
#define INIT5_NAME(name1, name2, cpu) \
INIT4_NAME(name1, name2, cpu) \
- p.name1[LUMA_8x4] = x265_pixel_ ## name2 ## _8x4 ## cpu;
+ p.pu[LUMA_8x4].name1 = x265_pixel_ ## name2 ## _8x4 ## cpu;
#define INIT6_NAME(name1, name2, cpu) \
INIT5_NAME(name1, name2, cpu) \
- p.name1[LUMA_4x8] = x265_pixel_ ## name2 ## _4x8 ## cpu;
+ p.pu[LUMA_4x8].name1 = x265_pixel_ ## name2 ## _4x8 ## cpu;
#define INIT7_NAME(name1, name2, cpu) \
INIT6_NAME(name1, name2, cpu) \
- p.name1[LUMA_4x4] = x265_pixel_ ## name2 ## _4x4 ## cpu;
+ p.pu[LUMA_4x4].name1 = x265_pixel_ ## name2 ## _4x4 ## cpu;
#define INIT8_NAME(name1, name2, cpu) \
INIT7_NAME(name1, name2, cpu) \
- p.name1[LUMA_4x16] = x265_pixel_ ## name2 ## _4x16 ## cpu;
+ p.pu[LUMA_4x16].name1 = x265_pixel_ ## name2 ## _4x16 ## cpu;
#define INIT2(name, cpu) INIT2_NAME(name, name, cpu)
#define INIT4(name, cpu) INIT4_NAME(name, name, cpu)
#define INIT5(name, cpu) INIT5_NAME(name, name, cpu)
@@ -77,220 +77,220 @@
#define INIT8(name, cpu) INIT8_NAME(name, name, cpu)
#define HEVC_SATD(cpu) \
- p.satd[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \
- p.satd[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \
- p.satd[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \
- p.satd[LUMA_8x8] = x265_pixel_satd_8x8_ ## cpu; \
- p.satd[LUMA_8x16] = x265_pixel_satd_8x16_ ## cpu; \
- p.satd[LUMA_8x32] = x265_pixel_satd_8x32_ ## cpu; \
- p.satd[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \
- p.satd[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \
- p.satd[LUMA_16x8] = x265_pixel_satd_16x8_ ## cpu; \
- p.satd[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \
- p.satd[LUMA_16x16] = x265_pixel_satd_16x16_ ## cpu; \
- p.satd[LUMA_16x32] = x265_pixel_satd_16x32_ ## cpu; \
- p.satd[LUMA_16x64] = x265_pixel_satd_16x64_ ## cpu; \
- p.satd[LUMA_24x32] = x265_pixel_satd_24x32_ ## cpu; \
- p.satd[LUMA_32x8] = x265_pixel_satd_32x8_ ## cpu; \
- p.satd[LUMA_32x16] = x265_pixel_satd_32x16_ ## cpu; \
- p.satd[LUMA_32x24] = x265_pixel_satd_32x24_ ## cpu; \
- p.satd[LUMA_32x32] = x265_pixel_satd_32x32_ ## cpu; \
- p.satd[LUMA_32x64] = x265_pixel_satd_32x64_ ## cpu; \
- p.satd[LUMA_48x64] = x265_pixel_satd_48x64_ ## cpu; \
- p.satd[LUMA_64x16] = x265_pixel_satd_64x16_ ## cpu; \
- p.satd[LUMA_64x32] = x265_pixel_satd_64x32_ ## cpu; \
- p.satd[LUMA_64x48] = x265_pixel_satd_64x48_ ## cpu; \
- p.satd[LUMA_64x64] = x265_pixel_satd_64x64_ ## cpu;
+ p.pu[LUMA_4x8].satd = x265_pixel_satd_4x8_ ## cpu; \
+ p.pu[LUMA_4x16].satd = x265_pixel_satd_4x16_ ## cpu; \
+ p.pu[LUMA_8x4].satd = x265_pixel_satd_8x4_ ## cpu; \
+ p.pu[LUMA_8x8].satd = x265_pixel_satd_8x8_ ## cpu; \
+ p.pu[LUMA_8x16].satd = x265_pixel_satd_8x16_ ## cpu; \
+ p.pu[LUMA_8x32].satd = x265_pixel_satd_8x32_ ## cpu; \
+ p.pu[LUMA_12x16].satd = x265_pixel_satd_12x16_ ## cpu; \
+ p.pu[LUMA_16x4].satd = x265_pixel_satd_16x4_ ## cpu; \
+ p.pu[LUMA_16x8].satd = x265_pixel_satd_16x8_ ## cpu; \
+ p.pu[LUMA_16x12].satd = x265_pixel_satd_16x12_ ## cpu; \
+ p.pu[LUMA_16x16].satd = x265_pixel_satd_16x16_ ## cpu; \
+ p.pu[LUMA_16x32].satd = x265_pixel_satd_16x32_ ## cpu; \
+ p.pu[LUMA_16x64].satd = x265_pixel_satd_16x64_ ## cpu; \
+ p.pu[LUMA_24x32].satd = x265_pixel_satd_24x32_ ## cpu; \
+ p.pu[LUMA_32x8].satd = x265_pixel_satd_32x8_ ## cpu; \
+ p.pu[LUMA_32x16].satd = x265_pixel_satd_32x16_ ## cpu; \
+ p.pu[LUMA_32x24].satd = x265_pixel_satd_32x24_ ## cpu; \
+ p.pu[LUMA_32x32].satd = x265_pixel_satd_32x32_ ## cpu; \
+ p.pu[LUMA_32x64].satd = x265_pixel_satd_32x64_ ## cpu; \
+ p.pu[LUMA_48x64].satd = x265_pixel_satd_48x64_ ## cpu; \
+ p.pu[LUMA_64x16].satd = x265_pixel_satd_64x16_ ## cpu; \
+ p.pu[LUMA_64x32].satd = x265_pixel_satd_64x32_ ## cpu; \
+ p.pu[LUMA_64x48].satd = x265_pixel_satd_64x48_ ## cpu; \
+ p.pu[LUMA_64x64].satd = x265_pixel_satd_64x64_ ## cpu;
#define SAD_X3(cpu) \
- p.sad_x3[LUMA_16x8] = x265_pixel_sad_x3_16x8_ ## cpu; \
- p.sad_x3[LUMA_16x12] = x265_pixel_sad_x3_16x12_ ## cpu; \
- p.sad_x3[LUMA_16x16] = x265_pixel_sad_x3_16x16_ ## cpu; \
- p.sad_x3[LUMA_16x32] = x265_pixel_sad_x3_16x32_ ## cpu; \
- p.sad_x3[LUMA_16x64] = x265_pixel_sad_x3_16x64_ ## cpu; \
- p.sad_x3[LUMA_32x8] = x265_pixel_sad_x3_32x8_ ## cpu; \
- p.sad_x3[LUMA_32x16] = x265_pixel_sad_x3_32x16_ ## cpu; \
- p.sad_x3[LUMA_32x24] = x265_pixel_sad_x3_32x24_ ## cpu; \
- p.sad_x3[LUMA_32x32] = x265_pixel_sad_x3_32x32_ ## cpu; \
- p.sad_x3[LUMA_32x64] = x265_pixel_sad_x3_32x64_ ## cpu; \
- p.sad_x3[LUMA_24x32] = x265_pixel_sad_x3_24x32_ ## cpu; \
- p.sad_x3[LUMA_48x64] = x265_pixel_sad_x3_48x64_ ## cpu; \
- p.sad_x3[LUMA_64x16] = x265_pixel_sad_x3_64x16_ ## cpu; \
- p.sad_x3[LUMA_64x32] = x265_pixel_sad_x3_64x32_ ## cpu; \
- p.sad_x3[LUMA_64x48] = x265_pixel_sad_x3_64x48_ ## cpu; \
- p.sad_x3[LUMA_64x64] = x265_pixel_sad_x3_64x64_ ## cpu
+ p.pu[LUMA_16x8].sad_x3 = x265_pixel_sad_x3_16x8_ ## cpu; \
+ p.pu[LUMA_16x12].sad_x3 = x265_pixel_sad_x3_16x12_ ## cpu; \
+ p.pu[LUMA_16x16].sad_x3 = x265_pixel_sad_x3_16x16_ ## cpu; \
+ p.pu[LUMA_16x32].sad_x3 = x265_pixel_sad_x3_16x32_ ## cpu; \
+ p.pu[LUMA_16x64].sad_x3 = x265_pixel_sad_x3_16x64_ ## cpu; \
+ p.pu[LUMA_32x8].sad_x3 = x265_pixel_sad_x3_32x8_ ## cpu; \
+ p.pu[LUMA_32x16].sad_x3 = x265_pixel_sad_x3_32x16_ ## cpu; \
+ p.pu[LUMA_32x24].sad_x3 = x265_pixel_sad_x3_32x24_ ## cpu; \
+ p.pu[LUMA_32x32].sad_x3 = x265_pixel_sad_x3_32x32_ ## cpu; \
+ p.pu[LUMA_32x64].sad_x3 = x265_pixel_sad_x3_32x64_ ## cpu; \
+ p.pu[LUMA_24x32].sad_x3 = x265_pixel_sad_x3_24x32_ ## cpu; \
+ p.pu[LUMA_48x64].sad_x3 = x265_pixel_sad_x3_48x64_ ## cpu; \
+ p.pu[LUMA_64x16].sad_x3 = x265_pixel_sad_x3_64x16_ ## cpu; \
+ p.pu[LUMA_64x32].sad_x3 = x265_pixel_sad_x3_64x32_ ## cpu; \
+ p.pu[LUMA_64x48].sad_x3 = x265_pixel_sad_x3_64x48_ ## cpu; \
+ p.pu[LUMA_64x64].sad_x3 = x265_pixel_sad_x3_64x64_ ## cpu
#define SAD_X4(cpu) \
- p.sad_x4[LUMA_16x8] = x265_pixel_sad_x4_16x8_ ## cpu; \
- p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_ ## cpu; \
- p.sad_x4[LUMA_16x16] = x265_pixel_sad_x4_16x16_ ## cpu; \
- p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_ ## cpu; \
- p.sad_x4[LUMA_16x64] = x265_pixel_sad_x4_16x64_ ## cpu; \
- p.sad_x4[LUMA_32x8] = x265_pixel_sad_x4_32x8_ ## cpu; \
- p.sad_x4[LUMA_32x16] = x265_pixel_sad_x4_32x16_ ## cpu; \
- p.sad_x4[LUMA_32x24] = x265_pixel_sad_x4_32x24_ ## cpu; \
- p.sad_x4[LUMA_32x32] = x265_pixel_sad_x4_32x32_ ## cpu; \
- p.sad_x4[LUMA_32x64] = x265_pixel_sad_x4_32x64_ ## cpu; \
- p.sad_x4[LUMA_24x32] = x265_pixel_sad_x4_24x32_ ## cpu; \
- p.sad_x4[LUMA_48x64] = x265_pixel_sad_x4_48x64_ ## cpu; \
- p.sad_x4[LUMA_64x16] = x265_pixel_sad_x4_64x16_ ## cpu; \
- p.sad_x4[LUMA_64x32] = x265_pixel_sad_x4_64x32_ ## cpu; \
- p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ ## cpu; \
- p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ ## cpu
+ p.pu[LUMA_16x8].sad_x4 = x265_pixel_sad_x4_16x8_ ## cpu; \
+ p.pu[LUMA_16x12].sad_x4 = x265_pixel_sad_x4_16x12_ ## cpu; \
+ p.pu[LUMA_16x16].sad_x4 = x265_pixel_sad_x4_16x16_ ## cpu; \
+ p.pu[LUMA_16x32].sad_x4 = x265_pixel_sad_x4_16x32_ ## cpu; \
+ p.pu[LUMA_16x64].sad_x4 = x265_pixel_sad_x4_16x64_ ## cpu; \
+ p.pu[LUMA_32x8].sad_x4 = x265_pixel_sad_x4_32x8_ ## cpu; \
+ p.pu[LUMA_32x16].sad_x4 = x265_pixel_sad_x4_32x16_ ## cpu; \
+ p.pu[LUMA_32x24].sad_x4 = x265_pixel_sad_x4_32x24_ ## cpu; \
+ p.pu[LUMA_32x32].sad_x4 = x265_pixel_sad_x4_32x32_ ## cpu; \
+ p.pu[LUMA_32x64].sad_x4 = x265_pixel_sad_x4_32x64_ ## cpu; \
+ p.pu[LUMA_24x32].sad_x4 = x265_pixel_sad_x4_24x32_ ## cpu; \
+ p.pu[LUMA_48x64].sad_x4 = x265_pixel_sad_x4_48x64_ ## cpu; \
+ p.pu[LUMA_64x16].sad_x4 = x265_pixel_sad_x4_64x16_ ## cpu; \
+ p.pu[LUMA_64x32].sad_x4 = x265_pixel_sad_x4_64x32_ ## cpu; \
+ p.pu[LUMA_64x48].sad_x4 = x265_pixel_sad_x4_64x48_ ## cpu; \
+ p.pu[LUMA_64x64].sad_x4 = x265_pixel_sad_x4_64x64_ ## cpu
#define SAD(cpu) \
- p.sad[LUMA_8x32] = x265_pixel_sad_8x32_ ## cpu; \
- p.sad[LUMA_16x4] = x265_pixel_sad_16x4_ ## cpu; \
- p.sad[LUMA_16x12] = x265_pixel_sad_16x12_ ## cpu; \
- p.sad[LUMA_16x32] = x265_pixel_sad_16x32_ ## cpu; \
- p.sad[LUMA_16x64] = x265_pixel_sad_16x64_ ## cpu; \
- p.sad[LUMA_32x8] = x265_pixel_sad_32x8_ ## cpu; \
- p.sad[LUMA_32x16] = x265_pixel_sad_32x16_ ## cpu; \
- p.sad[LUMA_32x24] = x265_pixel_sad_32x24_ ## cpu; \
- p.sad[LUMA_32x32] = x265_pixel_sad_32x32_ ## cpu; \
- p.sad[LUMA_32x64] = x265_pixel_sad_32x64_ ## cpu; \
- p.sad[LUMA_64x16] = x265_pixel_sad_64x16_ ## cpu; \
- p.sad[LUMA_64x32] = x265_pixel_sad_64x32_ ## cpu; \
- p.sad[LUMA_64x48] = x265_pixel_sad_64x48_ ## cpu; \
- p.sad[LUMA_64x64] = x265_pixel_sad_64x64_ ## cpu; \
- p.sad[LUMA_48x64] = x265_pixel_sad_48x64_ ## cpu; \
- p.sad[LUMA_24x32] = x265_pixel_sad_24x32_ ## cpu; \
- p.sad[LUMA_12x16] = x265_pixel_sad_12x16_ ## cpu
+ p.pu[LUMA_8x32].sad = x265_pixel_sad_8x32_ ## cpu; \
+ p.pu[LUMA_16x4].sad = x265_pixel_sad_16x4_ ## cpu; \
+ p.pu[LUMA_16x12].sad = x265_pixel_sad_16x12_ ## cpu; \
+ p.pu[LUMA_16x32].sad = x265_pixel_sad_16x32_ ## cpu; \
+ p.pu[LUMA_16x64].sad = x265_pixel_sad_16x64_ ## cpu; \
+ p.pu[LUMA_32x8].sad = x265_pixel_sad_32x8_ ## cpu; \
+ p.pu[LUMA_32x16].sad = x265_pixel_sad_32x16_ ## cpu; \
+ p.pu[LUMA_32x24].sad = x265_pixel_sad_32x24_ ## cpu; \
+ p.pu[LUMA_32x32].sad = x265_pixel_sad_32x32_ ## cpu; \
+ p.pu[LUMA_32x64].sad = x265_pixel_sad_32x64_ ## cpu; \
+ p.pu[LUMA_64x16].sad = x265_pixel_sad_64x16_ ## cpu; \
+ p.pu[LUMA_64x32].sad = x265_pixel_sad_64x32_ ## cpu; \
+ p.pu[LUMA_64x48].sad = x265_pixel_sad_64x48_ ## cpu; \
+ p.pu[LUMA_64x64].sad = x265_pixel_sad_64x64_ ## cpu; \
+ p.pu[LUMA_48x64].sad = x265_pixel_sad_48x64_ ## cpu; \
+ p.pu[LUMA_24x32].sad = x265_pixel_sad_24x32_ ## cpu; \
+ p.pu[LUMA_12x16].sad = x265_pixel_sad_12x16_ ## cpu
#define ASSGN_SSE(cpu) \
- p.sse_pp[LUMA_8x8] = x265_pixel_ssd_8x8_ ## cpu; \
- p.sse_pp[LUMA_8x4] = x265_pixel_ssd_8x4_ ## cpu; \
- p.sse_pp[LUMA_16x16] = x265_pixel_ssd_16x16_ ## cpu; \
- p.sse_pp[LUMA_16x4] = x265_pixel_ssd_16x4_ ## cpu; \
- p.sse_pp[LUMA_16x8] = x265_pixel_ssd_16x8_ ## cpu; \
- p.sse_pp[LUMA_8x16] = x265_pixel_ssd_8x16_ ## cpu; \
- p.sse_pp[LUMA_16x12] = x265_pixel_ssd_16x12_ ## cpu; \
- p.sse_pp[LUMA_32x32] = x265_pixel_ssd_32x32_ ## cpu; \
- p.sse_pp[LUMA_32x16] = x265_pixel_ssd_32x16_ ## cpu; \
- p.sse_pp[LUMA_16x32] = x265_pixel_ssd_16x32_ ## cpu; \
- p.sse_pp[LUMA_8x32] = x265_pixel_ssd_8x32_ ## cpu; \
- p.sse_pp[LUMA_32x8] = x265_pixel_ssd_32x8_ ## cpu; \
- p.sse_pp[LUMA_32x24] = x265_pixel_ssd_32x24_ ## cpu; \
- p.sse_pp[LUMA_32x64] = x265_pixel_ssd_32x64_ ## cpu; \
- p.sse_pp[LUMA_16x64] = x265_pixel_ssd_16x64_ ## cpu
+ p.pu[LUMA_8x8].sse_pp = x265_pixel_ssd_8x8_ ## cpu; \
+ p.pu[LUMA_8x4].sse_pp = x265_pixel_ssd_8x4_ ## cpu; \
+ p.pu[LUMA_16x16].sse_pp = x265_pixel_ssd_16x16_ ## cpu; \
+ p.pu[LUMA_16x4].sse_pp = x265_pixel_ssd_16x4_ ## cpu; \
+ p.pu[LUMA_16x8].sse_pp = x265_pixel_ssd_16x8_ ## cpu; \
+ p.pu[LUMA_8x16].sse_pp = x265_pixel_ssd_8x16_ ## cpu; \
+ p.pu[LUMA_16x12].sse_pp = x265_pixel_ssd_16x12_ ## cpu; \
+ p.pu[LUMA_32x32].sse_pp = x265_pixel_ssd_32x32_ ## cpu; \
+ p.pu[LUMA_32x16].sse_pp = x265_pixel_ssd_32x16_ ## cpu; \
+ p.pu[LUMA_16x32].sse_pp = x265_pixel_ssd_16x32_ ## cpu; \
+ p.pu[LUMA_8x32].sse_pp = x265_pixel_ssd_8x32_ ## cpu; \
+ p.pu[LUMA_32x8].sse_pp = x265_pixel_ssd_32x8_ ## cpu; \
+ p.pu[LUMA_32x24].sse_pp = x265_pixel_ssd_32x24_ ## cpu; \
+ p.pu[LUMA_32x64].sse_pp = x265_pixel_ssd_32x64_ ## cpu; \
+ p.pu[LUMA_16x64].sse_pp = x265_pixel_ssd_16x64_ ## cpu
#define ASSGN_SSE_SS(cpu) \
- p.sse_ss[LUMA_4x4] = x265_pixel_ssd_ss_4x4_ ## cpu; \
- p.sse_ss[LUMA_4x8] = x265_pixel_ssd_ss_4x8_ ## cpu; \
- p.sse_ss[LUMA_4x16] = x265_pixel_ssd_ss_4x16_ ## cpu; \
- p.sse_ss[LUMA_8x4] = x265_pixel_ssd_ss_8x4_ ## cpu; \
- p.sse_ss[LUMA_8x8] = x265_pixel_ssd_ss_8x8_ ## cpu; \
- p.sse_ss[LUMA_8x16] = x265_pixel_ssd_ss_8x16_ ## cpu; \
- p.sse_ss[LUMA_8x32] = x265_pixel_ssd_ss_8x32_ ## cpu; \
- p.sse_ss[LUMA_12x16] = x265_pixel_ssd_ss_12x16_ ## cpu; \
- p.sse_ss[LUMA_16x4] = x265_pixel_ssd_ss_16x4_ ## cpu; \
- p.sse_ss[LUMA_16x8] = x265_pixel_ssd_ss_16x8_ ## cpu; \
- p.sse_ss[LUMA_16x12] = x265_pixel_ssd_ss_16x12_ ## cpu; \
- p.sse_ss[LUMA_16x16] = x265_pixel_ssd_ss_16x16_ ## cpu; \
- p.sse_ss[LUMA_16x32] = x265_pixel_ssd_ss_16x32_ ## cpu; \
- p.sse_ss[LUMA_16x64] = x265_pixel_ssd_ss_16x64_ ## cpu; \
- p.sse_ss[LUMA_24x32] = x265_pixel_ssd_ss_24x32_ ## cpu; \
- p.sse_ss[LUMA_32x8] = x265_pixel_ssd_ss_32x8_ ## cpu; \
- p.sse_ss[LUMA_32x16] = x265_pixel_ssd_ss_32x16_ ## cpu; \
- p.sse_ss[LUMA_32x24] = x265_pixel_ssd_ss_32x24_ ## cpu; \
- p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_ ## cpu; \
- p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_ ## cpu; \
- p.sse_ss[LUMA_48x64] = x265_pixel_ssd_ss_48x64_ ## cpu; \
- p.sse_ss[LUMA_64x16] = x265_pixel_ssd_ss_64x16_ ## cpu; \
- p.sse_ss[LUMA_64x32] = x265_pixel_ssd_ss_64x32_ ## cpu; \
- p.sse_ss[LUMA_64x48] = x265_pixel_ssd_ss_64x48_ ## cpu; \
- p.sse_ss[LUMA_64x64] = x265_pixel_ssd_ss_64x64_ ## cpu;
+ p.pu[LUMA_4x4].sse_ss = x265_pixel_ssd_ss_4x4_ ## cpu; \
+ p.pu[LUMA_4x8].sse_ss = x265_pixel_ssd_ss_4x8_ ## cpu; \
+ p.pu[LUMA_4x16].sse_ss = x265_pixel_ssd_ss_4x16_ ## cpu; \
+ p.pu[LUMA_8x4].sse_ss = x265_pixel_ssd_ss_8x4_ ## cpu; \
+ p.pu[LUMA_8x8].sse_ss = x265_pixel_ssd_ss_8x8_ ## cpu; \
+ p.pu[LUMA_8x16].sse_ss = x265_pixel_ssd_ss_8x16_ ## cpu; \
+ p.pu[LUMA_8x32].sse_ss = x265_pixel_ssd_ss_8x32_ ## cpu; \
+ p.pu[LUMA_12x16].sse_ss = x265_pixel_ssd_ss_12x16_ ## cpu; \
+ p.pu[LUMA_16x4].sse_ss = x265_pixel_ssd_ss_16x4_ ## cpu; \
+ p.pu[LUMA_16x8].sse_ss = x265_pixel_ssd_ss_16x8_ ## cpu; \
+ p.pu[LUMA_16x12].sse_ss = x265_pixel_ssd_ss_16x12_ ## cpu; \
+ p.pu[LUMA_16x16].sse_ss = x265_pixel_ssd_ss_16x16_ ## cpu; \
+ p.pu[LUMA_16x32].sse_ss = x265_pixel_ssd_ss_16x32_ ## cpu; \
+ p.pu[LUMA_16x64].sse_ss = x265_pixel_ssd_ss_16x64_ ## cpu; \
+ p.pu[LUMA_24x32].sse_ss = x265_pixel_ssd_ss_24x32_ ## cpu; \
+ p.pu[LUMA_32x8].sse_ss = x265_pixel_ssd_ss_32x8_ ## cpu; \
+ p.pu[LUMA_32x16].sse_ss = x265_pixel_ssd_ss_32x16_ ## cpu; \
+ p.pu[LUMA_32x24].sse_ss = x265_pixel_ssd_ss_32x24_ ## cpu; \
+ p.pu[LUMA_32x32].sse_ss = x265_pixel_ssd_ss_32x32_ ## cpu; \
+ p.pu[LUMA_32x64].sse_ss = x265_pixel_ssd_ss_32x64_ ## cpu; \
+ p.pu[LUMA_48x64].sse_ss = x265_pixel_ssd_ss_48x64_ ## cpu; \
+ p.pu[LUMA_64x16].sse_ss = x265_pixel_ssd_ss_64x16_ ## cpu; \
+ p.pu[LUMA_64x32].sse_ss = x265_pixel_ssd_ss_64x32_ ## cpu; \
+ p.pu[LUMA_64x48].sse_ss = x265_pixel_ssd_ss_64x48_ ## cpu; \
+ p.pu[LUMA_64x64].sse_ss = x265_pixel_ssd_ss_64x64_ ## cpu;
#define SA8D_INTER_FROM_BLOCK(cpu) \
- p.sa8d_inter[LUMA_4x8] = x265_pixel_satd_4x8_ ## cpu; \
- p.sa8d_inter[LUMA_8x4] = x265_pixel_satd_8x4_ ## cpu; \
- p.sa8d_inter[LUMA_4x16] = x265_pixel_satd_4x16_ ## cpu; \
- p.sa8d_inter[LUMA_16x4] = x265_pixel_satd_16x4_ ## cpu; \
- p.sa8d_inter[LUMA_12x16] = x265_pixel_satd_12x16_ ## cpu; \
- p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_ ## cpu; \
- p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_ ## cpu; \
- p.sa8d_inter[LUMA_16x12] = x265_pixel_satd_16x12_ ## cpu; \
- p.sa8d_inter[LUMA_16x8] = x265_pixel_sa8d_16x8_ ## cpu; \
- p.sa8d_inter[LUMA_8x16] = x265_pixel_sa8d_8x16_ ## cpu; \
- p.sa8d_inter[LUMA_32x24] = x265_pixel_sa8d_32x24_ ## cpu; \
- p.sa8d_inter[LUMA_24x32] = x265_pixel_sa8d_24x32_ ## cpu; \
- p.sa8d_inter[LUMA_32x8] = x265_pixel_sa8d_32x8_ ## cpu; \
- p.sa8d_inter[LUMA_8x32] = x265_pixel_sa8d_8x32_ ## cpu; \
- p.sa8d_inter[LUMA_32x32] = x265_pixel_sa8d_32x32_ ## cpu; \
- p.sa8d_inter[LUMA_32x16] = x265_pixel_sa8d_32x16_ ## cpu; \
- p.sa8d_inter[LUMA_16x32] = x265_pixel_sa8d_16x32_ ## cpu; \
- p.sa8d_inter[LUMA_64x64] = x265_pixel_sa8d_64x64_ ## cpu; \
- p.sa8d_inter[LUMA_64x32] = x265_pixel_sa8d_64x32_ ## cpu; \
- p.sa8d_inter[LUMA_32x64] = x265_pixel_sa8d_32x64_ ## cpu; \
- p.sa8d_inter[LUMA_64x48] = x265_pixel_sa8d_64x48_ ## cpu; \
- p.sa8d_inter[LUMA_48x64] = x265_pixel_sa8d_48x64_ ## cpu; \
- p.sa8d_inter[LUMA_64x16] = x265_pixel_sa8d_64x16_ ## cpu; \
- p.sa8d_inter[LUMA_16x64] = x265_pixel_sa8d_16x64_ ## cpu;
+ p.pu[LUMA_4x8].sa8d_inter = x265_pixel_satd_4x8_ ## cpu; \
+ p.pu[LUMA_8x4].sa8d_inter = x265_pixel_satd_8x4_ ## cpu; \
+ p.pu[LUMA_4x16].sa8d_inter = x265_pixel_satd_4x16_ ## cpu; \
+ p.pu[LUMA_16x4].sa8d_inter = x265_pixel_satd_16x4_ ## cpu; \
+ p.pu[LUMA_12x16].sa8d_inter = x265_pixel_satd_12x16_ ## cpu; \
+ p.pu[LUMA_8x8].sa8d_inter = x265_pixel_sa8d_8x8_ ## cpu; \
+ p.pu[LUMA_16x16].sa8d_inter = x265_pixel_sa8d_16x16_ ## cpu; \
+ p.pu[LUMA_16x12].sa8d_inter = x265_pixel_satd_16x12_ ## cpu; \
+ p.pu[LUMA_16x8].sa8d_inter = x265_pixel_sa8d_16x8_ ## cpu; \
+ p.pu[LUMA_8x16].sa8d_inter = x265_pixel_sa8d_8x16_ ## cpu; \
+ p.pu[LUMA_32x24].sa8d_inter = x265_pixel_sa8d_32x24_ ## cpu; \
+ p.pu[LUMA_24x32].sa8d_inter = x265_pixel_sa8d_24x32_ ## cpu; \
+ p.pu[LUMA_32x8].sa8d_inter = x265_pixel_sa8d_32x8_ ## cpu; \
+ p.pu[LUMA_8x32].sa8d_inter = x265_pixel_sa8d_8x32_ ## cpu; \
+ p.pu[LUMA_32x32].sa8d_inter = x265_pixel_sa8d_32x32_ ## cpu; \
+ p.pu[LUMA_32x16].sa8d_inter = x265_pixel_sa8d_32x16_ ## cpu; \
+ p.pu[LUMA_16x32].sa8d_inter = x265_pixel_sa8d_16x32_ ## cpu; \
+ p.pu[LUMA_64x64].sa8d_inter = x265_pixel_sa8d_64x64_ ## cpu; \
+ p.pu[LUMA_64x32].sa8d_inter = x265_pixel_sa8d_64x32_ ## cpu; \
+ p.pu[LUMA_32x64].sa8d_inter = x265_pixel_sa8d_32x64_ ## cpu; \
+ p.pu[LUMA_64x48].sa8d_inter = x265_pixel_sa8d_64x48_ ## cpu; \
+ p.pu[LUMA_48x64].sa8d_inter = x265_pixel_sa8d_48x64_ ## cpu; \
+ p.pu[LUMA_64x16].sa8d_inter = x265_pixel_sa8d_64x16_ ## cpu; \
+ p.pu[LUMA_16x64].sa8d_inter = x265_pixel_sa8d_16x64_ ## cpu;
#define PIXEL_AVG(cpu) \
- p.pixelavg_pp[LUMA_64x64] = x265_pixel_avg_64x64_ ## cpu; \
- p.pixelavg_pp[LUMA_64x48] = x265_pixel_avg_64x48_ ## cpu; \
- p.pixelavg_pp[LUMA_64x32] = x265_pixel_avg_64x32_ ## cpu; \
- p.pixelavg_pp[LUMA_64x16] = x265_pixel_avg_64x16_ ## cpu; \
- p.pixelavg_pp[LUMA_48x64] = x265_pixel_avg_48x64_ ## cpu; \
- p.pixelavg_pp[LUMA_32x64] = x265_pixel_avg_32x64_ ## cpu; \
- p.pixelavg_pp[LUMA_32x32] = x265_pixel_avg_32x32_ ## cpu; \
- p.pixelavg_pp[LUMA_32x24] = x265_pixel_avg_32x24_ ## cpu; \
- p.pixelavg_pp[LUMA_32x16] = x265_pixel_avg_32x16_ ## cpu; \
- p.pixelavg_pp[LUMA_32x8] = x265_pixel_avg_32x8_ ## cpu; \
- p.pixelavg_pp[LUMA_24x32] = x265_pixel_avg_24x32_ ## cpu; \
- p.pixelavg_pp[LUMA_16x64] = x265_pixel_avg_16x64_ ## cpu; \
- p.pixelavg_pp[LUMA_16x32] = x265_pixel_avg_16x32_ ## cpu; \
- p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_ ## cpu; \
- p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \
- p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \
- p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \
- p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \
- p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \
- p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \
- p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \
- p.pixelavg_pp[LUMA_8x4] = x265_pixel_avg_8x4_ ## cpu;
+ p.pu[LUMA_64x64].pixelavg_pp = x265_pixel_avg_64x64_ ## cpu; \
+ p.pu[LUMA_64x48].pixelavg_pp = x265_pixel_avg_64x48_ ## cpu; \
+ p.pu[LUMA_64x32].pixelavg_pp = x265_pixel_avg_64x32_ ## cpu; \
+ p.pu[LUMA_64x16].pixelavg_pp = x265_pixel_avg_64x16_ ## cpu; \
+ p.pu[LUMA_48x64].pixelavg_pp = x265_pixel_avg_48x64_ ## cpu; \
+ p.pu[LUMA_32x64].pixelavg_pp = x265_pixel_avg_32x64_ ## cpu; \
+ p.pu[LUMA_32x32].pixelavg_pp = x265_pixel_avg_32x32_ ## cpu; \
+ p.pu[LUMA_32x24].pixelavg_pp = x265_pixel_avg_32x24_ ## cpu; \
+ p.pu[LUMA_32x16].pixelavg_pp = x265_pixel_avg_32x16_ ## cpu; \
+ p.pu[LUMA_32x8].pixelavg_pp = x265_pixel_avg_32x8_ ## cpu; \
+ p.pu[LUMA_24x32].pixelavg_pp = x265_pixel_avg_24x32_ ## cpu; \
+ p.pu[LUMA_16x64].pixelavg_pp = x265_pixel_avg_16x64_ ## cpu; \
+ p.pu[LUMA_16x32].pixelavg_pp = x265_pixel_avg_16x32_ ## cpu; \
+ p.pu[LUMA_16x16].pixelavg_pp = x265_pixel_avg_16x16_ ## cpu; \
+ p.pu[LUMA_16x12].pixelavg_pp = x265_pixel_avg_16x12_ ## cpu; \
+ p.pu[LUMA_16x8].pixelavg_pp = x265_pixel_avg_16x8_ ## cpu; \
+ p.pu[LUMA_16x4].pixelavg_pp = x265_pixel_avg_16x4_ ## cpu; \
+ p.pu[LUMA_12x16].pixelavg_pp = x265_pixel_avg_12x16_ ## cpu; \
+ p.pu[LUMA_8x32].pixelavg_pp = x265_pixel_avg_8x32_ ## cpu; \
+ p.pu[LUMA_8x16].pixelavg_pp = x265_pixel_avg_8x16_ ## cpu; \
+ p.pu[LUMA_8x8].pixelavg_pp = x265_pixel_avg_8x8_ ## cpu; \
+ p.pu[LUMA_8x4].pixelavg_pp = x265_pixel_avg_8x4_ ## cpu;
#define PIXEL_AVG_W4(cpu) \
- p.pixelavg_pp[LUMA_4x4] = x265_pixel_avg_4x4_ ## cpu; \
- p.pixelavg_pp[LUMA_4x8] = x265_pixel_avg_4x8_ ## cpu; \
- p.pixelavg_pp[LUMA_4x16] = x265_pixel_avg_4x16_ ## cpu;
+ p.pu[LUMA_4x4].pixelavg_pp = x265_pixel_avg_4x4_ ## cpu; \
+ p.pu[LUMA_4x8].pixelavg_pp = x265_pixel_avg_4x8_ ## cpu; \
+ p.pu[LUMA_4x16].pixelavg_pp = x265_pixel_avg_4x16_ ## cpu;
#define SETUP_CHROMA_FUNC_DEF_420(W, H, cpu) \
- p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_FUNC_DEF_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_FUNC_DEF_444(W, H, cpu) \
- p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_SP_FUNC_DEF_420(W, H, cpu) \
- p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_SP_FUNC_DEF_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_SP_FUNC_DEF_444(W, H, cpu) \
- p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_SS_FUNC_DEF_420(W, H, cpu) \
- p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_SS_FUNC_DEF_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_SS_FUNC_DEF_444(W, H, cpu) \
- p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu;
#define CHROMA_FILTERS_420(cpu) \
SETUP_CHROMA_FUNC_DEF_420(4, 4, cpu); \
@@ -538,37 +538,37 @@
#if HIGH_BIT_DEPTH // temporary, until all 10bit functions are completed
#define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
- p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
- p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
- p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
- p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_hpp = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hps = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vpp = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vps = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vsp = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
#else
#define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
- p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
- p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
- p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
- p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
+ p.pu[LUMA_ ## W ## x ## H].luma_hpp = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hps = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vpp = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vps = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_vsp = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_ ## W ## x ## H>;
#endif // if HIGH_BIT_DEPTH
#define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \
- p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
- p.luma_add_ps[LUMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
+ p.pu[LUMA_ ## W ## x ## H].luma_sub_ps = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
+ p.pu[LUMA_ ## W ## x ## H].luma_add_ps = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
#define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
- p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
+ p.pu[LUMA_ ## W ## x ## H].luma_vsp = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
#define SETUP_LUMA_SS_FUNC_DEF(W, H, cpu) \
- p.luma_vss[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu;
+ p.pu[LUMA_ ## W ## x ## H].luma_vss = x265_interp_8tap_vert_ss_ ## W ## x ## H ## cpu;
#define SETUP_LUMA_BLOCKCOPY(type, W, H, cpu) \
- p.luma_copy_ ## type[LUMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
+ p.pu[LUMA_ ## W ## x ## H].luma_copy_ ## type = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_BLOCKCOPY(type, W, H, cpu) \
- p.chroma[X265_CSP_I420].copy_ ## type[CHROMA_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_ ## type = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
#define CHROMA_BLOCKCOPY(type, cpu) \
SETUP_CHROMA_BLOCKCOPY(type, 2, 4, cpu); \
@@ -597,7 +597,7 @@
SETUP_CHROMA_BLOCKCOPY(type, 32, 32, cpu);
#define SETUP_CHROMA_BLOCKCOPY_422(type, W, H, cpu) \
- p.chroma[X265_CSP_I422].copy_ ## type[CHROMA422_ ## W ## x ## H] = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_ ## type = x265_blockcopy_ ## type ## _ ## W ## x ## H ## cpu;
#define CHROMA_BLOCKCOPY_422(type, cpu) \
SETUP_CHROMA_BLOCKCOPY_422(type, 2, 8, cpu); \
@@ -653,7 +653,7 @@
SETUP_LUMA_BLOCKCOPY(type, 16, 64, cpu);
#define SETUP_CHROMA_BLOCKCOPY_SP(W, H, cpu) \
- p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].copy_sp = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
#define CHROMA_BLOCKCOPY_SP(cpu) \
SETUP_CHROMA_BLOCKCOPY_SP(2, 4, cpu); \
@@ -682,7 +682,7 @@
SETUP_CHROMA_BLOCKCOPY_SP(32, 32, cpu);
#define SETUP_CHROMA_BLOCKCOPY_SP_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].copy_sp[CHROMA422_ ## W ## x ## H] = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].copy_sp = x265_blockcopy_sp_ ## W ## x ## H ## cpu;
#define CHROMA_BLOCKCOPY_SP_422(cpu) \
SETUP_CHROMA_BLOCKCOPY_SP_422(2, 8, cpu); \
@@ -711,8 +711,8 @@
SETUP_CHROMA_BLOCKCOPY_SP_422(32, 64, cpu);
#define SETUP_CHROMA_PIXELSUB(W, H, cpu) \
- p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].sub_ps = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].cu[CHROMA_ ## W ## x ## H].add_ps = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
#define CHROMA_PIXELSUB_PS(cpu) \
SETUP_CHROMA_PIXELSUB(4, 4, cpu); \
@@ -721,8 +721,8 @@
SETUP_CHROMA_PIXELSUB(32, 32, cpu);
#define SETUP_CHROMA_PIXELSUB_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].sub_ps = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].cu[CHROMA422_ ## W ## x ## H].add_ps = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
#define CHROMA_PIXELSUB_PS_422(cpu) \
SETUP_CHROMA_PIXELSUB_422(4, 8, cpu); \
@@ -819,7 +819,7 @@
SETUP_LUMA_SS_FUNC_DEF(16, 64, cpu);
#define SETUP_PIXEL_VAR_DEF(W, H, cpu) \
- p.var[BLOCK_ ## W ## x ## H] = x265_pixel_var_ ## W ## x ## H ## cpu;
+ p.cu[BLOCK_ ## W ## x ## H].var = x265_pixel_var_ ## W ## x ## H ## cpu;
#define LUMA_VAR(cpu) \
SETUP_PIXEL_VAR_DEF(8, 8, cpu); \
@@ -828,7 +828,7 @@
SETUP_PIXEL_VAR_DEF(64, 64, cpu);
#define SETUP_PIXEL_SSE_SP_DEF(W, H, cpu) \
- p.sse_sp[LUMA_ ## W ## x ## H] = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu;
+ p.pu[LUMA_ ## W ## x ## H].sse_sp = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu;
#define LUMA_SSE_SP(cpu) \
SETUP_PIXEL_SSE_SP_DEF(4, 4, cpu); \
@@ -858,7 +858,7 @@
SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
#define SETUP_LUMA_ADDAVG_FUNC_DEF(W, H, cpu) \
- p.luma_addAvg[LUMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
+ p.pu[LUMA_ ## W ## x ## H].luma_addAvg = x265_addAvg_ ## W ## x ## H ## cpu;
#define LUMA_ADDAVG(cpu) \
SETUP_LUMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
@@ -888,7 +888,7 @@
SETUP_LUMA_ADDAVG_FUNC_DEF(64, 64, cpu); \
#define SETUP_CHROMA_ADDAVG_FUNC_DEF(W, H, cpu) \
- p.chroma[X265_CSP_I420].addAvg[CHROMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].addAvg = x265_addAvg_ ## W ## x ## H ## cpu;
#define CHROMA_ADDAVG(cpu) \
SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 4, cpu); \
@@ -917,7 +917,7 @@
SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu);
#define SETUP_CHROMA_ADDAVG_FUNC_DEF_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].addAvg[CHROMA422_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].addAvg = x265_addAvg_ ## W ## x ## H ## cpu;
#define CHROMA_ADDAVG_422(cpu) \
SETUP_CHROMA_ADDAVG_FUNC_DEF_422(2, 8, cpu); \
@@ -1054,10 +1054,10 @@
SETUP_INTRA_ANG16_32(33, 33, cpu);
#define SETUP_CHROMA_VERT_FUNC_DEF(W, H, cpu) \
- p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
#define CHROMA_VERT_FILTERS(cpu) \
SETUP_CHROMA_VERT_FUNC_DEF(4, 4, cpu); \
@@ -1088,10 +1088,10 @@
SETUP_CHROMA_VERT_FUNC_DEF(6, 8, cpu);
#define SETUP_CHROMA_VERT_FUNC_DEF_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
#define CHROMA_VERT_FILTERS_422(cpu) \
SETUP_CHROMA_VERT_FUNC_DEF_422(4, 8, cpu); \
@@ -1122,10 +1122,10 @@
SETUP_CHROMA_VERT_FUNC_DEF_422(6, 16, cpu);
#define SETUP_CHROMA_VERT_FUNC_DEF_444(W, H, cpu) \
- p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
#define CHROMA_VERT_FILTERS_444(cpu) \
SETUP_CHROMA_VERT_FUNC_DEF_444(8, 8, cpu); \
@@ -1154,8 +1154,8 @@
SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu);
#define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \
- p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I420].pu[CHROMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
#define CHROMA_HORIZ_FILTERS(cpu) \
SETUP_CHROMA_HORIZ_FUNC_DEF(4, 4, cpu); \
@@ -1184,8 +1184,8 @@
SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu);
#define SETUP_CHROMA_HORIZ_FUNC_DEF_422(W, H, cpu) \
- p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I422].pu[CHROMA422_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
#define CHROMA_HORIZ_FILTERS_422(cpu) \
SETUP_CHROMA_HORIZ_FUNC_DEF_422(4, 8, cpu); \
@@ -1214,8 +1214,8 @@
SETUP_CHROMA_HORIZ_FUNC_DEF_422(8, 64, cpu);
#define SETUP_CHROMA_HORIZ_FUNC_DEF_444(W, H, cpu) \
- p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
+ p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
#define CHROMA_HORIZ_FILTERS_444(cpu) \
SETUP_CHROMA_HORIZ_FUNC_DEF_444(8, 8, cpu); \
@@ -1257,44 +1257,44 @@
INIT6(satd, _sse2);
HEVC_SATD(sse2);
- p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
+ p.pu[LUMA_4x4].satd = x265_pixel_satd_4x4_mmx2;
- p.sa8d_inter[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
+ p.pu[LUMA_4x4].sa8d_inter = x265_pixel_satd_4x4_mmx2;
SA8D_INTER_FROM_BLOCK(sse2);
- p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2;
- p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2;
+ p.pu[LUMA_8x8].sa8d_inter = x265_pixel_sa8d_8x8_sse2;
+ p.pu[LUMA_16x16].sa8d_inter = x265_pixel_sa8d_16x16_sse2;
- p.sse_ss[LUMA_4x4] = x265_pixel_ssd_ss_4x4_mmx2;
- p.sse_ss[LUMA_4x8] = x265_pixel_ssd_ss_4x8_mmx2;
- p.sse_ss[LUMA_4x16] = x265_pixel_ssd_ss_4x16_mmx2;
- p.sse_ss[LUMA_8x4] = x265_pixel_ssd_ss_8x4_sse2;
- p.sse_ss[LUMA_8x8] = x265_pixel_ssd_ss_8x8_sse2;
- p.sse_ss[LUMA_8x16] = x265_pixel_ssd_ss_8x16_sse2;
- p.sse_ss[LUMA_8x32] = x265_pixel_ssd_ss_8x32_sse2;
- p.sse_ss[LUMA_12x16] = x265_pixel_ssd_ss_12x16_sse2;
- p.sse_ss[LUMA_16x4] = x265_pixel_ssd_ss_16x4_sse2;
- p.sse_ss[LUMA_16x8] = x265_pixel_ssd_ss_16x8_sse2;
- p.sse_ss[LUMA_16x12] = x265_pixel_ssd_ss_16x12_sse2;
- p.sse_ss[LUMA_16x16] = x265_pixel_ssd_ss_16x16_sse2;
- p.sse_ss[LUMA_16x32] = x265_pixel_ssd_ss_16x32_sse2;
- p.sse_ss[LUMA_16x64] = x265_pixel_ssd_ss_16x64_sse2;
- p.sse_ss[LUMA_24x32] = x265_pixel_ssd_ss_24x32_sse2;
- p.sse_ss[LUMA_32x8] = x265_pixel_ssd_ss_32x8_sse2;
- p.sse_ss[LUMA_32x16] = x265_pixel_ssd_ss_32x16_sse2;
- p.sse_ss[LUMA_32x24] = x265_pixel_ssd_ss_32x24_sse2;
- p.sse_ss[LUMA_32x32] = x265_pixel_ssd_ss_32x32_sse2;
- p.sse_ss[LUMA_32x64] = x265_pixel_ssd_ss_32x64_sse2;
- p.sse_ss[LUMA_48x64] = x265_pixel_ssd_ss_48x64_sse2;
- p.sse_ss[LUMA_64x16] = x265_pixel_ssd_ss_64x16_sse2;
- p.sse_ss[LUMA_64x32] = x265_pixel_ssd_ss_64x32_sse2;
- p.sse_ss[LUMA_64x48] = x265_pixel_ssd_ss_64x48_sse2;
- p.sse_ss[LUMA_64x64] = x265_pixel_ssd_ss_64x64_sse2;
+ p.pu[LUMA_4x4].sse_ss = x265_pixel_ssd_ss_4x4_mmx2;
+ p.pu[LUMA_4x8].sse_ss = x265_pixel_ssd_ss_4x8_mmx2;
+ p.pu[LUMA_4x16].sse_ss = x265_pixel_ssd_ss_4x16_mmx2;
+ p.pu[LUMA_8x4].sse_ss = x265_pixel_ssd_ss_8x4_sse2;
+ p.pu[LUMA_8x8].sse_ss = x265_pixel_ssd_ss_8x8_sse2;
+ p.pu[LUMA_8x16].sse_ss = x265_pixel_ssd_ss_8x16_sse2;
+ p.pu[LUMA_8x32].sse_ss = x265_pixel_ssd_ss_8x32_sse2;
+ p.pu[LUMA_12x16].sse_ss = x265_pixel_ssd_ss_12x16_sse2;
+ p.pu[LUMA_16x4].sse_ss = x265_pixel_ssd_ss_16x4_sse2;
+ p.pu[LUMA_16x8].sse_ss = x265_pixel_ssd_ss_16x8_sse2;
+ p.pu[LUMA_16x12].sse_ss = x265_pixel_ssd_ss_16x12_sse2;
+ p.pu[LUMA_16x16].sse_ss = x265_pixel_ssd_ss_16x16_sse2;
+ p.pu[LUMA_16x32].sse_ss = x265_pixel_ssd_ss_16x32_sse2;
+ p.pu[LUMA_16x64].sse_ss = x265_pixel_ssd_ss_16x64_sse2;
+ p.pu[LUMA_24x32].sse_ss = x265_pixel_ssd_ss_24x32_sse2;
+ p.pu[LUMA_32x8].sse_ss = x265_pixel_ssd_ss_32x8_sse2;
+ p.pu[LUMA_32x16].sse_ss = x265_pixel_ssd_ss_32x16_sse2;
+ p.pu[LUMA_32x24].sse_ss = x265_pixel_ssd_ss_32x24_sse2;
+ p.pu[LUMA_32x32].sse_ss = x265_pixel_ssd_ss_32x32_sse2;
+ p.pu[LUMA_32x64].sse_ss = x265_pixel_ssd_ss_32x64_sse2;
+ p.pu[LUMA_48x64].sse_ss = x265_pixel_ssd_ss_48x64_sse2;
+ p.pu[LUMA_64x16].sse_ss = x265_pixel_ssd_ss_64x16_sse2;
+ p.pu[LUMA_64x32].sse_ss = x265_pixel_ssd_ss_64x32_sse2;
+ p.pu[LUMA_64x48].sse_ss = x265_pixel_ssd_ss_64x48_sse2;
+ p.pu[LUMA_64x64].sse_ss = x265_pixel_ssd_ss_64x64_sse2;
- p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
- p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
- p.transpose[BLOCK_16x16] = x265_transpose16_sse2;
- p.transpose[BLOCK_32x32] = x265_transpose32_sse2;
- p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
+ p.cu[BLOCK_4x4].transpose = x265_transpose4_sse2;
+ p.cu[BLOCK_8x8].transpose = x265_transpose8_sse2;
+ p.cu[BLOCK_16x16].transpose = x265_transpose16_sse2;
+ p.cu[BLOCK_32x32].transpose = x265_transpose32_sse2;
+ p.cu[BLOCK_64x64].transpose = x265_transpose64_sse2;
p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
p.ssim_end_4 = x265_pixel_ssim_end4_sse2;
@@ -1303,43 +1303,43 @@
LUMA_VAR(_sse2);
SAD_X3(sse2);
- p.sad_x3[LUMA_4x4] = x265_pixel_sad_x3_4x4_mmx2;
- p.sad_x3[LUMA_4x8] = x265_pixel_sad_x3_4x8_mmx2;
- p.sad_x3[LUMA_4x16] = x265_pixel_sad_x3_4x16_mmx2;
- p.sad_x3[LUMA_8x4] = x265_pixel_sad_x3_8x4_sse2;
- p.sad_x3[LUMA_8x8] = x265_pixel_sad_x3_8x8_sse2;
- p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_sse2;
- p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_sse2;
- p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_sse2;
- p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_mmx2;
+ p.pu[LUMA_4x4].sad_x3 = x265_pixel_sad_x3_4x4_mmx2;
+ p.pu[LUMA_4x8].sad_x3 = x265_pixel_sad_x3_4x8_mmx2;
+ p.pu[LUMA_4x16].sad_x3 = x265_pixel_sad_x3_4x16_mmx2;
+ p.pu[LUMA_8x4].sad_x3 = x265_pixel_sad_x3_8x4_sse2;
+ p.pu[LUMA_8x8].sad_x3 = x265_pixel_sad_x3_8x8_sse2;
+ p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_sse2;
+ p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_sse2;
+ p.pu[LUMA_16x4].sad_x3 = x265_pixel_sad_x3_16x4_sse2;
+ p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_mmx2;
SAD_X4(sse2);
- p.sad_x4[LUMA_4x4] = x265_pixel_sad_x4_4x4_mmx2;
- p.sad_x4[LUMA_4x8] = x265_pixel_sad_x4_4x8_mmx2;
- p.sad_x4[LUMA_4x16] = x265_pixel_sad_x4_4x16_mmx2;
- p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_sse2;
- p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_sse2;
- p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_sse2;
- p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_sse2;
- p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_sse2;
- p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_mmx2;
+ p.pu[LUMA_4x4].sad_x4 = x265_pixel_sad_x4_4x4_mmx2;
+ p.pu[LUMA_4x8].sad_x4 = x265_pixel_sad_x4_4x8_mmx2;
+ p.pu[LUMA_4x16].sad_x4 = x265_pixel_sad_x4_4x16_mmx2;
+ p.pu[LUMA_8x4].sad_x4 = x265_pixel_sad_x4_8x4_sse2;
+ p.pu[LUMA_8x8].sad_x4 = x265_pixel_sad_x4_8x8_sse2;
+ p.pu[LUMA_8x16].sad_x4 = x265_pixel_sad_x4_8x16_sse2;
+ p.pu[LUMA_8x32].sad_x4 = x265_pixel_sad_x4_8x32_sse2;
+ p.pu[LUMA_16x4].sad_x4 = x265_pixel_sad_x4_16x4_sse2;
+ p.pu[LUMA_12x16].sad_x4 = x265_pixel_sad_x4_12x16_mmx2;
- p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2;
- p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2;
- p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2;
- p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2;
- p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2;
- p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2;
- p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2;
- p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2;
- p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2;
- p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2;
- p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2;
- p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2;
- p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2;
- p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2;
- p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2;
- p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2;
+ p.cu[BLOCK_4x4].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_4_sse2;
+ p.cu[BLOCK_8x8].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_8_sse2;
+ p.cu[BLOCK_16x16].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_16_sse2;
+ p.cu[BLOCK_32x32].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_32_sse2;
+ p.cu[BLOCK_4x4].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_4_sse2;
+ p.cu[BLOCK_8x8].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_8_sse2;
+ p.cu[BLOCK_16x16].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_16_sse2;
+ p.cu[BLOCK_32x32].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_32_sse2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_sse2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_sse2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_sse2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_sse2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_sse2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_sse2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_sse2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_sse2;
CHROMA_PIXELSUB_PS(_sse2);
CHROMA_PIXELSUB_PS_422(_sse2);
@@ -1357,28 +1357,28 @@
p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2;
p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s
- p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
- p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
- p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2;
- p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2;
+ p.cu[BLOCK_4x4].blockfill_s = x265_blockfill_s_4x4_sse2;
+ p.cu[BLOCK_8x8].blockfill_s = x265_blockfill_s_8x8_sse2;
+ p.cu[BLOCK_16x16].blockfill_s = x265_blockfill_s_16x16_sse2;
+ p.cu[BLOCK_32x32].blockfill_s = x265_blockfill_s_32x32_sse2;
// TODO: overflow on 12-bits mode!
- p.ssd_s[BLOCK_4x4] = x265_pixel_ssd_s_4_sse2;
- p.ssd_s[BLOCK_8x8] = x265_pixel_ssd_s_8_sse2;
- p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2;
- p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2;
+ p.cu[BLOCK_4x4].ssd_s = x265_pixel_ssd_s_4_sse2;
+ p.cu[BLOCK_8x8].ssd_s = x265_pixel_ssd_s_8_sse2;
+ p.cu[BLOCK_16x16].ssd_s = x265_pixel_ssd_s_16_sse2;
+ p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_sse2;
- p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
- p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
- p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse2;
- p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse2;
+ p.cu[BLOCK_4x4].calcresidual = x265_getResidual4_sse2;
+ p.cu[BLOCK_8x8].calcresidual = x265_getResidual8_sse2;
+ p.cu[BLOCK_16x16].calcresidual = x265_getResidual16_sse2;
+ p.cu[BLOCK_32x32].calcresidual = x265_getResidual32_sse2;
- p.dct[DCT_4x4] = x265_dct4_sse2;
- p.idct[IDCT_4x4] = x265_idct4_sse2;
+ p.cu[BLOCK_4x4].dct = x265_dct4_sse2;
+ p.cu[BLOCK_4x4].idct = x265_idct4_sse2;
#if X86_64
- p.idct[IDCT_8x8] = x265_idct8_sse2;
+ p.cu[BLOCK_8x8].idct = x265_idct8_sse2;
#endif
- p.idct[IDST_4x4] = x265_idst4_sse2;
+ p.idst4x4 = x265_idst4_sse2;
LUMA_SS_FILTERS(_sse2);
}
@@ -1389,8 +1389,8 @@
INTRA_ANG_SSSE3(ssse3);
- p.dct[DST_4x4] = x265_dst4_ssse3;
- p.idct[IDCT_8x8] = x265_idct8_ssse3;
+ p.dst4x4 = x265_dst4_ssse3;
+ p.cu[BLOCK_8x8].idct = x265_idct8_ssse3;
p.count_nonzero = x265_count_nonzero_ssse3;
}
if (cpuMask & X265_CPU_SSE4)
@@ -1405,7 +1405,7 @@
CHROMA_VERT_FILTERS_SSE4_422(_sse4);
CHROMA_HORIZ_FILTERS_444(_sse4);
- p.dct[DCT_8x8] = x265_dct8_sse4;
+ p.cu[BLOCK_8x8].dct = x265_dct8_sse4;
p.quant = x265_quant_sse4;
p.nquant = x265_nquant_sse4;
p.dequant_normal = x265_dequant_normal_sse4;
@@ -1423,12 +1423,12 @@
INTRA_ANG_SSE4_COMMON(sse4);
INTRA_ANG_SSE4_HIGH(sse4);
- p.psy_cost_pp[BLOCK_4x4] = x265_psyCost_pp_4x4_sse4;
+ p.cu[BLOCK_4x4].psy_cost_pp = x265_psyCost_pp_4x4_sse4;
#if X86_64
- p.psy_cost_pp[BLOCK_8x8] = x265_psyCost_pp_8x8_sse4;
- p.psy_cost_pp[BLOCK_16x16] = x265_psyCost_pp_16x16_sse4;
- p.psy_cost_pp[BLOCK_32x32] = x265_psyCost_pp_32x32_sse4;
- p.psy_cost_pp[BLOCK_64x64] = x265_psyCost_pp_64x64_sse4;
+ p.cu[BLOCK_8x8].psy_cost_pp = x265_psyCost_pp_8x8_sse4;
+ p.cu[BLOCK_16x16].psy_cost_pp = x265_psyCost_pp_16x16_sse4;
+ p.cu[BLOCK_32x32].psy_cost_pp = x265_psyCost_pp_32x32_sse4;
+ p.cu[BLOCK_64x64].psy_cost_pp = x265_psyCost_pp_64x64_sse4;
#endif
}
if (cpuMask & X265_CPU_XOP)
@@ -1440,59 +1440,59 @@
}
if (cpuMask & X265_CPU_AVX2)
{
- p.dct[DCT_4x4] = x265_dct4_avx2;
+ p.cu[BLOCK_4x4].dct = x265_dct4_avx2;
p.quant = x265_quant_avx2;
p.nquant = x265_nquant_avx2;
- p.dequant_normal = x265_dequant_normal_avx2;
+ p.dequant_normal = x265_dequant_normal_avx2;
p.scale1D_128to64 = x265_scale1D_128to64_avx2;
- p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2;
- p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2;
- p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2;
- p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2;
- p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2;
- p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2;
- p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2;
- p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_avx2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_avx2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_avx2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_avx2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_avx2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_avx2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_avx2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_avx2;
#if X86_64
- p.dct[DCT_8x8] = x265_dct8_avx2;
- p.dct[DCT_16x16] = x265_dct16_avx2;
- p.dct[DCT_32x32] = x265_dct32_avx2;
- p.idct[IDCT_4x4] = x265_idct4_avx2;
- p.idct[IDCT_8x8] = x265_idct8_avx2;
- p.idct[IDCT_16x16] = x265_idct16_avx2;
- p.idct[IDCT_32x32] = x265_idct32_avx2;
- p.transpose[BLOCK_8x8] = x265_transpose8_avx2;
- p.transpose[BLOCK_16x16] = x265_transpose16_avx2;
- p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
- p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
+ p.cu[BLOCK_8x8].dct = x265_dct8_avx2;
+ p.cu[BLOCK_16x16].dct = x265_dct16_avx2;
+ p.cu[BLOCK_32x32].dct = x265_dct32_avx2;
+ p.cu[BLOCK_4x4].idct = x265_idct4_avx2;
+ p.cu[BLOCK_8x8].idct = x265_idct8_avx2;
+ p.cu[BLOCK_16x16].idct = x265_idct16_avx2;
+ p.cu[BLOCK_32x32].idct = x265_idct32_avx2;
+ p.cu[BLOCK_8x8].transpose = x265_transpose8_avx2;
+ p.cu[BLOCK_16x16].transpose = x265_transpose16_avx2;
+ p.cu[BLOCK_32x32].transpose = x265_transpose32_avx2;
+ p.cu[BLOCK_64x64].transpose = x265_transpose64_avx2;
#endif
}
/* at HIGH_BIT_DEPTH, pixel == short so we can reuse a number of primitives */
for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
{
- p.sse_pp[i] = (pixelcmp_t)p.sse_ss[i];
- p.sse_sp[i] = (pixelcmp_sp_t)p.sse_ss[i];
+ p.pu[i].sse_pp = (pixelcmp_t)p.pu[i].sse_ss;
+ p.pu[i].sse_sp = (pixelcmp_sp_t)p.pu[i].sse_ss;
}
for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
{
- p.luma_copy_ps[i] = (copy_ps_t)p.luma_copy_ss[i];
- p.luma_copy_sp[i] = (copy_sp_t)p.luma_copy_ss[i];
- p.luma_copy_pp[i] = (copy_pp_t)p.luma_copy_ss[i];
+ p.pu[i].luma_copy_ps = (copy_ps_t)p.pu[i].luma_copy_ss;
+ p.pu[i].luma_copy_sp = (copy_sp_t)p.pu[i].luma_copy_ss;
+ p.pu[i].luma_copy_pp = (copy_pp_t)p.pu[i].luma_copy_ss;
}
for (int i = 0; i < NUM_CHROMA_PARTITIONS; i++)
{
- p.chroma[X265_CSP_I420].copy_ps[i] = (copy_ps_t)p.chroma[X265_CSP_I420].copy_ss[i];
- p.chroma[X265_CSP_I420].copy_sp[i] = (copy_sp_t)p.chroma[X265_CSP_I420].copy_ss[i];
- p.chroma[X265_CSP_I420].copy_pp[i] = (copy_pp_t)p.chroma[X265_CSP_I420].copy_ss[i];
+ p.chroma[X265_CSP_I420].pu[i].copy_ps = (copy_ps_t)p.chroma[X265_CSP_I420].pu[i].copy_ss;
+ p.chroma[X265_CSP_I420].pu[i].copy_sp = (copy_sp_t)p.chroma[X265_CSP_I420].pu[i].copy_ss;
+ p.chroma[X265_CSP_I420].pu[i].copy_pp = (copy_pp_t)p.chroma[X265_CSP_I420].pu[i].copy_ss;
}
for (int i = 0; i < NUM_CHROMA_PARTITIONS; i++)
{
- p.chroma[X265_CSP_I422].copy_ps[i] = (copy_ps_t)p.chroma[X265_CSP_I422].copy_ss[i];
- p.chroma[X265_CSP_I422].copy_sp[i] = (copy_sp_t)p.chroma[X265_CSP_I422].copy_ss[i];
- p.chroma[X265_CSP_I422].copy_pp[i] = (copy_pp_t)p.chroma[X265_CSP_I422].copy_ss[i];
+ p.chroma[X265_CSP_I422].pu[i].copy_ps = (copy_ps_t)p.chroma[X265_CSP_I422].pu[i].copy_ss;
+ p.chroma[X265_CSP_I422].pu[i].copy_sp = (copy_sp_t)p.chroma[X265_CSP_I422].pu[i].copy_ss;
+ p.chroma[X265_CSP_I422].pu[i].copy_pp = (copy_pp_t)p.chroma[X265_CSP_I422].pu[i].copy_ss;
}
#else // if HIGH_BIT_DEPTH
@@ -1502,7 +1502,7 @@
INIT8(sad, _mmx2);
INIT8(sad_x3, _mmx2);
INIT8(sad_x4, _mmx2);
- p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
+ p.pu[LUMA_4x4].satd = x265_pixel_satd_4x4_mmx2;
p.frameInitLowres = x265_frame_init_lowres_core_mmx2;
PIXEL_AVG(sse2);
@@ -1541,52 +1541,52 @@
// until all partitions are coded and commit smaller patches, easier to
// review.
- p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
- p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
- p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2;
- p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_sse2;
+ p.cu[BLOCK_4x4].blockfill_s = x265_blockfill_s_4x4_sse2;
+ p.cu[BLOCK_8x8].blockfill_s = x265_blockfill_s_8x8_sse2;
+ p.cu[BLOCK_16x16].blockfill_s = x265_blockfill_s_16x16_sse2;
+ p.cu[BLOCK_32x32].blockfill_s = x265_blockfill_s_32x32_sse2;
- p.ssd_s[BLOCK_4x4] = x265_pixel_ssd_s_4_sse2;
- p.ssd_s[BLOCK_8x8] = x265_pixel_ssd_s_8_sse2;
- p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2;
- p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2;
+ p.cu[BLOCK_4x4].ssd_s = x265_pixel_ssd_s_4_sse2;
+ p.cu[BLOCK_8x8].ssd_s = x265_pixel_ssd_s_8_sse2;
+ p.cu[BLOCK_16x16].ssd_s = x265_pixel_ssd_s_16_sse2;
+ p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_sse2;
p.frameInitLowres = x265_frame_init_lowres_core_sse2;
SA8D_INTER_FROM_BLOCK(sse2);
- p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2;
- p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2;
- p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2;
- p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2;
- p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2;
- p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2;
- p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2;
- p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2;
- p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2;
- p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2;
- p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2;
- p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2;
- p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2;
- p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2;
- p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2;
- p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2;
+ p.cu[BLOCK_4x4].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_4_sse2;
+ p.cu[BLOCK_8x8].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_8_sse2;
+ p.cu[BLOCK_16x16].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_16_sse2;
+ p.cu[BLOCK_32x32].cpy2Dto1D_shl = x265_cpy2Dto1D_shl_32_sse2;
+ p.cu[BLOCK_4x4].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_4_sse2;
+ p.cu[BLOCK_8x8].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_8_sse2;
+ p.cu[BLOCK_16x16].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_16_sse2;
+ p.cu[BLOCK_32x32].cpy2Dto1D_shr = x265_cpy2Dto1D_shr_32_sse2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_sse2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_sse2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_sse2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_sse2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_sse2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_sse2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_sse2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_sse2;
- p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
- p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
- p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
- p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
- p.transpose[BLOCK_16x16] = x265_transpose16_sse2;
- p.transpose[BLOCK_32x32] = x265_transpose32_sse2;
- p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
+ p.cu[BLOCK_4x4].calcresidual = x265_getResidual4_sse2;
+ p.cu[BLOCK_8x8].calcresidual = x265_getResidual8_sse2;
+ p.cu[BLOCK_4x4].transpose = x265_transpose4_sse2;
+ p.cu[BLOCK_8x8].transpose = x265_transpose8_sse2;
+ p.cu[BLOCK_16x16].transpose = x265_transpose16_sse2;
+ p.cu[BLOCK_32x32].transpose = x265_transpose32_sse2;
+ p.cu[BLOCK_64x64].transpose = x265_transpose64_sse2;
p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
p.ssim_end_4 = x265_pixel_ssim_end4_sse2;
- p.dct[DCT_4x4] = x265_dct4_sse2;
- p.idct[IDCT_4x4] = x265_idct4_sse2;
+ p.cu[BLOCK_4x4].dct = x265_dct4_sse2;
+ p.cu[BLOCK_4x4].idct = x265_idct4_sse2;
#if X86_64
- p.idct[IDCT_8x8] = x265_idct8_sse2;
+ p.cu[BLOCK_8x8].idct = x265_idct8_sse2;
#endif
- p.idct[IDST_4x4] = x265_idst4_sse2;
+ p.idst4x4 = x265_idst4_sse2;
p.planecopy_sp = x265_downShift_16_sse2;
}
@@ -1594,7 +1594,7 @@
{
p.frameInitLowres = x265_frame_init_lowres_core_ssse3;
SA8D_INTER_FROM_BLOCK(ssse3);
- p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3;
+ p.pu[LUMA_4x4].sse_pp = x265_pixel_ssd_4x4_ssse3;
ASSGN_SSE(ssse3);
PIXEL_AVG(ssse3);
PIXEL_AVG_W4(ssse3);
@@ -1605,23 +1605,23 @@
p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
SAD_X3(ssse3);
SAD_X4(ssse3);
- p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
- p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
- p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3;
- p.sad_x4[LUMA_8x16] = x265_pixel_sad_x4_8x16_ssse3;
- p.sad_x3[LUMA_8x32] = x265_pixel_sad_x3_8x32_ssse3;
- p.sad_x4[LUMA_8x32] = x265_pixel_sad_x4_8x32_ssse3;
+ p.pu[LUMA_8x4].sad_x4 = x265_pixel_sad_x4_8x4_ssse3;
+ p.pu[LUMA_8x8].sad_x4 = x265_pixel_sad_x4_8x8_ssse3;
+ p.pu[LUMA_8x16].sad_x3 = x265_pixel_sad_x3_8x16_ssse3;
+ p.pu[LUMA_8x16].sad_x4 = x265_pixel_sad_x4_8x16_ssse3;
+ p.pu[LUMA_8x32].sad_x3 = x265_pixel_sad_x3_8x32_ssse3;
+ p.pu[LUMA_8x32].sad_x4 = x265_pixel_sad_x4_8x32_ssse3;
- p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_ssse3;
- p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_ssse3;
+ p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_ssse3;
+ p.pu[LUMA_12x16].sad_x4 = x265_pixel_sad_x4_12x16_ssse3;
p.luma_p2s = x265_luma_p2s_ssse3;
p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3;
p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3;
p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_ssse3; // for i444, chroma_p2s can use luma_p2s
- p.dct[DST_4x4] = x265_dst4_ssse3;
- p.idct[IDCT_8x8] = x265_idct8_ssse3;
+ p.dst4x4 = x265_dst4_ssse3;
+ p.cu[BLOCK_8x8].idct = x265_idct8_ssse3;
p.count_nonzero = x265_count_nonzero_ssse3;
}
if (cpuMask & X265_CPU_SSE4)
@@ -1638,21 +1638,21 @@
CHROMA_ADDAVG_422(_sse4);
// TODO: check POPCNT flag!
- p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_sse4;
- p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_sse4;
- p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_sse4;
- p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_sse4;
+ p.cu[BLOCK_4x4].copy_cnt = x265_copy_cnt_4_sse4;
+ p.cu[BLOCK_8x8].copy_cnt = x265_copy_cnt_8_sse4;
+ p.cu[BLOCK_16x16].copy_cnt = x265_copy_cnt_16_sse4;
+ p.cu[BLOCK_32x32].copy_cnt = x265_copy_cnt_32_sse4;
HEVC_SATD(sse4);
SA8D_INTER_FROM_BLOCK(sse4);
- p.sse_pp[LUMA_12x16] = x265_pixel_ssd_12x16_sse4;
- p.sse_pp[LUMA_24x32] = x265_pixel_ssd_24x32_sse4;
- p.sse_pp[LUMA_48x64] = x265_pixel_ssd_48x64_sse4;
- p.sse_pp[LUMA_64x16] = x265_pixel_ssd_64x16_sse4;
- p.sse_pp[LUMA_64x32] = x265_pixel_ssd_64x32_sse4;
- p.sse_pp[LUMA_64x48] = x265_pixel_ssd_64x48_sse4;
- p.sse_pp[LUMA_64x64] = x265_pixel_ssd_64x64_sse4;
+ p.pu[LUMA_12x16].sse_pp = x265_pixel_ssd_12x16_sse4;
+ p.pu[LUMA_24x32].sse_pp = x265_pixel_ssd_24x32_sse4;
+ p.pu[LUMA_48x64].sse_pp = x265_pixel_ssd_48x64_sse4;
+ p.pu[LUMA_64x16].sse_pp = x265_pixel_ssd_64x16_sse4;
+ p.pu[LUMA_64x32].sse_pp = x265_pixel_ssd_64x32_sse4;
+ p.pu[LUMA_64x48].sse_pp = x265_pixel_ssd_64x48_sse4;
+ p.pu[LUMA_64x64].sse_pp = x265_pixel_ssd_64x64_sse4;
LUMA_SSE_SP(_sse4);
@@ -1673,17 +1673,17 @@
ASSGN_SSE_SS(sse4);
// MUST be done after LUMA_FILTERS() to overwrite default version
- p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_sse4;
+ p.pu[LUMA_8x8].luma_hvpp = x265_interp_8tap_hv_pp_8x8_sse4;
- p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
- p.chroma[X265_CSP_I420].copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
- p.chroma[X265_CSP_I420].copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
+ p.chroma[X265_CSP_I420].pu[CHROMA_2x4].copy_sp = x265_blockcopy_sp_2x4_sse4;
+ p.chroma[X265_CSP_I420].pu[CHROMA_2x8].copy_sp = x265_blockcopy_sp_2x8_sse4;
+ p.chroma[X265_CSP_I420].pu[CHROMA_6x8].copy_sp = x265_blockcopy_sp_6x8_sse4;
CHROMA_BLOCKCOPY(ps, _sse4);
CHROMA_BLOCKCOPY_422(ps, _sse4);
LUMA_BLOCKCOPY(ps, _sse4);
- p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4;
- p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4;
+ p.cu[BLOCK_16x16].calcresidual = x265_getResidual16_sse4;
+ p.cu[BLOCK_32x32].calcresidual = x265_getResidual32_sse4;
p.quant = x265_quant_sse4;
p.nquant = x265_nquant_sse4;
p.dequant_normal = x265_dequant_normal_sse4;
@@ -1707,14 +1707,14 @@
INTRA_ANG_SSE4_COMMON(sse4);
INTRA_ANG_SSE4(sse4);
- p.dct[DCT_8x8] = x265_dct8_sse4;
+ p.cu[BLOCK_8x8].dct = x265_dct8_sse4;
p.denoiseDct = x265_denoise_dct_sse4;
- p.psy_cost_pp[BLOCK_4x4] = x265_psyCost_pp_4x4_sse4;
+ p.cu[BLOCK_4x4].psy_cost_pp = x265_psyCost_pp_4x4_sse4;
#if X86_64
- p.psy_cost_pp[BLOCK_8x8] = x265_psyCost_pp_8x8_sse4;
- p.psy_cost_pp[BLOCK_16x16] = x265_psyCost_pp_16x16_sse4;
- p.psy_cost_pp[BLOCK_32x32] = x265_psyCost_pp_32x32_sse4;
- p.psy_cost_pp[BLOCK_64x64] = x265_psyCost_pp_64x64_sse4;
+ p.cu[BLOCK_8x8].psy_cost_pp = x265_psyCost_pp_8x8_sse4;
+ p.cu[BLOCK_16x16].psy_cost_pp = x265_psyCost_pp_16x16_sse4;
+ p.cu[BLOCK_32x32].psy_cost_pp = x265_psyCost_pp_32x32_sse4;
+ p.cu[BLOCK_64x64].psy_cost_pp = x265_psyCost_pp_64x64_sse4;
#endif
}
if (cpuMask & X265_CPU_AVX)
@@ -1727,36 +1727,36 @@
ASSGN_SSE_SS(avx);
SAD_X3(avx);
SAD_X4(avx);
- p.sad_x3[LUMA_12x16] = x265_pixel_sad_x3_12x16_avx;
- p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_avx;
- p.sad_x3[LUMA_16x4] = x265_pixel_sad_x3_16x4_avx;
- p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_avx;
+ p.pu[LUMA_12x16].sad_x3 = x265_pixel_sad_x3_12x16_avx;
+ p.pu[LUMA_12x16].sad_x4 = x265_pixel_sad_x4_12x16_avx;
+ p.pu[LUMA_16x4].sad_x3 = x265_pixel_sad_x3_16x4_avx;
+ p.pu[LUMA_16x4].sad_x4 = x265_pixel_sad_x4_16x4_avx;
p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_avx;
p.ssim_end_4 = x265_pixel_ssim_end4_avx;
- p.luma_copy_ss[LUMA_64x16] = x265_blockcopy_ss_64x16_avx;
- p.luma_copy_ss[LUMA_64x32] = x265_blockcopy_ss_64x32_avx;
- p.luma_copy_ss[LUMA_64x48] = x265_blockcopy_ss_64x48_avx;
- p.luma_copy_ss[LUMA_64x64] = x265_blockcopy_ss_64x64_avx;
+ p.pu[LUMA_64x16].luma_copy_ss = x265_blockcopy_ss_64x16_avx;
+ p.pu[LUMA_64x32].luma_copy_ss = x265_blockcopy_ss_64x32_avx;
+ p.pu[LUMA_64x48].luma_copy_ss = x265_blockcopy_ss_64x48_avx;
+ p.pu[LUMA_64x64].luma_copy_ss = x265_blockcopy_ss_64x64_avx;
- p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x8] = x265_blockcopy_pp_32x8_avx;
- p.luma_copy_pp[LUMA_32x8] = x265_blockcopy_pp_32x8_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x8].copy_pp = x265_blockcopy_pp_32x8_avx;
+ p.pu[LUMA_32x8].luma_copy_pp = x265_blockcopy_pp_32x8_avx;
- p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x16] = x265_blockcopy_pp_32x16_avx;
- p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x16] = x265_blockcopy_pp_32x16_avx;
- p.luma_copy_pp[LUMA_32x16] = x265_blockcopy_pp_32x16_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x16].copy_pp = x265_blockcopy_pp_32x16_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x16].copy_pp = x265_blockcopy_pp_32x16_avx;
+ p.pu[LUMA_32x16].luma_copy_pp = x265_blockcopy_pp_32x16_avx;
- p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x24] = x265_blockcopy_pp_32x24_avx;
- p.luma_copy_pp[LUMA_32x24] = x265_blockcopy_pp_32x24_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x24].copy_pp = x265_blockcopy_pp_32x24_avx;
+ p.pu[LUMA_32x24].luma_copy_pp = x265_blockcopy_pp_32x24_avx;
- p.chroma[X265_CSP_I420].copy_pp[CHROMA_32x32] = x265_blockcopy_pp_32x32_avx;
- p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x32] = x265_blockcopy_pp_32x32_avx;
- p.luma_copy_pp[LUMA_32x32] = x265_blockcopy_pp_32x32_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x32].copy_pp = x265_blockcopy_pp_32x32_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x32].copy_pp = x265_blockcopy_pp_32x32_avx;
+ p.pu[LUMA_32x32].luma_copy_pp = x265_blockcopy_pp_32x32_avx;
- p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x48] = x265_blockcopy_pp_32x48_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x48].copy_pp = x265_blockcopy_pp_32x48_avx;
- p.chroma[X265_CSP_I422].copy_pp[CHROMA422_32x64] = x265_blockcopy_pp_32x64_avx;
- p.luma_copy_pp[LUMA_32x64] = x265_blockcopy_pp_32x64_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_32x64].copy_pp = x265_blockcopy_pp_32x64_avx;
+ p.pu[LUMA_32x64].luma_copy_pp = x265_blockcopy_pp_32x64_avx;
}
if (cpuMask & X265_CPU_XOP)
{
@@ -1771,139 +1771,139 @@
INIT2(sad_x4, _avx2);
INIT4(satd, _avx2);
INIT2_NAME(sse_pp, ssd, _avx2);
- p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
- p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
- p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2;
+ p.pu[LUMA_16x12].sad_x4 = x265_pixel_sad_x4_16x12_avx2;
+ p.pu[LUMA_16x32].sad_x4 = x265_pixel_sad_x4_16x32_avx2;
+ p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_avx2;
/* Need to update assembly code as per changed interface of the copy_cnt primitive, once
* code is updated, avx2 version will be enabled */
- p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
- p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
- p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
+ p.cu[BLOCK_8x8].copy_cnt = x265_copy_cnt_8_avx2;
+ p.cu[BLOCK_16x16].copy_cnt = x265_copy_cnt_16_avx2;
+ p.cu[BLOCK_32x32].copy_cnt = x265_copy_cnt_32_avx2;
- p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_avx2;
- p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_avx2;
+ p.cu[BLOCK_16x16].blockfill_s = x265_blockfill_s_16x16_avx2;
+ p.cu[BLOCK_32x32].blockfill_s = x265_blockfill_s_32x32_avx2;
- p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2;
- p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2;
- p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2;
- p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2;
- p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2;
- p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2;
- p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2;
- p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_avx2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_avx2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_avx2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_32_avx2;
+ p.cu[BLOCK_4x4].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_4_avx2;
+ p.cu[BLOCK_8x8].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_8_avx2;
+ p.cu[BLOCK_16x16].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_16_avx2;
+ p.cu[BLOCK_32x32].cpy1Dto2D_shr = x265_cpy1Dto2D_shr_32_avx2;
p.denoiseDct = x265_denoise_dct_avx2;
- p.dct[DCT_4x4] = x265_dct4_avx2;
+ p.cu[BLOCK_4x4].dct = x265_dct4_avx2;
p.quant = x265_quant_avx2;
p.nquant = x265_nquant_avx2;
p.dequant_normal = x265_dequant_normal_avx2;
- p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x4] = x265_blockcopy_ss_16x4_avx;
- p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x12] = x265_blockcopy_ss_16x12_avx;
- p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x8] = x265_blockcopy_ss_16x8_avx;
- p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x16] = x265_blockcopy_ss_16x16_avx;
- p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x32] = x265_blockcopy_ss_16x32_avx;
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x8] = x265_blockcopy_ss_16x8_avx;
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x16] = x265_blockcopy_ss_16x16_avx;
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x24] = x265_blockcopy_ss_16x24_avx;
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x32] = x265_blockcopy_ss_16x32_avx;
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422_16x64] = x265_blockcopy_ss_16x64_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x4].copy_ss = x265_blockcopy_ss_16x4_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x12].copy_ss = x265_blockcopy_ss_16x12_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x8].copy_ss = x265_blockcopy_ss_16x8_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x16].copy_ss = x265_blockcopy_ss_16x16_avx;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x32].copy_ss = x265_blockcopy_ss_16x32_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x8] .copy_ss = x265_blockcopy_ss_16x8_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x16].copy_ss = x265_blockcopy_ss_16x16_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x24].copy_ss = x265_blockcopy_ss_16x24_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].copy_ss = x265_blockcopy_ss_16x32_avx;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].copy_ss = x265_blockcopy_ss_16x64_avx;
p.scale1D_128to64 = x265_scale1D_128to64_avx2;
p.weight_pp = x265_weight_pp_avx2;
#if X86_64
- p.dct[DCT_8x8] = x265_dct8_avx2;
- p.dct[DCT_16x16] = x265_dct16_avx2;
- p.dct[DCT_32x32] = x265_dct32_avx2;
- p.idct[IDCT_4x4] = x265_idct4_avx2;
- p.idct[IDCT_8x8] = x265_idct8_avx2;
- p.idct[IDCT_16x16] = x265_idct16_avx2;
- p.idct[IDCT_32x32] = x265_idct32_avx2;
+ p.cu[BLOCK_8x8].dct = x265_dct8_avx2;
+ p.cu[BLOCK_16x16].dct = x265_dct16_avx2;
+ p.cu[BLOCK_32x32].dct = x265_dct32_avx2;
+ p.cu[BLOCK_4x4].idct = x265_idct4_avx2;
+ p.cu[BLOCK_8x8].idct = x265_idct8_avx2;
+ p.cu[BLOCK_16x16].idct = x265_idct16_avx2;
+ p.cu[BLOCK_32x32].idct = x265_idct32_avx2;
- p.transpose[BLOCK_8x8] = x265_transpose8_avx2;
- p.transpose[BLOCK_16x16] = x265_transpose16_avx2;
- p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
- p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
+ p.cu[BLOCK_8x8].transpose = x265_transpose8_avx2;
+ p.cu[BLOCK_16x16].transpose = x265_transpose16_avx2;
+ p.cu[BLOCK_32x32].transpose = x265_transpose32_avx2;
+ p.cu[BLOCK_64x64].transpose = x265_transpose64_avx2;
- p.luma_vpp[LUMA_12x16] = x265_interp_8tap_vert_pp_12x16_avx2;
+ p.pu[LUMA_12x16].luma_vpp = x265_interp_8tap_vert_pp_12x16_avx2;
- p.luma_vpp[LUMA_16x4] = x265_interp_8tap_vert_pp_16x4_avx2;
- p.luma_vpp[LUMA_16x8] = x265_interp_8tap_vert_pp_16x8_avx2;
- p.luma_vpp[LUMA_16x12] = x265_interp_8tap_vert_pp_16x12_avx2;
- p.luma_vpp[LUMA_16x16] = x265_interp_8tap_vert_pp_16x16_avx2;
- p.luma_vpp[LUMA_16x32] = x265_interp_8tap_vert_pp_16x32_avx2;
- p.luma_vpp[LUMA_16x64] = x265_interp_8tap_vert_pp_16x64_avx2;
+ p.pu[LUMA_16x4].luma_vpp = x265_interp_8tap_vert_pp_16x4_avx2;
+ p.pu[LUMA_16x8].luma_vpp = x265_interp_8tap_vert_pp_16x8_avx2;
+ p.pu[LUMA_16x12].luma_vpp = x265_interp_8tap_vert_pp_16x12_avx2;
+ p.pu[LUMA_16x16].luma_vpp = x265_interp_8tap_vert_pp_16x16_avx2;
+ p.pu[LUMA_16x32].luma_vpp = x265_interp_8tap_vert_pp_16x32_avx2;
+ p.pu[LUMA_16x64].luma_vpp = x265_interp_8tap_vert_pp_16x64_avx2;
- p.luma_vpp[LUMA_24x32] = x265_interp_8tap_vert_pp_24x32_avx2;
+ p.pu[LUMA_24x32].luma_vpp = x265_interp_8tap_vert_pp_24x32_avx2;
- p.luma_vpp[LUMA_32x8] = x265_interp_8tap_vert_pp_32x8_avx2;
- p.luma_vpp[LUMA_32x16] = x265_interp_8tap_vert_pp_32x16_avx2;
- p.luma_vpp[LUMA_32x24] = x265_interp_8tap_vert_pp_32x24_avx2;
- p.luma_vpp[LUMA_32x32] = x265_interp_8tap_vert_pp_32x32_avx2;
- p.luma_vpp[LUMA_32x64] = x265_interp_8tap_vert_pp_32x64_avx2;
+ p.pu[LUMA_32x8].luma_vpp = x265_interp_8tap_vert_pp_32x8_avx2;
+ p.pu[LUMA_32x16].luma_vpp = x265_interp_8tap_vert_pp_32x16_avx2;
+ p.pu[LUMA_32x24].luma_vpp = x265_interp_8tap_vert_pp_32x24_avx2;
+ p.pu[LUMA_32x32].luma_vpp = x265_interp_8tap_vert_pp_32x32_avx2;
+ p.pu[LUMA_32x64].luma_vpp = x265_interp_8tap_vert_pp_32x64_avx2;
- p.luma_vpp[LUMA_48x64] = x265_interp_8tap_vert_pp_48x64_avx2;
+ p.pu[LUMA_48x64].luma_vpp = x265_interp_8tap_vert_pp_48x64_avx2;
- p.luma_vpp[LUMA_64x16] = x265_interp_8tap_vert_pp_64x16_avx2;
- p.luma_vpp[LUMA_64x32] = x265_interp_8tap_vert_pp_64x32_avx2;
- p.luma_vpp[LUMA_64x48] = x265_interp_8tap_vert_pp_64x48_avx2;
- p.luma_vpp[LUMA_64x64] = x265_interp_8tap_vert_pp_64x64_avx2;
+ p.pu[LUMA_64x16].luma_vpp = x265_interp_8tap_vert_pp_64x16_avx2;
+ p.pu[LUMA_64x32].luma_vpp = x265_interp_8tap_vert_pp_64x32_avx2;
+ p.pu[LUMA_64x48].luma_vpp = x265_interp_8tap_vert_pp_64x48_avx2;
+ p.pu[LUMA_64x64].luma_vpp = x265_interp_8tap_vert_pp_64x64_avx2;
#endif
- p.luma_hpp[LUMA_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
+ p.pu[LUMA_4x4].luma_hpp = x265_interp_8tap_horiz_pp_4x4_avx2;
- p.luma_hpp[LUMA_8x4] = x265_interp_8tap_horiz_pp_8x4_avx2;
- p.luma_hpp[LUMA_8x8] = x265_interp_8tap_horiz_pp_8x8_avx2;
- p.luma_hpp[LUMA_8x16] = x265_interp_8tap_horiz_pp_8x16_avx2;
- p.luma_hpp[LUMA_8x32] = x265_interp_8tap_horiz_pp_8x32_avx2;
+ p.pu[LUMA_8x4].luma_hpp = x265_interp_8tap_horiz_pp_8x4_avx2;
+ p.pu[LUMA_8x8].luma_hpp = x265_interp_8tap_horiz_pp_8x8_avx2;
+ p.pu[LUMA_8x16].luma_hpp = x265_interp_8tap_horiz_pp_8x16_avx2;
+ p.pu[LUMA_8x32].luma_hpp = x265_interp_8tap_horiz_pp_8x32_avx2;
- p.luma_hpp[LUMA_16x4] = x265_interp_8tap_horiz_pp_16x4_avx2;
- p.luma_hpp[LUMA_16x8] = x265_interp_8tap_horiz_pp_16x8_avx2;
- p.luma_hpp[LUMA_16x12] = x265_interp_8tap_horiz_pp_16x12_avx2;
- p.luma_hpp[LUMA_16x16] = x265_interp_8tap_horiz_pp_16x16_avx2;
- p.luma_hpp[LUMA_16x32] = x265_interp_8tap_horiz_pp_16x32_avx2;
- p.luma_hpp[LUMA_16x64] = x265_interp_8tap_horiz_pp_16x64_avx2;
+ p.pu[LUMA_16x4].luma_hpp = x265_interp_8tap_horiz_pp_16x4_avx2;
+ p.pu[LUMA_16x8].luma_hpp = x265_interp_8tap_horiz_pp_16x8_avx2;
+ p.pu[LUMA_16x12].luma_hpp = x265_interp_8tap_horiz_pp_16x12_avx2;
+ p.pu[LUMA_16x16].luma_hpp = x265_interp_8tap_horiz_pp_16x16_avx2;
+ p.pu[LUMA_16x32].luma_hpp = x265_interp_8tap_horiz_pp_16x32_avx2;
+ p.pu[LUMA_16x64].luma_hpp = x265_interp_8tap_horiz_pp_16x64_avx2;
- p.luma_hpp[LUMA_32x8] = x265_interp_8tap_horiz_pp_32x8_avx2;
- p.luma_hpp[LUMA_32x16] = x265_interp_8tap_horiz_pp_32x16_avx2;
- p.luma_hpp[LUMA_32x24] = x265_interp_8tap_horiz_pp_32x24_avx2;
- p.luma_hpp[LUMA_32x32] = x265_interp_8tap_horiz_pp_32x32_avx2;
- p.luma_hpp[LUMA_32x64] = x265_interp_8tap_horiz_pp_32x64_avx2;
+ p.pu[LUMA_32x8].luma_hpp = x265_interp_8tap_horiz_pp_32x8_avx2;
+ p.pu[LUMA_32x16].luma_hpp = x265_interp_8tap_horiz_pp_32x16_avx2;
+ p.pu[LUMA_32x24].luma_hpp = x265_interp_8tap_horiz_pp_32x24_avx2;
+ p.pu[LUMA_32x32].luma_hpp = x265_interp_8tap_horiz_pp_32x32_avx2;
+ p.pu[LUMA_32x64].luma_hpp = x265_interp_8tap_horiz_pp_32x64_avx2;
- p.luma_hpp[LUMA_64x64] = x265_interp_8tap_horiz_pp_64x64_avx2;
- p.luma_hpp[LUMA_64x48] = x265_interp_8tap_horiz_pp_64x48_avx2;
- p.luma_hpp[LUMA_64x32] = x265_interp_8tap_horiz_pp_64x32_avx2;
- p.luma_hpp[LUMA_64x16] = x265_interp_8tap_horiz_pp_64x16_avx2;
+ p.pu[LUMA_64x64].luma_hpp = x265_interp_8tap_horiz_pp_64x64_avx2;
+ p.pu[LUMA_64x48].luma_hpp = x265_interp_8tap_horiz_pp_64x48_avx2;
+ p.pu[LUMA_64x32].luma_hpp = x265_interp_8tap_horiz_pp_64x32_avx2;
+ p.pu[LUMA_64x16].luma_hpp = x265_interp_8tap_horiz_pp_64x16_avx2;
- p.luma_hpp[LUMA_48x64] = x265_interp_8tap_horiz_pp_48x64_avx2;
+ p.pu[LUMA_48x64].luma_hpp = x265_interp_8tap_horiz_pp_48x64_avx2;
- p.chroma[X265_CSP_I420].filter_hpp[CHROMA_8x8] = x265_interp_4tap_horiz_pp_8x8_avx2;
- p.chroma[X265_CSP_I420].filter_hpp[CHROMA_4x4] = x265_interp_4tap_horiz_pp_4x4_avx2;
- p.chroma[X265_CSP_I420].filter_hpp[CHROMA_32x32] = x265_interp_4tap_horiz_pp_32x32_avx2;
- p.chroma[X265_CSP_I420].filter_hpp[CHROMA_16x16] = x265_interp_4tap_horiz_pp_16x16_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x8].filter_hpp = x265_interp_4tap_horiz_pp_8x8_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x4].filter_hpp = x265_interp_4tap_horiz_pp_4x4_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x32].filter_hpp = x265_interp_4tap_horiz_pp_32x32_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x16].filter_hpp = x265_interp_4tap_horiz_pp_16x16_avx2;
- p.luma_vpp[LUMA_4x4] = x265_interp_8tap_vert_pp_4x4_avx2;
+ p.pu[LUMA_4x4].luma_vpp = x265_interp_8tap_vert_pp_4x4_avx2;
- p.luma_vpp[LUMA_8x4] = x265_interp_8tap_vert_pp_8x4_avx2;
- p.luma_vpp[LUMA_8x8] = x265_interp_8tap_vert_pp_8x8_avx2;
- p.luma_vpp[LUMA_8x16] = x265_interp_8tap_vert_pp_8x16_avx2;
- p.luma_vpp[LUMA_8x32] = x265_interp_8tap_vert_pp_8x32_avx2;
+ p.pu[LUMA_8x4].luma_vpp = x265_interp_8tap_vert_pp_8x4_avx2;
+ p.pu[LUMA_8x8].luma_vpp = x265_interp_8tap_vert_pp_8x8_avx2;
+ p.pu[LUMA_8x16].luma_vpp = x265_interp_8tap_vert_pp_8x16_avx2;
+ p.pu[LUMA_8x32].luma_vpp = x265_interp_8tap_vert_pp_8x32_avx2;
// color space i420
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_4x4] = x265_interp_4tap_vert_pp_4x4_avx2;
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_8x8] = x265_interp_4tap_vert_pp_8x8_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_4x4].filter_vpp = x265_interp_4tap_vert_pp_4x4_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_8x8].filter_vpp = x265_interp_4tap_vert_pp_8x8_avx2;
// color space i422
- p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_4x4] = x265_interp_4tap_vert_pp_4x4_avx2;
+ p.chroma[X265_CSP_I422].pu[CHROMA422_4x4].filter_vpp = x265_interp_4tap_vert_pp_4x4_avx2;
- p.luma_vps[LUMA_4x4] = x265_interp_8tap_vert_ps_4x4_avx2;
+ p.pu[LUMA_4x4].luma_vps = x265_interp_8tap_vert_ps_4x4_avx2;
#if X86_64
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_16x16] = x265_interp_4tap_vert_pp_16x16_avx2;
- p.chroma[X265_CSP_I420].filter_vpp[CHROMA_32x32] = x265_interp_4tap_vert_pp_32x32_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_16x16].filter_vpp = x265_interp_4tap_vert_pp_16x16_avx2;
+ p.chroma[X265_CSP_I420].pu[CHROMA_32x32].filter_vpp = x265_interp_4tap_vert_pp_32x32_avx2;
#endif
}
#endif // if HIGH_BIT_DEPTH
diff -r 1924c460d130 -r c6ca0fd54aa7 source/common/yuv.cpp
--- a/source/common/yuv.cpp Fri Jan 09 11:35:26 2015 +0530
+++ b/source/common/yuv.cpp Thu Jan 08 15:23:38 2015 -0600
@@ -81,32 +81,32 @@
void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
{
pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
- primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
+ primitives.pu[m_part].luma_copy_pp(dstY, dstPic.m_stride, m_buf[0], m_size);
pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
- primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPic.m_strideC, m_buf[1], m_csize);
- primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPic.m_strideC, m_buf[2], m_csize);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(dstU, dstPic.m_strideC, m_buf[1], m_csize);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(dstV, dstPic.m_strideC, m_buf[2], m_csize);
}
void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
{
const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
- primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
+ primitives.pu[m_part].luma_copy_pp(m_buf[0], m_size, srcY, srcPic.m_stride);
const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
- primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC);
- primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[1], m_csize, srcU, srcPic.m_strideC);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[2], m_csize, srcV, srcPic.m_strideC);
}
void Yuv::copyFromYuv(const Yuv& srcYuv)
{
X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
- primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
- primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
- primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
+ primitives.pu[m_part].luma_copy_pp(m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
}
/* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
@@ -115,47 +115,47 @@
X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
- primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size);
+ primitives.pu[partEnum].luma_copy_pp(m_buf[0], m_size, srcY, srcYuv.m_size);
if (bChroma)
{
const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
- primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize);
- primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize);
+ primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[1], m_csize, srcU, srcYuv.m_csize);
+ primitives.chroma[m_csp].pu[partEnum].copy_pp(m_buf[2], m_csize, srcV, srcYuv.m_csize);
}
}
void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
{
pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
- primitives.luma_copy_pp[m_part](dstY, dstYuv.m_size, m_buf[0], m_size);
+ primitives.pu[m_part].luma_copy_pp(dstY, dstYuv.m_size, m_buf[0], m_size);
pixel* dstU = dstYuv.getCbAddr(absPartIdx);
pixel* dstV = dstYuv.getCrAddr(absPartIdx);
- primitives.chroma[m_csp].copy_pp[m_part](dstU, dstYuv.m_csize, m_buf[1], m_csize);
- primitives.chroma[m_csp].copy_pp[m_part](dstV, dstYuv.m_csize, m_buf[2], m_csize);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(dstU, dstYuv.m_csize, m_buf[1], m_csize);
+ primitives.chroma[m_csp].pu[m_part].copy_pp(dstV, dstYuv.m_csize, m_buf[2], m_csize);
}
void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
{
pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
pixel* dstY = dstYuv.m_buf[0];
- primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
+ primitives.pu[dstYuv.m_part].luma_copy_pp(dstY, dstYuv.m_size, srcY, m_size);
pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
pixel* dstU = dstYuv.m_buf[1];
pixel* dstV = dstYuv.m_buf[2];
- primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstU, dstYuv.m_csize, srcU, m_csize);
- primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstV, dstYuv.m_csize, srcV, m_csize);
+ primitives.chroma[m_csp].pu[dstYuv.m_part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
+ primitives.chroma[m_csp].pu[dstYuv.m_part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
}
void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
{
- primitives.luma_add_ps[log2SizeL - 2](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
- primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
- primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
+ primitives.pu[log2SizeL - 2].luma_add_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
+ primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
+ primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
}
void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
@@ -167,7 +167,7 @@
const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
pixel* dstY = getLumaAddr(absPartIdx);
- primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
+ primitives.pu[part].luma_addAvg(srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
}
if (bChroma)
{
@@ -177,8 +177,8 @@
const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
pixel* dstU = getCbAddr(absPartIdx);
pixel* dstV = getCrAddr(absPartIdx);
- primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
- primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+ primitives.chroma[m_csp].pu[part].addAvg(srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
+ primitives.chroma[m_csp].pu[part].addAvg(srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
}
}
@@ -186,7 +186,7 @@
{
const pixel* src = getLumaAddr(absPartIdx);
pixel* dst = dstYuv.getLumaAddr(absPartIdx);
- primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
+ primitives.pu[log2Size - 2].luma_copy_pp(dst, dstYuv.m_size, src, m_size);
}
void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
@@ -196,6 +196,6 @@
const pixel* srcV = getCrAddr(absPartIdx);
pixel* dstU = dstYuv.getCbAddr(absPartIdx);
pixel* dstV = dstYuv.getCrAddr(absPartIdx);
- primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize);
- primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize);
+ primitives.chroma[m_csp].pu[part].copy_pp(dstU, dstYuv.m_csize, srcU, m_csize);
+ primitives.chroma[m_csp].pu[part].copy_pp(dstV, dstYuv.m_csize, srcV, m_csize);
}
More information about the x265-devel
mailing list