[x264-devel] [PATCH 23/29] Templatize AARCH64 assembly code
Vittorio Giovara
vittorio.giovara at gmail.com
Fri Feb 10 22:18:58 CET 2017
---
common/aarch64/asm.S | 15 ++++++----
common/aarch64/cabac-a.S | 4 +--
common/aarch64/dct.h | 27 ++++++++++++++++++
common/aarch64/mc-c.c | 55 +++++++++++++++++++++++++++++++++++++
common/aarch64/mc.h | 1 +
common/aarch64/pixel.h | 55 +++++++++++++++++++++++++++++++++++++
common/aarch64/predict.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
common/aarch64/quant-a.S | 4 +--
common/aarch64/quant.h | 21 ++++++++++++++
9 files changed, 243 insertions(+), 10 deletions(-)
diff --git a/common/aarch64/asm.S b/common/aarch64/asm.S
index 658a1dd..07f5719 100644
--- a/common/aarch64/asm.S
+++ b/common/aarch64/asm.S
@@ -27,12 +27,19 @@
#include "config.h"
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+
#ifdef PREFIX
-# define EXTERN_ASM _x264_
+# define BASE _x264_
#else
-# define EXTERN_ASM x264_
+# define BASE x264_
#endif
+#define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _)
+#define X(s) JOIN(EXTERN_ASM, s)
+#define X264(s) JOIN(BASE, s)
+
#ifdef __ELF__
# define ELF
#else
@@ -98,10 +105,6 @@ MACH .const_data
#endif
.endm
-#define GLUE(a, b) a ## b
-#define JOIN(a, b) GLUE(a, b)
-#define X(s) JOIN(EXTERN_ASM, s)
-
#define FDEC_STRIDE 32
#define FENC_STRIDE 16
diff --git a/common/aarch64/cabac-a.S b/common/aarch64/cabac-a.S
index 7f8fa84..c05f963 100644
--- a/common/aarch64/cabac-a.S
+++ b/common/aarch64/cabac-a.S
@@ -30,8 +30,8 @@
// w12 holds x264_cabac_t.i_range
function cabac_encode_decision_asm, export=1
- movrel x8, X(cabac_range_lps)
- movrel x9, X(cabac_transition)
+ movrel x8, X264(cabac_range_lps)
+ movrel x9, X264(cabac_transition)
add w10, w1, #CABAC_STATE
ldrb w3, [x0, x10] // i_state
ldr w12, [x0, #CABAC_I_RANGE]
diff --git a/common/aarch64/dct.h b/common/aarch64/dct.h
index 095f4ab..25395cc 100644
--- a/common/aarch64/dct.h
+++ b/common/aarch64/dct.h
@@ -27,13 +27,29 @@
#ifndef X264_AARCH64_DCT_H
#define X264_AARCH64_DCT_H
+#define x264_dct4x4dc_neon x264_template(dct4x4dc_neon)
void x264_dct4x4dc_neon( int16_t d[16] );
+#define x264_idct4x4dc_neon x264_template(idct4x4dc_neon)
void x264_idct4x4dc_neon( int16_t d[16] );
+#define x264_sub16x16_dct8_neon x264_template(sub16x16_dct8_neon)
+#define x264_sub16x16_dct_neon x264_template(sub16x16_dct_neon)
+#define x264_sub4x4_dct_neon x264_template(sub4x4_dct_neon)
+#define x264_sub8x16_dct_dc_neon x264_template(sub8x16_dct_dc_neon)
+#define x264_sub8x8_dct8_neon x264_template(sub8x8_dct8_neon)
+#define x264_sub8x8_dct_dc_neon x264_template(sub8x8_dct_dc_neon)
+#define x264_sub8x8_dct_neon x264_template(sub8x8_dct_neon)
void x264_sub4x4_dct_neon( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub8x8_dct_neon( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub16x16_dct_neon( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_add16x16_idct8_neon x264_template(add16x16_idct8_neon)
+#define x264_add16x16_idct_dc_neon x264_template(add16x16_idct_dc_neon)
+#define x264_add16x16_idct_neon x264_template(add16x16_idct_neon)
+#define x264_add4x4_idct_neon x264_template(add4x4_idct_neon)
+#define x264_add8x8_idct8_neon x264_template(add8x8_idct8_neon)
+#define x264_add8x8_idct_dc_neon x264_template(add8x8_idct_dc_neon)
+#define x264_add8x8_idct_neon x264_template(add8x8_idct_neon)
void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] );
void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] );
void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] );
@@ -49,6 +65,17 @@ void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 )
void x264_add8x8_idct8_neon( uint8_t *p_dst, int16_t dct[64] );
void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] );
+#define x264_zigzag_interleave_8x8_cavlc_neon x264_template(zigzag_interleave_8x8_cavlc_neon)
+#define x264_zigzag_scan_4x4_field_neon x264_template(zigzag_scan_4x4_field_neon)
+#define x264_zigzag_scan_4x4_frame_neon x264_template(zigzag_scan_4x4_frame_neon)
+#define x264_zigzag_scan_8x8_field_neon x264_template(zigzag_scan_8x8_field_neon)
+#define x264_zigzag_scan_8x8_frame_neon x264_template(zigzag_scan_8x8_frame_neon)
+#define x264_zigzag_sub_4x4_field_neon x264_template(zigzag_sub_4x4_field_neon)
+#define x264_zigzag_sub_4x4_frame_neon x264_template(zigzag_sub_4x4_frame_neon)
+#define x264_zigzag_sub_4x4ac_field_neon x264_template(zigzag_sub_4x4ac_field_neon)
+#define x264_zigzag_sub_4x4ac_frame_neon x264_template(zigzag_sub_4x4ac_frame_neon)
+#define x264_zigzag_sub_8x8_field_neon x264_template(zigzag_sub_8x8_field_neon)
+#define x264_zigzag_sub_8x8_frame_neon x264_template(zigzag_sub_8x8_frame_neon)
void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] );
void x264_zigzag_scan_4x4_field_neon( int16_t level[16], int16_t dct[16] );
void x264_zigzag_scan_8x8_frame_neon( int16_t level[64], int16_t dct[64] );
diff --git a/common/aarch64/mc-c.c b/common/aarch64/mc-c.c
index 2cd548a..d73682a 100644
--- a/common/aarch64/mc-c.c
+++ b/common/aarch64/mc-c.c
@@ -27,13 +27,31 @@
#include "common/common.h"
#include "mc.h"
+#define x264_prefetch_fenc_420_aarch64 x264_template(prefetch_fenc_420_aarch64)
+#define x264_prefetch_fenc_422_aarch64 x264_template(prefetch_fenc_422_aarch64)
+#define x264_prefetch_ref_aarch64 x264_template(prefetch_ref_aarch64)
void x264_prefetch_ref_aarch64( uint8_t *, intptr_t, int );
void x264_prefetch_fenc_420_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
void x264_prefetch_fenc_422_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_memcpy_aligned_neon x264_template(memcpy_aligned_neon)
+#define x264_memzero_aligned_neon x264_template(memzero_aligned_neon)
void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n );
void x264_memzero_aligned_neon( void *dst, size_t n );
+#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon)
+#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon)
+#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon)
+#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon)
+#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon)
+#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon)
+#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon)
+#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon)
+#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon)
+#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon)
+#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon)
+#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon)
+#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon)
void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
@@ -49,6 +67,11 @@ void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_plane_copy_core_neon x264_template(plane_copy_core_neon)
+#define x264_plane_copy_deinterleave_neon x264_template(plane_copy_deinterleave_neon)
+#define x264_plane_copy_deinterleave_rgb_neon x264_template(plane_copy_deinterleave_rgb_neon)
+#define x264_plane_copy_interleave_core_neon x264_template(plane_copy_interleave_core_neon)
+#define x264_plane_copy_swap_core_neon x264_template(plane_copy_swap_core_neon)
void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
@@ -64,10 +87,29 @@ void x264_plane_copy_interleave_core_neon( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
+#define x264_store_interleave_chroma_neon x264_template(store_interleave_chroma_neon)
void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_load_deinterleave_chroma_fdec_neon x264_template(load_deinterleave_chroma_fdec_neon)
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_neon x264_template(load_deinterleave_chroma_fenc_neon)
void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_mc_weight_w16_neon x264_template(mc_weight_w16_neon)
+#define x264_mc_weight_w16_nodenom_neon x264_template(mc_weight_w16_nodenom_neon)
+#define x264_mc_weight_w16_offsetadd_neon x264_template(mc_weight_w16_offsetadd_neon)
+#define x264_mc_weight_w16_offsetsub_neon x264_template(mc_weight_w16_offsetsub_neon)
+#define x264_mc_weight_w20_neon x264_template(mc_weight_w20_neon)
+#define x264_mc_weight_w20_nodenom_neon x264_template(mc_weight_w20_nodenom_neon)
+#define x264_mc_weight_w20_offsetadd_neon x264_template(mc_weight_w20_offsetadd_neon)
+#define x264_mc_weight_w20_offsetsub_neon x264_template(mc_weight_w20_offsetsub_neon)
+#define x264_mc_weight_w4_neon x264_template(mc_weight_w4_neon)
+#define x264_mc_weight_w4_nodenom_neon x264_template(mc_weight_w4_nodenom_neon)
+#define x264_mc_weight_w4_offsetadd_neon x264_template(mc_weight_w4_offsetadd_neon)
+#define x264_mc_weight_w4_offsetsub_neon x264_template(mc_weight_w4_offsetsub_neon)
+#define x264_mc_weight_w8_neon x264_template(mc_weight_w8_neon)
+#define x264_mc_weight_w8_nodenom_neon x264_template(mc_weight_w8_nodenom_neon)
+#define x264_mc_weight_w8_offsetadd_neon x264_template(mc_weight_w8_offsetadd_neon)
+#define x264_mc_weight_w8_offsetsub_neon x264_template(mc_weight_w8_offsetsub_neon)
#define MC_WEIGHT(func)\
void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
@@ -89,20 +131,32 @@ MC_WEIGHT(_nodenom)
MC_WEIGHT(_offsetadd)
MC_WEIGHT(_offsetsub)
+#define x264_mc_copy_w16_neon x264_template(mc_copy_w16_neon)
+#define x264_mc_copy_w4_neon x264_template(mc_copy_w4_neon)
+#define x264_mc_copy_w8_neon x264_template(mc_copy_w8_neon)
void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_chroma_neon x264_template(mc_chroma_neon)
void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int );
+#define x264_integral_init4h_neon x264_template(integral_init4h_neon)
+#define x264_integral_init4v_neon x264_template(integral_init4v_neon)
+#define x264_integral_init8h_neon x264_template(integral_init8h_neon)
+#define x264_integral_init8v_neon x264_template(integral_init8v_neon)
void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t );
void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t );
void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t );
void x264_integral_init8v_neon( uint16_t *, intptr_t );
+#define x264_frame_init_lowres_core_neon x264_template(frame_init_lowres_core_neon)
void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int );
+#define x264_mbtree_propagate_cost_neon x264_template(mbtree_propagate_cost_neon)
void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int );
+#define x264_mbtree_fix8_pack_neon x264_template(mbtree_fix8_pack_neon)
void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count );
+#define x264_mbtree_fix8_unpack_neon x264_template(mbtree_fix8_unpack_neon)
void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count );
#if !HIGH_BIT_DEPTH
@@ -205,6 +259,7 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
}
}
+#define x264_hpel_filter_neon x264_template(hpel_filter_neon)
void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
uint8_t *src, intptr_t stride, int width,
int height, int16_t *buf );
diff --git a/common/aarch64/mc.h b/common/aarch64/mc.h
index 33c311e..cda7379 100644
--- a/common/aarch64/mc.h
+++ b/common/aarch64/mc.h
@@ -26,6 +26,7 @@
#ifndef X264_AARCH64_MC_H
#define X264_AARCH64_MC_H
+#define x264_mc_init_aarch64 x264_template(mc_init_aarch64)
void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf );
#endif
diff --git a/common/aarch64/pixel.h b/common/aarch64/pixel.h
index 8a7b83e..f224f7e 100644
--- a/common/aarch64/pixel.h
+++ b/common/aarch64/pixel.h
@@ -27,6 +27,60 @@
#ifndef X264_AARCH64_PIXEL_H
#define X264_AARCH64_PIXEL_H
+#define x264_pixel_hadamard_ac_16x16_neon x264_template(pixel_hadamard_ac_16x16_neon)
+#define x264_pixel_hadamard_ac_16x8_neon x264_template(pixel_hadamard_ac_16x8_neon)
+#define x264_pixel_hadamard_ac_8x16_neon x264_template(pixel_hadamard_ac_8x16_neon)
+#define x264_pixel_hadamard_ac_8x8_neon x264_template(pixel_hadamard_ac_8x8_neon)
+#define x264_pixel_sa8d_16x16_neon x264_template(pixel_sa8d_16x16_neon)
+#define x264_pixel_sa8d_8x8_neon x264_template(pixel_sa8d_8x8_neon)
+#define x264_pixel_sa8d_satd_16x16_neon x264_template(pixel_sa8d_satd_16x16_neon)
+#define x264_pixel_sad_16x16_neon x264_template(pixel_sad_16x16_neon)
+#define x264_pixel_sad_16x8_neon x264_template(pixel_sad_16x8_neon)
+#define x264_pixel_sad_4x16_neon x264_template(pixel_sad_4x16_neon)
+#define x264_pixel_sad_4x4_neon x264_template(pixel_sad_4x4_neon)
+#define x264_pixel_sad_4x8_neon x264_template(pixel_sad_4x8_neon)
+#define x264_pixel_sad_8x16_neon x264_template(pixel_sad_8x16_neon)
+#define x264_pixel_sad_8x4_neon x264_template(pixel_sad_8x4_neon)
+#define x264_pixel_sad_8x8_neon x264_template(pixel_sad_8x8_neon)
+#define x264_pixel_sad_x3_16x16_neon x264_template(pixel_sad_x3_16x16_neon)
+#define x264_pixel_sad_x3_16x8_neon x264_template(pixel_sad_x3_16x8_neon)
+#define x264_pixel_sad_x3_4x4_neon x264_template(pixel_sad_x3_4x4_neon)
+#define x264_pixel_sad_x3_4x8_neon x264_template(pixel_sad_x3_4x8_neon)
+#define x264_pixel_sad_x3_8x16_neon x264_template(pixel_sad_x3_8x16_neon)
+#define x264_pixel_sad_x3_8x4_neon x264_template(pixel_sad_x3_8x4_neon)
+#define x264_pixel_sad_x3_8x8_neon x264_template(pixel_sad_x3_8x8_neon)
+#define x264_pixel_sad_x4_16x16_neon x264_template(pixel_sad_x4_16x16_neon)
+#define x264_pixel_sad_x4_16x8_neon x264_template(pixel_sad_x4_16x8_neon)
+#define x264_pixel_sad_x4_4x4_neon x264_template(pixel_sad_x4_4x4_neon)
+#define x264_pixel_sad_x4_4x8_neon x264_template(pixel_sad_x4_4x8_neon)
+#define x264_pixel_sad_x4_8x16_neon x264_template(pixel_sad_x4_8x16_neon)
+#define x264_pixel_sad_x4_8x4_neon x264_template(pixel_sad_x4_8x4_neon)
+#define x264_pixel_sad_x4_8x8_neon x264_template(pixel_sad_x4_8x8_neon)
+#define x264_pixel_satd_16x16_neon x264_template(pixel_satd_16x16_neon)
+#define x264_pixel_satd_16x8_neon x264_template(pixel_satd_16x8_neon)
+#define x264_pixel_satd_4x16_neon x264_template(pixel_satd_4x16_neon)
+#define x264_pixel_satd_4x4_neon x264_template(pixel_satd_4x4_neon)
+#define x264_pixel_satd_4x8_neon x264_template(pixel_satd_4x8_neon)
+#define x264_pixel_satd_8x16_neon x264_template(pixel_satd_8x16_neon)
+#define x264_pixel_satd_8x4_neon x264_template(pixel_satd_8x4_neon)
+#define x264_pixel_satd_8x8_neon x264_template(pixel_satd_8x8_neon)
+#define x264_pixel_ssd_16x16_neon x264_template(pixel_ssd_16x16_neon)
+#define x264_pixel_ssd_16x8_neon x264_template(pixel_ssd_16x8_neon)
+#define x264_pixel_ssd_4x16_neon x264_template(pixel_ssd_4x16_neon)
+#define x264_pixel_ssd_4x4_neon x264_template(pixel_ssd_4x4_neon)
+#define x264_pixel_ssd_4x8_neon x264_template(pixel_ssd_4x8_neon)
+#define x264_pixel_ssd_8x16_neon x264_template(pixel_ssd_8x16_neon)
+#define x264_pixel_ssd_8x4_neon x264_template(pixel_ssd_8x4_neon)
+#define x264_pixel_ssd_8x8_neon x264_template(pixel_ssd_8x8_neon)
+#define x264_pixel_ssd_nv12_core_neon x264_template(pixel_ssd_nv12_core_neon)
+#define x264_pixel_ssim_4x4x2_core_neon x264_template(pixel_ssim_4x4x2_core_neon)
+#define x264_pixel_ssim_end4_neon x264_template(pixel_ssim_end4_neon)
+#define x264_pixel_var2_8x16_neon x264_template(pixel_var2_8x16_neon)
+#define x264_pixel_var2_8x8_neon x264_template(pixel_var2_8x8_neon)
+#define x264_pixel_var_16x16_neon x264_template(pixel_var_16x16_neon)
+#define x264_pixel_var_8x16_neon x264_template(pixel_var_8x16_neon)
+#define x264_pixel_var_8x8_neon x264_template(pixel_var_8x8_neon)
+#define x264_pixel_vsad_neon x264_template(pixel_vsad_neon)
#define DECL_PIXELS( ret, name, suffix, args ) \
ret x264_pixel_##name##_16x16_##suffix args;\
ret x264_pixel_##name##_16x8_##suffix args;\
@@ -74,6 +128,7 @@ void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t,
int sums[2][4] );
float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
+#define x264_pixel_asd8_neon x264_template(pixel_asd8_neon)
int x264_pixel_asd8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
#endif
diff --git a/common/aarch64/predict.h b/common/aarch64/predict.h
index a8beada..d7866d3 100644
--- a/common/aarch64/predict.h
+++ b/common/aarch64/predict.h
@@ -27,6 +27,39 @@
#ifndef X264_AARCH64_PREDICT_H
#define X264_AARCH64_PREDICT_H
+#define x264_predict_16x16_dc_left_neon x264_template(predict_16x16_dc_left_neon)
+#define x264_predict_16x16_dc_neon x264_template(predict_16x16_dc_neon)
+#define x264_predict_16x16_dc_top_neon x264_template(predict_16x16_dc_top_neon)
+#define x264_predict_16x16_h_neon x264_template(predict_16x16_h_neon)
+#define x264_predict_16x16_p_neon x264_template(predict_16x16_p_neon)
+#define x264_predict_16x16_v_neon x264_template(predict_16x16_v_neon)
+#define x264_predict_4x4_dc_neon x264_template(predict_4x4_dc_neon)
+#define x264_predict_4x4_dc_top_neon x264_template(predict_4x4_dc_top_neon)
+#define x264_predict_4x4_ddl_neon x264_template(predict_4x4_ddl_neon)
+#define x264_predict_4x4_ddr_neon x264_template(predict_4x4_ddr_neon)
+#define x264_predict_4x4_h_aarch64 x264_template(predict_4x4_h_aarch64)
+#define x264_predict_4x4_v_aarch64 x264_template(predict_4x4_v_aarch64)
+#define x264_predict_8x16c_dc_left_neon x264_template(predict_8x16c_dc_left_neon)
+#define x264_predict_8x16c_dc_neon x264_template(predict_8x16c_dc_neon)
+#define x264_predict_8x16c_dc_top_neon x264_template(predict_8x16c_dc_top_neon)
+#define x264_predict_8x16c_h_neon x264_template(predict_8x16c_h_neon)
+#define x264_predict_8x16c_p_neon x264_template(predict_8x16c_p_neon)
+#define x264_predict_8x16c_v_neon x264_template(predict_8x16c_v_neon)
+#define x264_predict_8x8_dc_neon x264_template(predict_8x8_dc_neon)
+#define x264_predict_8x8_ddl_neon x264_template(predict_8x8_ddl_neon)
+#define x264_predict_8x8_ddr_neon x264_template(predict_8x8_ddr_neon)
+#define x264_predict_8x8_h_neon x264_template(predict_8x8_h_neon)
+#define x264_predict_8x8_hd_neon x264_template(predict_8x8_hd_neon)
+#define x264_predict_8x8_hu_neon x264_template(predict_8x8_hu_neon)
+#define x264_predict_8x8_v_neon x264_template(predict_8x8_v_neon)
+#define x264_predict_8x8_vl_neon x264_template(predict_8x8_vl_neon)
+#define x264_predict_8x8_vr_neon x264_template(predict_8x8_vr_neon)
+#define x264_predict_8x8c_dc_left_neon x264_template(predict_8x8c_dc_left_neon)
+#define x264_predict_8x8c_dc_neon x264_template(predict_8x8c_dc_neon)
+#define x264_predict_8x8c_dc_top_neon x264_template(predict_8x8c_dc_top_neon)
+#define x264_predict_8x8c_h_neon x264_template(predict_8x8c_h_neon)
+#define x264_predict_8x8c_p_neon x264_template(predict_8x8c_p_neon)
+#define x264_predict_8x8c_v_aarch64 x264_template(predict_8x8c_v_aarch64)
void x264_predict_4x4_h_aarch64( uint8_t *src );
void x264_predict_4x4_v_aarch64( uint8_t *src );
void x264_predict_8x8c_v_aarch64( uint8_t *src );
@@ -36,6 +69,39 @@ void x264_predict_8x8c_v_aarch64( uint8_t *src );
#define x264_predict_4x4_v_neon x264_predict_4x4_v_aarch64
#define x264_predict_8x8c_v_neon x264_predict_8x8c_v_aarch64
+#define x264_predict_16x16_dc_left_neon x264_template(predict_16x16_dc_left_neon)
+#define x264_predict_16x16_dc_neon x264_template(predict_16x16_dc_neon)
+#define x264_predict_16x16_dc_top_neon x264_template(predict_16x16_dc_top_neon)
+#define x264_predict_16x16_h_neon x264_template(predict_16x16_h_neon)
+#define x264_predict_16x16_p_neon x264_template(predict_16x16_p_neon)
+#define x264_predict_16x16_v_neon x264_template(predict_16x16_v_neon)
+#define x264_predict_4x4_dc_neon x264_template(predict_4x4_dc_neon)
+#define x264_predict_4x4_dc_top_neon x264_template(predict_4x4_dc_top_neon)
+#define x264_predict_4x4_ddl_neon x264_template(predict_4x4_ddl_neon)
+#define x264_predict_4x4_ddr_neon x264_template(predict_4x4_ddr_neon)
+#define x264_predict_4x4_h_aarch64 x264_template(predict_4x4_h_aarch64)
+#define x264_predict_4x4_v_aarch64 x264_template(predict_4x4_v_aarch64)
+#define x264_predict_8x16c_dc_left_neon x264_template(predict_8x16c_dc_left_neon)
+#define x264_predict_8x16c_dc_neon x264_template(predict_8x16c_dc_neon)
+#define x264_predict_8x16c_dc_top_neon x264_template(predict_8x16c_dc_top_neon)
+#define x264_predict_8x16c_h_neon x264_template(predict_8x16c_h_neon)
+#define x264_predict_8x16c_p_neon x264_template(predict_8x16c_p_neon)
+#define x264_predict_8x16c_v_neon x264_template(predict_8x16c_v_neon)
+#define x264_predict_8x8_dc_neon x264_template(predict_8x8_dc_neon)
+#define x264_predict_8x8_ddl_neon x264_template(predict_8x8_ddl_neon)
+#define x264_predict_8x8_ddr_neon x264_template(predict_8x8_ddr_neon)
+#define x264_predict_8x8_h_neon x264_template(predict_8x8_h_neon)
+#define x264_predict_8x8_hd_neon x264_template(predict_8x8_hd_neon)
+#define x264_predict_8x8_hu_neon x264_template(predict_8x8_hu_neon)
+#define x264_predict_8x8_v_neon x264_template(predict_8x8_v_neon)
+#define x264_predict_8x8_vl_neon x264_template(predict_8x8_vl_neon)
+#define x264_predict_8x8_vr_neon x264_template(predict_8x8_vr_neon)
+#define x264_predict_8x8c_dc_left_neon x264_template(predict_8x8c_dc_left_neon)
+#define x264_predict_8x8c_dc_neon x264_template(predict_8x8c_dc_neon)
+#define x264_predict_8x8c_dc_top_neon x264_template(predict_8x8c_dc_top_neon)
+#define x264_predict_8x8c_h_neon x264_template(predict_8x8c_h_neon)
+#define x264_predict_8x8c_p_neon x264_template(predict_8x8c_p_neon)
+#define x264_predict_8x8c_v_aarch64 x264_template(predict_8x8c_v_aarch64)
void x264_predict_4x4_dc_neon( uint8_t *src );
void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
@@ -49,6 +115,11 @@ void x264_predict_16x16_v_neon( uint8_t *src );
void x264_predict_16x16_h_neon( uint8_t *src );
void x264_predict_16x16_dc_neon( uint8_t *src );
+#define x264_predict_4x4_init_aarch64 x264_template(predict_4x4_init_aarch64)
+#define x264_predict_8x8_init_aarch64 x264_template(predict_8x8_init_aarch64)
+#define x264_predict_8x8c_init_aarch64 x264_template(predict_8x8c_init_aarch64)
+#define x264_predict_8x16c_init_aarch64 x264_template(predict_8x16c_init_aarch64)
+#define x264_predict_16x16_init_aarch64 x264_template(predict_16x16_init_aarch64)
void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] );
void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] );
diff --git a/common/aarch64/quant-a.S b/common/aarch64/quant-a.S
index 865e896..f94a0a0 100644
--- a/common/aarch64/quant-a.S
+++ b/common/aarch64/quant-a.S
@@ -305,7 +305,7 @@ endfunc
.macro decimate_score_1x size
function decimate_score\size\()_neon, export=1
ld1 {v0.8h,v1.8h}, [x0]
- movrel x5, X(decimate_table4)
+ movrel x5, X264(decimate_table4)
movi v3.16b, #0x01
sqxtn v0.8b, v0.8h
sqxtn2 v0.16b, v1.8h
@@ -391,7 +391,7 @@ function decimate_score64_neon, export=1
mvn x1, x1
mov w0, #0
cbz x1, 0f
- movrel x5, X(decimate_table8)
+ movrel x5, X264(decimate_table8)
1:
clz x3, x1
lsl x1, x1, x3
diff --git a/common/aarch64/quant.h b/common/aarch64/quant.h
index e0133e7..d885344 100644
--- a/common/aarch64/quant.h
+++ b/common/aarch64/quant.h
@@ -29,31 +29,52 @@
int x264_quant_2x2_dc_aarch64( int16_t dct[4], int mf, int bias );
+#define x264_quant_2x2_dc_neon x264_template(quant_2x2_dc_neon)
+#define x264_quant_4x4_dc_neon x264_template(quant_4x4_dc_neon)
+#define x264_quant_4x4_neon x264_template(quant_4x4_neon)
+#define x264_quant_4x4x4_neon x264_template(quant_4x4x4_neon)
+#define x264_quant_8x8_neon x264_template(quant_8x8_neon)
int x264_quant_2x2_dc_neon( int16_t dct[4], int mf, int bias );
int x264_quant_4x4_dc_neon( int16_t dct[16], int mf, int bias );
int x264_quant_4x4_neon( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
int x264_quant_4x4x4_neon( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] );
int x264_quant_8x8_neon( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
+#define x264_dequant_4x4_dc_neon x264_template(dequant_4x4_dc_neon)
+#define x264_dequant_4x4_neon x264_template(dequant_4x4_neon)
+#define x264_dequant_8x8_neon x264_template(dequant_8x8_neon)
void x264_dequant_4x4_dc_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_decimate_score15_neon x264_template(decimate_score15_neon)
+#define x264_decimate_score16_neon x264_template(decimate_score16_neon)
+#define x264_decimate_score64_neon x264_template(decimate_score64_neon)
int x264_decimate_score15_neon( int16_t * );
int x264_decimate_score16_neon( int16_t * );
int x264_decimate_score64_neon( int16_t * );
+#define x264_coeff_last15_neon x264_template(coeff_last15_neon)
+#define x264_coeff_last16_neon x264_template(coeff_last16_neon)
+#define x264_coeff_last4_aarch64 x264_template(coeff_last4_aarch64)
+#define x264_coeff_last64_neon x264_template(coeff_last64_neon)
+#define x264_coeff_last8_aarch64 x264_template(coeff_last8_aarch64)
int x264_coeff_last4_aarch64( int16_t * );
int x264_coeff_last8_aarch64( int16_t * );
int x264_coeff_last15_neon( int16_t * );
int x264_coeff_last16_neon( int16_t * );
int x264_coeff_last64_neon( int16_t * );
+#define x264_coeff_level_run15_neon x264_template(coeff_level_run15_neon)
+#define x264_coeff_level_run16_neon x264_template(coeff_level_run16_neon)
+#define x264_coeff_level_run4_aarch64 x264_template(coeff_level_run4_aarch64)
+#define x264_coeff_level_run8_neon x264_template(coeff_level_run8_neon)
int x264_coeff_level_run4_aarch64( int16_t *, x264_run_level_t * );
int x264_coeff_level_run8_neon( int16_t *, x264_run_level_t * );
int x264_coeff_level_run15_neon( int16_t *, x264_run_level_t * );
int x264_coeff_level_run16_neon( int16_t *, x264_run_level_t * );
+#define x264_denoise_dct_neon x264_template(denoise_dct_neon)
void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int );
#endif
--
2.10.0
More information about the x264-devel
mailing list