[x264-devel] [PATCH 22/29] Templatize ARM assembly code

Vittorio Giovara vittorio.giovara at gmail.com
Thu Feb 2 10:05:34 CET 2017


---
 common/arm/asm.S     | 15 ++++++----
 common/arm/dct.h     | 17 +++++++++++
 common/arm/mc-c.c    | 57 +++++++++++++++++++++++++++++++++++++
 common/arm/mc.h      |  1 +
 common/arm/pixel.h   | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 common/arm/predict.h | 36 ++++++++++++++++++++++++
 common/arm/quant-a.S |  4 +--
 common/arm/quant.h   | 18 ++++++++++++
 common/bitstream.c   |  1 +
 common/deblock.c     | 13 +++++++++
 common/mc.h          |  1 +
 11 files changed, 234 insertions(+), 8 deletions(-)

diff --git a/common/arm/asm.S b/common/arm/asm.S
index 1b9eaad..39c5c91 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -38,12 +38,19 @@
 
 .fpu neon
 
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+
 #ifdef PREFIX
-#   define EXTERN_ASM _x264_
+#   define BASE _x264_
 #else
-#   define EXTERN_ASM x264_
+#   define BASE x264_
 #endif
 
+#define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _)
+#define X(s) JOIN(EXTERN_ASM, s)
+#define X264(s) JOIN(BASE, s)
+
 #ifdef __ELF__
 #   define ELF
 #else
@@ -168,10 +175,6 @@ ELF     .size   \name, . - \name
 #endif
 .endm
 
-#define GLUE(a, b) a ## b
-#define JOIN(a, b) GLUE(a, b)
-#define X(s) JOIN(EXTERN_ASM, s)
-
 #define FENC_STRIDE 16
 #define FDEC_STRIDE 32
 
diff --git a/common/arm/dct.h b/common/arm/dct.h
index 2b4210a..2af1b13 100644
--- a/common/arm/dct.h
+++ b/common/arm/dct.h
@@ -26,28 +26,45 @@
 #ifndef X264_ARM_DCT_H
 #define X264_ARM_DCT_H
 
+#define x264_dct4x4dc_neon x264_template(dct4x4dc_neon)
 void x264_dct4x4dc_neon( int16_t d[16] );
+#define x264_idct4x4dc_neon x264_template(idct4x4dc_neon)
 void x264_idct4x4dc_neon( int16_t d[16] );
 
+#define x264_sub4x4_dct_neon x264_template(sub4x4_dct_neon)
 void x264_sub4x4_dct_neon( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_neon x264_template(sub8x8_dct_neon)
 void x264_sub8x8_dct_neon( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_neon x264_template(sub16x16_dct_neon)
 void x264_sub16x16_dct_neon( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add4x4_idct_neon x264_template(add4x4_idct_neon)
 void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] );
+#define x264_add8x8_idct_neon x264_template(add8x8_idct_neon)
 void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] );
+#define x264_add16x16_idct_neon x264_template(add16x16_idct_neon)
 void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] );
 
+#define x264_add8x8_idct_dc_neon x264_template(add8x8_idct_dc_neon)
 void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] );
+#define x264_add16x16_idct_dc_neon x264_template(add16x16_idct_dc_neon)
 void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] );
+#define x264_sub8x8_dct_dc_neon x264_template(sub8x8_dct_dc_neon)
 void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x16_dct_dc_neon x264_template(sub8x16_dct_dc_neon)
 void x264_sub8x16_dct_dc_neon( int16_t dct[8], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_sub8x8_dct8_neon x264_template(sub8x8_dct8_neon)
 void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct8_neon x264_template(sub16x16_dct8_neon)
 void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add8x8_idct8_neon x264_template(add8x8_idct8_neon)
 void x264_add8x8_idct8_neon( uint8_t *p_dst, int16_t dct[64] );
+#define x264_add16x16_idct8_neon x264_template(add16x16_idct8_neon)
 void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] );
 
+#define x264_zigzag_scan_4x4_frame_neon x264_template(zigzag_scan_4x4_frame_neon)
 void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] );
 
 #endif
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index 6df7443..3888d20 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -27,12 +27,25 @@
 #include "common/common.h"
 #include "mc.h"
 
+#define x264_prefetch_ref_arm x264_template(prefetch_ref_arm)
 void x264_prefetch_ref_arm( uint8_t *, intptr_t, int );
+#define x264_prefetch_fenc_arm x264_template(prefetch_fenc_arm)
 void x264_prefetch_fenc_arm( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_memcpy_aligned_neon x264_template(memcpy_aligned_neon)
 void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n );
+#define x264_memzero_aligned_neon x264_template(memzero_aligned_neon)
 void x264_memzero_aligned_neon( void *dst, size_t n );
 
+#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon)
+#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon)
+#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon)
+#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon)
+#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon)
+#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon)
+#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon)
+#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon)
+#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon)
 void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
@@ -43,11 +56,20 @@ void x264_pixel_avg_4x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_
 void x264_pixel_avg_4x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 void x264_pixel_avg_4x2_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon)
+#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon)
+#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon)
+#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon)
 void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 
+#define x264_plane_copy_core_neon x264_template(plane_copy_core_neon)
+#define x264_plane_copy_deinterleave_neon x264_template(plane_copy_deinterleave_neon)
+#define x264_plane_copy_deinterleave_rgb_neon x264_template(plane_copy_deinterleave_rgb_neon)
+#define x264_plane_copy_interleave_core_neon x264_template(plane_copy_interleave_core_neon)
+#define x264_plane_copy_swap_core_neon x264_template(plane_copy_swap_core_neon)
 void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
                                 pixel *src, intptr_t i_src, int w, int h );
 void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
@@ -63,10 +85,29 @@ void x264_plane_copy_interleave_core_neon( pixel *dst,  intptr_t i_dst,
 void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
                                      pixel *src, intptr_t i_src, int w, int h );
 
+#define x264_store_interleave_chroma_neon x264_template(store_interleave_chroma_neon)
 void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_load_deinterleave_chroma_fdec_neon x264_template(load_deinterleave_chroma_fdec_neon)
 void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_neon x264_template(load_deinterleave_chroma_fenc_neon)
 void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
 
+#define x264_mc_weight_w16_neon x264_template(mc_weight_w16_neon)
+#define x264_mc_weight_w16_nodenom_neon x264_template(mc_weight_w16_nodenom_neon)
+#define x264_mc_weight_w16_offsetadd_neon x264_template(mc_weight_w16_offsetadd_neon)
+#define x264_mc_weight_w16_offsetsub_neon x264_template(mc_weight_w16_offsetsub_neon)
+#define x264_mc_weight_w20_neon x264_template(mc_weight_w20_neon)
+#define x264_mc_weight_w20_nodenom_neon x264_template(mc_weight_w20_nodenom_neon)
+#define x264_mc_weight_w20_offsetadd_neon x264_template(mc_weight_w20_offsetadd_neon)
+#define x264_mc_weight_w20_offsetsub_neon x264_template(mc_weight_w20_offsetsub_neon)
+#define x264_mc_weight_w4_neon x264_template(mc_weight_w4_neon)
+#define x264_mc_weight_w4_nodenom_neon x264_template(mc_weight_w4_nodenom_neon)
+#define x264_mc_weight_w4_offsetadd_neon x264_template(mc_weight_w4_offsetadd_neon)
+#define x264_mc_weight_w4_offsetsub_neon x264_template(mc_weight_w4_offsetsub_neon)
+#define x264_mc_weight_w8_neon x264_template(mc_weight_w8_neon)
+#define x264_mc_weight_w8_nodenom_neon x264_template(mc_weight_w8_nodenom_neon)
+#define x264_mc_weight_w8_offsetadd_neon x264_template(mc_weight_w8_offsetadd_neon)
+#define x264_mc_weight_w8_offsetsub_neon x264_template(mc_weight_w8_offsetsub_neon)
 #if !HIGH_BIT_DEPTH
 #define MC_WEIGHT(func)\
 void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
@@ -90,26 +131,42 @@ MC_WEIGHT(_offsetadd)
 MC_WEIGHT(_offsetsub)
 #endif
 
+#define x264_mc_copy_w16_aligned_neon x264_template(mc_copy_w16_aligned_neon)
+#define x264_mc_copy_w16_neon x264_template(mc_copy_w16_neon)
+#define x264_mc_copy_w4_neon x264_template(mc_copy_w4_neon)
+#define x264_mc_copy_w8_neon x264_template(mc_copy_w8_neon)
 void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 void x264_mc_copy_w16_aligned_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_mc_chroma_neon x264_template(mc_chroma_neon)
 void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int );
+#define x264_frame_init_lowres_core_neon x264_template(frame_init_lowres_core_neon)
 void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int );
 
+#define x264_hpel_filter_c_neon x264_template(hpel_filter_c_neon)
+#define x264_hpel_filter_h_neon x264_template(hpel_filter_h_neon)
+#define x264_hpel_filter_v_neon x264_template(hpel_filter_v_neon)
 void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, intptr_t, int );
 void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
 void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
 
+#define x264_integral_init4h_neon x264_template(integral_init4h_neon)
+#define x264_integral_init4v_neon x264_template(integral_init4v_neon)
+#define x264_integral_init8h_neon x264_template(integral_init8h_neon)
+#define x264_integral_init8v_neon x264_template(integral_init8v_neon)
 void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t );
 void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t );
 void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t );
 void x264_integral_init8v_neon( uint16_t *, intptr_t );
 
+#define x264_mbtree_propagate_cost_neon x264_template(mbtree_propagate_cost_neon)
 void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int );
 
+#define x264_mbtree_fix8_pack_neon x264_template(mbtree_fix8_pack_neon)
 void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count );
+#define x264_mbtree_fix8_unpack_neon x264_template(mbtree_fix8_unpack_neon)
 void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count );
 
 #if !HIGH_BIT_DEPTH
diff --git a/common/arm/mc.h b/common/arm/mc.h
index fc8802f..6ea7877 100644
--- a/common/arm/mc.h
+++ b/common/arm/mc.h
@@ -26,6 +26,7 @@
 #ifndef X264_ARM_MC_H
 #define X264_ARM_MC_H
 
+#define x264_mc_init_arm x264_template(mc_init_arm)
 void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf );
 
 #endif
diff --git a/common/arm/pixel.h b/common/arm/pixel.h
index 8a6751b..3f72791 100644
--- a/common/arm/pixel.h
+++ b/common/arm/pixel.h
@@ -26,6 +26,85 @@
 #ifndef X264_ARM_PIXEL_H
 #define X264_ARM_PIXEL_H
 
+#define x264_pixel_asd8_neon x264_template(pixel_asd8_neon)
+#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon)
+#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon)
+#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon)
+#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon)
+#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon)
+#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon)
+#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon)
+#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon)
+#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon)
+#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon)
+#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon)
+#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon)
+#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon)
+#define x264_pixel_hadamard_ac_16x16_neon x264_template(pixel_hadamard_ac_16x16_neon)
+#define x264_pixel_hadamard_ac_16x8_neon x264_template(pixel_hadamard_ac_16x8_neon)
+#define x264_pixel_hadamard_ac_8x16_neon x264_template(pixel_hadamard_ac_8x16_neon)
+#define x264_pixel_hadamard_ac_8x8_neon x264_template(pixel_hadamard_ac_8x8_neon)
+#define x264_pixel_sa8d_16x16_neon x264_template(pixel_sa8d_16x16_neon)
+#define x264_pixel_sa8d_8x8_neon x264_template(pixel_sa8d_8x8_neon)
+#define x264_pixel_sa8d_satd_16x16_neon x264_template(pixel_sa8d_satd_16x16_neon)
+#define x264_pixel_sad_16x16_neon x264_template(pixel_sad_16x16_neon)
+#define x264_pixel_sad_16x8_neon x264_template(pixel_sad_16x8_neon)
+#define x264_pixel_sad_4x4_armv6 x264_template(pixel_sad_4x4_armv6)
+#define x264_pixel_sad_4x4_neon x264_template(pixel_sad_4x4_neon)
+#define x264_pixel_sad_4x8_armv6 x264_template(pixel_sad_4x8_armv6)
+#define x264_pixel_sad_4x8_neon x264_template(pixel_sad_4x8_neon)
+#define x264_pixel_sad_8x16_neon x264_template(pixel_sad_8x16_neon)
+#define x264_pixel_sad_8x4_neon x264_template(pixel_sad_8x4_neon)
+#define x264_pixel_sad_8x8_neon x264_template(pixel_sad_8x8_neon)
+#define x264_pixel_sad_aligned_16x16_neon x264_template(pixel_sad_aligned_16x16_neon)
+#define x264_pixel_sad_aligned_16x16_neon_dual x264_template(pixel_sad_aligned_16x16_neon_dual)
+#define x264_pixel_sad_aligned_16x8_neon x264_template(pixel_sad_aligned_16x8_neon)
+#define x264_pixel_sad_aligned_16x8_neon_dual x264_template(pixel_sad_aligned_16x8_neon_dual)
+#define x264_pixel_sad_aligned_4x4_neon x264_template(pixel_sad_aligned_4x4_neon)
+#define x264_pixel_sad_aligned_4x8_neon x264_template(pixel_sad_aligned_4x8_neon)
+#define x264_pixel_sad_aligned_8x16_neon x264_template(pixel_sad_aligned_8x16_neon)
+#define x264_pixel_sad_aligned_8x16_neon_dual x264_template(pixel_sad_aligned_8x16_neon_dual)
+#define x264_pixel_sad_aligned_8x4_neon x264_template(pixel_sad_aligned_8x4_neon)
+#define x264_pixel_sad_aligned_8x4_neon_dual x264_template(pixel_sad_aligned_8x4_neon_dual)
+#define x264_pixel_sad_aligned_8x8_neon x264_template(pixel_sad_aligned_8x8_neon)
+#define x264_pixel_sad_aligned_8x8_neon_dual x264_template(pixel_sad_aligned_8x8_neon_dual)
+#define x264_pixel_sad_x3_16x16_neon x264_template(pixel_sad_x3_16x16_neon)
+#define x264_pixel_sad_x3_16x8_neon x264_template(pixel_sad_x3_16x8_neon)
+#define x264_pixel_sad_x3_4x4_neon x264_template(pixel_sad_x3_4x4_neon)
+#define x264_pixel_sad_x3_4x8_neon x264_template(pixel_sad_x3_4x8_neon)
+#define x264_pixel_sad_x3_8x16_neon x264_template(pixel_sad_x3_8x16_neon)
+#define x264_pixel_sad_x3_8x4_neon x264_template(pixel_sad_x3_8x4_neon)
+#define x264_pixel_sad_x3_8x8_neon x264_template(pixel_sad_x3_8x8_neon)
+#define x264_pixel_sad_x4_16x16_neon x264_template(pixel_sad_x4_16x16_neon)
+#define x264_pixel_sad_x4_16x8_neon x264_template(pixel_sad_x4_16x8_neon)
+#define x264_pixel_sad_x4_4x4_neon x264_template(pixel_sad_x4_4x4_neon)
+#define x264_pixel_sad_x4_4x8_neon x264_template(pixel_sad_x4_4x8_neon)
+#define x264_pixel_sad_x4_8x16_neon x264_template(pixel_sad_x4_8x16_neon)
+#define x264_pixel_sad_x4_8x4_neon x264_template(pixel_sad_x4_8x4_neon)
+#define x264_pixel_sad_x4_8x8_neon x264_template(pixel_sad_x4_8x8_neon)
+#define x264_pixel_satd_16x16_neon x264_template(pixel_satd_16x16_neon)
+#define x264_pixel_satd_16x8_neon x264_template(pixel_satd_16x8_neon)
+#define x264_pixel_satd_4x4_neon x264_template(pixel_satd_4x4_neon)
+#define x264_pixel_satd_4x8_neon x264_template(pixel_satd_4x8_neon)
+#define x264_pixel_satd_8x16_neon x264_template(pixel_satd_8x16_neon)
+#define x264_pixel_satd_8x4_neon x264_template(pixel_satd_8x4_neon)
+#define x264_pixel_satd_8x8_neon x264_template(pixel_satd_8x8_neon)
+#define x264_pixel_ssd_16x16_neon x264_template(pixel_ssd_16x16_neon)
+#define x264_pixel_ssd_16x8_neon x264_template(pixel_ssd_16x8_neon)
+#define x264_pixel_ssd_4x4_neon x264_template(pixel_ssd_4x4_neon)
+#define x264_pixel_ssd_4x8_neon x264_template(pixel_ssd_4x8_neon)
+#define x264_pixel_ssd_8x16_neon x264_template(pixel_ssd_8x16_neon)
+#define x264_pixel_ssd_8x4_neon x264_template(pixel_ssd_8x4_neon)
+#define x264_pixel_ssd_8x8_neon x264_template(pixel_ssd_8x8_neon)
+#define x264_pixel_ssd_nv12_core_neon x264_template(pixel_ssd_nv12_core_neon)
+#define x264_pixel_ssim_4x4x2_core_neon x264_template(pixel_ssim_4x4x2_core_neon)
+#define x264_pixel_ssim_end4_neon x264_template(pixel_ssim_end4_neon)
+#define x264_pixel_var2_8x16_neon x264_template(pixel_var2_8x16_neon)
+#define x264_pixel_var2_8x8_neon x264_template(pixel_var2_8x8_neon)
+#define x264_pixel_var_16x16_neon x264_template(pixel_var_16x16_neon)
+#define x264_pixel_var_8x16_neon x264_template(pixel_var_8x16_neon)
+#define x264_pixel_var_8x8_neon x264_template(pixel_var_8x8_neon)
+#define x264_pixel_vsad_neon x264_template(pixel_vsad_neon)
 #define DECL_PIXELS( ret, name, suffix, args ) \
     ret x264_pixel_##name##_16x16_##suffix args;\
     ret x264_pixel_##name##_16x8_##suffix args;\
diff --git a/common/arm/predict.h b/common/arm/predict.h
index 35aeaaf..bd5ca9c 100644
--- a/common/arm/predict.h
+++ b/common/arm/predict.h
@@ -26,6 +26,36 @@
 #ifndef X264_ARM_PREDICT_H
 #define X264_ARM_PREDICT_H
 
+#define x264_predict_16x16_dc_left_neon x264_template(predict_16x16_dc_left_neon)
+#define x264_predict_16x16_dc_neon x264_template(predict_16x16_dc_neon)
+#define x264_predict_16x16_dc_top_neon x264_template(predict_16x16_dc_top_neon)
+#define x264_predict_16x16_h_neon x264_template(predict_16x16_h_neon)
+#define x264_predict_16x16_p_neon x264_template(predict_16x16_p_neon)
+#define x264_predict_16x16_v_neon x264_template(predict_16x16_v_neon)
+#define x264_predict_4x4_dc_armv6 x264_template(predict_4x4_dc_armv6)
+#define x264_predict_4x4_dc_top_neon x264_template(predict_4x4_dc_top_neon)
+#define x264_predict_4x4_ddl_neon x264_template(predict_4x4_ddl_neon)
+#define x264_predict_4x4_ddr_armv6 x264_template(predict_4x4_ddr_armv6)
+#define x264_predict_4x4_h_armv6 x264_template(predict_4x4_h_armv6)
+#define x264_predict_4x4_v_armv6 x264_template(predict_4x4_v_armv6)
+#define x264_predict_8x16c_dc_top_neon x264_template(predict_8x16c_dc_top_neon)
+#define x264_predict_8x16c_h_neon x264_template(predict_8x16c_h_neon)
+#define x264_predict_8x16c_p_neon x264_template(predict_8x16c_p_neon)
+#define x264_predict_8x8_dc_neon x264_template(predict_8x8_dc_neon)
+#define x264_predict_8x8_ddl_neon x264_template(predict_8x8_ddl_neon)
+#define x264_predict_8x8_ddr_neon x264_template(predict_8x8_ddr_neon)
+#define x264_predict_8x8_h_neon x264_template(predict_8x8_h_neon)
+#define x264_predict_8x8_hd_neon x264_template(predict_8x8_hd_neon)
+#define x264_predict_8x8_hu_neon x264_template(predict_8x8_hu_neon)
+#define x264_predict_8x8_v_neon x264_template(predict_8x8_v_neon)
+#define x264_predict_8x8_vl_neon x264_template(predict_8x8_vl_neon)
+#define x264_predict_8x8_vr_neon x264_template(predict_8x8_vr_neon)
+#define x264_predict_8x8c_dc_left_neon x264_template(predict_8x8c_dc_left_neon)
+#define x264_predict_8x8c_dc_neon x264_template(predict_8x8c_dc_neon)
+#define x264_predict_8x8c_dc_top_neon x264_template(predict_8x8c_dc_top_neon)
+#define x264_predict_8x8c_h_neon x264_template(predict_8x8c_h_neon)
+#define x264_predict_8x8c_p_neon x264_template(predict_8x8c_p_neon)
+#define x264_predict_8x8c_v_neon x264_template(predict_8x8c_v_neon)
 void x264_predict_4x4_dc_armv6( uint8_t *src );
 void x264_predict_4x4_dc_top_neon( uint8_t *src );
 void x264_predict_4x4_v_armv6( uint8_t *src );
@@ -61,6 +91,12 @@ void x264_predict_16x16_h_neon( uint8_t *src );
 void x264_predict_16x16_v_neon( uint8_t *src );
 void x264_predict_16x16_p_neon( uint8_t *src );
 
+
+#define x264_predict_4x4_init_arm x264_template(predict_4x4_init_arm)
+#define x264_predict_8x8_init_arm x264_template(predict_8x8_init_arm)
+#define x264_predict_8x8c_init_arm x264_template(predict_8x8c_init_arm)
+#define x264_predict_8x16c_init_arm x264_template(predict_8x16c_init_arm)
+#define x264_predict_16x16_init_arm x264_template(predict_16x16_init_arm)
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 10282e7..2473e84 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -346,7 +346,7 @@ function decimate_score\size\()_neon
     lsr         r1,  r1,  #2
 .endif
     rbit        r1,  r1
-    movrelx     r3,  X(decimate_table4), r2
+    movrelx     r3,  X264(decimate_table4), r2
 1:
     clz         r2,  r1
     lsl         r1,  r1,  r2
@@ -415,7 +415,7 @@ function decimate_score64_neon
     mvn         r12, r12
     mov         r0,  #0
     mov         lr,  #32
-    movrelx     r3,  X(decimate_table8), r2
+    movrelx     r3,  X264(decimate_table8), r2
     beq         2f
 1:
     clz         r2,  r1
diff --git a/common/arm/quant.h b/common/arm/quant.h
index 2c71577..60c2e98 100644
--- a/common/arm/quant.h
+++ b/common/arm/quant.h
@@ -26,28 +26,46 @@
 #ifndef X264_ARM_QUANT_H
 #define X264_ARM_QUANT_H
 
+#define x264_quant_2x2_dc_armv6 x264_template(quant_2x2_dc_armv6)
 int x264_quant_2x2_dc_armv6( int16_t dct[4], int mf, int bias );
 
+#define x264_quant_2x2_dc_neon x264_template(quant_2x2_dc_neon)
+#define x264_quant_4x4_dc_neon x264_template(quant_4x4_dc_neon)
+#define x264_quant_4x4_neon x264_template(quant_4x4_neon)
+#define x264_quant_4x4x4_neon x264_template(quant_4x4x4_neon)
+#define x264_quant_8x8_neon x264_template(quant_8x8_neon)
 int x264_quant_2x2_dc_neon( int16_t dct[4], int mf, int bias );
 int x264_quant_4x4_dc_neon( int16_t dct[16], int mf, int bias );
 int x264_quant_4x4_neon( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
 int x264_quant_4x4x4_neon( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] );
 int x264_quant_8x8_neon( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
 
+#define x264_dequant_4x4_dc_neon x264_template(dequant_4x4_dc_neon)
+#define x264_dequant_4x4_neon x264_template(dequant_4x4_neon)
+#define x264_dequant_8x8_neon x264_template(dequant_8x8_neon)
 void x264_dequant_4x4_dc_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
 void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
 void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
 
+#define x264_decimate_score15_neon x264_template(decimate_score15_neon)
+#define x264_decimate_score16_neon x264_template(decimate_score16_neon)
+#define x264_decimate_score64_neon x264_template(decimate_score64_neon)
 int x264_decimate_score15_neon( int16_t * );
 int x264_decimate_score16_neon( int16_t * );
 int x264_decimate_score64_neon( int16_t * );
 
+#define x264_coeff_last15_neon x264_template(coeff_last15_neon)
+#define x264_coeff_last16_neon x264_template(coeff_last16_neon)
+#define x264_coeff_last4_arm x264_template(coeff_last4_arm)
+#define x264_coeff_last64_neon x264_template(coeff_last64_neon)
+#define x264_coeff_last8_arm x264_template(coeff_last8_arm)
 int x264_coeff_last4_arm( int16_t * );
 int x264_coeff_last8_arm( int16_t * );
 int x264_coeff_last15_neon( int16_t * );
 int x264_coeff_last16_neon( int16_t * );
 int x264_coeff_last64_neon( int16_t * );
 
+#define x264_denoise_dct_neon x264_template(denoise_dct_neon)
 void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int );
 
 #endif
diff --git a/common/bitstream.c b/common/bitstream.c
index d52281b..458b26a 100644
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -68,6 +68,7 @@ void x264_cabac_block_residual_internal_sse2      ( dctcoef *l, int b_interlaced
 void x264_cabac_block_residual_internal_sse2_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
 void x264_cabac_block_residual_internal_avx2_bmi2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
 
+#define x264_nal_escape_neon x264_template(nal_escape_neon)
 uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
 
 /****************************************************************************
diff --git a/common/deblock.c b/common/deblock.c
index bad8db0..76afc2d 100644
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -762,6 +762,19 @@ void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int
 #endif // ARCH_PPC
 
 #if HAVE_ARMV6 || ARCH_AARCH64
+#define x264_deblock_h_chroma_422_intra_neon x264_template(deblock_h_chroma_422_intra_neon)
+#define x264_deblock_h_chroma_422_neon x264_template(deblock_h_chroma_422_neon)
+#define x264_deblock_h_chroma_intra_mbaff_neon x264_template(deblock_h_chroma_intra_mbaff_neon)
+#define x264_deblock_h_chroma_intra_neon x264_template(deblock_h_chroma_intra_neon)
+#define x264_deblock_h_chroma_mbaff_neon x264_template(deblock_h_chroma_mbaff_neon)
+#define x264_deblock_h_chroma_neon x264_template(deblock_h_chroma_neon)
+#define x264_deblock_h_luma_intra_neon x264_template(deblock_h_luma_intra_neon)
+#define x264_deblock_h_luma_neon x264_template(deblock_h_luma_neon)
+#define x264_deblock_strength_neon x264_template(deblock_strength_neon)
+#define x264_deblock_v_chroma_intra_neon x264_template(deblock_v_chroma_intra_neon)
+#define x264_deblock_v_chroma_neon x264_template(deblock_v_chroma_neon)
+#define x264_deblock_v_luma_intra_neon x264_template(deblock_v_luma_intra_neon)
+#define x264_deblock_v_luma_neon x264_template(deblock_v_luma_neon)
 void x264_deblock_v_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
 void x264_deblock_h_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
 void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
diff --git a/common/mc.h b/common/mc.h
index d19f52b..a66d682 100644
--- a/common/mc.h
+++ b/common/mc.h
@@ -34,6 +34,7 @@ do\
     MC_CLIP_ADD((s)[1], (x)[1]);\
 } while( 0 )
 
+#define x264_mbtree_propagate_list_internal_neon x264_template(mbtree_propagate_list_internal_neon)
 #define PROPAGATE_LIST(cpu)\
 void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
                                                 uint16_t *lowres_costs, int16_t *output,\
-- 
2.10.0



More information about the x264-devel mailing list