[x264-devel] [PATCH 01/29] arm: Set the function symbol prefix in a single location
Vittorio Giovara
vittorio.giovara at gmail.com
Thu Feb 2 10:05:13 CET 2017
---
common/arm/asm.S | 8 ++-
common/arm/bitstream-a.S | 2 +-
common/arm/cpu-a.S | 12 ++---
common/arm/dct-a.S | 94 +++++++++++++++++-----------------
common/arm/deblock-a.S | 30 +++++------
common/arm/mc-a.S | 129 +++++++++++++++++++++++------------------------
common/arm/pixel-a.S | 118 +++++++++++++++++++++----------------------
common/arm/predict-a.S | 60 +++++++++++-----------
common/arm/quant-a.S | 32 ++++++------
tools/checkasm-arm.S | 8 ++-
10 files changed, 250 insertions(+), 243 deletions(-)
diff --git a/common/arm/asm.S b/common/arm/asm.S
index 62a5e578..1b9eaad 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -39,9 +39,9 @@
.fpu neon
#ifdef PREFIX
-# define EXTERN_ASM _
+# define EXTERN_ASM _x264_
#else
-# define EXTERN_ASM
+# define EXTERN_ASM x264_
#endif
#ifdef __ELF__
@@ -72,7 +72,11 @@ ELF .eabi_attribute 25, \val
.macro function name, export=1
.macro endfunc
+.if \export
+ELF .size EXTERN_ASM\name, . - EXTERN_ASM\name
+.else
ELF .size \name, . - \name
+.endif
FUNC .endfunc
.purgem endfunc
.endm
diff --git a/common/arm/bitstream-a.S b/common/arm/bitstream-a.S
index 62be077..39b5068 100644
--- a/common/arm/bitstream-a.S
+++ b/common/arm/bitstream-a.S
@@ -25,7 +25,7 @@
#include "asm.S"
-function x264_nal_escape_neon
+function nal_escape_neon
push {r4-r5,lr}
vmov.u8 q0, #0xff
vmov.u8 q8, #4
diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S
index a61a379..e5e20f2 100644
--- a/common/arm/cpu-a.S
+++ b/common/arm/cpu-a.S
@@ -29,7 +29,7 @@
// done in gas because .fpu neon overrides the refusal to assemble
// instructions the selected -march/-mcpu doesn't support
-function x264_cpu_neon_test
+function cpu_neon_test
vadd.i16 q0, q0, q0
bx lr
endfunc
@@ -37,7 +37,7 @@ endfunc
// return: 0 on success
// 1 if counters were already enabled
// 9 if lo-res counters were already enabled
-function x264_cpu_enable_armv7_counter, export=0
+function cpu_enable_armv7_counter, export=0
mrc p15, 0, r2, c9, c12, 0 // read PMNC
ands r0, r2, #1
andne r0, r2, #9
@@ -50,7 +50,7 @@ function x264_cpu_enable_armv7_counter, export=0
bx lr
endfunc
-function x264_cpu_disable_armv7_counter, export=0
+function cpu_disable_armv7_counter, export=0
mrc p15, 0, r0, c9, c12, 0 // read PMNC
bic r0, r0, #1 // disable counters
mcr p15, 0, r0, c9, c12, 0 // write PMNC
@@ -64,14 +64,14 @@ endfunc
// return: 0 if transfers neon -> arm transfers take more than 10 cycles
// nonzero otherwise
-function x264_cpu_fast_neon_mrc_test
+function cpu_fast_neon_mrc_test
// check for user access to performance counters
mrc p15, 0, r0, c9, c14, 0
cmp r0, #0
bxeq lr
push {r4-r6,lr}
- bl x264_cpu_enable_armv7_counter
+ bl cpu_enable_armv7_counter
ands r1, r0, #8
mov r3, #0
mov ip, #4
@@ -99,7 +99,7 @@ average_loop:
// disable counters if we enabled them
ands r0, r0, #1
- bleq x264_cpu_disable_armv7_counter
+ bleq cpu_disable_armv7_counter
lsr r0, r3, #5
cmp r0, #10
diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S
index 13d5061..d260c04 100644
--- a/common/arm/dct-a.S
+++ b/common/arm/dct-a.S
@@ -64,7 +64,7 @@ scan4x4_frame:
.endm
-function x264_dct4x4dc_neon
+function dct4x4dc_neon
vld1.64 {d0-d3}, [r0,:128]
SUMSUB_ABCD d4, d5, d6, d7, d0, d1, d2, d3
SUMSUB_ABCD d0, d2, d3, d1, d4, d6, d5, d7
@@ -83,7 +83,7 @@ function x264_dct4x4dc_neon
bx lr
endfunc
-function x264_idct4x4dc_neon
+function idct4x4dc_neon
vld1.64 {d0-d3}, [r0,:128]
SUMSUB_ABCD d4, d5, d6, d7, d0, d1, d2, d3
SUMSUB_ABCD d0, d2, d3, d1, d4, d6, d5, d7
@@ -107,7 +107,7 @@ endfunc
vsub.s16 \d3, \d7, \d5
.endm
-function x264_sub4x4_dct_neon
+function sub4x4_dct_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.32 {d0[]}, [r1,:32], r3
@@ -130,7 +130,7 @@ function x264_sub4x4_dct_neon
bx lr
endfunc
-function x264_sub8x4_dct_neon, export=0
+function sub8x4_dct_neon, export=0
vld1.64 {d0}, [r1,:64], r3
vld1.64 {d1}, [r2,:64], ip
vsubl.u8 q8, d0, d1
@@ -166,34 +166,34 @@ function x264_sub8x4_dct_neon, export=0
bx lr
endfunc
-function x264_sub8x8_dct_neon
+function sub8x8_dct_neon
push {lr}
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
- bl x264_sub8x4_dct_neon
+ bl sub8x4_dct_neon
pop {lr}
- b x264_sub8x4_dct_neon
+ b sub8x4_dct_neon
endfunc
-function x264_sub16x16_dct_neon
+function sub16x16_dct_neon
push {lr}
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
- bl x264_sub8x4_dct_neon
- bl x264_sub8x4_dct_neon
+ bl sub8x4_dct_neon
+ bl sub8x4_dct_neon
sub r1, r1, #8*FENC_STRIDE-8
sub r2, r2, #8*FDEC_STRIDE-8
- bl x264_sub8x4_dct_neon
- bl x264_sub8x4_dct_neon
+ bl sub8x4_dct_neon
+ bl sub8x4_dct_neon
sub r1, r1, #8
sub r2, r2, #8
- bl x264_sub8x4_dct_neon
- bl x264_sub8x4_dct_neon
+ bl sub8x4_dct_neon
+ bl sub8x4_dct_neon
sub r1, r1, #8*FENC_STRIDE-8
sub r2, r2, #8*FDEC_STRIDE-8
- bl x264_sub8x4_dct_neon
+ bl sub8x4_dct_neon
pop {lr}
- b x264_sub8x4_dct_neon
+ b sub8x4_dct_neon
endfunc
@@ -228,7 +228,7 @@ endfunc
SUMSUB_SHR2 2, q11, q13, q3, q13, q0, q1
.endm
-function x264_sub8x8_dct8_neon
+function sub8x8_dct8_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64], r3
@@ -280,19 +280,19 @@ function x264_sub8x8_dct8_neon
bx lr
endfunc
-function x264_sub16x16_dct8_neon
+function sub16x16_dct8_neon
push {lr}
- bl X(x264_sub8x8_dct8_neon)
+ bl X(sub8x8_dct8_neon)
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
- bl X(x264_sub8x8_dct8_neon)
+ bl X(sub8x8_dct8_neon)
sub r1, r1, #8
sub r2, r2, #8
- bl X(x264_sub8x8_dct8_neon)
+ bl X(sub8x8_dct8_neon)
pop {lr}
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
- b X(x264_sub8x8_dct8_neon)
+ b X(sub8x8_dct8_neon)
endfunc
@@ -305,7 +305,7 @@ endfunc
vadd.s16 \d6, \d6, \d1
.endm
-function x264_add4x4_idct_neon
+function add4x4_idct_neon
mov r2, #FDEC_STRIDE
vld1.64 {d0-d3}, [r1,:128]
@@ -337,7 +337,7 @@ function x264_add4x4_idct_neon
bx lr
endfunc
-function x264_add8x4_idct_neon, export=0
+function add8x4_idct_neon, export=0
vld1.64 {d0-d3}, [r1,:128]!
IDCT_1D d16, d18, d20, d22, d0, d1, d2, d3
vld1.64 {d4-d7}, [r1,:128]!
@@ -377,29 +377,29 @@ function x264_add8x4_idct_neon, export=0
bx lr
endfunc
-function x264_add8x8_idct_neon
+function add8x8_idct_neon
mov r2, #FDEC_STRIDE
mov ip, lr
- bl x264_add8x4_idct_neon
+ bl add8x4_idct_neon
mov lr, ip
- b x264_add8x4_idct_neon
+ b add8x4_idct_neon
endfunc
-function x264_add16x16_idct_neon
+function add16x16_idct_neon
mov r2, #FDEC_STRIDE
mov ip, lr
- bl x264_add8x4_idct_neon
- bl x264_add8x4_idct_neon
+ bl add8x4_idct_neon
+ bl add8x4_idct_neon
sub r0, r0, #8*FDEC_STRIDE-8
- bl x264_add8x4_idct_neon
- bl x264_add8x4_idct_neon
+ bl add8x4_idct_neon
+ bl add8x4_idct_neon
sub r0, r0, #8
- bl x264_add8x4_idct_neon
- bl x264_add8x4_idct_neon
+ bl add8x4_idct_neon
+ bl add8x4_idct_neon
sub r0, r0, #8*FDEC_STRIDE-8
- bl x264_add8x4_idct_neon
+ bl add8x4_idct_neon
mov lr, ip
- b x264_add8x4_idct_neon
+ b add8x4_idct_neon
endfunc
@@ -437,7 +437,7 @@ endfunc
SUMSUB_AB q11, q12, q2, q12
.endm
-function x264_add8x8_idct8_neon
+function add8x8_idct8_neon
mov r2, #FDEC_STRIDE
vld1.64 {d16-d19}, [r1,:128]!
vld1.64 {d20-d23}, [r1,:128]!
@@ -499,20 +499,20 @@ function x264_add8x8_idct8_neon
bx lr
endfunc
-function x264_add16x16_idct8_neon
+function add16x16_idct8_neon
mov ip, lr
- bl X(x264_add8x8_idct8_neon)
+ bl X(add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
- bl X(x264_add8x8_idct8_neon)
+ bl X(add8x8_idct8_neon)
sub r0, r0, #8
- bl X(x264_add8x8_idct8_neon)
+ bl X(add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
mov lr, ip
- b X(x264_add8x8_idct8_neon)
+ b X(add8x8_idct8_neon)
endfunc
-function x264_add8x8_idct_dc_neon
+function add8x8_idct_dc_neon
mov r2, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64]
vrshr.s16 d16, d16, #6
@@ -595,7 +595,7 @@ endfunc
vst1.64 {d22-d23}, [r2,:128], r3
.endm
-function x264_add16x16_idct_dc_neon
+function add16x16_idct_dc_neon
mov r2, r0
mov r3, #FDEC_STRIDE
vmov.i16 q15, #0
@@ -611,7 +611,7 @@ function x264_add16x16_idct_dc_neon
bx lr
endfunc
-function x264_sub8x8_dct_dc_neon
+function sub8x8_dct_dc_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64], r3
@@ -659,7 +659,7 @@ function x264_sub8x8_dct_dc_neon
bx lr
endfunc
-function x264_sub8x16_dct_dc_neon
+function sub8x16_dct_dc_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64], r3
@@ -753,7 +753,7 @@ function x264_sub8x16_dct_dc_neon
endfunc
-function x264_zigzag_scan_4x4_frame_neon
+function zigzag_scan_4x4_frame_neon
movrel r2, scan4x4_frame
vld1.64 {d0-d3}, [r1,:128]
vld1.64 {d16-d19}, [r2,:128]
diff --git a/common/arm/deblock-a.S b/common/arm/deblock-a.S
index b0241d0..c997b3f 100644
--- a/common/arm/deblock-a.S
+++ b/common/arm/deblock-a.S
@@ -117,7 +117,7 @@
vqmovun.s16 d1, q12
.endm
-function x264_deblock_v_luma_neon
+function deblock_v_luma_neon
h264_loop_filter_start
vld1.64 {d0, d1}, [r0,:128], r1
@@ -143,7 +143,7 @@ function x264_deblock_v_luma_neon
bx lr
endfunc
-function x264_deblock_h_luma_neon
+function deblock_h_luma_neon
h264_loop_filter_start
sub r0, r0, #4
@@ -324,7 +324,7 @@ endfunc
.endm
-function x264_deblock_v_luma_intra_neon
+function deblock_v_luma_intra_neon
push {lr}
vld1.64 {d0, d1}, [r0,:128], r1
vld1.64 {d2, d3}, [r0,:128], r1
@@ -352,7 +352,7 @@ function x264_deblock_v_luma_intra_neon
pop {pc}
endfunc
-function x264_deblock_h_luma_intra_neon
+function deblock_h_luma_intra_neon
push {lr}
sub r0, r0, #4
vld1.64 {d22}, [r0], r1
@@ -447,7 +447,7 @@ endfunc
vqmovun.s16 d1, q12
.endm
-function x264_deblock_v_chroma_neon
+function deblock_v_chroma_neon
h264_loop_filter_start
sub r0, r0, r1, lsl #1
@@ -465,7 +465,7 @@ function x264_deblock_v_chroma_neon
bx lr
endfunc
-function x264_deblock_h_chroma_neon
+function deblock_h_chroma_neon
h264_loop_filter_start
sub r0, r0, #4
@@ -499,7 +499,7 @@ deblock_h_chroma:
bx lr
endfunc
-function x264_deblock_h_chroma_422_neon
+function deblock_h_chroma_422_neon
h264_loop_filter_start
push {lr}
sub r0, r0, #4
@@ -547,7 +547,7 @@ endfunc
vqmovun.s16 d0, q11
.endm
-function x264_deblock_h_chroma_mbaff_neon
+function deblock_h_chroma_mbaff_neon
h264_loop_filter_start
sub r0, r0, #4
@@ -610,7 +610,7 @@ endfunc
vbit q0, q2, q13
.endm
-function x264_deblock_v_chroma_intra_neon
+function deblock_v_chroma_intra_neon
sub r0, r0, r1, lsl #1
vld2.8 {d18,d19}, [r0,:128], r1
vld2.8 {d16,d17}, [r0,:128], r1
@@ -626,7 +626,7 @@ function x264_deblock_v_chroma_intra_neon
bx lr
endfunc
-function x264_deblock_h_chroma_intra_neon
+function deblock_h_chroma_intra_neon
sub r0, r0, #4
vld1.8 {d18}, [r0], r1
vld1.8 {d16}, [r0], r1
@@ -657,15 +657,15 @@ function x264_deblock_h_chroma_intra_neon
bx lr
endfunc
-function x264_deblock_h_chroma_422_intra_neon
+function deblock_h_chroma_422_intra_neon
push {lr}
- bl X(x264_deblock_h_chroma_intra_neon)
+ bl X(deblock_h_chroma_intra_neon)
add r0, r0, #2
pop {lr}
- b X(x264_deblock_h_chroma_intra_neon)
+ b X(deblock_h_chroma_intra_neon)
endfunc
-function x264_deblock_h_chroma_intra_mbaff_neon
+function deblock_h_chroma_intra_mbaff_neon
sub r0, r0, #4
vld1.8 {d18}, [r0], r1
vld1.8 {d16}, [r0], r1
@@ -688,7 +688,7 @@ function x264_deblock_h_chroma_intra_mbaff_neon
bx lr
endfunc
-function x264_deblock_strength_neon
+function deblock_strength_neon
ldr ip, [sp]
vmov.i8 q8, #0
lsl ip, ip, #8
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index e8d3d03..f5df805 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -39,7 +39,7 @@ pw_0to15:
// They also use nothing above armv5te, but we don't care about pre-armv6
// void prefetch_ref( uint8_t *pix, intptr_t stride, int parity )
-function x264_prefetch_ref_arm
+function prefetch_ref_arm
sub r2, r2, #1
add r0, r0, #64
and r2, r2, r1
@@ -59,7 +59,7 @@ endfunc
// void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y,
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
-function x264_prefetch_fenc_arm
+function prefetch_fenc_arm
ldr ip, [sp]
push {lr}
and lr, ip, #3
@@ -84,8 +84,8 @@ function x264_prefetch_fenc_arm
endfunc
-// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
-function x264_memcpy_aligned_neon
+// void *memcpy_aligned( void *dst, const void *src, size_t n )
+function memcpy_aligned_neon
orr r3, r0, r1, lsr #1
movrel ip, memcpy_table
and r3, r3, #0xc
@@ -151,8 +151,8 @@ endconst
.ltorg
-// void x264_memzero_aligned( void *dst, size_t n )
-function x264_memzero_aligned_neon
+// void memzero_aligned( void *dst, size_t n )
+function memzero_aligned_neon
vmov.i8 q0, #0
vmov.i8 q1, #0
memzero_loop:
@@ -169,18 +169,18 @@ endfunc
// uint8_t *src1, intptr_t src1_stride,
// uint8_t *src2, intptr_t src2_stride, int weight );
.macro AVGH w h
-function x264_pixel_avg_\w\()x\h\()_neon
+function pixel_avg_\w\()x\h\()_neon
ldr ip, [sp, #8]
push {r4-r6,lr}
cmp ip, #32
ldrd r4, r5, [sp, #16]
mov lr, #\h
- beq x264_pixel_avg_w\w\()_neon
+ beq pixel_avg_w\w\()_neon
rsbs r6, ip, #64
- blt x264_pixel_avg_weight_w\w\()_add_sub_neon // weight > 64
+ blt pixel_avg_weight_w\w\()_add_sub_neon // weight > 64
cmp ip, #0
- bge x264_pixel_avg_weight_w\w\()_add_add_neon
- b x264_pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
+ bge pixel_avg_weight_w\w\()_add_add_neon
+ b pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
endfunc
.endm
@@ -245,7 +245,7 @@ AVGH 16, 16
.endm
.macro AVG_WEIGHT ext
-function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
+function pixel_avg_weight_w4_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
@@ -261,7 +261,7 @@ function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
pop {r4-r6,pc}
endfunc
-function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
+function pixel_avg_weight_w8_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #4
@@ -285,7 +285,7 @@ function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
pop {r4-r6,pc}
endfunc
-function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
+function pixel_avg_weight_w16_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
@@ -310,7 +310,7 @@ AVG_WEIGHT add_add
AVG_WEIGHT add_sub
AVG_WEIGHT sub_add
-function x264_pixel_avg_w4_neon, export=0
+function pixel_avg_w4_neon, export=0
subs lr, lr, #2
vld1.32 {d0[]}, [r2], r3
vld1.32 {d2[]}, [r4], r5
@@ -320,11 +320,11 @@ function x264_pixel_avg_w4_neon, export=0
vrhadd.u8 d1, d1, d3
vst1.32 {d0[0]}, [r0,:32], r1
vst1.32 {d1[0]}, [r0,:32], r1
- bgt x264_pixel_avg_w4_neon
+ bgt pixel_avg_w4_neon
pop {r4-r6,pc}
endfunc
-function x264_pixel_avg_w8_neon, export=0
+function pixel_avg_w8_neon, export=0
subs lr, lr, #4
vld1.64 {d0}, [r2], r3
vld1.64 {d2}, [r4], r5
@@ -342,11 +342,11 @@ function x264_pixel_avg_w8_neon, export=0
vrhadd.u8 d3, d3, d5
vst1.64 {d2}, [r0,:64], r1
vst1.64 {d3}, [r0,:64], r1
- bgt x264_pixel_avg_w8_neon
+ bgt pixel_avg_w8_neon
pop {r4-r6,pc}
endfunc
-function x264_pixel_avg_w16_neon, export=0
+function pixel_avg_w16_neon, export=0
subs lr, lr, #4
vld1.64 {d0-d1}, [r2], r3
vld1.64 {d2-d3}, [r4], r5
@@ -364,12 +364,12 @@ function x264_pixel_avg_w16_neon, export=0
vrhadd.u8 q3, q3, q0
vst1.64 {d4-d5}, [r0,:128], r1
vst1.64 {d6-d7}, [r0,:128], r1
- bgt x264_pixel_avg_w16_neon
+ bgt pixel_avg_w16_neon
pop {r4-r6,pc}
endfunc
-function x264_pixel_avg2_w4_neon
+function pixel_avg2_w4_neon
ldr ip, [sp, #4]
push {lr}
ldr lr, [sp, #4]
@@ -387,7 +387,7 @@ avg2_w4_loop:
pop {pc}
endfunc
-function x264_pixel_avg2_w8_neon
+function pixel_avg2_w8_neon
ldr ip, [sp, #4]
push {lr}
ldr lr, [sp, #4]
@@ -405,7 +405,7 @@ avg2_w8_loop:
pop {pc}
endfunc
-function x264_pixel_avg2_w16_neon
+function pixel_avg2_w16_neon
ldr ip, [sp, #4]
push {lr}
ldr lr, [sp, #4]
@@ -423,7 +423,7 @@ avg2_w16_loop:
pop {pc}
endfunc
-function x264_pixel_avg2_w20_neon
+function pixel_avg2_w20_neon
ldr ip, [sp, #4]
push {lr}
sub r1, r1, #16
@@ -465,7 +465,7 @@ endfunc
// void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst, intptr_t dst_stride,
// const x264_weight_t *weight, int height )
-function x264_mc_weight_w20_neon
+function mc_weight_w20_neon
weight_prologue full
sub r1, #16
weight20_loop:
@@ -501,7 +501,7 @@ weight20_loop:
pop {r4-r5,pc}
endfunc
-function x264_mc_weight_w16_neon
+function mc_weight_w16_neon
weight_prologue full
weight16_loop:
subs ip, #2
@@ -529,7 +529,7 @@ weight16_loop:
pop {r4-r5,pc}
endfunc
-function x264_mc_weight_w8_neon
+function mc_weight_w8_neon
weight_prologue full
weight8_loop:
subs ip, #2
@@ -549,7 +549,7 @@ weight8_loop:
pop {r4-r5,pc}
endfunc
-function x264_mc_weight_w4_neon
+function mc_weight_w4_neon
weight_prologue full
weight4_loop:
subs ip, #2
@@ -565,7 +565,7 @@ weight4_loop:
pop {r4-r5,pc}
endfunc
-function x264_mc_weight_w20_nodenom_neon
+function mc_weight_w20_nodenom_neon
weight_prologue nodenom
sub r1, #16
weight20_nodenom_loop:
@@ -596,7 +596,7 @@ weight20_nodenom_loop:
pop {r4-r5,pc}
endfunc
-function x264_mc_weight_w16_nodenom_neon
+function mc_weight_w16_nodenom_neon
weight_prologue nodenom
weight16_nodenom_loop:
subs ip, #2
@@ -620,7 +620,7 @@ weight16_nodenom_loop:
pop {r4-r5,pc}
endfunc
-function x264_mc_weight_w8_nodenom_neon
+function mc_weight_w8_nodenom_neon
weight_prologue nodenom
weight8_nodenom_loop:
subs ip, #2
@@ -638,7 +638,7 @@ weight8_nodenom_loop:
pop {r4-r5,pc}
endfunc
-function x264_mc_weight_w4_nodenom_neon
+function mc_weight_w4_nodenom_neon
weight_prologue nodenom
weight4_nodenom_loop:
subs ip, #2
@@ -662,7 +662,7 @@ endfunc
.endm
.macro weight_simple name op
-function x264_mc_weight_w20_\name\()_neon
+function mc_weight_w20_\name\()_neon
weight_simple_prologue
weight20_\name\()_loop:
subs ip, #2
@@ -677,7 +677,7 @@ weight20_\name\()_loop:
pop {pc}
endfunc
-function x264_mc_weight_w16_\name\()_neon
+function mc_weight_w16_\name\()_neon
weight_simple_prologue
weight16_\name\()_loop:
subs ip, #2
@@ -691,7 +691,7 @@ weight16_\name\()_loop:
pop {pc}
endfunc
-function x264_mc_weight_w8_\name\()_neon
+function mc_weight_w8_\name\()_neon
weight_simple_prologue
weight8_\name\()_loop:
subs ip, #2
@@ -704,7 +704,7 @@ weight8_\name\()_loop:
pop {pc}
endfunc
-function x264_mc_weight_w4_\name\()_neon
+function mc_weight_w4_\name\()_neon
weight_simple_prologue
weight4_\name\()_loop:
subs ip, #2
@@ -723,7 +723,7 @@ weight_simple offsetsub, vqsub.u8
// void mc_copy( uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, int height )
-function x264_mc_copy_w4_neon
+function mc_copy_w4_neon
ldr ip, [sp]
copy_w4_loop:
subs ip, ip, #4
@@ -739,7 +739,7 @@ copy_w4_loop:
bx lr
endfunc
-function x264_mc_copy_w8_neon
+function mc_copy_w8_neon
ldr ip, [sp]
copy_w8_loop:
subs ip, ip, #4
@@ -755,7 +755,7 @@ copy_w8_loop:
bx lr
endfunc
-function x264_mc_copy_w16_neon
+function mc_copy_w16_neon
ldr ip, [sp]
copy_w16_loop:
subs ip, ip, #4
@@ -771,7 +771,7 @@ copy_w16_loop:
bx lr
endfunc
-function x264_mc_copy_w16_aligned_neon
+function mc_copy_w16_aligned_neon
ldr ip, [sp]
copy_w16_aligned_loop:
subs ip, ip, #4
@@ -788,11 +788,10 @@ copy_w16_aligned_loop:
endfunc
-// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride,
-// uint8_t *src, intptr_t i_src_stride,
-// int dx, int dy, int i_width, int i_height );
-
-function x264_mc_chroma_neon
+// void mc_chroma( uint8_t *dst, intptr_t i_dst_stride,
+// uint8_t *src, intptr_t i_src_stride,
+// int dx, int dy, int i_width, int i_height );
+function mc_chroma_neon
push {r4-r8, lr}
vpush {d8-d11}
ldrd r4, r5, [sp, #56]
@@ -1139,7 +1138,7 @@ endfunc
// hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width )
-function x264_hpel_filter_v_neon
+function hpel_filter_v_neon
ldr ip, [sp]
sub r1, r1, r3, lsl #1
push {lr}
@@ -1179,7 +1178,7 @@ filter_v_loop:
endfunc
// hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
-function x264_hpel_filter_c_neon
+function hpel_filter_c_neon
sub r1, #16
vld1.64 {d0-d3}, [r1,:128]!
@@ -1264,7 +1263,7 @@ filter_c_loop:
endfunc
// hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
-function x264_hpel_filter_h_neon
+function hpel_filter_h_neon
sub r1, #16
vmov.u8 d30, #5
vld1.64 {d0-d3}, [r1,:128]!
@@ -1354,7 +1353,7 @@ endfunc
// frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
// uint8_t *dstc, intptr_t src_stride, intptr_t dst_stride, int width,
// int height )
-function x264_frame_init_lowres_core_neon
+function frame_init_lowres_core_neon
push {r4-r10,lr}
vpush {d8-d15}
ldrd r4, r5, [sp, #96]
@@ -1442,7 +1441,7 @@ lowres_xloop_end:
pop {r4-r10,pc}
endfunc
-function x264_load_deinterleave_chroma_fdec_neon
+function load_deinterleave_chroma_fdec_neon
mov ip, #FDEC_STRIDE/2
1:
vld2.8 {d0-d1}, [r1,:128], r2
@@ -1455,7 +1454,7 @@ function x264_load_deinterleave_chroma_fdec_neon
bx lr
endfunc
-function x264_load_deinterleave_chroma_fenc_neon
+function load_deinterleave_chroma_fenc_neon
mov ip, #FENC_STRIDE/2
1:
vld2.8 {d0-d1}, [r1,:128], r2
@@ -1468,7 +1467,7 @@ function x264_load_deinterleave_chroma_fenc_neon
bx lr
endfunc
-function x264_plane_copy_core_neon
+function plane_copy_core_neon
push {r4,lr}
ldr r4, [sp, #8]
ldr lr, [sp, #12]
@@ -1499,7 +1498,7 @@ function x264_plane_copy_core_neon
pop {r4,pc}
endfunc
-function x264_plane_copy_deinterleave_neon
+function plane_copy_deinterleave_neon
push {r4-r7, lr}
ldrd r6, r7, [sp, #28]
ldrd r4, r5, [sp, #20]
@@ -1525,7 +1524,7 @@ block:
pop {r4-r7, pc}
endfunc
-function x264_plane_copy_deinterleave_rgb_neon
+function plane_copy_deinterleave_rgb_neon
push {r4-r8, r10, r11, lr}
ldrd r4, r5, [sp, #32]
ldrd r6, r7, [sp, #40]
@@ -1577,7 +1576,7 @@ block4:
pop {r4-r8, r10, r11, pc}
endfunc
-function x264_plane_copy_interleave_core_neon
+function plane_copy_interleave_core_neon
push {r4-r7, lr}
ldrd r6, r7, [sp, #28]
ldrd r4, r5, [sp, #20]
@@ -1604,7 +1603,7 @@ blocki:
pop {r4-r7, pc}
endfunc
-function x264_plane_copy_swap_core_neon
+function plane_copy_swap_core_neon
push {r4-r5, lr}
ldrd r4, r5, [sp, #12]
add lr, r4, #15
@@ -1628,7 +1627,7 @@ function x264_plane_copy_swap_core_neon
pop {r4-r5, pc}
endfunc
-function x264_store_interleave_chroma_neon
+function store_interleave_chroma_neon
push {lr}
ldr lr, [sp, #4]
mov ip, #FDEC_STRIDE
@@ -1652,7 +1651,7 @@ endfunc
vadd.u16 q0, q0, q2
.endm
-function x264_integral_init4h_neon
+function integral_init4h_neon
sub r3, r0, r2, lsl #1
vld1.8 {d6, d7}, [r1, :128]!
1:
@@ -1687,7 +1686,7 @@ endfunc
vadd.u16 q0, q0, \s
.endm
-function x264_integral_init8h_neon
+function integral_init8h_neon
sub r3, r0, r2, lsl #1
vld1.8 {d16, d17}, [r1, :128]!
1:
@@ -1704,7 +1703,7 @@ function x264_integral_init8h_neon
bx lr
endfunc
-function x264_integral_init4v_neon
+function integral_init4v_neon
push {r4-r5}
mov r3, r0
add r4, r0, r2, lsl #3
@@ -1743,7 +1742,7 @@ function x264_integral_init4v_neon
bx lr
endfunc
-function x264_integral_init8v_neon
+function integral_init8v_neon
add r2, r0, r1, lsl #4
sub r1, r1, #8
ands r3, r1, #16 - 1
@@ -1767,7 +1766,7 @@ function x264_integral_init8v_neon
bx lr
endfunc
-function x264_mbtree_propagate_cost_neon
+function mbtree_propagate_cost_neon
push {r4-r5,lr}
ldrd r4, r5, [sp, #12]
ldr lr, [sp, #20]
@@ -1817,7 +1816,7 @@ function x264_mbtree_propagate_cost_neon
pop {r4-r5,pc}
endfunc
-function x264_mbtree_propagate_list_internal_neon
+function mbtree_propagate_list_internal_neon
vld1.16 {d4[]}, [sp] @ bipred_weight
movrel r12, pw_0to15
vmov.u16 q10, #0xc000
@@ -1883,7 +1882,7 @@ function x264_mbtree_propagate_list_internal_neon
endfunc
@ void mbtree_fix8_pack( int16_t *dst, float *src, int count )
-function x264_mbtree_fix8_pack_neon, export=1
+function mbtree_fix8_pack_neon, export=1
subs r3, r2, #8
blt 2f
1:
@@ -1911,7 +1910,7 @@ function x264_mbtree_fix8_pack_neon, export=1
endfunc
@ void mbtree_fix8_unpack( float *dst, int16_t *src, int count )
-function x264_mbtree_fix8_unpack_neon, export=1
+function mbtree_fix8_unpack_neon, export=1
subs r3, r2, #8
blt 2f
1:
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index a1a0673..3a4732d 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -45,7 +45,7 @@ mask_ac8:
.text
.macro SAD4_ARMV6 h
-function x264_pixel_sad_4x\h\()_armv6
+function pixel_sad_4x\h\()_armv6
push {r4-r6,lr}
ldr r4, [r2], r3
ldr r5, [r0], r1
@@ -114,7 +114,7 @@ SAD4_ARMV6 8
.endm
.macro SAD_FUNC w, h, name, align:vararg
-function x264_pixel_sad\name\()_\w\()x\h\()_neon
+function pixel_sad\name\()_\w\()x\h\()_neon
SAD_START_\w \align
.if \w == 16
@@ -205,7 +205,7 @@ SAD_FUNC 16, 16, _aligned, ,:128
.endm
.macro SAD_FUNC_DUAL w, h
-function x264_pixel_sad_aligned_\w\()x\h\()_neon_dual
+function pixel_sad_aligned_\w\()x\h\()_neon_dual
SAD_DUAL_START_\w
.rept \h / 2 - \w / 8
SAD_DUAL_\w
@@ -327,7 +327,7 @@ SAD_FUNC_DUAL 16, 16
.endm
.macro SAD_X_FUNC x, w, h
-function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
+function pixel_sad_x\x\()_\w\()x\h\()_neon
push {r6-r7,lr}
.if \x == 3
ldrd r6, r7, [sp, #12]
@@ -389,7 +389,7 @@ SAD_X_FUNC 4, 8, 16
SAD_X_FUNC 4, 16, 8
SAD_X_FUNC 4, 16, 16
-function x264_pixel_vsad_neon
+function pixel_vsad_neon
subs r2, r2, #2
vld1.8 {q0}, [r0], r1
vld1.8 {q1}, [r0], r1
@@ -413,7 +413,7 @@ function x264_pixel_vsad_neon
bx lr
endfunc
-function x264_pixel_asd8_neon
+function pixel_asd8_neon
ldr r12, [sp, #0]
sub r12, r12, #2
vld1.8 {d0}, [r0], r1
@@ -522,7 +522,7 @@ endfunc
.endm
.macro SSD_FUNC w h
-function x264_pixel_ssd_\w\()x\h\()_neon
+function pixel_ssd_\w\()x\h\()_neon
SSD_START_\w
.rept \h-2
SSD_\w
@@ -543,7 +543,7 @@ SSD_FUNC 8, 16
SSD_FUNC 16, 8
SSD_FUNC 16, 16
-function x264_pixel_ssd_nv12_core_neon
+function pixel_ssd_nv12_core_neon
push {r4-r5}
ldrd r4, r5, [sp, #8]
add r12, r4, #8
@@ -623,7 +623,7 @@ endfunc
\vpadal \qsqr_sum, \qsqr_last
.endm
-function x264_pixel_var_8x8_neon
+function pixel_var_8x8_neon
vld1.64 {d16}, [r0,:64], r1
vmull.u8 q1, d16, d16
vmovl.u8 q0, d16
@@ -644,10 +644,10 @@ function x264_pixel_var_8x8_neon
VAR_SQR_SUM q1, q9, q14, d24
vld1.64 {d26}, [r0,:64], r1
VAR_SQR_SUM q2, q10, q15, d26
- b x264_var_end
+ b var_end
endfunc
-function x264_pixel_var_8x16_neon
+function pixel_var_8x16_neon
vld1.64 {d16}, [r0,:64], r1
vld1.64 {d18}, [r0,:64], r1
vmull.u8 q1, d16, d16
@@ -676,10 +676,10 @@ function x264_pixel_var_8x16_neon
b 1b
2:
VAR_SQR_SUM q2, q13, q15, d22
- b x264_var_end
+ b var_end
endfunc
-function x264_pixel_var_16x16_neon
+function pixel_var_16x16_neon
vld1.64 {d16-d17}, [r0,:128], r1
vmull.u8 q12, d16, d16
vmovl.u8 q0, d16
@@ -703,7 +703,7 @@ var16_loop:
bgt var16_loop
endfunc
-function x264_var_end, export=0
+function var_end, export=0
vpaddl.u16 q8, q14
vpaddl.u16 q9, q15
vadd.u32 q1, q1, q8
@@ -732,7 +732,7 @@ endfunc
vmlal.s16 \acc, \d1, \d1
.endm
-function x264_pixel_var2_8x8_neon
+function pixel_var2_8x8_neon
DIFF_SUM q0, d0, d1
DIFF_SUM q8, d16, d17
SQR_ACC q1, d0, d1, vmull.s16
@@ -763,7 +763,7 @@ function x264_pixel_var2_8x8_neon
bx lr
endfunc
-function x264_pixel_var2_8x16_neon
+function pixel_var2_8x16_neon
vld1.64 {d16}, [r0,:64], r1
vld1.64 {d17}, [r2,:64], r3
vld1.64 {d18}, [r0,:64], r1
@@ -822,7 +822,7 @@ endfunc
vsubl.u8 \q3, d6, d7
.endm
-function x264_pixel_satd_4x4_neon
+function pixel_satd_4x4_neon
vld1.32 {d1[]}, [r2], r3
vld1.32 {d0[]}, [r0,:32], r1
vld1.32 {d3[]}, [r2], r3
@@ -844,7 +844,7 @@ function x264_pixel_satd_4x4_neon
bx lr
endfunc
-function x264_pixel_satd_4x8_neon
+function pixel_satd_4x8_neon
vld1.32 {d1[]}, [r2], r3
vld1.32 {d0[]}, [r0,:32], r1
vld1.32 {d3[]}, [r2], r3
@@ -868,10 +868,10 @@ function x264_pixel_satd_4x8_neon
vld1.32 {d6[1]}, [r0,:32], r1
vsubl.u8 q3, d6, d7
SUMSUB_AB q10, q11, q2, q3
- b x264_satd_4x8_8x4_end_neon
+ b satd_4x8_8x4_end_neon
endfunc
-function x264_pixel_satd_8x4_neon
+function pixel_satd_8x4_neon
vld1.64 {d1}, [r2], r3
vld1.64 {d0}, [r0,:64], r1
vsubl.u8 q0, d0, d1
@@ -888,7 +888,7 @@ function x264_pixel_satd_8x4_neon
SUMSUB_AB q10, q11, q2, q3
endfunc
-function x264_satd_4x8_8x4_end_neon, export=0
+function satd_4x8_8x4_end_neon, export=0
vadd.s16 q0, q8, q10
vadd.s16 q1, q9, q11
vsub.s16 q2, q8, q10
@@ -915,10 +915,10 @@ function x264_satd_4x8_8x4_end_neon, export=0
bx lr
endfunc
-function x264_pixel_satd_8x8_neon
+function pixel_satd_8x8_neon
mov ip, lr
- bl x264_satd_8x8_neon
+ bl satd_8x8_neon
vadd.u16 q0, q12, q13
vadd.u16 q1, q14, q15
@@ -929,15 +929,15 @@ function x264_pixel_satd_8x8_neon
bx lr
endfunc
-function x264_pixel_satd_8x16_neon
+function pixel_satd_8x16_neon
vpush {d8-d11}
mov ip, lr
- bl x264_satd_8x8_neon
+ bl satd_8x8_neon
vadd.u16 q4, q12, q13
vadd.u16 q5, q14, q15
- bl x264_satd_8x8_neon
+ bl satd_8x8_neon
vadd.u16 q4, q4, q12
vadd.u16 q5, q5, q13
vadd.u16 q4, q4, q14
@@ -951,7 +951,7 @@ function x264_pixel_satd_8x16_neon
bx lr
endfunc
-function x264_satd_8x8_neon, export=0
+function satd_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
vld1.64 {d7}, [r2], r3
SUMSUB_AB q0, q1, q8, q9
@@ -972,7 +972,7 @@ function x264_satd_8x8_neon, export=0
endfunc
// one vertical hadamard pass and two horizontal
-function x264_satd_8x4v_8x8h_neon, export=0
+function satd_8x4v_8x8h_neon, export=0
SUMSUB_ABCD q0, q1, q2, q3, q12, q13, q14, q15
vtrn.16 q8, q9
SUMSUB_AB q12, q14, q0, q2
@@ -1000,15 +1000,15 @@ function x264_satd_8x4v_8x8h_neon, export=0
bx lr
endfunc
-function x264_pixel_satd_16x8_neon
+function pixel_satd_16x8_neon
vpush {d8-d11}
mov ip, lr
- bl x264_satd_16x4_neon
+ bl satd_16x4_neon
vadd.u16 q4, q12, q13
vadd.u16 q5, q14, q15
- bl x264_satd_16x4_neon
+ bl satd_16x4_neon
vadd.u16 q4, q4, q12
vadd.u16 q5, q5, q13
vadd.u16 q4, q4, q14
@@ -1022,27 +1022,27 @@ function x264_pixel_satd_16x8_neon
bx lr
endfunc
-function x264_pixel_satd_16x16_neon
+function pixel_satd_16x16_neon
vpush {d8-d11}
mov ip, lr
- bl x264_satd_16x4_neon
+ bl satd_16x4_neon
vadd.u16 q4, q12, q13
vadd.u16 q5, q14, q15
- bl x264_satd_16x4_neon
+ bl satd_16x4_neon
vadd.u16 q4, q4, q12
vadd.u16 q5, q5, q13
vadd.u16 q4, q4, q14
vadd.u16 q5, q5, q15
- bl x264_satd_16x4_neon
+ bl satd_16x4_neon
vadd.u16 q4, q4, q12
vadd.u16 q5, q5, q13
vadd.u16 q4, q4, q14
vadd.u16 q5, q5, q15
- bl x264_satd_16x4_neon
+ bl satd_16x4_neon
vadd.u16 q4, q4, q12
vadd.u16 q5, q5, q13
vadd.u16 q4, q4, q14
@@ -1056,7 +1056,7 @@ function x264_pixel_satd_16x16_neon
bx lr
endfunc
-function x264_satd_16x4_neon, export=0
+function satd_16x4_neon, export=0
vld1.64 {d2-d3}, [r2], r3
vld1.64 {d0-d1}, [r0,:128], r1
vsubl.u8 q8, d0, d2
@@ -1077,13 +1077,13 @@ function x264_satd_16x4_neon, export=0
vsubl.u8 q15, d5, d7
SUMSUB_AB q2, q3, q10, q11
SUMSUB_ABCD q8, q10, q9, q11, q0, q2, q1, q3
- b x264_satd_8x4v_8x8h_neon
+ b satd_8x4v_8x8h_neon
endfunc
-function x264_pixel_sa8d_8x8_neon
+function pixel_sa8d_8x8_neon
mov ip, lr
- bl x264_sa8d_8x8_neon
+ bl sa8d_8x8_neon
vadd.u16 q0, q8, q9
HORIZ_ADD d0, d0, d1
mov lr, ip
@@ -1093,23 +1093,23 @@ function x264_pixel_sa8d_8x8_neon
bx lr
endfunc
-function x264_pixel_sa8d_16x16_neon
+function pixel_sa8d_16x16_neon
vpush {d8-d11}
mov ip, lr
- bl x264_sa8d_8x8_neon
+ bl sa8d_8x8_neon
vpaddl.u16 q4, q8
vpaddl.u16 q5, q9
- bl x264_sa8d_8x8_neon
+ bl sa8d_8x8_neon
vpadal.u16 q4, q8
vpadal.u16 q5, q9
sub r0, r0, r1, lsl #4
sub r2, r2, r3, lsl #4
add r0, r0, #8
add r2, r2, #8
- bl x264_sa8d_8x8_neon
+ bl sa8d_8x8_neon
vpadal.u16 q4, q8
vpadal.u16 q5, q9
- bl x264_sa8d_8x8_neon
+ bl sa8d_8x8_neon
vpaddl.u16 q8, q8
vpaddl.u16 q9, q9
vadd.u32 q0, q4, q8
@@ -1158,7 +1158,7 @@ endfunc
.endm
.macro sa8d_satd_8x8 satd=
-function x264_sa8d_\satd\()8x8_neon, export=0
+function sa8d_\satd\()8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
vld1.64 {d7}, [r2], r3
SUMSUB_AB q0, q1, q8, q9
@@ -1230,19 +1230,19 @@ endfunc
sa8d_satd_8x8
sa8d_satd_8x8 satd_
-function x264_pixel_sa8d_satd_16x16_neon
+function pixel_sa8d_satd_16x16_neon
push {lr}
vpush {q4-q7}
vmov.u32 q4, #0
vmov.u32 q5, #0
- bl x264_sa8d_satd_8x8_neon
- bl x264_sa8d_satd_8x8_neon
+ bl sa8d_satd_8x8_neon
+ bl sa8d_satd_8x8_neon
sub r0, r0, r1, lsl #4
sub r2, r2, r3, lsl #4
add r0, r0, #8
add r2, r2, #8
- bl x264_sa8d_satd_8x8_neon
- bl x264_sa8d_satd_8x8_neon
+ bl sa8d_satd_8x8_neon
+ bl sa8d_satd_8x8_neon
vadd.u32 d1, d10, d11
vadd.u32 d0, d8, d9
vpadd.u32 d1, d1, d1
@@ -1256,7 +1256,7 @@ endfunc
.macro HADAMARD_AC w h
-function x264_pixel_hadamard_ac_\w\()x\h\()_neon
+function pixel_hadamard_ac_\w\()x\h\()_neon
vpush {d8-d15}
movrel ip, mask_ac4
vmov.i8 q4, #0
@@ -1265,18 +1265,18 @@ function x264_pixel_hadamard_ac_\w\()x\h\()_neon
vmov.i8 q5, #0
mov ip, lr
- bl x264_hadamard_ac_8x8_neon
+ bl hadamard_ac_8x8_neon
.if \h > 8
- bl x264_hadamard_ac_8x8_neon
+ bl hadamard_ac_8x8_neon
.endif
.if \w > 8
sub r0, r0, r1, lsl #3
add r0, r0, #8
- bl x264_hadamard_ac_8x8_neon
+ bl hadamard_ac_8x8_neon
.endif
.if \w * \h == 256
sub r0, r0, r1, lsl #4
- bl x264_hadamard_ac_8x8_neon
+ bl hadamard_ac_8x8_neon
.endif
vadd.s32 d8, d8, d9
@@ -1297,7 +1297,7 @@ HADAMARD_AC 16, 8
HADAMARD_AC 16, 16
// q4: satd q5: sa8d q6: mask_ac4 q7: mask_ac8
-function x264_hadamard_ac_8x8_neon, export=0
+function hadamard_ac_8x8_neon, export=0
vld1.64 {d2}, [r0,:64], r1
vld1.64 {d3}, [r0,:64], r1
vaddl.u8 q0, d2, d3
@@ -1411,7 +1411,7 @@ endfunc
vmull.u8 \ssb, \db, \db
.endm
-function x264_pixel_ssim_4x4x2_core_neon
+function pixel_ssim_4x4x2_core_neon
ldr ip, [sp]
vld1.64 {d0}, [r0], r1
vld1.64 {d2}, [r2], r3
@@ -1440,7 +1440,7 @@ function x264_pixel_ssim_4x4x2_core_neon
endfunc
// FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2
-function x264_pixel_ssim_end4_neon
+function pixel_ssim_end4_neon
vld1.32 {d16-d19}, [r0,:128]!
vld1.32 {d20-d23}, [r1,:128]!
vadd.s32 q0, q8, q10
diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
index a7d9f10..67f1ea9 100644
--- a/common/arm/predict-a.S
+++ b/common/arm/predict-a.S
@@ -78,7 +78,7 @@ p16weight: .short 1,2,3,4,5,6,7,8
// because gcc doesn't believe in using the free shift in add
-function x264_predict_4x4_h_armv6
+function predict_4x4_h_armv6
ldrb r1, [r0, #0*FDEC_STRIDE-1]
ldrb r2, [r0, #1*FDEC_STRIDE-1]
ldrb r3, [r0, #2*FDEC_STRIDE-1]
@@ -98,7 +98,7 @@ function x264_predict_4x4_h_armv6
bx lr
endfunc
-function x264_predict_4x4_v_armv6
+function predict_4x4_v_armv6
ldr r1, [r0, #0 - 1 * FDEC_STRIDE]
str r1, [r0, #0 + 0 * FDEC_STRIDE]
str r1, [r0, #0 + 1 * FDEC_STRIDE]
@@ -107,7 +107,7 @@ function x264_predict_4x4_v_armv6
bx lr
endfunc
-function x264_predict_4x4_dc_armv6
+function predict_4x4_dc_armv6
mov ip, #0
ldr r1, [r0, #-FDEC_STRIDE]
ldrb r2, [r0, #0*FDEC_STRIDE-1]
@@ -130,7 +130,7 @@ function x264_predict_4x4_dc_armv6
bx lr
endfunc
-function x264_predict_4x4_dc_top_neon
+function predict_4x4_dc_top_neon
mov r12, #FDEC_STRIDE
sub r1, r0, #FDEC_STRIDE
vld1.32 d1[], [r1,:32]
@@ -159,7 +159,7 @@ endfunc
uadd8 \a2, \a2, \c2
.endm
-function x264_predict_4x4_ddr_armv6
+function predict_4x4_ddr_armv6
ldr r1, [r0, # -FDEC_STRIDE]
ldrb r2, [r0, # -FDEC_STRIDE-1]
ldrb r3, [r0, #0*FDEC_STRIDE-1]
@@ -188,7 +188,7 @@ function x264_predict_4x4_ddr_armv6
pop {r4-r6,pc}
endfunc
-function x264_predict_4x4_ddl_neon
+function predict_4x4_ddl_neon
sub r0, #FDEC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d0}, [r0], ip
@@ -207,7 +207,7 @@ function x264_predict_4x4_ddl_neon
bx lr
endfunc
-function x264_predict_8x8_dc_neon
+function predict_8x8_dc_neon
mov ip, #0
ldrd r2, r3, [r1, #8]
push {r4-r5,lr}
@@ -231,7 +231,7 @@ function x264_predict_8x8_dc_neon
pop {r4-r5,pc}
endfunc
-function x264_predict_8x8_h_neon
+function predict_8x8_h_neon
add r1, r1, #7
mov ip, #FDEC_STRIDE
vld1.64 {d16}, [r1]
@@ -254,7 +254,7 @@ function x264_predict_8x8_h_neon
bx lr
endfunc
-function x264_predict_8x8_v_neon
+function predict_8x8_v_neon
add r1, r1, #16
mov r12, #FDEC_STRIDE
vld1.8 {d0}, [r1,:64]
@@ -264,7 +264,7 @@ function x264_predict_8x8_v_neon
bx lr
endfunc
-function x264_predict_8x8_ddl_neon
+function predict_8x8_ddl_neon
add r1, #16
vld1.8 {d0, d1}, [r1,:128]
vmov.i8 q3, #0
@@ -292,7 +292,7 @@ function x264_predict_8x8_ddl_neon
bx lr
endfunc
-function x264_predict_8x8_ddr_neon
+function predict_8x8_ddr_neon
vld1.8 {d0-d3}, [r1,:128]
vext.8 q2, q0, q1, #7
vext.8 q3, q0, q1, #9
@@ -322,7 +322,7 @@ function x264_predict_8x8_ddr_neon
bx lr
endfunc
-function x264_predict_8x8_vl_neon
+function predict_8x8_vl_neon
add r1, #16
mov r12, #FDEC_STRIDE
@@ -353,7 +353,7 @@ function x264_predict_8x8_vl_neon
bx lr
endfunc
-function x264_predict_8x8_vr_neon
+function predict_8x8_vr_neon
add r1, #8
mov r12, #FDEC_STRIDE
vld1.8 {d4,d5}, [r1,:64]
@@ -385,7 +385,7 @@ function x264_predict_8x8_vr_neon
bx lr
endfunc
-function x264_predict_8x8_hd_neon
+function predict_8x8_hd_neon
mov r12, #FDEC_STRIDE
add r1, #7
@@ -418,7 +418,7 @@ function x264_predict_8x8_hd_neon
bx lr
endfunc
-function x264_predict_8x8_hu_neon
+function predict_8x8_hu_neon
mov r12, #FDEC_STRIDE
add r1, #7
vld1.8 {d7}, [r1]
@@ -451,7 +451,7 @@ function x264_predict_8x8_hu_neon
bx lr
endfunc
-function x264_predict_8x8c_dc_top_neon
+function predict_8x8c_dc_top_neon
sub r2, r0, #FDEC_STRIDE
mov r1, #FDEC_STRIDE
vld1.8 {d0}, [r2,:64]
@@ -464,7 +464,7 @@ function x264_predict_8x8c_dc_top_neon
b pred8x8_dc_end
endfunc
-function x264_predict_8x8c_dc_left_neon
+function predict_8x8c_dc_left_neon
mov r1, #FDEC_STRIDE
sub r2, r0, #1
ldcol.8 d0, r2, r1
@@ -476,7 +476,7 @@ function x264_predict_8x8c_dc_left_neon
b pred8x8_dc_end
endfunc
-function x264_predict_8x8c_dc_neon
+function predict_8x8c_dc_neon
sub r2, r0, #FDEC_STRIDE
mov r1, #FDEC_STRIDE
vld1.8 {d0}, [r2,:64]
@@ -502,7 +502,7 @@ pred8x8_dc_end:
bx lr
endfunc
-function x264_predict_8x8c_h_neon
+function predict_8x8c_h_neon
sub r1, r0, #1
mov ip, #FDEC_STRIDE
.rept 4
@@ -514,7 +514,7 @@ function x264_predict_8x8c_h_neon
bx lr
endfunc
-function x264_predict_8x8c_v_neon
+function predict_8x8c_v_neon
sub r0, r0, #FDEC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d0}, [r0,:64], ip
@@ -524,7 +524,7 @@ function x264_predict_8x8c_v_neon
bx lr
endfunc
-function x264_predict_8x8c_p_neon
+function predict_8x8c_p_neon
sub r3, r0, #FDEC_STRIDE
mov r1, #FDEC_STRIDE
add r2, r3, #4
@@ -573,7 +573,7 @@ function x264_predict_8x8c_p_neon
endfunc
-function x264_predict_8x16c_dc_top_neon
+function predict_8x16c_dc_top_neon
sub r2, r0, #FDEC_STRIDE
mov r1, #FDEC_STRIDE
vld1.8 {d0}, [r2,:64]
@@ -598,7 +598,7 @@ function x264_predict_8x16c_dc_top_neon
bx lr
endfunc
-function x264_predict_8x16c_h_neon
+function predict_8x16c_h_neon
sub r1, r0, #1
mov ip, #FDEC_STRIDE
.rept 8
@@ -610,7 +610,7 @@ function x264_predict_8x16c_h_neon
bx lr
endfunc
-function x264_predict_8x16c_p_neon
+function predict_8x16c_p_neon
sub r3, r0, #FDEC_STRIDE
mov r1, #FDEC_STRIDE
add r2, r3, #4
@@ -668,7 +668,7 @@ function x264_predict_8x16c_p_neon
endfunc
-function x264_predict_16x16_dc_top_neon
+function predict_16x16_dc_top_neon
sub r2, r0, #FDEC_STRIDE
mov r1, #FDEC_STRIDE
vld1.8 {q0}, [r2,:128]
@@ -678,7 +678,7 @@ function x264_predict_16x16_dc_top_neon
b pred16x16_dc_end
endfunc
-function x264_predict_16x16_dc_left_neon
+function predict_16x16_dc_left_neon
mov r1, #FDEC_STRIDE
sub r2, r0, #1
ldcol.8 d0, r2, r1
@@ -689,7 +689,7 @@ function x264_predict_16x16_dc_left_neon
b pred16x16_dc_end
endfunc
-function x264_predict_16x16_dc_neon
+function predict_16x16_dc_neon
sub r3, r0, #FDEC_STRIDE
sub r0, r0, #1
vld1.64 {d0-d1}, [r3,:128]
@@ -727,7 +727,7 @@ pred16x16_dc_end:
bx lr
endfunc
-function x264_predict_16x16_h_neon
+function predict_16x16_h_neon
sub r1, r0, #1
mov ip, #FDEC_STRIDE
.rept 8
@@ -741,7 +741,7 @@ function x264_predict_16x16_h_neon
bx lr
endfunc
-function x264_predict_16x16_v_neon
+function predict_16x16_v_neon
sub r0, r0, #FDEC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d0-d1}, [r0,:128], ip
@@ -751,7 +751,7 @@ function x264_predict_16x16_v_neon
bx lr
endfunc
-function x264_predict_16x16_p_neon
+function predict_16x16_p_neon
sub r3, r0, #FDEC_STRIDE
mov r1, #FDEC_STRIDE
add r2, r3, #8
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index eb3fd36..10282e7 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -74,7 +74,7 @@ mask_1bit:
.endm
// quant_2x2_dc( int16_t dct[4], int mf, int bias )
-function x264_quant_2x2_dc_neon
+function quant_2x2_dc_neon
vld1.64 {d0}, [r0,:64]
vabs.s16 d3, d0
vdup.16 d2, r2
@@ -90,7 +90,7 @@ function x264_quant_2x2_dc_neon
endfunc
// quant_4x4_dc( int16_t dct[16], int mf, int bias )
-function x264_quant_4x4_dc_neon
+function quant_4x4_dc_neon
vld1.64 {d28-d31}, [r0,:128]
vabs.s16 q8, q14
vabs.s16 q9, q15
@@ -102,7 +102,7 @@ function x264_quant_4x4_dc_neon
endfunc
// quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4_neon
+function quant_4x4_neon
vld1.64 {d28-d31}, [r0,:128]
vabs.s16 q8, q14
vabs.s16 q9, q15
@@ -114,7 +114,7 @@ function x264_quant_4x4_neon
endfunc
// quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4x4_neon
+function quant_4x4x4_neon
vpush {d8-d15}
vld1.64 {d28-d31}, [r0,:128]
vabs.s16 q8, q14
@@ -155,7 +155,7 @@ function x264_quant_4x4x4_neon
endfunc
// quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
-function x264_quant_8x8_neon
+function quant_8x8_neon
vld1.64 {d28-d31}, [r0,:128]
vabs.s16 q8, q14
vabs.s16 q9, q15
@@ -190,7 +190,7 @@ endfunc
// dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
.macro DEQUANT size bits
-function x264_dequant_\size\()_neon
+function dequant_\size\()_neon
DEQUANT_START \bits+2, \bits
.ifc \size, 8x8
mov r2, #4
@@ -271,7 +271,7 @@ DEQUANT 4x4, 4
DEQUANT 8x8, 6
// dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
-function x264_dequant_4x4_dc_neon
+function dequant_4x4_dc_neon
DEQUANT_START 6, 6, yes
blt dequant_4x4_dc_rshift
@@ -317,7 +317,7 @@ dequant_4x4_dc_rshift:
endfunc
.macro decimate_score_1x size
-function x264_decimate_score\size\()_neon
+function decimate_score\size\()_neon
vld1.16 {q0, q1}, [r0, :128]
movrel r3, mask_2bit
vmov.s8 q3, #0x01
@@ -346,7 +346,7 @@ function x264_decimate_score\size\()_neon
lsr r1, r1, #2
.endif
rbit r1, r1
- movrelx r3, X(x264_decimate_table4), r2
+ movrelx r3, X(decimate_table4), r2
1:
clz r2, r1
lsl r1, r1, r2
@@ -362,7 +362,7 @@ endfunc
decimate_score_1x 15
decimate_score_1x 16
-function x264_decimate_score64_neon
+function decimate_score64_neon
push {lr}
vld1.16 {q8, q9}, [r0, :128]!
vld1.16 {q10, q11}, [r0, :128]!
@@ -415,7 +415,7 @@ function x264_decimate_score64_neon
mvn r12, r12
mov r0, #0
mov lr, #32
- movrelx r3, X(x264_decimate_table8), r2
+ movrelx r3, X(decimate_table8), r2
beq 2f
1:
clz r2, r1
@@ -448,7 +448,7 @@ function x264_decimate_score64_neon
endfunc
// int coeff_last( int16_t *l )
-function x264_coeff_last4_arm
+function coeff_last4_arm
ldrd r2, r3, [r0]
subs r0, r3, #0
movne r0, #2
@@ -458,7 +458,7 @@ function x264_coeff_last4_arm
bx lr
endfunc
-function x264_coeff_last8_arm
+function coeff_last8_arm
ldrd r2, r3, [r0, #8]
orrs ip, r2, r3
movne r0, #4
@@ -473,7 +473,7 @@ function x264_coeff_last8_arm
endfunc
.macro COEFF_LAST_1x size
-function x264_coeff_last\size\()_neon
+function coeff_last\size\()_neon
.if \size == 15
sub r0, r0, #2
.endif
@@ -499,7 +499,7 @@ endfunc
COEFF_LAST_1x 15
COEFF_LAST_1x 16
-function x264_coeff_last64_neon
+function coeff_last64_neon
vld1.64 {d16-d19}, [r0,:128]!
vqmovn.u16 d16, q8
vqmovn.u16 d17, q9
@@ -544,7 +544,7 @@ function x264_coeff_last64_neon
bx lr
endfunc
-function x264_denoise_dct_neon
+function denoise_dct_neon
1: subs r3, r3, #16
vld1.16 {q0, q1}, [r0]
vld1.32 {q12, q13}, [r1]!
diff --git a/tools/checkasm-arm.S b/tools/checkasm-arm.S
index 433ac53..97e308d 100644
--- a/tools/checkasm-arm.S
+++ b/tools/checkasm-arm.S
@@ -52,7 +52,7 @@ error_message:
.macro clobbercheck variant
.equ pushed, 4*10
-function x264_checkasm_call_\variant
+function checkasm_call_\variant
push {r4-r11, lr}
.ifc \variant, neon
vpush {q4-q7}
@@ -128,7 +128,11 @@ function x264_checkasm_call_\variant
mov r12, #0
str r12, [r2]
movrel r0, error_message
- blx X(puts)
+#ifdef PREFIX
+ blx _puts
+#else
+ blx puts
+#endif
0:
pop {r0, r1}
.ifc \variant, neon
--
2.10.0
More information about the x264-devel
mailing list