[x264-devel] [PATCH 2/4] arm: do not export every asm function
Janne Grunau
janne-x264 at jannau.net
Tue Apr 1 22:11:43 CEST 2014
Based on Libav's libavutil/arm/asm.S. Also prevents having the same
label twice for every function on systems not defining EXTERN_ASM.
Clang's integrated assembler does not like it.
---
common/arm/asm.S | 14 ++++++++++++--
common/arm/cpu-a.S | 4 ++--
common/arm/dct-a.S | 20 ++++++++++----------
common/arm/mc-a.S | 14 +++++++-------
common/arm/pixel-a.S | 14 +++++++-------
5 files changed, 38 insertions(+), 28 deletions(-)
diff --git a/common/arm/asm.S b/common/arm/asm.S
index 3a6f621..273a79c 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -48,14 +48,20 @@ ELF .eabi_attribute 24, \val
ELF .eabi_attribute 25, \val
.endm
-.macro function name
- .global EXTERN_ASM\name
+.macro function name, export=1
.align 2
+.if \export == 1
+ .global EXTERN_ASM\name
+ELF .hidden EXTERN_ASM\name
+ELF .type EXTERN_ASM\name, %function
+ .func EXTERN_ASM\name
EXTERN_ASM\name:
+.else
ELF .hidden \name
ELF .type \name, %function
.func \name
\name:
+.endif
.endm
.macro movrel rd, val
@@ -78,6 +84,10 @@ ELF .type \name, %function
#endif
.endm
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+#define X(s) JOIN(EXTERN_ASM, s)
+
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S
index a254551..9ae6b14 100644
--- a/common/arm/cpu-a.S
+++ b/common/arm/cpu-a.S
@@ -38,7 +38,7 @@ function x264_cpu_neon_test
// return: 0 on success
// 1 if counters were already enabled
// 9 if lo-res counters were already enabled
-function x264_cpu_enable_armv7_counter
+function x264_cpu_enable_armv7_counter, export=0
mrc p15, 0, r2, c9, c12, 0 // read PMNC
ands r0, r2, #1
andne r0, r2, #9
@@ -51,7 +51,7 @@ function x264_cpu_enable_armv7_counter
bx lr
.endfunc
-function x264_cpu_disable_armv7_counter
+function x264_cpu_disable_armv7_counter, export=0
mrc p15, 0, r0, c9, c12, 0 // read PMNC
bic r0, r0, #1 // disable counters
mcr p15, 0, r0, c9, c12, 0 // write PMNC
diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S
index df12aeb..c5490bd 100644
--- a/common/arm/dct-a.S
+++ b/common/arm/dct-a.S
@@ -131,7 +131,7 @@ function x264_sub4x4_dct_neon
bx lr
.endfunc
-function x264_sub8x4_dct_neon
+function x264_sub8x4_dct_neon, export=0
vld1.64 {d0}, [r1,:64], r3
vld1.64 {d1}, [r2,:64], ip
vsubl.u8 q8, d0, d1
@@ -283,17 +283,17 @@ function x264_sub8x8_dct8_neon
function x264_sub16x16_dct8_neon
push {lr}
- bl x264_sub8x8_dct8_neon
+ bl X(x264_sub8x8_dct8_neon)
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
- bl x264_sub8x8_dct8_neon
+ bl X(x264_sub8x8_dct8_neon)
sub r1, r1, #8
sub r2, r2, #8
- bl x264_sub8x8_dct8_neon
+ bl X(x264_sub8x8_dct8_neon)
pop {lr}
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
- b x264_sub8x8_dct8_neon
+ b X(x264_sub8x8_dct8_neon)
.endfunc
@@ -338,7 +338,7 @@ function x264_add4x4_idct_neon
bx lr
.endfunc
-function x264_add8x4_idct_neon
+function x264_add8x4_idct_neon, export=0
vld1.64 {d0-d3}, [r1,:128]!
IDCT_1D d16, d18, d20, d22, d0, d1, d2, d3
vld1.64 {d4-d7}, [r1,:128]!
@@ -502,14 +502,14 @@ function x264_add8x8_idct8_neon
function x264_add16x16_idct8_neon
mov ip, lr
- bl x264_add8x8_idct8_neon
+ bl X(x264_add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
- bl x264_add8x8_idct8_neon
+ bl X(x264_add8x8_idct8_neon)
sub r0, r0, #8
- bl x264_add8x8_idct8_neon
+ bl X(x264_add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
mov lr, ip
- b x264_add8x8_idct8_neon
+ b X(x264_add8x8_idct8_neon)
.endfunc
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 58cf542..330b852 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -88,7 +88,7 @@ function x264_memcpy_aligned_neon
.endfunc
.macro MEMCPY_ALIGNED srcalign dstalign
-function memcpy_aligned_\dstalign\()_\srcalign\()_neon
+function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0
mov r3, r0
.if \srcalign == 8 && \dstalign == 8
sub r2, #16
@@ -239,7 +239,7 @@ AVGH 16, 16
.endm
.macro AVG_WEIGHT ext
-function x264_pixel_avg_weight_w4_\ext\()_neon
+function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
@@ -255,7 +255,7 @@ function x264_pixel_avg_weight_w4_\ext\()_neon
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_weight_w8_\ext\()_neon
+function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #4
@@ -279,7 +279,7 @@ function x264_pixel_avg_weight_w8_\ext\()_neon
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_weight_w16_\ext\()_neon
+function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
@@ -304,7 +304,7 @@ AVG_WEIGHT add_add
AVG_WEIGHT add_sub
AVG_WEIGHT sub_add
-function x264_pixel_avg_w4_neon
+function x264_pixel_avg_w4_neon, export=0
subs lr, lr, #2
vld1.32 {d0[]}, [r2], r3
vld1.32 {d2[]}, [r4], r5
@@ -318,7 +318,7 @@ function x264_pixel_avg_w4_neon
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_w8_neon
+function x264_pixel_avg_w8_neon, export=0
subs lr, lr, #4
vld1.64 {d0}, [r2], r3
vld1.64 {d2}, [r4], r5
@@ -340,7 +340,7 @@ function x264_pixel_avg_w8_neon
pop {r4-r6,pc}
.endfunc
-function x264_pixel_avg_w16_neon
+function x264_pixel_avg_w16_neon, export=0
subs lr, lr, #4
vld1.64 {d0-d1}, [r2], r3
vld1.64 {d2-d3}, [r4], r5
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index 0b996a8..ddf396d 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -575,7 +575,7 @@ var16_loop:
bgt var16_loop
.endfunc
-function x264_var_end
+function x264_var_end, export=0
vpaddl.u16 q8, q14
vpaddl.u16 q9, q15
vadd.u32 q1, q1, q8
@@ -760,7 +760,7 @@ function x264_pixel_satd_8x4_neon
SUMSUB_AB q10, q11, q2, q3
.endfunc
-function x264_satd_4x8_8x4_end_neon
+function x264_satd_4x8_8x4_end_neon, export=0
vadd.s16 q0, q8, q10
vadd.s16 q1, q9, q11
vsub.s16 q2, q8, q10
@@ -823,7 +823,7 @@ function x264_pixel_satd_8x16_neon
bx lr
.endfunc
-function x264_satd_8x8_neon
+function x264_satd_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
vld1.64 {d7}, [r2], r3
SUMSUB_AB q0, q1, q8, q9
@@ -844,7 +844,7 @@ function x264_satd_8x8_neon
.endfunc
// one vertical hadamard pass and two horizontal
-function x264_satd_8x4v_8x8h_neon
+function x264_satd_8x4v_8x8h_neon, export=0
SUMSUB_ABCD q0, q1, q2, q3, q12, q13, q14, q15
vtrn.16 q8, q9
SUMSUB_AB q12, q14, q0, q2
@@ -928,7 +928,7 @@ function x264_pixel_satd_16x16_neon
bx lr
.endfunc
-function x264_satd_16x4_neon
+function x264_satd_16x4_neon, export=0
vld1.64 {d2-d3}, [r2], r3
vld1.64 {d0-d1}, [r0,:128], r1
vsubl.u8 q8, d0, d2
@@ -1002,7 +1002,7 @@ function x264_pixel_sa8d_16x16_neon
SUMSUB_ABCD \r1, \r3, \r2, \r4, \t1, \t3, \t2, \t4
.endm
-function x264_sa8d_8x8_neon
+function x264_sa8d_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
vld1.64 {d7}, [r2], r3
SUMSUB_AB q0, q1, q8, q9
@@ -1103,7 +1103,7 @@ HADAMARD_AC 16, 8
HADAMARD_AC 16, 16
// q4: satd q5: sa8d q6: mask_ac4 q7: mask_ac8
-function x264_hadamard_ac_8x8_neon
+function x264_hadamard_ac_8x8_neon, export=0
vld1.64 {d2}, [r0,:64], r1
vld1.64 {d3}, [r0,:64], r1
vaddl.u8 q0, d2, d3
--
1.9.1
More information about the x264-devel
mailing list