[x264-devel] [PATCH 4/8] arm: check if the assembler supports the '.func' directive
Janne Grunau
janne-x264 at jannau.net
Sun Jul 20 18:48:28 CEST 2014
The integrated assembler in llvm trunk (to be released as 3.5) is
otherwise capable enough to assemble the arm asm correctly.
---
common/arm/asm.S | 15 +++++++--
common/arm/cpu-a.S | 8 ++---
common/arm/dct-a.S | 36 ++++++++++-----------
common/arm/deblock-a.S | 10 +++---
common/arm/mc-a.S | 86 +++++++++++++++++++++++++-------------------------
common/arm/pixel-a.S | 58 +++++++++++++++++-----------------
common/arm/predict-a.S | 54 +++++++++++++++----------------
common/arm/quant-a.S | 22 ++++++-------
configure | 3 ++
9 files changed, 153 insertions(+), 139 deletions(-)
diff --git a/common/arm/asm.S b/common/arm/asm.S
index 6656858..3fb11b8 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -50,6 +50,12 @@
# define ELF @
#endif
+#if HAVE_AS_FUNC
+# define FUNC
+#else
+# define FUNC @
+#endif
+
.macro require8, val=1
ELF .eabi_attribute 24, \val
.endm
@@ -59,17 +65,22 @@ ELF .eabi_attribute 25, \val
.endm
.macro function name, export=1
+ .macro endfunc
+ELF .size \name, . - \name
+FUNC .endfunc
+ .purgem endfunc
+ .endm
.align 2
.if \export == 1
.global EXTERN_ASM\name
ELF .hidden EXTERN_ASM\name
ELF .type EXTERN_ASM\name, %function
- .func EXTERN_ASM\name
+FUNC .func EXTERN_ASM\name
EXTERN_ASM\name:
.else
ELF .hidden \name
ELF .type \name, %function
- .func \name
+FUNC .func \name
\name:
.endif
.endm
diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S
index 7fc273a..9285219 100644
--- a/common/arm/cpu-a.S
+++ b/common/arm/cpu-a.S
@@ -32,7 +32,7 @@
function x264_cpu_neon_test
vadd.i16 q0, q0, q0
bx lr
-.endfunc
+endfunc
// return: 0 on success
// 1 if counters were already enabled
@@ -48,14 +48,14 @@ function x264_cpu_enable_armv7_counter, export=0
mov r2, #1 << 31 // enable cycle counter
mcr p15, 0, r2, c9, c12, 1 // write CNTENS
bx lr
-.endfunc
+endfunc
function x264_cpu_disable_armv7_counter, export=0
mrc p15, 0, r0, c9, c12, 0 // read PMNC
bic r0, r0, #1 // disable counters
mcr p15, 0, r0, c9, c12, 0 // write PMNC
bx lr
-.endfunc
+endfunc
.macro READ_TIME r
@@ -105,4 +105,4 @@ average_loop:
cmp r0, #10
movgt r0, #0
pop {r4-r6,pc}
-.endfunc
+endfunc
diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S
index 9e7d75f..f8d1ccf 100644
--- a/common/arm/dct-a.S
+++ b/common/arm/dct-a.S
@@ -80,7 +80,7 @@ function x264_dct4x4dc_neon
vrhadd.s16 d3, d6, d7
vst1.64 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
function x264_idct4x4dc_neon
vld1.64 {d0-d3}, [r0,:128]
@@ -92,7 +92,7 @@ function x264_idct4x4dc_neon
HADAMARD 2, sumsub, d3, d2, d6, d7
vst1.64 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
.macro DCT_1D d0 d1 d2 d3 d4 d5 d6 d7
@@ -127,7 +127,7 @@ function x264_sub4x4_dct_neon
DCT_1D d4, d5, d6, d7, d0, d1, d2, d3
vst1.64 {d4-d7}, [r0,:128]
bx lr
-.endfunc
+endfunc
function x264_sub8x4_dct_neon, export=0
vld1.64 {d0}, [r1,:64], r3
@@ -163,7 +163,7 @@ function x264_sub8x4_dct_neon, export=0
vst1.64 {d4-d5}, [r0,:128]!
vst1.64 {d6-d7}, [r0,:128]!
bx lr
-.endfunc
+endfunc
function x264_sub8x8_dct_neon
push {lr}
@@ -172,7 +172,7 @@ function x264_sub8x8_dct_neon
bl x264_sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
-.endfunc
+endfunc
function x264_sub16x16_dct_neon
push {lr}
@@ -193,7 +193,7 @@ function x264_sub16x16_dct_neon
bl x264_sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
-.endfunc
+endfunc
.macro DCT8_1D type
@@ -277,7 +277,7 @@ function x264_sub8x8_dct8_neon
vst1.64 {d24-d27}, [r0,:128]!
vst1.64 {d28-d31}, [r0,:128]!
bx lr
-.endfunc
+endfunc
function x264_sub16x16_dct8_neon
push {lr}
@@ -292,7 +292,7 @@ function x264_sub16x16_dct8_neon
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
b X(x264_sub8x8_dct8_neon)
-.endfunc
+endfunc
// First part of IDCT (minus final SUMSUB_BA)
@@ -334,7 +334,7 @@ function x264_add4x4_idct_neon
vst1.32 {d2[1]}, [r0,:32], r2
vst1.32 {d2[0]}, [r0,:32], r2
bx lr
-.endfunc
+endfunc
function x264_add8x4_idct_neon, export=0
vld1.64 {d0-d3}, [r1,:128]!
@@ -374,7 +374,7 @@ function x264_add8x4_idct_neon, export=0
vst1.32 {d2}, [r0,:64], r2
vst1.32 {d3}, [r0,:64], r2
bx lr
-.endfunc
+endfunc
function x264_add8x8_idct_neon
mov r2, #FDEC_STRIDE
@@ -382,7 +382,7 @@ function x264_add8x8_idct_neon
bl x264_add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
-.endfunc
+endfunc
function x264_add16x16_idct_neon
mov r2, #FDEC_STRIDE
@@ -399,7 +399,7 @@ function x264_add16x16_idct_neon
bl x264_add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
-.endfunc
+endfunc
.macro IDCT8_1D type
@@ -496,7 +496,7 @@ function x264_add8x8_idct8_neon
vst1.64 {d6}, [r0,:64], r2
vst1.64 {d7}, [r0,:64], r2
bx lr
-.endfunc
+endfunc
function x264_add16x16_idct8_neon
mov ip, lr
@@ -508,7 +508,7 @@ function x264_add16x16_idct8_neon
sub r0, r0, #8*FDEC_STRIDE-8
mov lr, ip
b X(x264_add8x8_idct8_neon)
-.endfunc
+endfunc
function x264_add8x8_idct_dc_neon
@@ -560,7 +560,7 @@ function x264_add8x8_idct_dc_neon
vst1.64 {d6}, [r0,:64], r2
vst1.64 {d7}, [r0,:64], r2
bx lr
-.endfunc
+endfunc
.macro ADD16x4_IDCT_DC dc
vld1.64 {d16-d17}, [r0,:128], r3
@@ -608,7 +608,7 @@ function x264_add16x16_idct_dc_neon
ADD16x4_IDCT_DC d2
ADD16x4_IDCT_DC d3
bx lr
-.endfunc
+endfunc
function x264_sub8x8_dct_dc_neon
mov r3, #FENC_STRIDE
@@ -656,7 +656,7 @@ function x264_sub8x8_dct_dc_neon
vpadd.s16 d0, d0, d1
vst1.64 {d0}, [r0,:64]
bx lr
-.endfunc
+endfunc
function x264_zigzag_scan_4x4_frame_neon
@@ -669,4 +669,4 @@ function x264_zigzag_scan_4x4_frame_neon
vtbl.8 d7, {d2-d3}, d19
vst1.64 {d4-d7}, [r0,:128]
bx lr
-.endfunc
+endfunc
diff --git a/common/arm/deblock-a.S b/common/arm/deblock-a.S
index 2d0ef9f..59977b4 100644
--- a/common/arm/deblock-a.S
+++ b/common/arm/deblock-a.S
@@ -140,7 +140,7 @@ function x264_deblock_v_luma_neon
align_pop_regs
bx lr
-.endfunc
+endfunc
function x264_deblock_h_luma_neon
h264_loop_filter_start
@@ -192,7 +192,7 @@ function x264_deblock_h_luma_neon
align_pop_regs
bx lr
-.endfunc
+endfunc
.macro h264_loop_filter_chroma
vdup.8 q11, r2 // alpha
@@ -253,7 +253,7 @@ function x264_deblock_v_chroma_neon
vst2.8 {d0, d1}, [r0,:128], r1
bx lr
-.endfunc
+endfunc
function x264_deblock_h_chroma_neon
h264_loop_filter_start
@@ -301,7 +301,7 @@ function x264_deblock_h_chroma_neon
vst1.8 {d3}, [r0], r1
bx lr
-.endfunc
+endfunc
function x264_deblock_strength_neon
ldr ip, [sp]
@@ -407,4 +407,4 @@ lists:
vst1.8 {q8}, [r3,:128] @ bs[0]
bx lr
-.endfunc
+endfunc
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 3a16d0d..cd57920 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -49,7 +49,7 @@ function x264_prefetch_ref_arm
pld [r3, r1, lsl #1]
pld [r3, r2]
bx lr
-.endfunc
+endfunc
// void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y,
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
@@ -75,7 +75,7 @@ function x264_prefetch_fenc_arm
pld [ip]
pld [ip, r3]
pop {pc}
-.endfunc
+endfunc
// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
@@ -84,7 +84,7 @@ function x264_memcpy_aligned_neon
movrel ip, memcpy_table
and r3, r3, #0xc
ldr pc, [ip, r3]
-.endfunc
+endfunc
.macro MEMCPY_ALIGNED srcalign dstalign
function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0
@@ -126,7 +126,7 @@ function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0
vst1.64 {d0}, [r3,:64]!
.endif
bx lr
-.endfunc
+endfunc
.endm
MEMCPY_ALIGNED 16, 16
@@ -155,7 +155,7 @@ memzero_loop:
.endr
bgt memzero_loop
bx lr
-.endfunc
+endfunc
// void pixel_avg( uint8_t *dst, intptr_t dst_stride,
@@ -174,7 +174,7 @@ function x264_pixel_avg_\w\()x\h\()_neon
cmp ip, #0
bge x264_pixel_avg_weight_w\w\()_add_add_neon
b x264_pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
-.endfunc
+endfunc
.endm
AVGH 4, 2
@@ -252,7 +252,7 @@ function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
vst1.32 {d1[0]}, [r0,:32], r1
bgt 1b
pop {r4-r6,pc}
-.endfunc
+endfunc
function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
load_weights_\ext
@@ -276,7 +276,7 @@ function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
vst1.64 {d3}, [r0,:64], r1
bgt 1b
pop {r4-r6,pc}
-.endfunc
+endfunc
function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
load_weights_\ext
@@ -296,7 +296,7 @@ function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
vst1.64 {d2-d3}, [r0,:128], r1
bgt 1b
pop {r4-r6,pc}
-.endfunc
+endfunc
.endm
AVG_WEIGHT add_add
@@ -315,7 +315,7 @@ function x264_pixel_avg_w4_neon, export=0
vst1.32 {d1[0]}, [r0,:32], r1
bgt x264_pixel_avg_w4_neon
pop {r4-r6,pc}
-.endfunc
+endfunc
function x264_pixel_avg_w8_neon, export=0
subs lr, lr, #4
@@ -337,7 +337,7 @@ function x264_pixel_avg_w8_neon, export=0
vst1.64 {d3}, [r0,:64], r1
bgt x264_pixel_avg_w8_neon
pop {r4-r6,pc}
-.endfunc
+endfunc
function x264_pixel_avg_w16_neon, export=0
subs lr, lr, #4
@@ -359,7 +359,7 @@ function x264_pixel_avg_w16_neon, export=0
vst1.64 {d6-d7}, [r0,:128], r1
bgt x264_pixel_avg_w16_neon
pop {r4-r6,pc}
-.endfunc
+endfunc
function x264_pixel_avg2_w4_neon
@@ -378,7 +378,7 @@ avg2_w4_loop:
vst1.32 {d1[0]}, [r0,:32], r1
bgt avg2_w4_loop
pop {pc}
-.endfunc
+endfunc
function x264_pixel_avg2_w8_neon
ldr ip, [sp, #4]
@@ -396,7 +396,7 @@ avg2_w8_loop:
vst1.64 {d1}, [r0,:64], r1
bgt avg2_w8_loop
pop {pc}
-.endfunc
+endfunc
function x264_pixel_avg2_w16_neon
ldr ip, [sp, #4]
@@ -414,7 +414,7 @@ avg2_w16_loop:
vst1.64 {d4-d5}, [r0,:128], r1
bgt avg2_w16_loop
pop {pc}
-.endfunc
+endfunc
function x264_pixel_avg2_w20_neon
ldr ip, [sp, #4]
@@ -437,7 +437,7 @@ avg2_w20_loop:
vst1.32 {d6[0]}, [r0,:32], r1
bgt avg2_w20_loop
pop {pc}
-.endfunc
+endfunc
.macro weight_prologue type
@@ -498,7 +498,7 @@ weight20_loop:
vst1.32 {d20[1]}, [r0,:32], r1
bgt weight20_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_mc_weight_w16_neon
weight_prologue full
@@ -530,7 +530,7 @@ weight16_loop:
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_mc_weight_w8_neon
weight_prologue full
@@ -552,7 +552,7 @@ weight8_loop:
vst1.8 {d18}, [r0,:64], r1
bgt weight8_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_mc_weight_w4_neon
weight_prologue full
@@ -571,7 +571,7 @@ weight4_loop:
vst1.32 {d16[1]}, [r0,:32], r1
bgt weight4_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_mc_weight_w20_nodenom_neon
weight_prologue nodenom
@@ -608,7 +608,7 @@ weight20_nodenom_loop:
vst1.32 {d20[1]}, [r0,:32], r1
bgt weight20_nodenom_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_mc_weight_w16_nodenom_neon
weight_prologue nodenom
@@ -636,7 +636,7 @@ weight16_nodenom_loop:
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_nodenom_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_mc_weight_w8_nodenom_neon
weight_prologue nodenom
@@ -656,7 +656,7 @@ weight8_nodenom_loop:
vst1.8 {d17}, [r0,:64], r1
bgt weight8_nodenom_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_mc_weight_w4_nodenom_neon
weight_prologue nodenom
@@ -674,7 +674,7 @@ weight4_nodenom_loop:
vst1.32 {d16[1]}, [r0,:32], r1
bgt weight4_nodenom_loop
pop {r4-r5,pc}
-.endfunc
+endfunc
.macro weight_simple_prologue
push {lr}
@@ -698,7 +698,7 @@ weight20_\name\()_loop:
vst1.8 {d19-d21}, [r0,:64], r1
bgt weight20_\name\()_loop
pop {pc}
-.endfunc
+endfunc
function x264_mc_weight_w16_\name\()_neon
weight_simple_prologue
@@ -712,7 +712,7 @@ weight16_\name\()_loop:
vst1.8 {d18-d19}, [r0,:128], r1
bgt weight16_\name\()_loop
pop {pc}
-.endfunc
+endfunc
function x264_mc_weight_w8_\name\()_neon
weight_simple_prologue
@@ -725,7 +725,7 @@ weight8_\name\()_loop:
vst1.8 {d17}, [r0,:64], r1
bgt weight8_\name\()_loop
pop {pc}
-.endfunc
+endfunc
function x264_mc_weight_w4_\name\()_neon
weight_simple_prologue
@@ -738,7 +738,7 @@ weight4_\name\()_loop:
vst1.32 {d17[0]}, [r0,:32], r1
bgt weight4_\name\()_loop
pop {pc}
-.endfunc
+endfunc
.endm
weight_simple offsetadd, vqadd.u8
@@ -760,7 +760,7 @@ copy_w4_loop:
vst1.32 {d3[0]}, [r0,:32], r1
bgt copy_w4_loop
bx lr
-.endfunc
+endfunc
function x264_mc_copy_w8_neon
ldr ip, [sp]
@@ -776,7 +776,7 @@ copy_w8_loop:
vst1.32 {d3}, [r0,:64], r1
bgt copy_w8_loop
bx lr
-.endfunc
+endfunc
function x264_mc_copy_w16_neon
ldr ip, [sp]
@@ -792,7 +792,7 @@ copy_w16_loop:
vst1.32 {d6-d7}, [r0,:128], r1
bgt copy_w16_loop
bx lr
-.endfunc
+endfunc
function x264_mc_copy_w16_aligned_neon
ldr ip, [sp]
@@ -808,7 +808,7 @@ copy_w16_aligned_loop:
vst1.32 {d6-d7}, [r0,:128], r1
bgt copy_w16_aligned_loop
bx lr
-.endfunc
+endfunc
// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride,
@@ -1158,7 +1158,7 @@ mc_chroma_w8:
vpop {d8-d11}
pop {r4-r8, pc}
-.endfunc
+endfunc
// hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width )
@@ -1199,7 +1199,7 @@ filter_v_loop:
vst1.64 {d0-d1}, [r0,:128]!
bgt filter_v_loop
pop {pc}
-.endfunc
+endfunc
// hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
function x264_hpel_filter_c_neon
@@ -1284,7 +1284,7 @@ filter_c_loop:
vst1.64 {d30-d31}, [r0,:128]!
bgt filter_c_loop
bx lr
-.endfunc
+endfunc
// hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
function x264_hpel_filter_h_neon
@@ -1371,7 +1371,7 @@ filter_h_loop:
vst1.64 {d6-d7}, [r0,:128]!
bgt filter_h_loop
bx lr
-.endfunc
+endfunc
// frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
@@ -1463,7 +1463,7 @@ lowres_xloop_end:
vpop {d8-d15}
pop {r4-r10,pc}
-.endfunc
+endfunc
function x264_load_deinterleave_chroma_fdec_neon
mov ip, #FDEC_STRIDE/2
@@ -1476,7 +1476,7 @@ function x264_load_deinterleave_chroma_fdec_neon
bgt 1b
bx lr
-.endfunc
+endfunc
function x264_load_deinterleave_chroma_fenc_neon
mov ip, #FENC_STRIDE/2
@@ -1489,7 +1489,7 @@ function x264_load_deinterleave_chroma_fenc_neon
bgt 1b
bx lr
-.endfunc
+endfunc
function x264_plane_copy_deinterleave_neon
push {r4-r7, lr}
@@ -1515,7 +1515,7 @@ block:
bgt block
pop {r4-r7, pc}
-.endfunc
+endfunc
function x264_plane_copy_deinterleave_rgb_neon
push {r4-r8, r10, r11, lr}
@@ -1567,7 +1567,7 @@ block4:
bgt block4
pop {r4-r8, r10, r11, pc}
-.endfunc
+endfunc
function x264_plane_copy_interleave_neon
push {r4-r7, lr}
@@ -1594,7 +1594,7 @@ blocki:
bgt blocki
pop {r4-r7, pc}
-.endfunc
+endfunc
function x264_store_interleave_chroma_neon
push {lr}
@@ -1608,4 +1608,4 @@ function x264_store_interleave_chroma_neon
bgt 1b
pop {pc}
-.endfunc
+endfunc
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index e288bcf..80b8b70 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -61,7 +61,7 @@ function x264_pixel_sad_4x\h\()_armv6
.endr
usada8 r0, r6, lr, ip
pop {r4-r6,pc}
-.endfunc
+endfunc
.endm
SAD4_ARMV6 4
@@ -138,7 +138,7 @@ function x264_pixel_sad\name\()_\w\()x\h\()_neon
vpaddl.u16 d0, d0
vmov.u32 r0, d0[0]
bx lr
-.endfunc
+endfunc
.endm
SAD_FUNC 4, 4
@@ -223,7 +223,7 @@ function x264_pixel_sad_aligned_\w\()x\h\()_neon_dual
vpaddl.u16 d0, d0
vmov.u32 r0, d0[0]
bx lr
-.endfunc
+endfunc
.endm
SAD_FUNC_DUAL 8, 4
@@ -369,7 +369,7 @@ function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
vst1.32 {d0-d1}, [r7]
.endif
pop {r6-r7,pc}
-.endfunc
+endfunc
.endm
SAD_X_FUNC 3, 4, 4
@@ -478,7 +478,7 @@ function x264_pixel_ssd_\w\()x\h\()_neon
vpadd.s32 d0, d0, d0
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
.endm
SSD_FUNC 4, 4
@@ -518,7 +518,7 @@ function x264_pixel_var_8x8_neon
vld1.64 {d26}, [r0,:64], r1
VAR_SQR_SUM q2, q10, q15, d26
b x264_var_end
-.endfunc
+endfunc
function x264_pixel_var_8x16_neon
vld1.64 {d16}, [r0,:64], r1
@@ -550,7 +550,7 @@ function x264_pixel_var_8x16_neon
2:
VAR_SQR_SUM q2, q13, q15, d22
b x264_var_end
-.endfunc
+endfunc
function x264_pixel_var_16x16_neon
vld1.64 {d16-d17}, [r0,:128], r1
@@ -574,7 +574,7 @@ var16_loop:
VAR_SQR_SUM q1, q12, q14, d18
VAR_SQR_SUM q2, q13, q15, d19
bgt var16_loop
-.endfunc
+endfunc
function x264_var_end, export=0
vpaddl.u16 q8, q14
@@ -589,7 +589,7 @@ function x264_var_end, export=0
vmov r0, r1, d0
bx lr
-.endfunc
+endfunc
.macro DIFF_SUM diff da db lastdiff
vld1.64 {\da}, [r0,:64], r1
@@ -634,7 +634,7 @@ function x264_pixel_var2_8x8_neon
mul r0, r0, r0
sub r0, r1, r0, lsr #6
bx lr
-.endfunc
+endfunc
function x264_pixel_var2_8x16_neon
vld1.64 {d16}, [r0,:64], r1
@@ -678,7 +678,7 @@ function x264_pixel_var2_8x16_neon
mul r0, r0, r0
sub r0, r1, r0, lsr #7
bx lr
-.endfunc
+endfunc
.macro LOAD_DIFF_8x4 q0 q1 q2 q3
vld1.32 {d1}, [r2], r3
@@ -715,7 +715,7 @@ function x264_pixel_satd_4x4_neon
HORIZ_ADD d0, d0, d1
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
function x264_pixel_satd_4x8_neon
vld1.32 {d1[]}, [r2], r3
@@ -742,7 +742,7 @@ function x264_pixel_satd_4x8_neon
vsubl.u8 q3, d6, d7
SUMSUB_AB q10, q11, q2, q3
b x264_satd_4x8_8x4_end_neon
-.endfunc
+endfunc
function x264_pixel_satd_8x4_neon
vld1.64 {d1}, [r2], r3
@@ -759,7 +759,7 @@ function x264_pixel_satd_8x4_neon
vld1.64 {d6}, [r0,:64], r1
vsubl.u8 q3, d6, d7
SUMSUB_AB q10, q11, q2, q3
-.endfunc
+endfunc
function x264_satd_4x8_8x4_end_neon, export=0
vadd.s16 q0, q8, q10
@@ -786,7 +786,7 @@ function x264_satd_4x8_8x4_end_neon, export=0
HORIZ_ADD d0, d0, d1
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
function x264_pixel_satd_8x8_neon
mov ip, lr
@@ -800,7 +800,7 @@ function x264_pixel_satd_8x8_neon
mov lr, ip
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
function x264_pixel_satd_8x16_neon
vpush {d8-d11}
@@ -822,7 +822,7 @@ function x264_pixel_satd_8x16_neon
mov lr, ip
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
function x264_satd_8x8_neon, export=0
LOAD_DIFF_8x4 q8, q9, q10, q11
@@ -842,7 +842,7 @@ function x264_satd_8x8_neon, export=0
SUMSUB_AB q9, q11, q1, q3
vld1.64 {d0}, [r0,:64], r1
vsubl.u8 q15, d0, d1
-.endfunc
+endfunc
// one vertical hadamard pass and two horizontal
function x264_satd_8x4v_8x8h_neon, export=0
@@ -871,7 +871,7 @@ function x264_satd_8x4v_8x8h_neon, export=0
vmax.s16 q14, q8, q10
vmax.s16 q15, q9, q11
bx lr
-.endfunc
+endfunc
function x264_pixel_satd_16x8_neon
vpush {d8-d11}
@@ -893,7 +893,7 @@ function x264_pixel_satd_16x8_neon
mov lr, ip
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
function x264_pixel_satd_16x16_neon
vpush {d8-d11}
@@ -927,7 +927,7 @@ function x264_pixel_satd_16x16_neon
mov lr, ip
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
function x264_satd_16x4_neon, export=0
vld1.64 {d2-d3}, [r2], r3
@@ -951,7 +951,7 @@ function x264_satd_16x4_neon, export=0
SUMSUB_AB q2, q3, q10, q11
SUMSUB_ABCD q8, q10, q9, q11, q0, q2, q1, q3
b x264_satd_8x4v_8x8h_neon
-.endfunc
+endfunc
function x264_pixel_sa8d_8x8_neon
@@ -964,7 +964,7 @@ function x264_pixel_sa8d_8x8_neon
add r0, r0, #1
lsr r0, r0, #1
bx lr
-.endfunc
+endfunc
function x264_pixel_sa8d_16x16_neon
vpush {d8-d11}
@@ -996,7 +996,7 @@ function x264_pixel_sa8d_16x16_neon
add r0, r0, #1
lsr r0, r0, #1
bx lr
-.endfunc
+endfunc
.macro HADAMARD4_V r1, r2, r3, r4, t1, t2, t3, t4
SUMSUB_ABCD \t1, \t2, \t3, \t4, \r1, \r2, \r3, \r4
@@ -1059,7 +1059,7 @@ function x264_sa8d_8x8_neon, export=0
vadd.i16 q8, q8, q9
vadd.i16 q9, q10, q11
bx lr
-.endfunc
+endfunc
.macro HADAMARD_AC w h
@@ -1095,7 +1095,7 @@ function x264_pixel_hadamard_ac_\w\()x\h\()_neon
lsr r0, r0, #1
lsr r1, r1, #2
bx lr
-.endfunc
+endfunc
.endm
HADAMARD_AC 8, 8
@@ -1190,7 +1190,7 @@ function x264_hadamard_ac_8x8_neon, export=0
vadd.s16 q2, q2, q14
vpadal.u16 q5, q2
bx lr
-.endfunc
+endfunc
.macro SSIM_ITER n ssa s12 ssb lastssa lasts12 lastssb da db dnext
@@ -1244,7 +1244,7 @@ function x264_pixel_ssim_4x4x2_core_neon
vst4.32 {d0-d3}, [ip]
bx lr
-.endfunc
+endfunc
// FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2
function x264_pixel_ssim_end4_neon
@@ -1315,4 +1315,4 @@ ssim_skip:
vpadd.f32 d0, d0, d0
vmov.32 r0, d0[0]
bx lr
-.endfunc
+endfunc
diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
index 8af861b..593d403 100644
--- a/common/arm/predict-a.S
+++ b/common/arm/predict-a.S
@@ -75,7 +75,7 @@ function x264_predict_4x4_h_armv6
add ip, ip, ip, lsl #16
str ip, [r0, #3*FDEC_STRIDE]
bx lr
-.endfunc
+endfunc
function x264_predict_4x4_v_armv6
ldr r1, [r0, #0 - 1 * FDEC_STRIDE]
@@ -84,7 +84,7 @@ function x264_predict_4x4_v_armv6
str r1, [r0, #0 + 2 * FDEC_STRIDE]
str r1, [r0, #0 + 3 * FDEC_STRIDE]
bx lr
-.endfunc
+endfunc
function x264_predict_4x4_dc_armv6
mov ip, #0
@@ -107,7 +107,7 @@ function x264_predict_4x4_dc_armv6
str r1, [r0, #2*FDEC_STRIDE]
str r1, [r0, #3*FDEC_STRIDE]
bx lr
-.endfunc
+endfunc
function x264_predict_4x4_dc_top_neon
mov r12, #FDEC_STRIDE
@@ -122,7 +122,7 @@ function x264_predict_4x4_dc_top_neon
vst1.32 d1[0], [r0,:32], r12
vst1.32 d1[0], [r0,:32], r12
bx lr
-.endfunc
+endfunc
// return a1 = (a1+2*b1+c1+2)>>2 a2 = (a2+2*b2+c2+2)>>2
.macro PRED4x4_LOWPASS a1 b1 c1 a2 b2 c2 pb_1
@@ -165,7 +165,7 @@ function x264_predict_4x4_ddr_armv6
add r5, r5, r4, lsr #8
str r5, [r0, #3*FDEC_STRIDE]
pop {r4-r6,pc}
-.endfunc
+endfunc
function x264_predict_4x4_ddl_neon
sub r0, #FDEC_STRIDE
@@ -184,7 +184,7 @@ function x264_predict_4x4_ddl_neon
vst1.32 {d2[0]}, [r0,:32], ip
vst1.32 {d3[0]}, [r0,:32], ip
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_dc_neon
mov ip, #0
@@ -208,7 +208,7 @@ function x264_predict_8x8_dc_neon
vst1.64 {d0}, [r0,:64], ip
.endr
pop {r4-r5,pc}
-.endfunc
+endfunc
function x264_predict_8x8_h_neon
add r1, r1, #7
@@ -231,7 +231,7 @@ function x264_predict_8x8_h_neon
vst1.64 {d6}, [r0,:64], ip
vst1.64 {d7}, [r0,:64], ip
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_v_neon
add r1, r1, #16
@@ -241,7 +241,7 @@ function x264_predict_8x8_v_neon
vst1.8 {d0}, [r0,:64], r12
.endr
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_ddl_neon
add r1, #16
@@ -269,7 +269,7 @@ function x264_predict_8x8_ddl_neon
vst1.8 d2, [r0,:64], r12
vst1.8 d1, [r0,:64], r12
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_ddr_neon
vld1.8 {d0-d3}, [r1,:128]
@@ -299,7 +299,7 @@ function x264_predict_8x8_ddr_neon
vst1.8 {d4}, [r0,:64], r12
vst1.8 {d5}, [r0,:64], r12
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_vl_neon
add r1, #16
@@ -330,7 +330,7 @@ function x264_predict_8x8_vl_neon
vst1.8 {d3}, [r0,:64], r12
vst1.8 {d2}, [r0,:64], r12
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_vr_neon
add r1, #8
@@ -362,7 +362,7 @@ function x264_predict_8x8_vr_neon
vst1.8 {d6}, [r0,:64], r12
vst1.8 {d3}, [r0,:64], r12
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_hd_neon
mov r12, #FDEC_STRIDE
@@ -395,7 +395,7 @@ function x264_predict_8x8_hd_neon
vst1.8 {d16}, [r0,:64], r12
bx lr
-.endfunc
+endfunc
function x264_predict_8x8_hu_neon
mov r12, #FDEC_STRIDE
@@ -428,7 +428,7 @@ function x264_predict_8x8_hu_neon
vst1.8 {d7}, [r0,:64], r12
vst1.8 {d17}, [r0,:64]
bx lr
-.endfunc
+endfunc
function x264_predict_8x8c_dc_top_neon
sub r2, r0, #FDEC_STRIDE
@@ -441,7 +441,7 @@ function x264_predict_8x8c_dc_top_neon
vdup.8 d0, d0[0]
vtrn.32 d0, d1
b pred8x8_dc_end
-.endfunc
+endfunc
function x264_predict_8x8c_dc_left_neon
mov r1, #FDEC_STRIDE
@@ -453,7 +453,7 @@ function x264_predict_8x8c_dc_left_neon
vdup.8 d1, d0[1]
vdup.8 d0, d0[0]
b pred8x8_dc_end
-.endfunc
+endfunc
function x264_predict_8x8c_dc_neon
sub r2, r0, #FDEC_STRIDE
@@ -479,7 +479,7 @@ pred8x8_dc_end:
vst1.8 {d1}, [r2,:64], r1
.endr
bx lr
-.endfunc
+endfunc
function x264_predict_8x8c_h_neon
sub r1, r0, #1
@@ -491,7 +491,7 @@ function x264_predict_8x8c_h_neon
vst1.64 {d2}, [r0,:64], ip
.endr
bx lr
-.endfunc
+endfunc
function x264_predict_8x8c_v_neon
sub r0, r0, #FDEC_STRIDE
@@ -501,7 +501,7 @@ function x264_predict_8x8c_v_neon
vst1.64 {d0}, [r0,:64], ip
.endr
bx lr
-.endfunc
+endfunc
function x264_predict_8x8c_p_neon
sub r3, r0, #FDEC_STRIDE
@@ -554,7 +554,7 @@ function x264_predict_8x8c_p_neon
subs r3, r3, #1
bne 1b
bx lr
-.endfunc
+endfunc
function x264_predict_16x16_dc_top_neon
@@ -565,7 +565,7 @@ function x264_predict_16x16_dc_top_neon
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b pred16x16_dc_end
-.endfunc
+endfunc
function x264_predict_16x16_dc_left_neon
mov r1, #FDEC_STRIDE
@@ -576,7 +576,7 @@ function x264_predict_16x16_dc_left_neon
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b pred16x16_dc_end
-.endfunc
+endfunc
function x264_predict_16x16_dc_neon
sub r3, r0, #FDEC_STRIDE
@@ -614,7 +614,7 @@ pred16x16_dc_end:
vst1.64 {d0-d1}, [r0,:128], r1
.endr
bx lr
-.endfunc
+endfunc
function x264_predict_16x16_h_neon
sub r1, r0, #1
@@ -628,7 +628,7 @@ function x264_predict_16x16_h_neon
vst1.64 {d2-d3}, [r0,:128], ip
.endr
bx lr
-.endfunc
+endfunc
function x264_predict_16x16_v_neon
sub r0, r0, #FDEC_STRIDE
@@ -638,7 +638,7 @@ function x264_predict_16x16_v_neon
vst1.64 {d0-d1}, [r0,:128], ip
.endr
bx lr
-.endfunc
+endfunc
function x264_predict_16x16_p_neon
sub r3, r0, #FDEC_STRIDE
@@ -695,4 +695,4 @@ function x264_predict_16x16_p_neon
subs r3, r3, #1
bne 1b
bx lr
-.endfunc
+endfunc
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 374796c..d22a10e 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -78,7 +78,7 @@ function x264_quant_2x2_dc_neon
vsub.s16 d3, d3, d0
vst1.64 {d3}, [r0,:64]
QUANT_END d3
-.endfunc
+endfunc
// quant_4x4_dc( int16_t dct[16], int mf, int bias )
function x264_quant_4x4_dc_neon
@@ -90,7 +90,7 @@ function x264_quant_4x4_dc_neon
QUANT_TWO q0, q0, d4, d5, d4, d5, q0
vorr d0, d0, d1
QUANT_END d0
-.endfunc
+endfunc
// quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4_neon
@@ -102,7 +102,7 @@ function x264_quant_4x4_neon
QUANT_TWO q0, q1, d4, d5, d6, d7, q0
vorr d0, d0, d1
QUANT_END d0
-.endfunc
+endfunc
// quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
function x264_quant_4x4x4_neon
@@ -143,7 +143,7 @@ function x264_quant_4x4x4_neon
orrne r0, #8
vpop {d8-d15}
bx lr
-.endfunc
+endfunc
// quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
function x264_quant_8x8_neon
@@ -163,7 +163,7 @@ function x264_quant_8x8_neon
.endr
vorr d0, d0, d1
QUANT_END d0
-.endfunc
+endfunc
.macro DEQUANT_START mf_size offset dc=no
mov r3, #0x2b
@@ -255,7 +255,7 @@ dequant_\size\()_rshift_loop:
bgt dequant_\size\()_rshift_loop
.endif
bx lr
-.endfunc
+endfunc
.endm
DEQUANT 4x4, 4
@@ -305,7 +305,7 @@ dequant_4x4_dc_rshift:
vmovn.s32 d3, q13
vst1.16 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
// int coeff_last( int16_t *l )
@@ -317,7 +317,7 @@ function x264_coeff_last4_arm
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
-.endfunc
+endfunc
function x264_coeff_last8_arm
ldrd r2, r3, [r0, #8]
@@ -331,7 +331,7 @@ function x264_coeff_last8_arm
lsrs r2, r2, #16
addne r0, r0, #1
bx lr
-.endfunc
+endfunc
.macro COEFF_LAST_1x size
function x264_coeff_last\size\()_neon
@@ -356,7 +356,7 @@ function x264_coeff_last\size\()_neon
subslt r0, r3, r0, lsr #2
movlt r0, #0
bx lr
-.endfunc
+endfunc
.endm
COEFF_LAST_1x 15
@@ -405,4 +405,4 @@ function x264_coeff_last64_neon
subslt r0, ip, r0
movlt r0, #0
bx lr
-.endfunc
+endfunc
diff --git a/configure b/configure
index a50f8d5..8d6a425 100755
--- a/configure
+++ b/configure
@@ -694,6 +694,9 @@ case $host_cpu in
;;
esac
+# check if the assembler supports '.func' (clang 3.5 does not)
+as_check ".func t\n.endfunc" && define HAVE_AS_FUNC 1 || define HAVE_AS_FUNC 0
+
if [ $SYS = WINDOWS ]; then
if ! rc_check "0 RCDATA {0}" ; then
RC=""
--
2.0.1
More information about the x264-devel
mailing list