[x264-devel] Fix ARM asm compilation with Apple assembler
Steve Clark
git at videolan.org
Tue Jan 21 22:51:24 CET 2014
x264 | branch: master | Steve Clark <sclark at vgocom.com> | Wed Nov 20 21:40:23 2013 +0400| [1c8e4725e446317b435cc131f07e53a1fbdd00dd] | committer: Jason Garrett-Glaser
Fix ARM asm compilation with Apple assembler
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=1c8e4725e446317b435cc131f07e53a1fbdd00dd
---
common/arm/asm.S | 2 ++
common/arm/cpu-a.S | 4 ++--
common/arm/mc-a.S | 12 ++++++------
common/arm/pixel-a.S | 6 +++---
common/arm/predict-a.S | 4 ++--
common/arm/quant-a.S | 6 +++---
6 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/common/arm/asm.S b/common/arm/asm.S
index 053328a..43d7259 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -26,6 +26,8 @@
#include "config.h"
+.syntax unified
+
#ifdef PREFIX
# define EXTERN_ASM _
#else
diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S
index 7a39290..3d9cca9 100644
--- a/common/arm/cpu-a.S
+++ b/common/arm/cpu-a.S
@@ -26,7 +26,7 @@
#include "asm.S"
.fpu neon
-.align
+.align 2
// done in gas because .fpu neon overrides the refusal to assemble
// instructions the selected -march/-mcpu doesn't support
@@ -95,7 +95,7 @@ average_loop:
sub r2, r2, r1
cmpgt r2, #30 << 3 // assume context switch if it took over 30 cycles
addle r3, r3, r2
- subles ip, ip, #1
+ subsle ip, ip, #1
bgt average_loop
// disable counters if we enabled them
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 8a37e95..6e84e3c 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -167,7 +167,7 @@ function x264_pixel_avg_\w\()x\h\()_neon
ldr ip, [sp, #8]
push {r4-r6,lr}
cmp ip, #32
- ldrd r4, [sp, #16]
+ ldrd r4, r5, [sp, #16]
mov lr, #\h
beq x264_pixel_avg_w\w\()_neon
rsbs r6, ip, #64
@@ -447,7 +447,7 @@ avg2_w20_loop:
.ifc \type, full
ldr lr, [r4, #32] // denom
.endif
- ldrd r4, [r4, #32+4] // scale, offset
+ ldrd r4, r5, [r4, #32+4] // scale, offset
vdup.16 q0, r4
vdup.16 q1, r5
.ifc \type, full
@@ -818,8 +818,8 @@ copy_w16_aligned_loop:
function x264_mc_chroma_neon
push {r4-r8, lr}
vpush {d8-d11}
- ldrd r4, [sp, #56]
- ldrd r6, [sp, #64]
+ ldrd r4, r5, [sp, #56]
+ ldrd r6, r7, [sp, #64]
asr lr, r6, #3
mul lr, r4, lr
@@ -1380,8 +1380,8 @@ filter_h_loop:
function x264_frame_init_lowres_core_neon
push {r4-r10,lr}
vpush {d8-d15}
- ldrd r4, [sp, #96]
- ldrd r6, [sp, #104]
+ ldrd r4, r5, [sp, #96]
+ ldrd r6, r7, [sp, #104]
ldr lr, [sp, #112]
sub r10, r6, r7 // dst_stride - width
and r10, r10, #~15
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index 7294ad0..717a771 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -328,9 +328,9 @@ SAD_FUNC_DUAL 16, 16
function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
push {r6-r7,lr}
.if \x == 3
- ldrd r6, [sp, #12]
+ ldrd r6, r7, [sp, #12]
.else
- ldrd r6, [sp, #16]
+ ldrd r6, r7, [sp, #16]
ldr r12, [sp, #12]
.endif
mov lr, #FENC_STRIDE
@@ -596,7 +596,7 @@ function x264_pixel_var2_8x8_neon
vadd.s32 d1, d2, d3
vpadd.s32 d0, d0, d1
- vmov.32 r0, r1, d0
+ vmov r0, r1, d0
vst1.32 {d0[1]}, [ip,:32]
mul r0, r0, r0
sub r0, r1, r0, lsr #6
diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
index 6d652f2..c2bf081 100644
--- a/common/arm/predict-a.S
+++ b/common/arm/predict-a.S
@@ -181,9 +181,9 @@ function x264_predict_4x4_ddl_neon
function x264_predict_8x8_dc_neon
mov ip, #0
- ldrd r2, [r1, #8]
+ ldrd r2, r3, [r1, #8]
push {r4-r5,lr}
- ldrd r4, [r1, #16]
+ ldrd r4, r5, [r1, #16]
lsl r3, r3, #8
ldrb lr, [r1, #7]
usad8 r2, r2, ip
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 0ccf112..38045bc 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -312,7 +312,7 @@ dequant_4x4_dc_rshift:
// int coeff_last( int16_t *l )
function x264_coeff_last4_arm
- ldrd r2, [r0]
+ ldrd r2, r3, [r0]
subs r0, r3, #0
movne r0, #2
movne r2, r3
@@ -341,7 +341,7 @@ function x264_coeff_last\size\()_neon
subs r1, ip, r1, lsr #2
addge r0, r1, #\size - 8
- sublts r0, r3, r0, lsr #2
+ subslt r0, r3, r0, lsr #2
movlt r0, #0
bx lr
.endfunc
@@ -390,7 +390,7 @@ function x264_coeff_last64_neon
subs r1, ip, r1
addge r0, r1, #32
- sublts r0, ip, r0
+ subslt r0, ip, r0
movlt r0, #0
bx lr
.endfunc
More information about the x264-devel
mailing list