[x264-devel] Fix ARM asm compilation with Apple assembler

Tue Jan 21 22:51:24 CET 2014

x264 | branch: master | Steve Clark <sclark at vgocom.com> | Wed Nov 20 21:40:23 2013 +0400| [1c8e4725e446317b435cc131f07e53a1fbdd00dd] | committer: Jason Garrett-Glaser

Fix ARM asm compilation with Apple assembler

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=1c8e4725e446317b435cc131f07e53a1fbdd00dd
---

 common/arm/asm.S       |    2 ++
 common/arm/cpu-a.S     |    4 ++--
 common/arm/mc-a.S      |   12 ++++++------
 common/arm/pixel-a.S   |    6 +++---
 common/arm/predict-a.S |    4 ++--
 common/arm/quant-a.S   |    6 +++---
 6 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/common/arm/asm.S b/common/arm/asm.S
index 053328a..43d7259 100644
--- a/common/arm/asm.S
+++ b/common/arm/asm.S
@@ -26,6 +26,8 @@
 
 #include "config.h"
 
+.syntax unified
+
 #ifdef PREFIX
 #   define EXTERN_ASM _
 #else
diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S
index 7a39290..3d9cca9 100644
--- a/common/arm/cpu-a.S
+++ b/common/arm/cpu-a.S
@@ -26,7 +26,7 @@
 #include "asm.S"
 
 .fpu neon
-.align
+.align 2
 
 // done in gas because .fpu neon overrides the refusal to assemble
 // instructions the selected -march/-mcpu doesn't support
@@ -95,7 +95,7 @@ average_loop:
     sub         r2, r2, r1
     cmpgt       r2, #30 << 3    // assume context switch if it took over 30 cycles
     addle       r3, r3, r2
-    subles      ip, ip, #1
+    subsle      ip, ip, #1
     bgt         average_loop
 
     // disable counters if we enabled them
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 8a37e95..6e84e3c 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -167,7 +167,7 @@ function x264_pixel_avg_\w\()x\h\()_neon
     ldr         ip, [sp, #8]
     push        {r4-r6,lr}
     cmp         ip, #32
-    ldrd        r4, [sp, #16]
+    ldrd        r4, r5, [sp, #16]
     mov         lr, #\h
     beq         x264_pixel_avg_w\w\()_neon
     rsbs        r6,  ip,  #64
@@ -447,7 +447,7 @@ avg2_w20_loop:
 .ifc \type, full
     ldr         lr,  [r4, #32]      // denom
 .endif
-    ldrd        r4,  [r4, #32+4]    // scale, offset
+    ldrd        r4,  r5,  [r4, #32+4]    // scale, offset
     vdup.16     q0,  r4
     vdup.16     q1,  r5
 .ifc \type, full
@@ -818,8 +818,8 @@ copy_w16_aligned_loop:
 function x264_mc_chroma_neon
     push            {r4-r8, lr}
     vpush           {d8-d11}
-    ldrd            r4, [sp, #56]
-    ldrd            r6, [sp, #64]
+    ldrd            r4, r5, [sp, #56]
+    ldrd            r6, r7, [sp, #64]
 
     asr             lr, r6, #3
     mul             lr, r4, lr
@@ -1380,8 +1380,8 @@ filter_h_loop:
 function x264_frame_init_lowres_core_neon
     push            {r4-r10,lr}
     vpush           {d8-d15}
-    ldrd            r4,  [sp, #96]
-    ldrd            r6,  [sp, #104]
+    ldrd            r4,  r5,  [sp, #96]
+    ldrd            r6,  r7,  [sp, #104]
     ldr             lr,  [sp, #112]
     sub             r10, r6,  r7            // dst_stride - width
     and             r10, r10, #~15
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index 7294ad0..717a771 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -328,9 +328,9 @@ SAD_FUNC_DUAL  16, 16
 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
     push        {r6-r7,lr}
 .if \x == 3
-    ldrd        r6,  [sp, #12]
+    ldrd        r6,  r7,  [sp, #12]
 .else
-    ldrd        r6,  [sp, #16]
+    ldrd        r6,  r7,  [sp, #16]
     ldr         r12, [sp, #12]
 .endif
     mov         lr,  #FENC_STRIDE
@@ -596,7 +596,7 @@ function x264_pixel_var2_8x8_neon
     vadd.s32        d1,  d2,  d3
     vpadd.s32       d0,  d0,  d1
 
-    vmov.32         r0,  r1,  d0
+    vmov            r0,  r1,  d0
     vst1.32         {d0[1]}, [ip,:32]
     mul             r0,  r0,  r0
     sub             r0,  r1,  r0,  lsr #6
diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
index 6d652f2..c2bf081 100644
--- a/common/arm/predict-a.S
+++ b/common/arm/predict-a.S
@@ -181,9 +181,9 @@ function x264_predict_4x4_ddl_neon
 
 function x264_predict_8x8_dc_neon
     mov     ip, #0
-    ldrd    r2, [r1, #8]
+    ldrd    r2, r3, [r1, #8]
     push    {r4-r5,lr}
-    ldrd    r4, [r1, #16]
+    ldrd    r4, r5, [r1, #16]
     lsl     r3, r3, #8
     ldrb    lr, [r1, #7]
     usad8   r2, r2, ip
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 0ccf112..38045bc 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -312,7 +312,7 @@ dequant_4x4_dc_rshift:
 
 // int coeff_last( int16_t *l )
 function x264_coeff_last4_arm
-    ldrd        r2,  [r0]
+    ldrd        r2,  r3,  [r0]
     subs        r0,  r3,  #0
     movne       r0,  #2
     movne       r2,  r3
@@ -341,7 +341,7 @@ function x264_coeff_last\size\()_neon
 
     subs        r1,  ip,  r1,  lsr #2
     addge       r0,  r1,  #\size - 8
-    sublts      r0,  r3,  r0,  lsr #2
+    subslt      r0,  r3,  r0,  lsr #2
     movlt       r0,  #0
     bx          lr
 .endfunc
@@ -390,7 +390,7 @@ function x264_coeff_last64_neon
 
     subs        r1,  ip,  r1
     addge       r0,  r1,  #32
-    sublts      r0,  ip,  r0
+    subslt      r0,  ip,  r0
     movlt       r0,  #0
     bx          lr
 .endfunc