[vlc-devel] [PATCH] define/use a function macro to handle the underscore prefix for some ARM ABI
David Geldreich
david.geldreich at free.fr
Fri Apr 20 16:41:19 CEST 2012
This correct a compilation/link problem of arm_neon plugins on iOS
inspired but the asm.S from ffmpeg, we define a "function" macro
EXTERN_ASM is hardcoded for iOS vs Android case.
Ideally, it should be tested and defined in VLC's config.h by configure
I am not sure of the effect of the endfunc macro in the case of the fall through
happening in s32_s16.S
---
modules/arm_neon/asm.S | 49 ++++++++++++++++++++++++++++++++++++++++++
modules/arm_neon/i420_rgb.S | 8 +++---
modules/arm_neon/i420_yuyv.S | 13 +++++------
modules/arm_neon/i422_yuyv.S | 14 ++++++------
modules/arm_neon/nv12_rgb.S | 9 ++++---
modules/arm_neon/nv21_rgb.S | 9 ++++---
modules/arm_neon/s32_s16.S | 14 ++++++------
modules/arm_neon/yuyv_i422.S | 13 +++++------
8 files changed, 89 insertions(+), 40 deletions(-)
create mode 100644 modules/arm_neon/asm.S
diff --git a/modules/arm_neon/asm.S b/modules/arm_neon/asm.S
new file mode 100644
index 0000000..ed0b99d
--- /dev/null
+++ b/modules/arm_neon/asm.S
@@ -0,0 +1,49 @@
+@*****************************************************************************
+@ asm.S : defines and macros
+@*****************************************************************************
+@ Copyright (C) 2012 David Geldreich <david.geldreich at free.fr>
+@
+@ This program is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU General Public License as published by
+@ the Free Software Foundation; either version 2 of the License, or
+@ (at your option) any later version.
+@
+@ This program is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+@ GNU General Public License for more details.
+@
+@ You should have received a copy of the GNU General Public License
+@ along with this program; if not, write to the Free Software Foundation,
+@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+@****************************************************************************/
+
+#ifdef __APPLE__
+# define EXTERN_ASM _
+#else
+# define EXTERN_ASM
+#endif
+
+#ifdef __ELF__
+# define ELF
+#else
+# define ELF @
+#endif
+
+.macro function name, export=0
+ .macro endfunc
+ELF .size \name, . - \name
+ .endfunc
+ .purgem endfunc
+ .endm
+ .text
+ .align 2
+ .if \export
+ .global EXTERN_ASM\name
+EXTERN_ASM\name:
+ .endif
+ELF .type \name, %function
+ .func \name
+\name:
+.endm
+
diff --git a/modules/arm_neon/i420_rgb.S b/modules/arm_neon/i420_rgb.S
index 1e9600e..f83c4a0 100644
--- a/modules/arm_neon/i420_rgb.S
+++ b/modules/arm_neon/i420_rgb.S
@@ -19,6 +19,8 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
+#include "asm.S"
+
.fpu neon
.text
@@ -74,10 +76,7 @@ coefficients:
.short 4992
.short -18432
- .align
- .global i420_rgb_neon
- .type i420_rgb_neon, %function
-i420_rgb_neon:
+function i420_rgb_neon, export=1
push {r4-r8,r10-r11,lr}
vpush {q4-q7}
@@ -207,3 +206,4 @@ loop_col:
add U, U, YPAD, lsr #1
add V, V, YPAD, lsr #1
b loop_row
+endfunc
diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S
index 8f4026a..93d7b5c 100644
--- a/modules/arm_neon/i420_yuyv.S
+++ b/modules/arm_neon/i420_yuyv.S
@@ -18,6 +18,8 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
+#include "asm.S"
+
.fpu neon
.text
@@ -35,10 +37,7 @@
#define COUNT ip
#define OPITCH lr
- .align
- .global i420_yuyv_neon
- .type i420_yuyv_neon, %function
-i420_yuyv_neon:
+function i420_yuyv_neon, export=1
push {r4-r8,r10-r11,lr}
ldmia r0, {O1, OPITCH}
ldmia r1, {Y1, U, V, YPITCH}
@@ -74,10 +73,9 @@ i420_yuyv_neon:
add U, U, YPAD, lsr #1
add V, V, YPAD, lsr #1
b 1b
+endfunc
- .global i420_uyvy_neon
- .type i420_uyvy_neon, %function
-i420_uyvy_neon:
+function i420_uyvy_neon, export=1
push {r4-r8,r10-r11,lr}
ldmia r0, {O1, OPITCH}
ldmia r1, {Y1, U, V, YPITCH}
@@ -113,3 +111,4 @@ i420_uyvy_neon:
add U, U, YPAD, lsr #1
add V, V, YPAD, lsr #1
b 1b
+endfunc
diff --git a/modules/arm_neon/i422_yuyv.S b/modules/arm_neon/i422_yuyv.S
index 2c9f5c8..5fa5e3b 100644
--- a/modules/arm_neon/i422_yuyv.S
+++ b/modules/arm_neon/i422_yuyv.S
@@ -18,6 +18,8 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
+#include "asm.S"
+
.fpu neon
.text
@@ -31,10 +33,7 @@
#define COUNT ip
#define YPAD lr
- .align
- .global i422_yuyv_neon
- .type i422_yuyv_neon, %function
-i422_yuyv_neon:
+function i422_yuyv_neon, export=1
push {r4-r6,lr}
ldmia r1, {Y, U, V, YPAD}
ldmia r0, {O, OPAD}
@@ -64,10 +63,9 @@ i422_yuyv_neon:
add Y, Y, YPAD
add O, O, OPAD
b 1b
+endfunc
- .global i422_uyvy_neon
- .type i422_uyvy_neon, %function
-i422_uyvy_neon:
+function i422_uyvy_neon, export=1
push {r4-r6,lr}
ldmia r1, {Y, U, V, YPAD}
ldmia r0, {O, OPAD}
@@ -96,3 +94,5 @@ i422_uyvy_neon:
add Y, Y, YPAD
add O, O, OPAD
b 1b
+endfunc
+
diff --git a/modules/arm_neon/nv12_rgb.S b/modules/arm_neon/nv12_rgb.S
index 64a2d76..2bada54 100644
--- a/modules/arm_neon/nv12_rgb.S
+++ b/modules/arm_neon/nv12_rgb.S
@@ -19,6 +19,8 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
+#include "asm.S"
+
.fpu neon
.text
@@ -74,10 +76,7 @@ coefficients:
.short 4992
.short -18432
- .align
- .global nv12_rgb_neon
- .type nv12_rgb_neon, %function
-nv12_rgb_neon:
+function nv12_rgb_neon, export=1
push {r4-r8,r10-r11,lr}
vpush {q4-q7}
@@ -204,3 +203,5 @@ loop_col:
add Y1, Y2, YPAD
add U, U, YPAD
b loop_row
+endfunc
+
diff --git a/modules/arm_neon/nv21_rgb.S b/modules/arm_neon/nv21_rgb.S
index b7f9745..8d82d68 100644
--- a/modules/arm_neon/nv21_rgb.S
+++ b/modules/arm_neon/nv21_rgb.S
@@ -19,6 +19,8 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
+#include "asm.S"
+
.fpu neon
.text
@@ -74,10 +76,7 @@ coefficients:
.short 4992
.short -18432
- .align
- .global nv21_rgb_neon
- .type nv21_rgb_neon, %function
-nv21_rgb_neon:
+function nv21_rgb_neon, export=1
push {r4-r8,r10-r11,lr}
vpush {q4-q7}
@@ -204,3 +203,5 @@ loop_col:
add Y1, Y2, YPAD
add U, U, YPAD
b loop_row
+endfunc
+
diff --git a/modules/arm_neon/s32_s16.S b/modules/arm_neon/s32_s16.S
index 88effca..fd2b657 100644
--- a/modules/arm_neon/s32_s16.S
+++ b/modules/arm_neon/s32_s16.S
@@ -18,6 +18,8 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
+#include "asm.S"
+
.fpu neon
.text
@@ -27,12 +29,9 @@
#define BUF r3
#define HALF ip
- .align
- .global s32_s16_neon
- .type s32_s16_neon, %function
@ Converts fixed-point 32-bits to signed 16-bits
@ Input and output must be on 128-bits boundary
-s32_s16_neon:
+function s32_s16_neon, export=1
pld [IN]
2:
cmp N, #8
@@ -77,14 +76,13 @@ s32_s16_neon:
sub N, #4
vqrshrn.s32 d16, q8, #13
vst1.s16 {d16}, [OUT,:64]!
+endfunc
@ Fall through for last 0-3 samples
- .global s32_s16_neon_unaligned
- .type s32_s16_neon_unaligned, %function
@ Converts fixed-point 32-bits to signed 16-bits
@ Input must be on 32-bits boundary, output on 16-bits
-s32_s16_neon_unaligned:
+function s32_s16_neon_unaligned, export=1
mov HALF, #4096
1:
cmp N, #0
@@ -98,3 +96,5 @@ s32_s16_neon_unaligned:
ssat BUF, #16, BUF, asr #13
strh BUF, [OUT, #-2]
b 1b
+endfunc
+
diff --git a/modules/arm_neon/yuyv_i422.S b/modules/arm_neon/yuyv_i422.S
index 9a0dd82..46deace 100644
--- a/modules/arm_neon/yuyv_i422.S
+++ b/modules/arm_neon/yuyv_i422.S
@@ -18,6 +18,8 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
+#include "asm.S"
+
.fpu neon
.text
@@ -31,10 +33,7 @@
#define COUNT ip
#define YPAD lr
- .align
- .global yuyv_i422_neon
- .type yuyv_i422_neon, %function
-yuyv_i422_neon:
+function yuyv_i422_neon, export=1
push {r4-r6,lr}
ldmia r0, {Y, U, V, YPAD}
ldmia r1, {I, IPAD}
@@ -62,10 +61,9 @@ yuyv_i422_neon:
add U, U, YPAD, lsr #1
add V, V, YPAD, lsr #1
b 1b
+endfunc
- .global uyvy_i422_neon
- .type uyvy_i422_neon, %function
-uyvy_i422_neon:
+function uyvy_i422_neon, export=1
push {r4-r6,lr}
ldmia r0, {Y, U, V, YPAD}
ldmia r1, {I, IPAD}
@@ -92,3 +90,4 @@ uyvy_i422_neon:
add U, U, YPAD, lsr #1
add V, V, YPAD, lsr #1
b 1b
+endfunc
--
1.7.7.5 (Apple Git-26)
More information about the vlc-devel
mailing list