[vlc-devel] [PATCHv3] arm: make the assembler functions compatible with non ELF/gas platforms

Martin Storsjö martin at martin.st
Mon Feb 19 11:19:00 CET 2018


From: Janne Grunau <janne-vlc at jannau.net>

Allow assembling arm neon functions for IOS and arm windows.
---
Changed to only append a symbol prefix for apple platforms, not
for arm/aarch64 windows.

Will push in a day or so unless there's anything further to add/change.
---
 modules/arm_neon/amplify.S                     |  8 ++++--
 modules/arm_neon/asm.S                         | 39 ++++++++++++++++++++++++++
 modules/arm_neon/deinterleave_chroma.S         | 10 ++++---
 modules/arm_neon/i420_rgb.S                    | 10 ++++---
 modules/arm_neon/i420_rv16.S                   | 10 ++++---
 modules/arm_neon/i420_yuyv.S                   | 14 ++++-----
 modules/arm_neon/i422_yuyv.S                   | 14 ++++-----
 modules/arm_neon/nv12_rgb.S                    | 10 ++++---
 modules/arm_neon/nv21_rgb.S                    | 10 ++++---
 modules/arm_neon/simple_channel_mixer.S        | 38 +++++++++----------------
 modules/arm_neon/yuyv_i422.S                   | 14 ++++-----
 modules/video_filter/deinterlace/merge_arm.S   | 22 +++++++--------
 modules/video_filter/deinterlace/merge_arm64.S | 10 +++----
 13 files changed, 122 insertions(+), 87 deletions(-)
 create mode 100644 modules/arm_neon/asm.S

diff --git a/modules/arm_neon/amplify.S b/modules/arm_neon/amplify.S
index 5938118..9e655af 100644
--- a/modules/arm_neon/amplify.S
+++ b/modules/arm_neon/amplify.S
@@ -18,18 +18,20 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax	unified
 	.arm
+#if HAVE_AS_FPU_DIRECTIVE
 	.fpu	neon
+#endif
 	.text
 
 #define	DST	r0
 #define	SRC	r1
 #define	SIZE	r2
 	.align 2
-	.global amplify_float_arm_neon
-	.type	amplify_float_arm_neon, %function
-amplify_float_arm_neon:
+function amplify_float_arm_neon
 	cmp		SIZE,	#0
 	bxeq		lr
 #ifdef __ARM_PCS
diff --git a/modules/arm_neon/asm.S b/modules/arm_neon/asm.S
new file mode 100644
index 0000000..728391e
--- /dev/null
+++ b/modules/arm_neon/asm.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Janne Grunau <janne-libav at jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef __APPLE__
+#   define EXTERN_ASM _
+#else
+#   define EXTERN_ASM
+#endif
+
+#if defined(__APPLE__) || defined(_WIN32)
+#   define HAVE_AS_ARCH_DIRECTIVE 0
+#   define HAVE_AS_FPU_DIRECTIVE  0
+#else
+#   define HAVE_AS_ARCH_DIRECTIVE 1
+#   define HAVE_AS_FPU_DIRECTIVE  1
+#endif
+
+.macro  function name
+	.globl  EXTERN_ASM\name
+#ifdef __ELF__
+	.type   EXTERN_ASM\name, %function
+#endif
+EXTERN_ASM\name:
+.endm
diff --git a/modules/arm_neon/deinterleave_chroma.S b/modules/arm_neon/deinterleave_chroma.S
index 019d647..9cd01c7 100644
--- a/modules/arm_neon/deinterleave_chroma.S
+++ b/modules/arm_neon/deinterleave_chroma.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 #define UV	r0
@@ -35,9 +39,7 @@
 #define OPAD	lr
 
 	.align 2
-	.global deinterleave_chroma_neon
-	.type	deinterleave_chroma_neon, %function
-deinterleave_chroma_neon:
+function deinterleave_chroma_neon
 	push		{r4-r6,lr}
 	ldmia		r0,	{U, V, OPITCH}
 	ldmia		r1,	{UV, IPITCH}
diff --git a/modules/arm_neon/i420_rgb.S b/modules/arm_neon/i420_rgb.S
index a512b5f..54fb387 100644
--- a/modules/arm_neon/i420_rgb.S
+++ b/modules/arm_neon/i420_rgb.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 /* ARM */
@@ -80,9 +84,7 @@ coefficients:
     .short  -18432
 
 	.align 2
-	.global i420_rgb_neon
-	.type	i420_rgb_neon, %function
-i420_rgb_neon:
+function i420_rgb_neon
 	push		{r4-r8,r10-r11,lr}
 	vpush		{q4-q7}
 
diff --git a/modules/arm_neon/i420_rv16.S b/modules/arm_neon/i420_rv16.S
index cd6d269..15d1e7b 100644
--- a/modules/arm_neon/i420_rv16.S
+++ b/modules/arm_neon/i420_rv16.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 /* ARM */
@@ -83,9 +87,7 @@ coefficients:
     .short  -18432
 
 	.align 2
-	.global i420_rv16_neon
-	.type	i420_rv16_neon, %function
-i420_rv16_neon:
+function i420_rv16_neon
 	push		{r4-r8,r10-r11,lr}
 	vpush		{q4-q7}
 
diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S
index 0dd04de..29668e4 100644
--- a/modules/arm_neon/i420_yuyv.S
+++ b/modules/arm_neon/i420_yuyv.S
@@ -18,8 +18,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 #define O1	r0
@@ -37,9 +41,7 @@
 #define OPITCH	lr
 
 	.align 2
-	.global i420_yuyv_neon
-	.type	i420_yuyv_neon, %function
-i420_yuyv_neon:
+function i420_yuyv_neon
 	push		{r4-r8,r10-r11,lr}
 	ldmia		r0,	{O1, OPITCH}
 	ldmia		r1,	{Y1, U, V, YPITCH}
@@ -76,9 +78,7 @@ i420_yuyv_neon:
 	add		V,	V,	YPAD,	lsr #1
 	b		1b
 
-	.global i420_uyvy_neon
-	.type	i420_uyvy_neon, %function
-i420_uyvy_neon:
+function i420_uyvy_neon
 	push		{r4-r8,r10-r11,lr}
 	ldmia		r0,	{O1, OPITCH}
 	ldmia		r1,	{Y1, U, V, YPITCH}
diff --git a/modules/arm_neon/i422_yuyv.S b/modules/arm_neon/i422_yuyv.S
index 0960267..9119839 100644
--- a/modules/arm_neon/i422_yuyv.S
+++ b/modules/arm_neon/i422_yuyv.S
@@ -18,8 +18,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 #define O	r0
@@ -33,9 +37,7 @@
 #define YPAD	lr
 
 	.align 2
-	.global i422_yuyv_neon
-	.type	i422_yuyv_neon, %function
-i422_yuyv_neon:
+function i422_yuyv_neon
 	push		{r4-r6,lr}
 	ldmia		r1,	{Y, U, V, YPAD}
 	ldmia		r0,	{O, OPAD}
@@ -66,9 +68,7 @@ i422_yuyv_neon:
 	add		O,	O,	OPAD
 	b		1b
 
-	.global i422_uyvy_neon
-	.type	i422_uyvy_neon, %function
-i422_uyvy_neon:
+function i422_uyvy_neon
 	push		{r4-r6,lr}
 	ldmia		r1,	{Y, U, V, YPAD}
 	ldmia		r0,	{O, OPAD}
diff --git a/modules/arm_neon/nv12_rgb.S b/modules/arm_neon/nv12_rgb.S
index f514c43..1bb924f 100644
--- a/modules/arm_neon/nv12_rgb.S
+++ b/modules/arm_neon/nv12_rgb.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 /* ARM */
@@ -76,9 +80,7 @@ coefficients:
     .short  -18432
 
 	.align 2
-	.global nv12_rgb_neon
-	.type	nv12_rgb_neon, %function
-nv12_rgb_neon:
+function nv12_rgb_neon
 	push		{r4-r8,r10-r11,lr}
 	vpush		{q4-q7}
 
diff --git a/modules/arm_neon/nv21_rgb.S b/modules/arm_neon/nv21_rgb.S
index 599112e..f775b5a 100644
--- a/modules/arm_neon/nv21_rgb.S
+++ b/modules/arm_neon/nv21_rgb.S
@@ -19,8 +19,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 /* ARM */
@@ -76,9 +80,7 @@ coefficients:
     .short  -18432
 
 	.align 2
-	.global nv21_rgb_neon
-	.type	nv21_rgb_neon, %function
-nv21_rgb_neon:
+function nv21_rgb_neon
 	push		{r4-r8,r10-r11,lr}
 	vpush		{q4-q7}
 
diff --git a/modules/arm_neon/simple_channel_mixer.S b/modules/arm_neon/simple_channel_mixer.S
index a94ae85..cf9b1b5 100644
--- a/modules/arm_neon/simple_channel_mixer.S
+++ b/modules/arm_neon/simple_channel_mixer.S
@@ -19,7 +19,11 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
-	.fpu neon
+#include "asm.S"
+
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 	.align 2
 
@@ -34,9 +38,7 @@ coeff_7to2:
 	.float 0.5
 	.float 0.25
 	.float 0.25
-	.global convert_7_x_to_2_0_neon_asm
-	.type	convert_7_x_to_2_0_neon_asm, %function
-convert_7_x_to_2_0_neon_asm:
+function convert_7_x_to_2_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_7to2
@@ -70,9 +72,7 @@ coeff_5to2:
 	.float 0.5
 	.float 0.33
 	.float 0.33
-	.global convert_5_x_to_2_0_neon_asm
-	.type	convert_5_x_to_2_0_neon_asm, %function
-convert_5_x_to_2_0_neon_asm:
+function convert_5_x_to_2_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_5to2
@@ -100,9 +100,7 @@ convert_5_x_to_2_0_neon_asm:
 coeff_4to2:
 	.float 0.5
 	.float 0.5
-	.global convert_4_0_to_2_0_neon_asm
-	.type	convert_4_0_to_2_0_neon_asm, %function
-convert_4_0_to_2_0_neon_asm:
+function convert_4_0_to_2_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_4to2
@@ -124,9 +122,7 @@ convert_4_0_to_2_0_neon_asm:
 coeff_3to2:
 	.float 0.5
 	.float 0.5
-	.global convert_3_x_to_2_0_neon_asm
-	.type	convert_3_x_to_2_0_neon_asm, %function
-convert_3_x_to_2_0_neon_asm:
+function convert_3_x_to_2_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_3to2
@@ -154,9 +150,7 @@ coeff_7to1:
 	.float 0.25
 	.float 0.125
 	.float 0.125
-	.global convert_7_x_to_1_0_neon_asm
-	.type	convert_7_x_to_1_0_neon_asm, %function
-convert_7_x_to_1_0_neon_asm:
+function convert_7_x_to_1_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_7to1
@@ -188,9 +182,7 @@ coeff_5to1:
 	.float 0.25
 	.float 0.16666667
 	.float 0.16666667
-	.global convert_5_x_to_1_0_neon_asm
-	.type	convert_5_x_to_1_0_neon_asm, %function
-convert_5_x_to_1_0_neon_asm:
+function convert_5_x_to_1_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_5to1
@@ -219,9 +211,7 @@ coeff_7to4:
 	.float 0.5
 	.float 0.16666667
 	.float 0.16666667
-	.global convert_7_x_to_4_0_neon_asm
-	.type	convert_7_x_to_4_0_neon_asm, %function
-convert_7_x_to_4_0_neon_asm:
+function convert_7_x_to_4_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_7to4
@@ -252,9 +242,7 @@ convert_7_x_to_4_0_neon_asm:
 coeff_5to4:
 	.float 0.5
 	.float 0.5
-	.global convert_5_x_to_4_0_neon_asm
-	.type	convert_5_x_to_4_0_neon_asm, %function
-convert_5_x_to_4_0_neon_asm:
+function convert_5_x_to_4_0_neon_asm
 	push {r4,lr}
 
 	adr COEFF, coeff_5to4
diff --git a/modules/arm_neon/yuyv_i422.S b/modules/arm_neon/yuyv_i422.S
index c3774f5..637effe 100644
--- a/modules/arm_neon/yuyv_i422.S
+++ b/modules/arm_neon/yuyv_i422.S
@@ -18,8 +18,12 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "asm.S"
+
 	.syntax unified
-	.fpu neon
+#if HAVE_AS_FPU_DIRECTIVE
+	.fpu	neon
+#endif
 	.text
 
 #define I	r0
@@ -33,9 +37,7 @@
 #define YPAD	lr
 
 	.align 2
-	.global yuyv_i422_neon
-	.type	yuyv_i422_neon, %function
-yuyv_i422_neon:
+function yuyv_i422_neon
 	push		{r4-r6,lr}
 	ldmia		r0,	{Y, U, V, YPAD}
 	ldmia		r1,	{I, IPAD}
@@ -64,9 +66,7 @@ yuyv_i422_neon:
 	add		V,	V,	YPAD,	lsr #1
 	b		1b
 
-	.global uyvy_i422_neon
-	.type	uyvy_i422_neon, %function
-uyvy_i422_neon:
+function uyvy_i422_neon
 	push		{r4-r6,lr}
 	ldmia		r0,	{Y, U, V, YPAD}
 	ldmia		r1,	{I, IPAD}
diff --git a/modules/video_filter/deinterlace/merge_arm.S b/modules/video_filter/deinterlace/merge_arm.S
index dd77902..d3f32c5 100644
--- a/modules/video_filter/deinterlace/merge_arm.S
+++ b/modules/video_filter/deinterlace/merge_arm.S
@@ -18,10 +18,16 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
+#include "../arm_neon/asm.S"
+
 	.syntax	unified
 	.arm
+#if HAVE_AS_ARCH_DIRECTIVE
 	.arch	armv6
+#endif
+#if HAVE_AS_FPU_DIRECTIVE
 	.fpu	neon
+#endif
 	.text
 
 #define	DEST	r0
@@ -30,10 +36,8 @@
 #define	SIZE	r3
 
 	.align 2
-	.global merge8_arm_neon
-	.type	merge8_arm_neon, %function
 	@ NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
-merge8_arm_neon:
+function merge8_arm_neon
 	cmp		SIZE,	#64
 	blo		2f
 1:
@@ -72,9 +76,7 @@ merge8_arm_neon:
 	bx		lr
 
 	.align 2
-	.global merge16_arm_neon
-	.type	merge16_arm_neon, %function
-merge16_arm_neon:
+function merge16_arm_neon
 	cmp		SIZE,	#64
 	blo		2f
 1:
@@ -113,9 +115,7 @@ merge16_arm_neon:
 	bx		lr
 
 	.align 2
-	.global merge8_armv6
-	.type	merge8_armv6, %function
-merge8_armv6:
+function merge8_armv6
 	push		{r4-r9,lr}
 1:
 	pld		[SRC1, #64]
@@ -135,9 +135,7 @@ merge8_armv6:
 	b		1b
 
 	.align 2
-	.global merge16_armv6
-	.type	merge16_armv6, %function
-merge16_armv6:
+function merge16_armv6
 	push		{r4-r9,lr}
 1:
 	pld		[SRC1, #64]
diff --git a/modules/video_filter/deinterlace/merge_arm64.S b/modules/video_filter/deinterlace/merge_arm64.S
index db19e54..7b70678 100644
--- a/modules/video_filter/deinterlace/merge_arm64.S
+++ b/modules/video_filter/deinterlace/merge_arm64.S
@@ -19,6 +19,8 @@
  // Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  //****************************************************************************/
 
+#include "../../arm_neon/asm.S"
+
 	.text
 
 #define	DEST	x0
@@ -27,10 +29,8 @@
 #define	SIZE	x3
 
 	.align 2
-	.global merge8_arm64_neon
-	.type	merge8_arm64_neon, %function
 	// NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
-merge8_arm64_neon:
+function merge8_arm64_neon
 	ands		x5, SIZE, #~63
 	b.eq		2f
 	mov		x10, #64
@@ -66,9 +66,7 @@ merge8_arm64_neon:
 	ret
 
 	.align 2
-	.global merge16_arm64_neon
-	.type	merge16_arm64_neon, %function
-merge16_arm64_neon:
+function merge16_arm64_neon
 	ands		x5, SIZE, #~63
 	b.eq		2f
 1:
-- 
2.7.4



More information about the vlc-devel mailing list