[vlc-commits] Run-time CPU detection for ARM (meaning NEON)
Rémi Denis-Courmont
git at videolan.org
Mon Jul 4 20:19:14 CEST 2011
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Mon Jul 4 21:14:18 2011 +0300| [f25efcaf2a7d3c2d9aefeb4974d225a25525fc21] | committer: Rémi Denis-Courmont
Run-time CPU detection for ARM (meaning NEON)
Unfortunately, we cannot emit NEON opcodes when NEON is not explicitly
enabled (-mfpu=neon), contrary to MMX & SSE on x86. As a consequence,
this will not work for inline assembler in a non-optimized plugin,
namely the deinterlacer.
There is also a(n hopefully theoretical) bug whereby the compiler would
emit NEON instructions in the descriptor or activation callback of a
NEON plugin. This could then crash if NEON is not supported, even
before the NEON run-time check is reached.
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=f25efcaf2a7d3c2d9aefeb4974d225a25525fc21
---
configure.ac | 10 ++++--
modules/Makefile.am | 2 +-
modules/arm_neon/Modules.am | 2 +-
modules/video_filter/deinterlace/deinterlace.c | 2 +-
src/misc/cpu.c | 36 ++++++++++++++++++++++-
5 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/configure.ac b/configure.ac
index 14c729c..831f94f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1489,12 +1489,16 @@ asm volatile("ssat r0, #1, r0":::"r0"); /* assume ARMv6 */
])
CFLAGS="${CFLAGS_save}"
])
- ARM_NEON_CFLAGS="$ac_cv_neon_inline"
+ AS_IF([test "$ac_cv_neon_inline" != "no"], [
+ NEON_CFLAGS="$ac_cv_neon_inline"
+ AC_DEFINE([CAN_COMPILE_NEON], 1,
+ [Define to 1 if NEON (and ARMv6) assembly is available with NEON_CFLAGS.])
+ ])
], [
ac_cv_neon_inline="no"
])
-AC_SUBST(ARM_NEON_CFLAGS)
-AM_CONDITIONAL(HAVE_ARM_NEON, [test "${ac_cv_neon_inline}" != "no"])
+AC_SUBST(NEON_CFLAGS)
+AM_CONDITIONAL(HAVE_NEON, [test "${ac_cv_neon_inline}" != "no"])
AC_ARG_ENABLE(altivec,
diff --git a/modules/Makefile.am b/modules/Makefile.am
index b84f2d9..4473820 100644
--- a/modules/Makefile.am
+++ b/modules/Makefile.am
@@ -51,7 +51,7 @@ endif
if HAVE_ALTIVEC
SUBDIRS += altivec
endif
-if HAVE_ARM_NEON
+if HAVE_NEON
SUBDIRS += arm_neon
endif
if BUILD_LUA
diff --git a/modules/arm_neon/Modules.am b/modules/arm_neon/Modules.am
index 1842561..8fa5b04 100644
--- a/modules/arm_neon/Modules.am
+++ b/modules/arm_neon/Modules.am
@@ -2,7 +2,7 @@
# without this. (This is the case with iOS).
LIBTOOL=@LIBTOOL@ --tag=CC
-AM_CFLAGS += $(ARM_NEON_CFLAGS)
+AM_CFLAGS += $(NEON_CFLAGS)
libaudio_format_neon_plugin_la_SOURCES = \
s32_s16.S \
diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c
index fa1a00b..60b5f45 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -682,7 +682,7 @@ int Open( vlc_object_t *p_this )
}
else
#endif
-#if defined __ARM_NEON__
+#if defined __ARM_NEON__ // FIXME: runtime detect support
if( vlc_CPU() & CPU_CAPABILITY_NEON )
{
p_sys->pf_merge = MergeNEON;
diff --git a/src/misc/cpu.c b/src/misc/cpu.c
index ad0c0b6..81f6948 100644
--- a/src/misc/cpu.c
+++ b/src/misc/cpu.c
@@ -247,10 +247,42 @@ uint32_t CPUCapabilities( void )
}
out:
-#elif defined( __arm__ )
-# if defined( __ARM_NEON__ )
+#elif defined (__arm__)
+
+# if defined (__ARM_NEON__)
i_capabilities |= CPU_CAPABILITY_NEON;
+# elif defined (CAN_COMPILE_NEON)
+# define NEED_RUNTIME_CPU_CHECK 1
+# endif
+
+# ifdef NEED_RUNTIME_CPU_CHECK
+# if defined (__linux__)
+ FILE *info = fopen ("/proc/cpuinfo", "rt");
+ if (info != NULL)
+ {
+ char *line = NULL;
+ size_t linelen = 0;
+
+ while (getline (&line, &linelen, info) != -1)
+ {
+ const char *cap;
+
+ if (strncmp (line, "Features\t:", 10))
+ continue;
+# if defined (CAN_COMPILE_NEON) && !defined (__ARM_NEON__)
+ cap = strstr (line + 10, " neon");
+ if (cap != NULL && (cap[5] == '\0' || cap[5] == ' '))
+ i_capabilities |= CPU_CAPABILITY_NEON;
# endif
+ break;
+ }
+ fclose (info);
+ free (line);
+ }
+# else
+# warning Run-time CPU detection missing: optimizations disabled!
+# endif
+# endif
#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
|| defined( __ppc64__ )
More information about the vlc-commits
mailing list