[vlc-commits] deinterlace: initial ARM SVE merge function
Rémi Denis-Courmont
git at videolan.org
Thu Dec 20 21:18:33 CET 2018
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Tue Dec 11 21:16:49 2018 +0200| [3985c874e0b71f73911aef084391a3a838eca877] | committer: Rémi Denis-Courmont
deinterlace: initial ARM SVE merge function
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=3985c874e0b71f73911aef084391a3a838eca877
---
modules/video_filter/Makefile.am | 4 ++
modules/video_filter/deinterlace/deinterlace.c | 5 +++
modules/video_filter/deinterlace/merge.h | 3 ++
modules/video_filter/deinterlace/merge_sve.S | 54 ++++++++++++++++++++++++++
4 files changed, 66 insertions(+)
diff --git a/modules/video_filter/Makefile.am b/modules/video_filter/Makefile.am
index af190d16e3..d3db0c0d0a 100644
--- a/modules/video_filter/Makefile.am
+++ b/modules/video_filter/Makefile.am
@@ -144,6 +144,10 @@ if HAVE_ARM64
libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_arm64.S
libdeinterlace_plugin_la_CFLAGS += -DCAN_COMPILE_ARM64
endif
+if HAVE_SVE
+libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_sve.S
+libdeinterlace_plugin_la_CFLAGS += -DCAN_COMPILE_SVE
+endif
libdeinterlace_plugin_la_LIBADD = libdeinterlace_common.la
video_filter_LTLIBRARIES += libdeinterlace_plugin.la
diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c
index 6bcba0f3bb..b8d0586123 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -575,6 +575,11 @@ notsupp:
p_sys->pf_merge = pixel_size == 1 ? merge8_armv6 : merge16_armv6;
else
#endif
+#if defined(CAN_COMPILE_SVE)
+ if( vlc_CPU_ARM_SVE() )
+ p_sys->pf_merge = pixel_size == 1 ? merge8_arm_sve : merge16_arm_sve;
+ else
+#endif
#if defined(CAN_COMPILE_ARM64)
if( vlc_CPU_ARM_NEON() )
p_sys->pf_merge = pixel_size == 1 ? merge8_arm64_neon : merge16_arm64_neon;
diff --git a/modules/video_filter/deinterlace/merge.h b/modules/video_filter/deinterlace/merge.h
index 70dcbef096..1a54b32db9 100644
--- a/modules/video_filter/deinterlace/merge.h
+++ b/modules/video_filter/deinterlace/merge.h
@@ -181,6 +181,9 @@ void merge16_arm64_neon (void *, const void *, const void *, size_t);
#endif
+void merge8_arm_sve(void *, const void *, const void *, size_t);
+void merge16_arm_sve(void *, const void *, const void *, size_t);
+
/*****************************************************************************
* EndMerge routines
*****************************************************************************/
diff --git a/modules/video_filter/deinterlace/merge_sve.S b/modules/video_filter/deinterlace/merge_sve.S
new file mode 100644
index 0000000000..9213d8b50f
--- /dev/null
+++ b/modules/video_filter/deinterlace/merge_sve.S
@@ -0,0 +1,54 @@
+/******************************************************************************
+ * merge_sve.S : ARM SVE mean
+ ******************************************************************************
+ * Copyright (C) 2018 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+ .arch_extension sve
+
+ /* TODO: prefetch, unroll */
+
+ .text
+ .globl merge8_arm_sve
+ .type merge8_arm_sve, %function
+merge8_arm_sve:
+ mov x4, #0
+ b 2f
+1: ld1b {z0.h}, p0/z, [x1, x4]
+ ld1b {z1.h}, p0/z, [x2, x4]
+ add z0.h, z0.h, z1.h
+ lsr z0.h, z0.h, #1
+ st1b {z0.h}, p0, [x0, x4]
+ inch x4
+2: whilelt p0.h, x4, x3
+ b.first 1b
+ ret
+
+ .globl merge16_arm_sve
+ .type merge16_arm_sve, %function
+merge16_arm_sve:
+ mov x4, #0
+ b 2f
+1: ld1h {z0.s}, p0/z, [x1, x4, lsl #1]
+ ld1h {z1.s}, p0/z, [x2, x4, lsl #1]
+ add z0.s, z0.s, z1.s
+ lsr z0.s, z0.s, #1
+ st1h {z0.s}, p0, [x0, x4, lsl #1]
+ incw x4
+2: whilelt p0.s, x4, x3
+ b.first 1b
+ ret
More information about the vlc-commits
mailing list