[vlc-commits] [Git][videolan/vlc][master] 9 commits: cpu: probe by descending priorities

Rémi Denis-Courmont (@Courmisch) gitlab at videolan.org
Sat Feb 26 19:58:34 UTC 2022



Rémi Denis-Courmont pushed to branch master at VideoLAN / VLC


Commits:
ba5a1f48 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
cpu: probe by descending priorities

The callbacks are initially for default C implementation, and are
updated by the "worst" optimisation first, with the best last.
(We do not stop at the first supported optimisation as not all all
modules necessarily provide all optimised callbacks.)

This probes modules by descending score, so that we can assign highest
score to the best optimisation.

This makes no functonal differences, merely provide a more intuitive
module scoring.

- - - - -
6e06e0fc by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: hook vlc_CPU_functions_init()

- - - - -
85f29856 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move RISC-V V code to backend module

- - - - -
8e197aa3 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
rvv: add missing .size directives

- - - - -
dccbcb9b by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move AArch64 AdvSIMD code to backend module

- - - - -
d53396b8 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move AArch64 SVE code to backend module

- - - - -
7f648830 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
sve: missing alignment

- - - - -
73672513 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move ARM SIMD code to backend module

- - - - -
1dae40e6 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move ARM NEON code to backend module

- - - - -


18 changed files:

- modules/Makefile.am
- + modules/isa/aarch64/Makefile.am
- + modules/isa/aarch64/simd/deinterlace.c
- modules/video_filter/deinterlace/merge_arm64.S → modules/isa/aarch64/simd/merge.S
- + modules/isa/aarch64/sve/deinterlace.c
- modules/video_filter/deinterlace/merge_sve.S → modules/isa/aarch64/sve/merge.S
- modules/isa/arm/Makefile.am
- + modules/isa/arm/neon/deinterlace.c
- modules/video_filter/deinterlace/merge_arm.S → modules/isa/arm/neon/merge.S
- + modules/isa/arm/simd/deinterlace.c
- + modules/isa/arm/simd/merge.S
- modules/isa/riscv/Makefile.am
- + modules/isa/riscv/deinterlace.c
- modules/video_filter/deinterlace/merge_rvv.S → modules/isa/riscv/rvv_merge.S
- modules/video_filter/Makefile.am
- modules/video_filter/deinterlace/deinterlace.c
- modules/video_filter/deinterlace/merge.h
- src/misc/cpu.c


Changes:

=====================================
modules/Makefile.am
=====================================
@@ -31,6 +31,7 @@ include hw/d3d11/Makefile.am
 include hw/vaapi/Makefile.am
 include hw/vdpau/Makefile.am
 include hw/mmal/Makefile.am
+include isa/aarch64/Makefile.am
 include isa/arm/Makefile.am
 include isa/riscv/Makefile.am
 include keystore/Makefile.am


=====================================
modules/isa/aarch64/Makefile.am
=====================================
@@ -0,0 +1,18 @@
+aarch64dir = $(pluginsdir)/aarch64
+aarch64_LTLIBRARIES =
+
+libdeinterlace_aarch64_plugin_la_SOURCES = \
+	isa/aarch64/simd/deinterlace.c isa/aarch64/simd/merge.S
+
+if HAVE_ARM64
+aarch64_LTLIBRARIES += \
+	libdeinterlace_aarch64_plugin.la
+endif
+
+libdeinterlace_sve_plugin_la_SOURCES = \
+	isa/aarch64/sve/deinterlace.c isa/aarch64/sve/merge.S
+
+if HAVE_SVE
+aarch64_LTLIBRARIES += \
+	libdeinterlace_sve_plugin.la
+endif


=====================================
modules/isa/aarch64/simd/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: AArch64 AdvSIMD deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_arm64(void *, const void *, const void *, size_t);
+void merge16_arm64(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+    if (vlc_CPU_ARM_NEON()) {
+        struct deinterlace_functions *const f = data;
+
+        f->merges[0] = merge8_arm64;
+        f->merges[1] = merge16_arm64;
+    }
+}
+
+vlc_module_begin()
+    set_description("AArch64 AvdSIMD optimisation for deinterlacing")
+    set_cpu_funcs("deinterlace functions", Probe, 10)
+vlc_module_end()


=====================================
modules/video_filter/deinterlace/merge_arm64.S → modules/isa/aarch64/simd/merge.S
=====================================
@@ -1,5 +1,5 @@
  //*****************************************************************************
- // merge_arm64.S : ARM64 NEON mean
+ // merge.S : AArch64 Advanced SIMD mean
  //*****************************************************************************
  // Copyright (C) 2009-2012 Rémi Denis-Courmont
  // Copyright (C) 2016-	   Janne Grunau
@@ -19,7 +19,7 @@
  // Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  //****************************************************************************/
 
-#include "../../isa/arm/asm.S"
+#include "../../arm/asm.S"
 
 	.arch armv8-a+simd
 	.text
@@ -32,7 +32,7 @@
 
 	.align 2
 	// NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
-function merge8_arm64_neon
+function merge8_arm64
 	bti		c
 	ands		x5, SIZE, #~63
 	b.eq		2f
@@ -69,7 +69,7 @@ function merge8_arm64_neon
 	ret
 
 	.align 2
-function merge16_arm64_neon
+function merge16_arm64
 	bti		c
 	ands		x5, SIZE, #~63
 	b.eq		2f


=====================================
modules/isa/aarch64/sve/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: AArch64 Scalable Vector Extension deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_arm_sve(void *, const void *, const void *, size_t);
+void merge16_arm_sve(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+    if (vlc_CPU_ARM_SVE()) {
+        struct deinterlace_functions *const f = data;
+
+        f->merges[0] = merge8_arm_sve;
+        f->merges[1] = merge16_arm_sve;
+    }
+}
+
+vlc_module_begin()
+    set_description("AArch64 SVE optimisation for deinterlacing")
+    set_cpu_funcs("deinterlace functions", Probe, 20)
+vlc_module_end()


=====================================
modules/video_filter/deinterlace/merge_sve.S → modules/isa/aarch64/sve/merge.S
=====================================
@@ -1,5 +1,5 @@
 /******************************************************************************
- * merge_sve.S : ARM SVE mean
+ * merge.S : ARM SVE mean
  ******************************************************************************
  * Copyright (C) 2018 Rémi Denis-Courmont
  *
@@ -18,13 +18,14 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
-#include "../../isa/arm/asm.S"
+#include "../../arm/asm.S"
 
-	.arch armv8-a+sve
+	.arch	armv8-a+sve
 
 	/* TODO: prefetch, unroll */
 
 	.text
+	.align	2
 	bti_advertise
 function merge8_arm_sve
 	bti	c


=====================================
modules/isa/arm/Makefile.am
=====================================
@@ -1,5 +1,15 @@
+armdir = $(pluginsdir)/arm
 neondir = $(pluginsdir)/arm_neon
 
+libdeinterlace_arm_simd_plugin_la_SOURCES = \
+	isa/arm/simd/deinterlace.c isa/arm/simd/merge.S
+
+# TODO? use dedicated conditional, or purge
+if HAVE_NEON
+arm_LTLIBRARIES = \
+	libdeinterlace_arm_simd_plugin.la
+endif
+
 libchroma_yuv_neon_plugin_la_SOURCES = \
 	isa/arm/neon/deinterleave_chroma.S \
 	isa/arm/neon/i420_yuyv.S \
@@ -9,6 +19,9 @@ libchroma_yuv_neon_plugin_la_SOURCES = \
 libchroma_yuv_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
 libchroma_yuv_neon_plugin_LIBTOOLFLAGS = --tag=CC
 
+libdeinterlace_neon_plugin_la_SOURCES = \
+	isa/arm/neon/deinterlace.c isa/arm/neon/merge.S
+
 libvolume_neon_plugin_la_SOURCES = isa/arm/neon/volume.c isa/arm/neon/amplify.S
 libvolume_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
 libvolume_neon_plugin_LIBTOOLFLAGS = --tag=CC
@@ -25,6 +38,7 @@ libyuv_rgb_neon_plugin_LIBTOOLFLAGS = --tag=CC
 if HAVE_NEON
 neon_LTLIBRARIES = \
 	libchroma_yuv_neon_plugin.la \
+	libdeinterlace_neon_plugin.la \
 	libvolume_neon_plugin.la \
 	libyuv_rgb_neon_plugin.la
 endif


=====================================
modules/isa/arm/neon/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: ARM NEON deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_arm_neon(void *, const void *, const void *, size_t);
+void merge16_arm_neon(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+    if (vlc_CPU_ARM_NEON()) {
+        struct deinterlace_functions *const f = data;
+
+        f->merges[0] = merge8_arm_neon;
+        f->merges[1] = merge16_arm_neon;
+    }
+}
+
+vlc_module_begin()
+    set_description("ARM NEON optimisation for deinterlacing")
+    set_cpu_funcs("deinterlace functions", Probe, 20)
+vlc_module_end()


=====================================
modules/video_filter/deinterlace/merge_arm.S → modules/isa/arm/neon/merge.S
=====================================
@@ -1,5 +1,5 @@
  @*****************************************************************************
- @ merge_arm.S : ARM NEON mean
+ @ merge.S : ARM NEON mean
  @*****************************************************************************
  @ Copyright (C) 2009-2012 Rémi Denis-Courmont
  @
@@ -18,11 +18,11 @@
  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  @****************************************************************************/
 
-#include "../../isa/arm/asm.S"
+#include "../asm.S"
 
 	.syntax	unified
 #if HAVE_AS_ARCH_DIRECTIVE
-	.arch	armv6
+	.arch	armv7-a
 #endif
 #if HAVE_AS_FPU_DIRECTIVE
 	.fpu	neon
@@ -114,45 +114,3 @@ function merge16_arm_neon
 	vhadd.u16	q0,	q0,	q8
 	vst1.u16	{q0},		[DEST,:128]!
 	bx		lr
-
-	.align 2
-function merge8_armv6
-	push		{r4-r9,lr}
-1:
-	pld		[SRC1, #64]
-	ldm		SRC1!,	{r4-r5}
-	pld		[SRC2, #64]
-	ldm		SRC2!,	{r8-r9}
-	subs		SIZE,	SIZE,	#16
-	uhadd8		r4,	r4,	r8
-	ldm		SRC1!,	{r6-r7}
-	uhadd8		r5,	r5,	r9
-	ldm		SRC2!,	{ip,lr}
-	uhadd8		r6,	r6,	ip
-	stm		DEST!,	{r4-r5}
-	uhadd8		r7,	r7,	lr
-	stm		DEST!,	{r6-r7}
-	it		eq
-	popeq		{r4-r9,pc}
-	b		1b
-
-	.align 2
-function merge16_armv6
-	push		{r4-r9,lr}
-1:
-	pld		[SRC1, #64]
-	ldm		SRC1!,	{r4-r5}
-	pld		[SRC2, #64]
-	ldm		SRC2!,	{r8-r9}
-	subs		SIZE,	SIZE,	#16
-	uhadd16		r4,	r4,	r8
-	ldm		SRC1!,	{r6-r7}
-	uhadd16		r5,	r5,	r9
-	ldm		SRC2!,	{ip,lr}
-	uhadd16		r6,	r6,	ip
-	stm		DEST!,	{r4-r5}
-	uhadd16		r7,	r7,	lr
-	stm		DEST!,	{r6-r7}
-	it		eq
-	popeq		{r4-r9,pc}
-	b		1b


=====================================
modules/isa/arm/simd/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: ARMv6 SIMD deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_armv6(void *, const void *, const void *, size_t);
+void merge16_armv6(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+    if (vlc_CPU_ARMv6()) {
+        struct deinterlace_functions *const f = data;
+
+        f->merges[0] = merge8_armv6;
+        f->merges[1] = merge16_armv6;
+    }
+}
+
+vlc_module_begin()
+    set_description("ARM SIMD optimisation for deinterlacing")
+    set_cpu_funcs("deinterlace functions", Probe, 10)
+vlc_module_end()


=====================================
modules/isa/arm/simd/merge.S
=====================================
@@ -0,0 +1,74 @@
+ @*****************************************************************************
+ @ merge.S: ARMv6 SIMD mean
+ @*****************************************************************************
+ @ Copyright (C) 2009-2012 Rémi Denis-Courmont
+ @
+ @ This program is free software; you can redistribute it and/or modify
+ @ it under the terms of the GNU Lesser General Public License as published by
+ @ the Free Software Foundation; either version 2.1 of the License, or
+ @ (at your option) any later version.
+ @
+ @ This program is distributed in the hope that it will be useful,
+ @ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ @ GNU Lesser General Public License for more details.
+ @
+ @ You should have received a copy of the GNU Lesser General Public License
+ @ along with this program; if not, write to the Free Software Foundation,
+ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ @****************************************************************************/
+
+#include "../asm.S"
+
+	.syntax	unified
+#if HAVE_AS_ARCH_DIRECTIVE
+	.arch	armv6
+#endif
+	.text
+
+#define	DEST	r0
+#define	SRC1	r1
+#define	SRC2	r2
+#define	SIZE	r3
+
+	.align 2
+function merge8_armv6
+	push		{r4-r9,lr}
+1:
+	pld		[SRC1, #64]
+	ldm		SRC1!,	{r4-r5}
+	pld		[SRC2, #64]
+	ldm		SRC2!,	{r8-r9}
+	subs		SIZE,	SIZE,	#16
+	uhadd8		r4,	r4,	r8
+	ldm		SRC1!,	{r6-r7}
+	uhadd8		r5,	r5,	r9
+	ldm		SRC2!,	{ip,lr}
+	uhadd8		r6,	r6,	ip
+	stm		DEST!,	{r4-r5}
+	uhadd8		r7,	r7,	lr
+	stm		DEST!,	{r6-r7}
+	it		eq
+	popeq		{r4-r9,pc}
+	b		1b
+
+	.align 2
+function merge16_armv6
+	push		{r4-r9,lr}
+1:
+	pld		[SRC1, #64]
+	ldm		SRC1!,	{r4-r5}
+	pld		[SRC2, #64]
+	ldm		SRC2!,	{r8-r9}
+	subs		SIZE,	SIZE,	#16
+	uhadd16		r4,	r4,	r8
+	ldm		SRC1!,	{r6-r7}
+	uhadd16		r5,	r5,	r9
+	ldm		SRC2!,	{ip,lr}
+	uhadd16		r6,	r6,	ip
+	stm		DEST!,	{r4-r5}
+	uhadd16		r7,	r7,	lr
+	stm		DEST!,	{r6-r7}
+	it		eq
+	popeq		{r4-r9,pc}
+	b		1b


=====================================
modules/isa/riscv/Makefile.am
=====================================
@@ -1,5 +1,7 @@
 riscvdir = $(pluginsdir)/riscv
 
+libdeinterlace_rvv_plugin_la_SOURCES = \
+	isa/riscv/deinterlace.c isa/riscv/rvv_merge.S
 libtransform_rvv_plugin_la_SOURCES = \
 	isa/riscv/transform.c isa/riscv/rvv_transform.S
 
@@ -8,6 +10,7 @@ libvolume_rvv_plugin_la_LIBADD = $(AM_LIBADD) $(LIBM)
 
 if HAVE_RVV
 riscv_LTLIBRARIES = \
+	libdeinterlace_rvv_plugin.la \
 	libtransform_rvv_plugin.la \
 	libvolume_rvv_plugin.la
 endif


=====================================
modules/isa/riscv/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: RISC-V V deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../video_filter/deinterlace/merge.h"
+
+void merge8_rvv(void *, const void *, const void *, size_t);
+void merge16_rvv(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+    if (vlc_CPU_RV_V()) {
+        struct deinterlace_functions *const f = data;
+
+        f->merges[0] = merge8_rvv;
+        f->merges[1] = merge16_rvv;
+    }
+}
+
+vlc_module_begin()
+    set_description("RISC-V V optimisation for deinterlacing")
+    set_cpu_funcs("deinterlace functions", Probe, 10)
+vlc_module_end()


=====================================
modules/video_filter/deinterlace/merge_rvv.S → modules/isa/riscv/rvv_merge.S
=====================================
@@ -1,5 +1,5 @@
 /******************************************************************************
- * merge_rvv.S: RISC-V Vector mean
+ * rvv_merge.S: RISC-V Vector mean
  ******************************************************************************
  * Copyright (C) 2022 Rémi Denis-Courmont
  *
@@ -37,6 +37,7 @@ merge8_rvv:
 	add	a0, a0, t0
 	bnez	a3, 1b
 	ret
+	.size	merge8_rvv, . - merge8_rvv
 
 	.globl	merge16_rvv
 	.type	merge16_rvv, %function
@@ -55,4 +56,5 @@ merge16_rvv:
 	add	a0, a0, t1
 	bnez	a3, 1b
 	ret
+	.size	merge16_rvv, . - merge16_rvv
 


=====================================
modules/video_filter/Makefile.am
=====================================
@@ -179,22 +179,6 @@ libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/yadif_x86.asm
 # inline ASM doesn't build with -O0
 libdeinterlace_plugin_la_CFLAGS += -O2
 endif
-if HAVE_NEON
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_arm.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_ARM
-endif
-if HAVE_ARM64
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_arm64.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_ARM64
-endif
-if HAVE_SVE
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_sve.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_SVE
-endif
-if HAVE_RVV
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_rvv.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_RVV
-endif
 libdeinterlace_plugin_la_LIBADD = libdeinterlace_common.la
 video_filter_LTLIBRARIES += libdeinterlace_plugin.la
 


=====================================
modules/video_filter/deinterlace/deinterlace.c
=====================================
@@ -488,6 +488,10 @@ static const struct vlc_filter_operations filter_ops = {
     .close = Close,
 };
 
+static struct deinterlace_functions funcs = {
+    { Merge8BitGeneric, Merge16BitGeneric, },
+};
+
 /*****************************************************************************
  * Open
  *****************************************************************************/
@@ -560,32 +564,10 @@ notsupp:
         p_sys->pf_end_merge = EndSSE;
     }
     else
-#endif
-#if defined(CAN_COMPILE_ARM)
-    if( vlc_CPU_ARM_NEON() )
-        p_sys->pf_merge = pixel_size == 1 ? merge8_arm_neon : merge16_arm_neon;
-    else
-    if( vlc_CPU_ARMv6() )
-        p_sys->pf_merge = pixel_size == 1 ? merge8_armv6 : merge16_armv6;
-    else
-#endif
-#if defined(CAN_COMPILE_SVE)
-    if( vlc_CPU_ARM_SVE() )
-        p_sys->pf_merge = pixel_size == 1 ? merge8_arm_sve : merge16_arm_sve;
-    else
-#endif
-#if defined(CAN_COMPILE_ARM64)
-    if( vlc_CPU_ARM_NEON() )
-        p_sys->pf_merge = pixel_size == 1 ? merge8_arm64_neon : merge16_arm64_neon;
-    else
-#endif
-#if defined(CAN_COMPILE_RVV)
-    if( vlc_CPU_RV_V() )
-        p_sys->pf_merge = pixel_size == 1 ? merge8_rvv : merge16_rvv;
-    else
 #endif
     {
-        p_sys->pf_merge = pixel_size == 1 ? Merge8BitGeneric : Merge16BitGeneric;
+        vlc_CPU_functions_init_once("deinterlace functions", &funcs);
+        p_sys->pf_merge = funcs.merges[vlc_ctz(pixel_size)];
 #if defined(__i386__) || defined(__x86_64__)
         p_sys->pf_end_merge = NULL;
 #endif


=====================================
modules/video_filter/deinterlace/merge.h
=====================================
@@ -31,6 +31,35 @@
  * Merge (line blending) routines for the VLC deinterlacer.
  */
 
+/**
+ * Average two vectors.
+ *
+ * This callback shall compute the element-wise rounded average of two vectors.
+ * This is used for blending scan lines of two fields for deinterlacing.
+ *
+ * The size of element is specified by the context,
+ * namely \see deinterlace_functions.
+ * Currently 8-bit and 16-bit elements are supported.
+ *
+ * \param d Output vector
+ * \param s1 First source vector
+ * \param s2 Second source vector
+ * \param len size of vectors in bytes
+ */
+
+typedef void (*merge_cb)(void *d, const void *s1, const void *s2, size_t len);
+
+/**
+ * Deinterlacing optimisation callbacks.
+ */
+struct deinterlace_functions {
+    /** Element-wise vector average
+     *
+     * The first array entries are indexed by the binary order of magnitude
+     * of the element size in bytes: 0 for 8-bit, 1 for 16-bit. */
+    merge_cb merges[2];
+};
+
 /*****************************************************************************
  * Macros
  *****************************************************************************/
@@ -133,35 +162,6 @@ void Merge8BitSSE2( void *, const void *, const void *, size_t );
 void Merge16BitSSE2( void *, const void *, const void *, size_t );
 #endif
 
-#if defined(CAN_COMPILE_ARM)
-/**
- * ARM NEON routine to blend pixels from two picture lines.
- */
-void merge8_arm_neon (void *, const void *, const void *, size_t);
-void merge16_arm_neon (void *, const void *, const void *, size_t);
-
-/**
- * ARMv6 SIMD routine to blend pixels from two picture lines.
- */
-void merge8_armv6 (void *, const void *, const void *, size_t);
-void merge16_armv6 (void *, const void *, const void *, size_t);
-#endif
-
-#if defined(CAN_COMPILE_ARM64)
-/**
- * ARM64 NEON routine to blend pixels from two picture lines.
- */
-void merge8_arm64_neon (void *, const void *, const void *, size_t);
-void merge16_arm64_neon (void *, const void *, const void *, size_t);
-
-#endif
-
-void merge8_arm_sve(void *, const void *, const void *, size_t);
-void merge16_arm_sve(void *, const void *, const void *, size_t);
-
-void merge8_rvv(void *, const void *, const void *, size_t);
-void merge16_rvv(void *, const void *, const void *, size_t);
-
 /*****************************************************************************
  * EndMerge routines
  *****************************************************************************/


=====================================
src/misc/cpu.c
=====================================
@@ -299,7 +299,8 @@ void vlc_CPU_functions_init(const char *capability, void *restrict funcs)
     module_t **mods;
     ssize_t n = vlc_module_match(capability, NULL, false, &mods, NULL);
 
-    for (ssize_t i = 0; i < n; i++) {
+    /* Descending order so higher priorities override the lower ones */
+    for (ssize_t i = n - 1; i >= 0; i--) {
         void (*init)(void *) = vlc_module_map(NULL, mods[i]);
         if (likely(init != NULL))
             init(funcs);



View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/ca084e43d29cc315099e2013650a32ae1fdb3f86...1dae40e6605a8cd547f3d1ecb29b785400e1c737

-- 
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/ca084e43d29cc315099e2013650a32ae1fdb3f86...1dae40e6605a8cd547f3d1ecb29b785400e1c737
You're receiving this email because of your account on code.videolan.org.


VideoLAN code repository instance


More information about the vlc-commits mailing list