[vlc-commits] [Git][videolan/vlc][master] 9 commits: cpu: probe by descending priorities
Rémi Denis-Courmont (@Courmisch)
gitlab at videolan.org
Sat Feb 26 19:58:34 UTC 2022
Rémi Denis-Courmont pushed to branch master at VideoLAN / VLC
Commits:
ba5a1f48 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
cpu: probe by descending priorities
The callbacks are initially for default C implementation, and are
updated by the "worst" optimisation first, with the best last.
(We do not stop at the first supported optimisation as not all all
modules necessarily provide all optimised callbacks.)
This probes modules by descending score, so that we can assign highest
score to the best optimisation.
This makes no functonal differences, merely provide a more intuitive
module scoring.
- - - - -
6e06e0fc by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: hook vlc_CPU_functions_init()
- - - - -
85f29856 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move RISC-V V code to backend module
- - - - -
8e197aa3 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
rvv: add missing .size directives
- - - - -
dccbcb9b by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move AArch64 AdvSIMD code to backend module
- - - - -
d53396b8 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move AArch64 SVE code to backend module
- - - - -
7f648830 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
sve: missing alignment
- - - - -
73672513 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move ARM SIMD code to backend module
- - - - -
1dae40e6 by Rémi Denis-Courmont at 2022-02-26T21:58:03+02:00
deinterlace: move ARM NEON code to backend module
- - - - -
18 changed files:
- modules/Makefile.am
- + modules/isa/aarch64/Makefile.am
- + modules/isa/aarch64/simd/deinterlace.c
- modules/video_filter/deinterlace/merge_arm64.S → modules/isa/aarch64/simd/merge.S
- + modules/isa/aarch64/sve/deinterlace.c
- modules/video_filter/deinterlace/merge_sve.S → modules/isa/aarch64/sve/merge.S
- modules/isa/arm/Makefile.am
- + modules/isa/arm/neon/deinterlace.c
- modules/video_filter/deinterlace/merge_arm.S → modules/isa/arm/neon/merge.S
- + modules/isa/arm/simd/deinterlace.c
- + modules/isa/arm/simd/merge.S
- modules/isa/riscv/Makefile.am
- + modules/isa/riscv/deinterlace.c
- modules/video_filter/deinterlace/merge_rvv.S → modules/isa/riscv/rvv_merge.S
- modules/video_filter/Makefile.am
- modules/video_filter/deinterlace/deinterlace.c
- modules/video_filter/deinterlace/merge.h
- src/misc/cpu.c
Changes:
=====================================
modules/Makefile.am
=====================================
@@ -31,6 +31,7 @@ include hw/d3d11/Makefile.am
include hw/vaapi/Makefile.am
include hw/vdpau/Makefile.am
include hw/mmal/Makefile.am
+include isa/aarch64/Makefile.am
include isa/arm/Makefile.am
include isa/riscv/Makefile.am
include keystore/Makefile.am
=====================================
modules/isa/aarch64/Makefile.am
=====================================
@@ -0,0 +1,18 @@
+aarch64dir = $(pluginsdir)/aarch64
+aarch64_LTLIBRARIES =
+
+libdeinterlace_aarch64_plugin_la_SOURCES = \
+ isa/aarch64/simd/deinterlace.c isa/aarch64/simd/merge.S
+
+if HAVE_ARM64
+aarch64_LTLIBRARIES += \
+ libdeinterlace_aarch64_plugin.la
+endif
+
+libdeinterlace_sve_plugin_la_SOURCES = \
+ isa/aarch64/sve/deinterlace.c isa/aarch64/sve/merge.S
+
+if HAVE_SVE
+aarch64_LTLIBRARIES += \
+ libdeinterlace_sve_plugin.la
+endif
=====================================
modules/isa/aarch64/simd/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: AArch64 AdvSIMD deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_arm64(void *, const void *, const void *, size_t);
+void merge16_arm64(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+ if (vlc_CPU_ARM_NEON()) {
+ struct deinterlace_functions *const f = data;
+
+ f->merges[0] = merge8_arm64;
+ f->merges[1] = merge16_arm64;
+ }
+}
+
+vlc_module_begin()
+ set_description("AArch64 AvdSIMD optimisation for deinterlacing")
+ set_cpu_funcs("deinterlace functions", Probe, 10)
+vlc_module_end()
=====================================
modules/video_filter/deinterlace/merge_arm64.S → modules/isa/aarch64/simd/merge.S
=====================================
@@ -1,5 +1,5 @@
//*****************************************************************************
- // merge_arm64.S : ARM64 NEON mean
+ // merge.S : AArch64 Advanced SIMD mean
//*****************************************************************************
// Copyright (C) 2009-2012 Rémi Denis-Courmont
// Copyright (C) 2016- Janne Grunau
@@ -19,7 +19,7 @@
// Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
//****************************************************************************/
-#include "../../isa/arm/asm.S"
+#include "../../arm/asm.S"
.arch armv8-a+simd
.text
@@ -32,7 +32,7 @@
.align 2
// NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
-function merge8_arm64_neon
+function merge8_arm64
bti c
ands x5, SIZE, #~63
b.eq 2f
@@ -69,7 +69,7 @@ function merge8_arm64_neon
ret
.align 2
-function merge16_arm64_neon
+function merge16_arm64
bti c
ands x5, SIZE, #~63
b.eq 2f
=====================================
modules/isa/aarch64/sve/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: AArch64 Scalable Vector Extension deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_arm_sve(void *, const void *, const void *, size_t);
+void merge16_arm_sve(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+ if (vlc_CPU_ARM_SVE()) {
+ struct deinterlace_functions *const f = data;
+
+ f->merges[0] = merge8_arm_sve;
+ f->merges[1] = merge16_arm_sve;
+ }
+}
+
+vlc_module_begin()
+ set_description("AArch64 SVE optimisation for deinterlacing")
+ set_cpu_funcs("deinterlace functions", Probe, 20)
+vlc_module_end()
=====================================
modules/video_filter/deinterlace/merge_sve.S → modules/isa/aarch64/sve/merge.S
=====================================
@@ -1,5 +1,5 @@
/******************************************************************************
- * merge_sve.S : ARM SVE mean
+ * merge.S : ARM SVE mean
******************************************************************************
* Copyright (C) 2018 Rémi Denis-Courmont
*
@@ -18,13 +18,14 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
-#include "../../isa/arm/asm.S"
+#include "../../arm/asm.S"
- .arch armv8-a+sve
+ .arch armv8-a+sve
/* TODO: prefetch, unroll */
.text
+ .align 2
bti_advertise
function merge8_arm_sve
bti c
=====================================
modules/isa/arm/Makefile.am
=====================================
@@ -1,5 +1,15 @@
+armdir = $(pluginsdir)/arm
neondir = $(pluginsdir)/arm_neon
+libdeinterlace_arm_simd_plugin_la_SOURCES = \
+ isa/arm/simd/deinterlace.c isa/arm/simd/merge.S
+
+# TODO? use dedicated conditional, or purge
+if HAVE_NEON
+arm_LTLIBRARIES = \
+ libdeinterlace_arm_simd_plugin.la
+endif
+
libchroma_yuv_neon_plugin_la_SOURCES = \
isa/arm/neon/deinterleave_chroma.S \
isa/arm/neon/i420_yuyv.S \
@@ -9,6 +19,9 @@ libchroma_yuv_neon_plugin_la_SOURCES = \
libchroma_yuv_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
libchroma_yuv_neon_plugin_LIBTOOLFLAGS = --tag=CC
+libdeinterlace_neon_plugin_la_SOURCES = \
+ isa/arm/neon/deinterlace.c isa/arm/neon/merge.S
+
libvolume_neon_plugin_la_SOURCES = isa/arm/neon/volume.c isa/arm/neon/amplify.S
libvolume_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
libvolume_neon_plugin_LIBTOOLFLAGS = --tag=CC
@@ -25,6 +38,7 @@ libyuv_rgb_neon_plugin_LIBTOOLFLAGS = --tag=CC
if HAVE_NEON
neon_LTLIBRARIES = \
libchroma_yuv_neon_plugin.la \
+ libdeinterlace_neon_plugin.la \
libvolume_neon_plugin.la \
libyuv_rgb_neon_plugin.la
endif
=====================================
modules/isa/arm/neon/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: ARM NEON deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_arm_neon(void *, const void *, const void *, size_t);
+void merge16_arm_neon(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+ if (vlc_CPU_ARM_NEON()) {
+ struct deinterlace_functions *const f = data;
+
+ f->merges[0] = merge8_arm_neon;
+ f->merges[1] = merge16_arm_neon;
+ }
+}
+
+vlc_module_begin()
+ set_description("ARM NEON optimisation for deinterlacing")
+ set_cpu_funcs("deinterlace functions", Probe, 20)
+vlc_module_end()
=====================================
modules/video_filter/deinterlace/merge_arm.S → modules/isa/arm/neon/merge.S
=====================================
@@ -1,5 +1,5 @@
@*****************************************************************************
- @ merge_arm.S : ARM NEON mean
+ @ merge.S : ARM NEON mean
@*****************************************************************************
@ Copyright (C) 2009-2012 Rémi Denis-Courmont
@
@@ -18,11 +18,11 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
-#include "../../isa/arm/asm.S"
+#include "../asm.S"
.syntax unified
#if HAVE_AS_ARCH_DIRECTIVE
- .arch armv6
+ .arch armv7-a
#endif
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
@@ -114,45 +114,3 @@ function merge16_arm_neon
vhadd.u16 q0, q0, q8
vst1.u16 {q0}, [DEST,:128]!
bx lr
-
- .align 2
-function merge8_armv6
- push {r4-r9,lr}
-1:
- pld [SRC1, #64]
- ldm SRC1!, {r4-r5}
- pld [SRC2, #64]
- ldm SRC2!, {r8-r9}
- subs SIZE, SIZE, #16
- uhadd8 r4, r4, r8
- ldm SRC1!, {r6-r7}
- uhadd8 r5, r5, r9
- ldm SRC2!, {ip,lr}
- uhadd8 r6, r6, ip
- stm DEST!, {r4-r5}
- uhadd8 r7, r7, lr
- stm DEST!, {r6-r7}
- it eq
- popeq {r4-r9,pc}
- b 1b
-
- .align 2
-function merge16_armv6
- push {r4-r9,lr}
-1:
- pld [SRC1, #64]
- ldm SRC1!, {r4-r5}
- pld [SRC2, #64]
- ldm SRC2!, {r8-r9}
- subs SIZE, SIZE, #16
- uhadd16 r4, r4, r8
- ldm SRC1!, {r6-r7}
- uhadd16 r5, r5, r9
- ldm SRC2!, {ip,lr}
- uhadd16 r6, r6, ip
- stm DEST!, {r4-r5}
- uhadd16 r7, r7, lr
- stm DEST!, {r6-r7}
- it eq
- popeq {r4-r9,pc}
- b 1b
=====================================
modules/isa/arm/simd/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: ARMv6 SIMD deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../../video_filter/deinterlace/merge.h"
+
+void merge8_armv6(void *, const void *, const void *, size_t);
+void merge16_armv6(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+ if (vlc_CPU_ARMv6()) {
+ struct deinterlace_functions *const f = data;
+
+ f->merges[0] = merge8_armv6;
+ f->merges[1] = merge16_armv6;
+ }
+}
+
+vlc_module_begin()
+ set_description("ARM SIMD optimisation for deinterlacing")
+ set_cpu_funcs("deinterlace functions", Probe, 10)
+vlc_module_end()
=====================================
modules/isa/arm/simd/merge.S
=====================================
@@ -0,0 +1,74 @@
+ @*****************************************************************************
+ @ merge.S: ARMv6 SIMD mean
+ @*****************************************************************************
+ @ Copyright (C) 2009-2012 Rémi Denis-Courmont
+ @
+ @ This program is free software; you can redistribute it and/or modify
+ @ it under the terms of the GNU Lesser General Public License as published by
+ @ the Free Software Foundation; either version 2.1 of the License, or
+ @ (at your option) any later version.
+ @
+ @ This program is distributed in the hope that it will be useful,
+ @ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ @ GNU Lesser General Public License for more details.
+ @
+ @ You should have received a copy of the GNU Lesser General Public License
+ @ along with this program; if not, write to the Free Software Foundation,
+ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ @****************************************************************************/
+
+#include "../asm.S"
+
+ .syntax unified
+#if HAVE_AS_ARCH_DIRECTIVE
+ .arch armv6
+#endif
+ .text
+
+#define DEST r0
+#define SRC1 r1
+#define SRC2 r2
+#define SIZE r3
+
+ .align 2
+function merge8_armv6
+ push {r4-r9,lr}
+1:
+ pld [SRC1, #64]
+ ldm SRC1!, {r4-r5}
+ pld [SRC2, #64]
+ ldm SRC2!, {r8-r9}
+ subs SIZE, SIZE, #16
+ uhadd8 r4, r4, r8
+ ldm SRC1!, {r6-r7}
+ uhadd8 r5, r5, r9
+ ldm SRC2!, {ip,lr}
+ uhadd8 r6, r6, ip
+ stm DEST!, {r4-r5}
+ uhadd8 r7, r7, lr
+ stm DEST!, {r6-r7}
+ it eq
+ popeq {r4-r9,pc}
+ b 1b
+
+ .align 2
+function merge16_armv6
+ push {r4-r9,lr}
+1:
+ pld [SRC1, #64]
+ ldm SRC1!, {r4-r5}
+ pld [SRC2, #64]
+ ldm SRC2!, {r8-r9}
+ subs SIZE, SIZE, #16
+ uhadd16 r4, r4, r8
+ ldm SRC1!, {r6-r7}
+ uhadd16 r5, r5, r9
+ ldm SRC2!, {ip,lr}
+ uhadd16 r6, r6, ip
+ stm DEST!, {r4-r5}
+ uhadd16 r7, r7, lr
+ stm DEST!, {r6-r7}
+ it eq
+ popeq {r4-r9,pc}
+ b 1b
=====================================
modules/isa/riscv/Makefile.am
=====================================
@@ -1,5 +1,7 @@
riscvdir = $(pluginsdir)/riscv
+libdeinterlace_rvv_plugin_la_SOURCES = \
+ isa/riscv/deinterlace.c isa/riscv/rvv_merge.S
libtransform_rvv_plugin_la_SOURCES = \
isa/riscv/transform.c isa/riscv/rvv_transform.S
@@ -8,6 +10,7 @@ libvolume_rvv_plugin_la_LIBADD = $(AM_LIBADD) $(LIBM)
if HAVE_RVV
riscv_LTLIBRARIES = \
+ libdeinterlace_rvv_plugin.la \
libtransform_rvv_plugin.la \
libvolume_rvv_plugin.la
endif
=====================================
modules/isa/riscv/deinterlace.c
=====================================
@@ -0,0 +1,46 @@
+/*****************************************************************************
+ * deinterlace.c: RISC-V V deinterlacing functions
+ *****************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_cpu.h>
+#include <vlc_plugin.h>
+#include "../../video_filter/deinterlace/merge.h"
+
+void merge8_rvv(void *, const void *, const void *, size_t);
+void merge16_rvv(void *, const void *, const void *, size_t);
+
+static void Probe(void *data)
+{
+ if (vlc_CPU_RV_V()) {
+ struct deinterlace_functions *const f = data;
+
+ f->merges[0] = merge8_rvv;
+ f->merges[1] = merge16_rvv;
+ }
+}
+
+vlc_module_begin()
+ set_description("RISC-V V optimisation for deinterlacing")
+ set_cpu_funcs("deinterlace functions", Probe, 10)
+vlc_module_end()
=====================================
modules/video_filter/deinterlace/merge_rvv.S → modules/isa/riscv/rvv_merge.S
=====================================
@@ -1,5 +1,5 @@
/******************************************************************************
- * merge_rvv.S: RISC-V Vector mean
+ * rvv_merge.S: RISC-V Vector mean
******************************************************************************
* Copyright (C) 2022 Rémi Denis-Courmont
*
@@ -37,6 +37,7 @@ merge8_rvv:
add a0, a0, t0
bnez a3, 1b
ret
+ .size merge8_rvv, . - merge8_rvv
.globl merge16_rvv
.type merge16_rvv, %function
@@ -55,4 +56,5 @@ merge16_rvv:
add a0, a0, t1
bnez a3, 1b
ret
+ .size merge16_rvv, . - merge16_rvv
=====================================
modules/video_filter/Makefile.am
=====================================
@@ -179,22 +179,6 @@ libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/yadif_x86.asm
# inline ASM doesn't build with -O0
libdeinterlace_plugin_la_CFLAGS += -O2
endif
-if HAVE_NEON
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_arm.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_ARM
-endif
-if HAVE_ARM64
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_arm64.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_ARM64
-endif
-if HAVE_SVE
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_sve.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_SVE
-endif
-if HAVE_RVV
-libdeinterlace_plugin_la_SOURCES += video_filter/deinterlace/merge_rvv.S
-libdeinterlace_plugin_la_CPPFLAGS += -DCAN_COMPILE_RVV
-endif
libdeinterlace_plugin_la_LIBADD = libdeinterlace_common.la
video_filter_LTLIBRARIES += libdeinterlace_plugin.la
=====================================
modules/video_filter/deinterlace/deinterlace.c
=====================================
@@ -488,6 +488,10 @@ static const struct vlc_filter_operations filter_ops = {
.close = Close,
};
+static struct deinterlace_functions funcs = {
+ { Merge8BitGeneric, Merge16BitGeneric, },
+};
+
/*****************************************************************************
* Open
*****************************************************************************/
@@ -560,32 +564,10 @@ notsupp:
p_sys->pf_end_merge = EndSSE;
}
else
-#endif
-#if defined(CAN_COMPILE_ARM)
- if( vlc_CPU_ARM_NEON() )
- p_sys->pf_merge = pixel_size == 1 ? merge8_arm_neon : merge16_arm_neon;
- else
- if( vlc_CPU_ARMv6() )
- p_sys->pf_merge = pixel_size == 1 ? merge8_armv6 : merge16_armv6;
- else
-#endif
-#if defined(CAN_COMPILE_SVE)
- if( vlc_CPU_ARM_SVE() )
- p_sys->pf_merge = pixel_size == 1 ? merge8_arm_sve : merge16_arm_sve;
- else
-#endif
-#if defined(CAN_COMPILE_ARM64)
- if( vlc_CPU_ARM_NEON() )
- p_sys->pf_merge = pixel_size == 1 ? merge8_arm64_neon : merge16_arm64_neon;
- else
-#endif
-#if defined(CAN_COMPILE_RVV)
- if( vlc_CPU_RV_V() )
- p_sys->pf_merge = pixel_size == 1 ? merge8_rvv : merge16_rvv;
- else
#endif
{
- p_sys->pf_merge = pixel_size == 1 ? Merge8BitGeneric : Merge16BitGeneric;
+ vlc_CPU_functions_init_once("deinterlace functions", &funcs);
+ p_sys->pf_merge = funcs.merges[vlc_ctz(pixel_size)];
#if defined(__i386__) || defined(__x86_64__)
p_sys->pf_end_merge = NULL;
#endif
=====================================
modules/video_filter/deinterlace/merge.h
=====================================
@@ -31,6 +31,35 @@
* Merge (line blending) routines for the VLC deinterlacer.
*/
+/**
+ * Average two vectors.
+ *
+ * This callback shall compute the element-wise rounded average of two vectors.
+ * This is used for blending scan lines of two fields for deinterlacing.
+ *
+ * The size of element is specified by the context,
+ * namely \see deinterlace_functions.
+ * Currently 8-bit and 16-bit elements are supported.
+ *
+ * \param d Output vector
+ * \param s1 First source vector
+ * \param s2 Second source vector
+ * \param len size of vectors in bytes
+ */
+
+typedef void (*merge_cb)(void *d, const void *s1, const void *s2, size_t len);
+
+/**
+ * Deinterlacing optimisation callbacks.
+ */
+struct deinterlace_functions {
+ /** Element-wise vector average
+ *
+ * The first array entries are indexed by the binary order of magnitude
+ * of the element size in bytes: 0 for 8-bit, 1 for 16-bit. */
+ merge_cb merges[2];
+};
+
/*****************************************************************************
* Macros
*****************************************************************************/
@@ -133,35 +162,6 @@ void Merge8BitSSE2( void *, const void *, const void *, size_t );
void Merge16BitSSE2( void *, const void *, const void *, size_t );
#endif
-#if defined(CAN_COMPILE_ARM)
-/**
- * ARM NEON routine to blend pixels from two picture lines.
- */
-void merge8_arm_neon (void *, const void *, const void *, size_t);
-void merge16_arm_neon (void *, const void *, const void *, size_t);
-
-/**
- * ARMv6 SIMD routine to blend pixels from two picture lines.
- */
-void merge8_armv6 (void *, const void *, const void *, size_t);
-void merge16_armv6 (void *, const void *, const void *, size_t);
-#endif
-
-#if defined(CAN_COMPILE_ARM64)
-/**
- * ARM64 NEON routine to blend pixels from two picture lines.
- */
-void merge8_arm64_neon (void *, const void *, const void *, size_t);
-void merge16_arm64_neon (void *, const void *, const void *, size_t);
-
-#endif
-
-void merge8_arm_sve(void *, const void *, const void *, size_t);
-void merge16_arm_sve(void *, const void *, const void *, size_t);
-
-void merge8_rvv(void *, const void *, const void *, size_t);
-void merge16_rvv(void *, const void *, const void *, size_t);
-
/*****************************************************************************
* EndMerge routines
*****************************************************************************/
=====================================
src/misc/cpu.c
=====================================
@@ -299,7 +299,8 @@ void vlc_CPU_functions_init(const char *capability, void *restrict funcs)
module_t **mods;
ssize_t n = vlc_module_match(capability, NULL, false, &mods, NULL);
- for (ssize_t i = 0; i < n; i++) {
+ /* Descending order so higher priorities override the lower ones */
+ for (ssize_t i = n - 1; i >= 0; i--) {
void (*init)(void *) = vlc_module_map(NULL, mods[i]);
if (likely(init != NULL))
init(funcs);
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/ca084e43d29cc315099e2013650a32ae1fdb3f86...1dae40e6605a8cd547f3d1ecb29b785400e1c737
--
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/ca084e43d29cc315099e2013650a32ae1fdb3f86...1dae40e6605a8cd547f3d1ecb29b785400e1c737
You're receiving this email because of your account on code.videolan.org.
VideoLAN code repository instance
More information about the vlc-commits
mailing list