[vlc-commits] [Git][videolan/vlc][master] 5 commits: riscv: add macros for function boilerplate
Steve Lhomme (@robUx4)
gitlab at videolan.org
Wed Oct 15 05:53:28 UTC 2025
Steve Lhomme pushed to branch master at VideoLAN / VLC
Commits:
6cccfb3a by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
riscv: add macros for function boilerplate
- - - - -
f74d450a by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
riscv: add forward-edge CFI landing pads
- - - - -
67e1e0d3 by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
cpu: add RISC-V B Bit manipulation extension
- - - - -
6055c11b by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
cpu: run-time detection for RISC-V B
- - - - -
5e5007a9 by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
rvv: use Zba SHxADD where applicable
I don't know any hardware that would support the Vector extension and
yet would not support the Bit-manip extension (Zba + Zbb + Zbs), so this
should be fine.
- - - - -
12 changed files:
- include/vlc_cpu.h
- modules/isa/riscv/Makefile.am
- modules/isa/riscv/deinterlace.c
- + modules/isa/riscv/macros.S
- modules/isa/riscv/mixer.c
- modules/isa/riscv/rvv_amplify.S
- modules/isa/riscv/rvv_merge.S
- modules/isa/riscv/rvv_transform.S
- src/freebsd/cpu.c
- src/linux/cpu.c
- src/misc/cpu.c
- src/openbsd/cpu.c
Changes:
=====================================
include/vlc_cpu.h
=====================================
@@ -163,6 +163,7 @@ unsigned vlc_CPU_raw(void);
# define HAVE_FPU 1
# endif
# define VLC_CPU_RV_V 0x1
+# define VLC_CPU_RV_B 0x2
# ifdef __riscv_v
# define vlc_CPU_RV_V() (1)
@@ -170,6 +171,13 @@ unsigned vlc_CPU_raw(void);
# define vlc_CPU_RV_V() ((vlc_CPU() & VLC_CPU_RV_V) != 0)
# endif
+# if (defined (__riscv_b) || (defined (__riscv_zba) && defined (__riscv_zbb) \
+ && defined (__riscv_zbs)))
+# define vlc_CPU_RV_B() (1)
+# else
+# define vlc_CPU_RV_B() ((vlc_CPU() & VLC_CPU_RV_B) != 0)
+# endif
+
# else
/**
* Are single precision floating point operations "fast"?
=====================================
modules/isa/riscv/Makefile.am
=====================================
@@ -8,6 +8,8 @@ libtransform_rvv_plugin_la_SOURCES = \
libvolume_rvv_plugin_la_SOURCES = isa/riscv/mixer.c isa/riscv/rvv_amplify.S
libvolume_rvv_plugin_la_LIBADD = $(AM_LIBADD) $(LIBM)
+EXTRA_DIST += isa/riscv/macros.S
+
if HAVE_RVV
riscv_LTLIBRARIES = \
libdeinterlace_rvv_plugin.la \
=====================================
modules/isa/riscv/deinterlace.c
=====================================
@@ -36,7 +36,9 @@ static void Probe(void *data)
struct deinterlace_functions *const f = data;
f->merges[0] = merge8_rvv;
- f->merges[1] = merge16_rvv;
+
+ if (vlc_CPU_RV_B())
+ f->merges[1] = merge16_rvv;
}
}
=====================================
modules/isa/riscv/macros.S
=====================================
@@ -0,0 +1,40 @@
+/*****************************************************************************
+ * macros.S: RISC-V common assembler macros
+ ******************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+.macro func sym
+ .text
+ .global \sym
+ .hidden \sym
+ .type \sym, %function
+ .align 2
+\sym:
+
+ .macro endfunc
+ .size \sym, . - \sym
+ .previous
+ .purgem endfunc
+ .endm
+.endm
+
+#if !defined (__riscv_zicfilp)
+.macro lpad lpl
+ auipc zero, \lpl
+.endm
+#endif
=====================================
modules/isa/riscv/mixer.c
=====================================
@@ -95,7 +95,7 @@ static int Probe(vlc_object_t *obj)
{
audio_volume_t *volume = (audio_volume_t *)obj;
- if (!vlc_CPU_RV_V())
+ if (!vlc_CPU_RV_V() || !vlc_CPU_RV_B())
return VLC_ENOTSUP;
switch (volume->format) {
=====================================
modules/isa/riscv/rvv_amplify.S
=====================================
@@ -18,80 +18,84 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
- .option arch, +v
- .text
- .align 2
+#include "macros.S"
- .globl rvv_amplify_f32
- .type rvv_amplify_f32, %function
-rvv_amplify_f32:
+ .option arch, +b, +v
+
+func rvv_amplify_f32
+ lpad 0
srli a2, a2, 2
#if defined (__riscv_float_abi_soft)
fmv.w.x fa0, a3
#endif
-1: vsetvli t0, a2, e32, m8, ta, ma
- slli t1, t0, 2
+1:
+ vsetvli t0, a2, e32, m8, ta, ma
vle32.v v16, (a1)
- add a1, a1, t1
+ sh2add a1, t0, a1
vfmul.vf v16, v16, fa0
sub a2, a2, t0
vse32.v v16, (a0)
- add a0, a0, t1
+ sh2add a0, t0, a0
bnez a2, 1b
+
ret
+endfunc
- .globl rvv_amplify_f64
- .type rvv_amplify_f64, %function
-rvv_amplify_f64:
+func rvv_amplify_f64
+ lpad 0
srli a2, a2, 3
#if defined (__riscv_float_abi_soft) || defined (__riscv_float_abi_single)
fmv.d.x fa0, a3
#endif
-1: vsetvli t0, a2, e64, m8, ta, ma
- slli t1, t0, 3
+1:
+ vsetvli t0, a2, e64, m8, ta, ma
vle64.v v16, (a1)
- add a1, a1, t1
+ sh3add a1, t0, a1
vfmul.vf v16, v16, fa0
sub a2, a2, t0
vse64.v v16, (a0)
- add a0, a0, t1
+ sh3add a0, t0, a0
bnez a2, 1b
+
ret
+endfunc
- .globl rvv_amplify_i16
- .type rvv_amplify_i16, %function
-rvv_amplify_i16:
+func rvv_amplify_i16
+ lpad 0
srli a2, a2, 1
-1: vsetvli t0, a2, e16, m8, ta, ma
- slli t1, t0, 1
+1:
+ vsetvli t0, a2, e16, m8, ta, ma
vle16.v v16, (a1)
- add a1, a1, t1
+ sh1add a1, t0, a1
vmulhsu.vx v16, v16, a3
sub a2, a2, t0
vse16.v v16, (a0)
- add a0, a0, t1
+ sh1add a0, t0, a0
bnez a2, 1b
+
ret
+endfunc
- .globl rvv_amplify_i32
- .type rvv_amplify_i32, %function
-rvv_amplify_i32:
+func rvv_amplify_i32
+ lpad 0
srli a2, a2, 2
-1: vsetvli t0, a2, e32, m8, ta, ma
- slli t1, t0, 2
+1:
+ vsetvli t0, a2, e32, m8, ta, ma
vle32.v v16, (a1)
- add a1, a1, t1
+ sh2add a1, t0, a1
vmulhsu.vx v16, v16, a3
sub a2, a2, t0
vse32.v v16, (a0)
- add a0, a0, t1
+ sh2add a0, t0, a0
bnez a2, 1b
+
ret
+endfunc
- .globl rvv_amplify_u8
- .type rvv_amplify_u8, %function
-rvv_amplify_u8:
-1: vsetvli t0, a2, e8, m8, ta, ma
+func rvv_amplify_u8
+ lpad 0
+1:
+ vsetvli t0, a2, e8, m8, ta, ma
vle8.v v16, (a1)
add a1, a1, t0
vmulhu.vx v16, v16, a3
@@ -99,5 +103,6 @@ rvv_amplify_u8:
vse8.v v16, (a0)
add a0, a0, t0
bnez a2, 1b
- ret
+ ret
+endfunc
=====================================
modules/isa/riscv/rvv_merge.S
=====================================
@@ -18,15 +18,15 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
- .option arch, +v
- .text
- .align 2
+#include "macros.S"
- .globl merge8_rvv
- .type merge8_rvv, %function
-merge8_rvv:
+ .option arch, +b, +v
+
+func merge8_rvv
+ lpad 0
csrwi vxrm, 0
-1: vsetvli t0, a3, e8, m8, ta, ma
+1:
+ vsetvli t0, a3, e8, m8, ta, ma
vle8.v v16, (a1)
add a1, a1, t0
vle8.v v24, (a2)
@@ -36,25 +36,25 @@ merge8_rvv:
vse8.v v16, (a0)
add a0, a0, t0
bnez a3, 1b
+
ret
- .size merge8_rvv, . - merge8_rvv
+endfunc
- .globl merge16_rvv
- .type merge16_rvv, %function
-merge16_rvv:
+func merge16_rvv
+ lpad 0
csrwi vxrm, 0
srli a3, a3, 1
-1: vsetvli t0, a3, e16, m8, ta, ma
- slli t1, t0, 1
+1:
+ vsetvli t0, a3, e16, m8, ta, ma
vle16.v v16, (a1)
- add a1, a1, t1
+ sh1add a1, t0, a1
vle16.v v24, (a2)
- add a2, a2, t1
+ sh1add a2, t0, a2
vaaddu.vv v16, v16, v24
sub a3, a3, t0
vse16.v v16, (a0)
- add a0, a0, t1
+ sh1add a0, t0, a0
bnez a3, 1b
- ret
- .size merge16_rvv, . - merge16_rvv
+ ret
+endfunc
=====================================
modules/isa/riscv/rvv_transform.S
=====================================
@@ -18,20 +18,19 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
+#include "macros.S"
+
.option arch, +v
- .text
- .align 2
.macro transforms, bits, order
.if \bits - (8 << \order)
.error "Mismatched parameters"
.endif
- .globl rvv_hflip_\bits
- .type rvv_hflip_\bits, %function
+func rvv_hflip_\bits
// a0:out_base, a1:out_stride, a2:in_base, a3:in_stride
// a4:width, a5:height
-rvv_hflip_\bits :
+ lpad 0
.if \order
slli t4, a4, \order
add a2, a2, t4
@@ -40,11 +39,12 @@ rvv_hflip_\bits :
.endif
li t6, -(1 << \order)
add a2, a2, t6
-
-1: mv t0, a0
+1:
+ mv t0, a0
mv t2, a2
mv t4, a4
-2: vsetvli t5, t4, e\bits, m8, ta, ma
+2:
+ vsetvli t5, t4, e\bits, m8, ta, ma
sub t4, t4, t5
vlse\bits\().v v0, (t2), t6
.if \order
@@ -59,20 +59,22 @@ rvv_hflip_\bits :
add a0, a0, a1
add a2, a2, a3
bnez a5, 1b
+
ret
- .size rvv_hflip_\bits, . - rvv_hflip_\bits
+endfunc
- .globl rvv_transpose_\bits
- .type rvv_transpose_\bits, %function
+func rvv_transpose_\bits
// a0:out_base, a1:out_stride, a2:in_base, a3:in_stride
// a4:in_width/out_height, a5:in_height/out_width
-rvv_transpose_\bits :
-1: mv t0, a0
+ lpad 0
+1:
+ mv t0, a0
mv t2, a2
mv t4, a4
/* For the sake of locality, the inner loop transposes VL rows at once
* rather than one column. */
-2: vsetvli t5, a5, e\bits, m8, ta, ma
+2:
+ vsetvli t5, a5, e\bits, m8, ta, ma
vlse\bits\().v v0, (t2), a3
addi t2, t2, (1 << \order)
vse\bits\().v v0, (t0)
@@ -88,9 +90,9 @@ rvv_transpose_\bits :
add a0, a0, t5 // VL output columns done
add a2, a2, t3 // VL input rows done
bnez a5, 1b
- ret
- .size rvv_transpose_\bits, . - rvv_transpose_\bits
+ ret
+endfunc
.endm // transforms
transforms 8, 0
=====================================
src/freebsd/cpu.c
=====================================
@@ -90,6 +90,8 @@ unsigned vlc_CPU_raw(void)
elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
+ if (hwcap & HWCAP_RV('B'))
+ flags |= VLC_CPU_RV_B;
if (hwcap & HWCAP_RV('V'))
flags |= VLC_CPU_RV_V;
=====================================
src/linux/cpu.c
=====================================
@@ -90,6 +90,8 @@ unsigned vlc_CPU_raw(void)
const unsigned long hwcap = getauxval(AT_HWCAP);
unsigned int flags = 0;
+ if (hwcap & HWCAP_RV('B'))
+ flags |= VLC_CPU_RV_B;
if (hwcap & HWCAP_RV('V'))
flags |= VLC_CPU_RV_V;
=====================================
src/misc/cpu.c
=====================================
@@ -186,6 +186,8 @@ void vlc_CPU_dump (vlc_object_t *obj)
vlc_memstream_puts(&stream, "ARM_NEON ");
#elif defined (__riscv)
+ if (vlc_CPU_RV_B())
+ vlc_memstream_puts(&stream, "B ");
if (vlc_CPU_RV_V())
vlc_memstream_puts(&stream, "V ");
=====================================
src/openbsd/cpu.c
=====================================
@@ -87,6 +87,8 @@ unsigned vlc_CPU_raw(void)
elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
+ if (hwcap & HWCAP_RV('B'))
+ flags |= VLC_CPU_RV_B;
if (hwcap & HWCAP_RV('V'))
flags |= VLC_CPU_RV_V;
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/851e8e217ae157130e8d30b5e1d68f28baf5b4b4...5e5007a9c31af2a8a81073f972abb7601c253424
--
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/851e8e217ae157130e8d30b5e1d68f28baf5b4b4...5e5007a9c31af2a8a81073f972abb7601c253424
You're receiving this email because of your account on code.videolan.org.
VideoLAN code repository instance
More information about the vlc-commits
mailing list