[vlc-devel] [PATCH] add ARM/NEON version of simple channel mixer
Rafaël Carré
funman at videolan.org
Thu Oct 4 20:17:03 CEST 2012
From: David Geldreich <david.geldreich at free.fr>
Signed-off-by: Jean-Baptiste Kempf <jb at videolan.org>
Signed-off-by: Rafaël Carré <funman at videolan.org>
---
modules/LIST | 1 +
modules/arm_neon/Modules.am | 8 +
modules/arm_neon/simple_channel_mixer.S | 279 +++++++++++++++++++++++++++++++
modules/arm_neon/simple_channel_mixer.c | 175 +++++++++++++++++++
4 files changed, 463 insertions(+)
create mode 100644 modules/arm_neon/simple_channel_mixer.S
create mode 100644 modules/arm_neon/simple_channel_mixer.c
diff --git a/modules/LIST b/modules/LIST
index 100375f..33050b5 100644
--- a/modules/LIST
+++ b/modules/LIST
@@ -289,6 +289,7 @@ $Id$
* shm: Shared memory framebuffer access module
* sid: Sidplay demuxer
* simple_channel_mixer: channel mixer
+ * simple_channel_mixer_neon: channel mixer using NEON assembly
* skins2: Skinnable interface, new generation
* smf: Standard MIDI file demuxer
* smooth: Microsoft Smooth Streaming input
diff --git a/modules/arm_neon/Modules.am b/modules/arm_neon/Modules.am
index 498368a..4f2af9e 100644
--- a/modules/arm_neon/Modules.am
+++ b/modules/arm_neon/Modules.am
@@ -8,6 +8,13 @@ libaudio_format_neon_plugin_la_SOURCES = \
libaudio_format_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
libaudio_format_neon_plugin_la_LIBADD = $(AM_LIBADD)
+libsimple_channel_mixer_neon_plugin_la_SOURCES = \
+ simple_channel_mixer.S \
+ simple_channel_mixer.c
+libsimple_channel_mixer_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
+libsimple_channel_mixer_neon_plugin_la_LIBADD = $(AM_LIBADD)
+libsimple_channel_mixer_neon_plugin_la_DEPENDENCIES =
+
libchroma_yuv_neon_plugin_la_SOURCES = \
i420_yuyv.S \
i422_yuyv.S \
@@ -30,6 +37,7 @@ libyuv_rgb_neon_plugin_la_LIBADD = $(AM_LIBADD)
libvlc_LTLIBRARIES += \
libaudio_format_neon_plugin.la \
+ libsimple_channel_mixer_neon_plugin.la \
libchroma_yuv_neon_plugin.la \
libvolume_neon_plugin.la \
libyuv_rgb_neon_plugin.la \
diff --git a/modules/arm_neon/simple_channel_mixer.S b/modules/arm_neon/simple_channel_mixer.S
new file mode 100644
index 0000000..16d6702
--- /dev/null
+++ b/modules/arm_neon/simple_channel_mixer.S
@@ -0,0 +1,279 @@
+ @*****************************************************************************
+ @ simple_channel_mixer.S : ARM NEON channel mixer
+ @*****************************************************************************
+ @ Copyright (C) 2012 David Geldreich <david.geldreich at free.fr>
+ @ Sébastien Toque
+ @
+ @ This program is free software; you can redistribute it and/or modify
+ @ it under the terms of the GNU General Public License as published by
+ @ the Free Software Foundation; either version 2 of the License, or
+ @ (at your option) any later version.
+ @
+ @ This program is distributed in the hope that it will be useful,
+ @ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ @ GNU General Public License for more details.
+ @
+ @ You should have received a copy of the GNU General Public License
+ @ along with this program; if not, write to the Free Software Foundation,
+ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ @****************************************************************************/
+
+ .fpu neon
+ .text
+ .align
+
+#define DST r0
+#define SRC r1
+#define NUM r2
+#define LFE r3
+#define COEFF r4
+
+coeff_7to2:
+ .float 0.5
+ .float 0.5
+ .float 0.25
+ .float 0.25
+ .global convert_7to2_neon_asm
+ .type convert_7to2_neon_asm, %function
+convert_7to2_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_7to2
+ vld1.32 {q0},[COEFF]
+0: @ use local label
+ vld1.32 {q2},[SRC]! @ load 0,1,2,3
+ vmul.f32 q2,q2,q0 @ 0.5*src[0] 0.5*src[1] 0.25*src[2] 0.25*src[3]
+ vld1.32 {d6},[SRC]! @ load 4,5
+ vmul.f32 d6,d6,d1 @ 0.25*src[4] 0.25*src[5]
+ vadd.f32 d4,d4,d5 @ 0.5*src[0] + 0.25*src[2]
+ @ 0.5*src[1] + 0.25*src[3]
+ vadd.f32 d4,d4,d6 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4]
+ @ 0.5*src[1] + 0.25*src[3] + 0.25*src[5]
+ flds s14,[SRC] @ load 6
+ vdup.32 d7,d7[0]
+ teq LFE,#0
+ ite eq
+ addeq SRC,SRC,#4
+ addne SRC,SRC,#8 @ skip the lfe channel
+ vadd.f32 d4,d4,d7 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4] + src[6]
+ @ 0.5*src[1] + 0.25*src[3] + 0.25*src[5] + src[6]
+ vst1.32 d4, [DST]!
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
+
+
+coeff_5to2:
+ .float 0.5
+ .float 0.5
+ .float 0.33
+ .float 0.33
+ .global convert_5to2_neon_asm
+ .type convert_5to2_neon_asm, %function
+convert_5to2_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_5to2
+ vld1.32 {q0},[COEFF] @ load constants
+0: @ use local label
+ vld1.32 {q1},[SRC]! @ load 0,1,2,3
+ flds s8,[SRC] @ load 4
+ vdup.32 d4,d4[0]
+ teq LFE,#0
+ ite eq
+ addeq SRC,SRC,#4
+ addne SRC,SRC,#8 @ skip the lfe channel
+ vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] 0.33*src[2] 0.33*src[3]/3
+ vadd.f32 d2,d2,d3 @ 0.5*src[0] + 0.33*src[2]
+ @ 0.5*src[1] + 0.33*src[3]
+ vadd.f32 d2,d2,d4 @ 0.5*src[0] + 0.33*src[2] + src[4]
+ @ 0.5*src[1] + 0.33*src[3] + src[4]
+ vst1.32 d2,[DST]!
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
+
+
+coeff_4to2:
+ .float 0.5
+ .float 0.5
+ .global convert_4to2_neon_asm
+ .type convert_4to2_neon_asm, %function
+convert_4to2_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_4to2
+ vld1.32 {d0},[COEFF] @ load constants
+0: @ use local label
+ vld1.32 {q1},[SRC]!
+ vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
+ vdup.32 d4,d3[0] @ dup src[2]
+ vdup.32 d3,d3[1] @ dup src[3]
+ vadd.f32 d2,d2,d3 @ +src[3]
+ vadd.f32 d2,d2,d4 @ +src[2]
+ vst1.32 d2,[DST]!
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
+
+
+coeff_3to2:
+ .float 0.5
+ .float 0.5
+ .global convert_3to2_neon_asm
+ .type convert_3to2_neon_asm, %function
+convert_3to2_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_3to2
+ vld1.32 {d0},[COEFF] @ load constants
+0: @ use local label
+ vld1.32 {d1},[SRC]! @ load 0,1
+ flds s4,[SRC] @ load 2
+ vdup.32 d2,d2[0]
+ teq LFE,#0
+ ite eq
+ addeq SRC,SRC,#4
+ addne SRC,SRC,#8 @ skip the lfe channel
+ vmul.f32 d1,d1,d0 @ 0.5*src[0] 0.5*src[1]
+ vadd.f32 d1,d1,d2 @ 0.5*src[0] + src[2]
+ @ 0.5*src[1] + src[2]
+ vst1.32 d1,[DST]!
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
+
+
+coeff_7to1:
+ .float 0.25
+ .float 0.25
+ .float 0.125
+ .float 0.125
+ .global convert_7to1_neon_asm
+ .type convert_7to1_neon_asm, %function
+convert_7to1_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_7to1
+ vld1.32 {q0},[COEFF]
+0: @ use local label
+ vld1.32 {q1},[SRC]! @ load 0,1,2,3
+ vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] 0.125*src[2] 0.125*src[3]
+ vld1.32 {d4},[SRC]! @ load 4,5
+ vmul.f32 d4,d4,d1 @ 0.125*src[4] 0.125*src[5]
+ vadd.f32 d2,d2,d3
+ vadd.f32 d2,d2,d4
+ flds s10,[SRC] @ load 6
+ teq LFE,#0
+ ite eq
+ addeq SRC,SRC,#4
+ addne SRC,SRC,#8 @ skip the lfe channel
+ vadd.f32 s4,s4,s5
+ vadd.f32 s4,s4,s10
+ fsts s4,[DST]
+ add DST,DST,#4
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
+
+
+coeff_5to1:
+ .float 0.25
+ .float 0.25
+ .float 0.16666667
+ .float 0.16666667
+ .global convert_5to1_neon_asm
+ .type convert_5to1_neon_asm, %function
+convert_5to1_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_5to1
+ vld1.32 {q0},[COEFF]
+0: @ use local label
+ vld1.32 {q1},[SRC]! @ load 0,1,2,3
+ vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] src[2]/6 src[3]/6
+ vadd.f32 d2,d2,d3
+ flds s10,[SRC] @ load 4
+ teq LFE,#0
+ ite eq
+ addeq SRC,SRC,#4
+ addne SRC,SRC,#8 @ skip the lfe channel
+ vadd.f32 s4,s4,s5
+ vadd.f32 s4,s4,s10
+ fsts s4,[DST]
+ add DST,DST,#4
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
+
+
+coeff_7to4:
+ .float 0.5
+ .float 0.5
+ .float 0.16666667
+ .float 0.16666667
+ .global convert_7to4_neon_asm
+ .type convert_7to4_neon_asm, %function
+convert_7to4_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_7to4
+ vld1.32 {q0},[COEFF]
+0: @ use local label
+ vld1.32 {q1},[SRC]! @ load 0,1,2,3
+ vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] src[2]/6 src[3]/6
+ vld1.32 {d5},[SRC]! @ load 4,5
+ flds s14,[SRC] @ load 6
+ vadd.f32 d2,d2,d3 @ 0.5*src[0] + src[2]/6
+ @ 0.5*src[1] + src[3]/6
+ vdup.32 d4,d7[0] @ so q2 : src[6] src[6] src[4] src[5]
+ vadd.f32 q2,q2,q1 @ src[6] + 0.5*src[0] + src[2]/6
+ @ src[6] + 0.5*src[1] + src[3]/6
+ @ src[4] + src[2]/6
+ @ src[5] + src[3]/6
+ teq LFE,#0
+ ite eq
+ addeq SRC,SRC,#4
+ addne SRC,SRC,#8 @ skip the lfe channel
+ vst1.32 {q2}, [DST]!
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
+
+
+coeff_5to4:
+ .float 0.5
+ .float 0.5
+ .global convert_5to4_neon_asm
+ .type convert_5to4_neon_asm, %function
+convert_5to4_neon_asm:
+ push {r4,lr}
+
+ adr COEFF, coeff_5to4
+ vld1.32 {d0},[COEFF]
+0: @ use local label
+ vld1.32 {q1},[SRC]! @ load 0,1,2,3
+ vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
+ flds s8,[SRC] @ load 4
+ vdup.32 d4,d4[0]
+ vadd.f32 d2,d2,d4 @ 0.5*src[0] + src[4]
+ @ 0.5*src[1] + src[4]
+ @ src[2]
+ @ src[3]
+ teq LFE,#0
+ ite eq
+ addeq SRC,SRC,#4
+ addne SRC,SRC,#8 @ skip the lfe channel
+ vst1.32 {q1}, [DST]!
+ subs NUM,NUM,#1
+ bne 0b
+
+ pop {r4,pc}
diff --git a/modules/arm_neon/simple_channel_mixer.c b/modules/arm_neon/simple_channel_mixer.c
new file mode 100644
index 0000000..687ae2b
--- /dev/null
+++ b/modules/arm_neon/simple_channel_mixer.c
@@ -0,0 +1,175 @@
+/*****************************************************************************
+ * simple_channel_mixer.c : simple channel mixer plug-in using NEON assembly
+ *****************************************************************************
+ * Copyright (C) 2002, 2004, 2006-2009, 2012 the VideoLAN team
+ * $Id$
+ *
+ * Authors: Gildas Bazin <gbazin at videolan.org>
+ * David Geldreich <david.geldreich at free.fr>
+ * Sébastien Toque
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_aout.h>
+#include <vlc_filter.h>
+#include <vlc_block.h>
+#include <vlc_cpu.h>
+#include <assert.h>
+
+/*****************************************************************************
+ * Module descriptor
+ *****************************************************************************/
+static int OpenFilter( vlc_object_t * );
+
+vlc_module_begin ()
+ set_description( N_("Audio filter for simple channel mixing using NEON assembly") )
+ set_category( CAT_AUDIO )
+ set_subcategory( SUBCAT_AUDIO_MISC )
+ set_capability( "audio filter", 20 )
+ set_callbacks( OpenFilter, NULL )
+vlc_module_end ()
+
+#define FILTER_WRAPPER(in, out) \
+ void convert_##in##to##out##_neon_asm(float *dst, const float *src, int num, bool lfeChannel); \
+ static block_t *Filter_##in##to##out (filter_t *p_filter, block_t *p_block) \
+ { \
+ block_t *p_out; \
+ if (!FilterInit( p_filter, p_block, &p_out )) \
+ return NULL; \
+ const float *p_src = (const float *)p_block->p_buffer; \
+ float *p_dest = (float *)p_out->p_buffer; \
+ convert_##in##to##out##_neon_asm( p_dest, p_src, p_block->i_nb_samples, \
+ p_filter->fmt_in.audio.i_physical_channels & AOUT_CHAN_LFE ); \
+ block_Release( p_block ); \
+ return p_out; \
+ }
+
+#define TRY_FILTER(in, out) \
+ if ( b_input_##in && b_output_##out ) \
+ { \
+ p_filter->pf_audio_filter = Filter_##in##to##out ; \
+ return VLC_SUCCESS; \
+ }
+
+/*****************************************************************************
+ * Filter:
+ *****************************************************************************/
+static bool FilterInit( filter_t *p_filter, block_t *p_block, block_t **pp_out )
+{
+ if( !p_block || !p_block->i_nb_samples )
+ {
+ if( p_block )
+ block_Release( p_block );
+ return false;
+ }
+
+ size_t i_out_size = p_block->i_nb_samples *
+ p_filter->fmt_out.audio.i_bitspersample *
+ p_filter->fmt_out.audio.i_channels / 8;
+
+ block_t *p_out = filter_NewAudioBuffer( p_filter, i_out_size );
+ if( !p_out )
+ {
+ msg_Warn( p_filter, "can't get output buffer" );
+ block_Release( p_block );
+ return false;
+ }
+
+ p_out->i_nb_samples = p_block->i_nb_samples;
+ p_out->i_dts = p_block->i_dts;
+ p_out->i_pts = p_block->i_pts;
+ p_out->i_length = p_block->i_length;
+
+ int i_input_nb = aout_FormatNbChannels( &p_filter->fmt_in.audio );
+ int i_output_nb = aout_FormatNbChannels( &p_filter->fmt_out.audio );
+ p_out->i_buffer = p_block->i_buffer * i_output_nb / i_input_nb;
+
+ *pp_out = p_out;
+ return true;
+}
+
+FILTER_WRAPPER(7,2)
+FILTER_WRAPPER(5,2)
+FILTER_WRAPPER(4,2)
+FILTER_WRAPPER(3,2)
+FILTER_WRAPPER(7,1)
+FILTER_WRAPPER(5,1)
+FILTER_WRAPPER(7,4)
+FILTER_WRAPPER(5,4)
+
+/*****************************************************************************
+ * OpenFilter:
+ *****************************************************************************/
+static int OpenFilter( vlc_object_t *p_this )
+{
+ filter_t *p_filter = (filter_t *)p_this;
+
+ if (!vlc_CPU_ARM_NEON())
+ return VLC_EGENERIC;
+
+ audio_format_t fmt_in = p_filter->fmt_in.audio;
+ audio_format_t fmt_out = p_filter->fmt_out.audio;
+
+ fmt_in.i_format = p_filter->fmt_in.i_codec;
+ fmt_out.i_format = p_filter->fmt_out.i_codec;
+
+ if( fmt_in.i_format != VLC_CODEC_FL32 ||
+ fmt_in.i_format != fmt_out.i_format ||
+ fmt_in.i_rate != fmt_out.i_rate )
+ {
+ return VLC_EGENERIC;
+ }
+
+ if( fmt_in.i_physical_channels == fmt_out.i_physical_channels &&
+ fmt_in.i_original_channels == fmt_out.i_original_channels )
+ {
+ return VLC_EGENERIC;
+ }
+
+ const bool b_input_7 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_7_0;
+ const bool b_input_5 = ( (fmt_in.i_physical_channels & AOUT_CHANS_5_0) == AOUT_CHANS_5_0 ||
+ (fmt_in.i_physical_channels & AOUT_CHANS_5_0_MIDDLE) == AOUT_CHANS_5_0_MIDDLE );
+ const bool b_input_4 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_4_CENTER_REAR;
+ const bool b_input_3 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_3_0;
+
+ const bool b_output_1 = fmt_out.i_physical_channels == AOUT_CHAN_CENTER;
+ const bool b_output_2 = fmt_out.i_physical_channels == AOUT_CHANS_2_0;
+ const bool b_output_4 = fmt_out.i_physical_channels == AOUT_CHANS_4_0;
+
+ /* Only conversion to Mono, Stereo and 4.0 right now */
+ /* Only from 7/7.1/5/5.1/3/3.1/2.0
+ * XXX 5.X rear and middle are handled the same way */
+
+ TRY_FILTER(7,2)
+ TRY_FILTER(5,2)
+ TRY_FILTER(4,2)
+ TRY_FILTER(3,2)
+ TRY_FILTER(7,1)
+ TRY_FILTER(5,1)
+ TRY_FILTER(7,4)
+ TRY_FILTER(5,4)
+
+ return VLC_EGENERIC;
+}
--
1.7.10.4
More information about the vlc-devel
mailing list