[vlc-devel] commit: NEON accelerated I420/YV12 -> YUYV/UYVY chroma conversion ( Rémi Denis-Courmont )
git version control
git at videolan.org
Sun Sep 20 10:47:04 CEST 2009
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sun Sep 20 11:29:47 2009 +0300| [d4a730bbabc16f80392ae36995865c92e36ac66e] | committer: Rémi Denis-Courmont
NEON accelerated I420/YV12 -> YUYV/UYVY chroma conversion
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=d4a730bbabc16f80392ae36995865c92e36ac66e
---
modules/video_chroma/Modules.am | 10 +++
modules/video_chroma/i420_yuyv_neon.S | 108 +++++++++++++++++++++++++++++++++
modules/video_chroma/neon.c | 97 +++++++++++++++++++++++++++++
3 files changed, 215 insertions(+), 0 deletions(-)
diff --git a/modules/video_chroma/Modules.am b/modules/video_chroma/Modules.am
index f886bec..eb0298f 100644
--- a/modules/video_chroma/Modules.am
+++ b/modules/video_chroma/Modules.am
@@ -83,3 +83,13 @@ libvlc_LTLIBRARIES += \
libi420_rgb_plugin.la \
libgrey_yuv_plugin.la \
$(NULL)
+
+libchroma_neon_plugin_la_SOURCES = \
+ i420_yuyv_neon.S \
+ neon.c
+libchroma_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
+libchroma_neon_plugin_la_LIBADD = $(AM_LIBADD)
+libchroma_neon_plugin_la_DEPENDENCIES =
+if HAVE_NEON
+libvlc_LTLIBRARIES += libchroma_neon_plugin.la
+endif
diff --git a/modules/video_chroma/i420_yuyv_neon.S b/modules/video_chroma/i420_yuyv_neon.S
new file mode 100644
index 0000000..c9be91a
--- /dev/null
+++ b/modules/video_chroma/i420_yuyv_neon.S
@@ -0,0 +1,108 @@
+ @****************************************************************************
+ @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion
+ @*****************************************************************************
+ @ Copyright (C) 2009 Rémi Denis-Courmont
+ @
+ @ This program is free software; you can redistribute it and/or modify
+ @ it under the terms of the GNU General Public License as published by
+ @ the Free Software Foundation; either version 2 of the License, or
+ @ (at your option) any later version.
+ @
+ @ This program is distributed in the hope that it will be useful,
+ @ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ @ GNU General Public License for more details.
+ @
+ @ You should have received a copy of the GNU General Public License
+ @ along with this program; if not, write to the Free Software
+ @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ @****************************************************************************/
+
+ .fpu neon
+ .text
+
+#define O1 r0
+#define O2 r1
+#define PITCH r2
+#define HEIGHT r3
+#define Y1 r4
+#define Y2 r5
+#define U r6
+#define V r7
+#define END_O1 r8
+
+ .align
+ .global i420_uyvy_neon
+ .type i420_uyvy_neon, %function
+i420_uyvy_neon:
+ push {r4-r8}
+ add r8, pc, #(indexes+64-.-8)
+ b i420_pack_neon
+
+ .global i420_yuyv_neon
+ .type i420_yuyv_neon, %function
+i420_yuyv_neon:
+ push {r4-r8}
+ add r8, pc, #(indexes-.-8)
+ .hidden i420_pack_neon
+i420_pack_neon:
+ vld1.u8 {d24-d27}, [r8]!
+ ldmia r1, {r4, r6, r7}
+ vld1.u8 {d28-d31}, [r8]
+ add O2, O1, PITCH, lsl #1
+ add Y2, Y1, PITCH
+1:
+ mov END_O1, O2
+2:
+ vld1.u8 {d0-d1}, [Y1,:128]!
+ vld1.u8 {d2}, [U,:64]!
+ vld1.u8 {d3}, [V,:64]!
+ vld1.u8 {d4-d5}, [Y2,:128]!
+ vtbl.u8 d16, {d0-d3}, d24
+ vtbl.u8 d17, {d0-d3}, d25
+ vtbl.u8 d18, {d0-d3}, d26
+ vtbl.u8 d19, {d0-d3}, d27
+ vtbl.u8 d20, {d2-d5}, d28
+ vtbl.u8 d21, {d2-d5}, d29
+ vtbl.u8 d22, {d2-d5}, d30
+ vtbl.u8 d23, {d2-d5}, d31
+ vst1.u8 {d16-d19}, [O1,:128]!
+ vst1.u8 {d20-d23}, [O2,:128]!
+
+ cmp O1, END_O1
+ bne 2b
+
+ sub HEIGHT, #2
+ mov O1, O2
+ add O2, PITCH, lsl #1
+ mov Y1, Y2
+ add Y2, PITCH
+
+ cmp HEIGHT, #0
+ bne 1b
+
+ pop {r4-r8}
+ bx lr
+
+ .hidden indexes
+indexes:
+ @ YUYV1
+ .byte 0x00, 0x10, 0x01, 0x18, 0x02, 0x11, 0x03, 0x19
+ .byte 0x04, 0x12, 0x05, 0x1A, 0x06, 0x13, 0x07, 0x1B
+ .byte 0x08, 0x14, 0x09, 0x1C, 0x0A, 0x15, 0x0B, 0x1D
+ .byte 0x0C, 0x16, 0x0D, 0x1E, 0x0E, 0x17, 0x0F, 0x1F
+ @ YUYV2
+ .byte 0x10, 0x00, 0x11, 0x08, 0x12, 0x01, 0x13, 0x09
+ .byte 0x14, 0x02, 0x15, 0x0A, 0x16, 0x03, 0x17, 0x0B
+ .byte 0x18, 0x04, 0x19, 0x0C, 0x1A, 0x05, 0x1B, 0x0D
+ .byte 0x1C, 0x06, 0x1D, 0x0E, 0x1E, 0x07, 0x1F, 0x0F
+ @ UYVY1
+ .byte 0x10, 0x00, 0x18, 0x01, 0x11, 0x02, 0x19, 0x03
+ .byte 0x12, 0x04, 0x1A, 0x05, 0x13, 0x06, 0x1B, 0x07
+ .byte 0x14, 0x08, 0x1C, 0x09, 0x15, 0x0A, 0x1D, 0x0B
+ .byte 0x16, 0x0C, 0x1E, 0x0D, 0x17, 0x0E, 0x1F, 0x0F
+ @ UYVY2
+ .byte 0x00, 0x10, 0x08, 0x11, 0x01, 0x12, 0x09, 0x13
+ .byte 0x02, 0x14, 0x0A, 0x15, 0x03, 0x16, 0x0B, 0x17
+ .byte 0x04, 0x18, 0x0C, 0x19, 0x05, 0x1A, 0x0D, 0x1B
+ .byte 0x06, 0x1C, 0x0E, 0x1D, 0x07, 0x1E, 0x0F, 0x1F
diff --git a/modules/video_chroma/neon.c b/modules/video_chroma/neon.c
new file mode 100644
index 0000000..b8a1785
--- /dev/null
+++ b/modules/video_chroma/neon.c
@@ -0,0 +1,97 @@
+/*****************************************************************************
+ * neon.c : ARM NEONv1 chroma conversion module for VLC
+ *****************************************************************************
+ * Copyright (C) 2009 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_filter.h>
+
+static int Open (vlc_object_t *);
+
+vlc_module_begin ()
+ set_description (N_("ARM NEON video chroma conversions"))
+ set_capability ("video filter2", 250)
+ set_callbacks (Open, NULL)
+ add_requirement (NEON)
+vlc_module_end ()
+
+void i420_yuyv_neon (uint8_t *out, const uint8_t **in,
+ uintptr_t pitch, uintptr_t height);
+
+static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
+{
+ uint8_t *out = dst->p->p_pixels;
+ const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, };
+ size_t pitch = (filter->fmt_in.video.i_width + 15) & ~15;
+ size_t height = filter->fmt_in.video.i_height;
+
+ i420_yuyv_neon (out, yuv, pitch, height);
+}
+
+void i420_uyvy_neon (uint8_t *out, const uint8_t **in,
+ uintptr_t pitch, uintptr_t height);
+
+static void I420_UYVY (filter_t *filter, picture_t *src, picture_t *dst)
+{
+ uint8_t *out = dst->p->p_pixels;
+ const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, };
+ size_t pitch = (filter->fmt_in.video.i_width + 15) & ~15;
+ size_t height = filter->fmt_in.video.i_height;
+
+ i420_yuyv_neon (out, yuv, pitch, height);
+}
+
+VIDEO_FILTER_WRAPPER (I420_YUYV)
+VIDEO_FILTER_WRAPPER (I420_UYVY)
+
+static int Open (vlc_object_t *obj)
+{
+ filter_t *filter = (filter_t *)obj;
+
+ if (((filter->fmt_in.video.i_width | filter->fmt_in.video.i_height) & 1)
+ || (filter->fmt_in.video.i_width != filter->fmt_out.video.i_width)
+ || (filter->fmt_in.video.i_height != filter->fmt_out.video.i_height))
+ return VLC_EGENERIC;
+
+ switch (filter->fmt_in.video.i_chroma)
+ {
+ case VLC_CODEC_YV12:
+ case VLC_CODEC_I420:
+ switch (filter->fmt_out.video.i_chroma)
+ {
+ case VLC_CODEC_YUYV:
+ filter->pf_video_filter = I420_YUYV_Filter;
+ break;
+ case VLC_CODEC_UYVY:
+ filter->pf_video_filter = I420_UYVY_Filter;
+ break;
+ default:
+ return VLC_EGENERIC;
+ }
+ break;
+
+ default:
+ return VLC_EGENERIC;
+ }
+ return VLC_SUCCESS;
+}
More information about the vlc-devel
mailing list