[vlc-commits] arm_neon: Add an optimized routine for deinterleaving chroma

Martin Storsjö git at videolan.org
Tue Oct 8 11:01:31 CEST 2013


vlc | branch: master | Martin Storsjö <martin at martin.st> | Sun Sep 29 22:50:48 2013 +0300| [5d15f59a0e726adad5bc86b1594d5443780b16b9] | committer: Martin Storsjö

arm_neon: Add an optimized routine for deinterleaving chroma

This supports conversion from NV12/21/16/24 to I420/YV12/I422/I444.

This avoids hitting swscale for the NV12->I420 conversion, for hw
decoders that return NV12/21 in combination with the android vout
in YUV mode.

Signed-off-by: Martin Storsjö <martin at martin.st>

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=5d15f59a0e726adad5bc86b1594d5443780b16b9
---

 modules/arm_neon/Makefile.am           |    1 +
 modules/arm_neon/chroma_neon.h         |   13 ++++
 modules/arm_neon/chroma_yuv.c          |  106 ++++++++++++++++++++++++++++++++
 modules/arm_neon/deinterleave_chroma.S |   67 ++++++++++++++++++++
 4 files changed, 187 insertions(+)

diff --git a/modules/arm_neon/Makefile.am b/modules/arm_neon/Makefile.am
index 212605f..4e73a4f 100644
--- a/modules/arm_neon/Makefile.am
+++ b/modules/arm_neon/Makefile.am
@@ -7,6 +7,7 @@ libsimple_channel_mixer_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
 libsimple_channel_mixer_neon_plugin_LIBTOOLFLAGS = --tag=CC
 
 libchroma_yuv_neon_plugin_la_SOURCES = \
+	arm_neon/deinterleave_chroma.S \
 	arm_neon/i420_yuyv.S \
 	arm_neon/i422_yuyv.S \
 	arm_neon/yuyv_i422.S \
diff --git a/modules/arm_neon/chroma_neon.h b/modules/arm_neon/chroma_neon.h
index 865315a..e516179 100644
--- a/modules/arm_neon/chroma_neon.h
+++ b/modules/arm_neon/chroma_neon.h
@@ -30,6 +30,14 @@ struct yuv_planes
     size_t pitch;
 };
 
+/* Planar chroma buffers.
+ * Pitch is in bytes. */
+struct uv_planes
+{
+    void *u, *v;
+    size_t pitch;
+};
+
 /* Packed picture buffer. Pitch is in bytes (_not_ pixels). */
 struct yuv_pack
 {
@@ -67,6 +75,11 @@ void uyvy_i422_neon (struct yuv_planes *const out,
                      const struct yuv_pack *const in,
                      int width, int height) asm("uyvy_i422_neon");
 
+/* Semiplanar to planar conversion. */
+void deinterleave_chroma_neon (struct uv_planes *const out,
+                               const struct yuv_pack *const in,
+                               int width, int height) asm("deinterleave_chroma_neon");
+
 /* I420 to RGBA conversion. */
 void i420_rgb_neon (struct yuv_pack *const out,
                     const struct yuv_planes *const in,
diff --git a/modules/arm_neon/chroma_yuv.c b/modules/arm_neon/chroma_yuv.c
index b54732e..34adcac 100644
--- a/modules/arm_neon/chroma_yuv.c
+++ b/modules/arm_neon/chroma_yuv.c
@@ -45,6 +45,15 @@ vlc_module_end ()
     struct yuv_planes planes = { \
         (pict)->Y_PIXELS, (pict)->V_PIXELS, (pict)->U_PIXELS, (pict)->Y_PITCH }
 
+#define DEFINE_UV_PLANES(planes, pict) \
+    struct uv_planes planes = { \
+        (pict)->U_PIXELS, (pict)->V_PIXELS, (pict)->U_PITCH }
+#define DEFINE_UV_PLANES_SWAP(planes, pict) \
+    struct uv_planes planes = { \
+        (pict)->V_PIXELS, (pict)->U_PIXELS, (pict)->U_PITCH }
+#define DEFINE_UV_PACK(pack, pict) \
+    struct yuv_pack pack = { (pict)->U_PIXELS, (pict)->U_PITCH }
+
 /* Planar YUV420 to packed YUV422 */
 static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
 {
@@ -83,6 +92,52 @@ static void I420_VYUY (filter_t *filter, picture_t *src, picture_t *dst)
 VIDEO_FILTER_WRAPPER (I420_VYUY)
 
 
+/* Semiplanar NV12/21/16/24 to planar I420/YV12/I422/I444 */
+static void copy_y_plane(filter_t *filter, picture_t *src, picture_t *dst)
+{
+    uint8_t *src_y = src->Y_PIXELS;
+    uint8_t *dst_y = dst->Y_PIXELS;
+    if (src->Y_PITCH == dst->Y_PITCH) {
+        memcpy(dst_y, src_y, dst->Y_PITCH * filter->fmt_in.video.i_height);
+    } else {
+        for (unsigned y = 0; y < filter->fmt_in.video.i_height;
+                y++, dst_y += dst->Y_PITCH, src_y += src->Y_PITCH)
+            memcpy(dst_y, src_y, filter->fmt_in.video.i_width);
+    }
+}
+
+#define SEMIPLANAR_FILTERS(name, h_subsamp, v_subsamp)                    \
+static void name (filter_t *filter, picture_t *src,                       \
+                  picture_t *dst)                                         \
+{                                                                         \
+    DEFINE_UV_PLANES(out, dst);                                           \
+    DEFINE_UV_PACK(in, src);                                              \
+    copy_y_plane (filter, src, dst);                                      \
+    deinterleave_chroma_neon (&out, &in,                                  \
+                              filter->fmt_in.video.i_width  / h_subsamp,  \
+                              filter->fmt_in.video.i_height / v_subsamp); \
+}                                                                         \
+VIDEO_FILTER_WRAPPER (name)                                               \
+
+#define SEMIPLANAR_FILTERS_SWAP(name, h_subsamp, v_subsamp)               \
+static void name (filter_t *filter, picture_t *src,                       \
+                  picture_t *dst)                                         \
+{                                                                         \
+    DEFINE_UV_PLANES_SWAP(out, dst);                                      \
+    DEFINE_UV_PACK(in, src);                                              \
+    copy_y_plane (filter, src, dst);                                      \
+    deinterleave_chroma_neon (&out, &in,                                  \
+                              filter->fmt_in.video.i_width  / h_subsamp,  \
+                              filter->fmt_in.video.i_height / v_subsamp); \
+}                                                                         \
+VIDEO_FILTER_WRAPPER (name)                                               \
+
+SEMIPLANAR_FILTERS (Semiplanar_Planar_420, 2, 2)
+SEMIPLANAR_FILTERS_SWAP (Semiplanar_Planar_420_Swap, 2, 2)
+SEMIPLANAR_FILTERS (Semiplanar_Planar_422, 2, 1)
+SEMIPLANAR_FILTERS (Semiplanar_Planar_444, 1, 1)
+
+
 /* Planar YUV422 to packed YUV422 */
 static void I422_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
 {
@@ -231,6 +286,57 @@ static int Open (vlc_object_t *obj)
             }
             break;
 
+        /* Semiplanar to planar */
+        case VLC_CODEC_NV12:
+            switch (filter->fmt_out.video.i_chroma)
+            {
+                case VLC_CODEC_I420:
+                    filter->pf_video_filter = Semiplanar_Planar_420_Filter;
+                    break;
+                case VLC_CODEC_YV12:
+                    filter->pf_video_filter = Semiplanar_Planar_420_Swap_Filter;
+                    break;
+                default:
+                    return VLC_EGENERIC;
+            }
+            break;
+
+        case VLC_CODEC_NV21:
+            switch (filter->fmt_out.video.i_chroma)
+            {
+                case VLC_CODEC_I420:
+                    filter->pf_video_filter = Semiplanar_Planar_420_Swap_Filter;
+                    break;
+                case VLC_CODEC_YV12:
+                    filter->pf_video_filter = Semiplanar_Planar_420_Filter;
+                    break;
+                default:
+                    return VLC_EGENERIC;
+            }
+            break;
+
+        case VLC_CODEC_NV16:
+            switch (filter->fmt_out.video.i_chroma)
+            {
+                case VLC_CODEC_I422:
+                    filter->pf_video_filter = Semiplanar_Planar_422_Filter;
+                    break;
+                default:
+                    return VLC_EGENERIC;
+            }
+            break;
+
+        case VLC_CODEC_NV24:
+            switch (filter->fmt_out.video.i_chroma)
+            {
+                case VLC_CODEC_I444:
+                    filter->pf_video_filter = Semiplanar_Planar_444_Filter;
+                    break;
+                default:
+                    return VLC_EGENERIC;
+            }
+            break;
+
         /* Packed to planar */
         case VLC_CODEC_YUYV:
             switch (filter->fmt_out.video.i_chroma)
diff --git a/modules/arm_neon/deinterleave_chroma.S b/modules/arm_neon/deinterleave_chroma.S
new file mode 100644
index 0000000..019d647
--- /dev/null
+++ b/modules/arm_neon/deinterleave_chroma.S
@@ -0,0 +1,67 @@
+ @*****************************************************************************
+ @ deinterleave_chroma.S : ARM NEONv1 conversion of interleaved to planar chroma
+ @*****************************************************************************
+ @ Copyright (C) 2009-2011 Rémi Denis-Courmont
+ @ Copyright (C) 2013 Martin Storsjö
+ @
+ @ This program is free software; you can redistribute it and/or modify
+ @ it under the terms of the GNU Lesser General Public License as published by
+ @ the Free Software Foundation; either version 2.1 of the License, or
+ @ (at your option) any later version.
+ @
+ @ This program is distributed in the hope that it will be useful,
+ @ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ @ GNU Lesser General Public License for more details.
+ @
+ @ You should have received a copy of the GNU Lesser General Public License
+ @ along with this program; if not, write to the Free Software Foundation,
+ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ @****************************************************************************/
+
+	.syntax unified
+	.fpu neon
+	.text
+
+#define UV	r0
+#define COUNT	r1
+#define WIDTH	r2
+#define HEIGHT	r3
+#define IPITCH	r4
+#define IPAD	r4
+#define U	r5
+#define V	r6
+#define OPITCH	lr
+#define OPAD	lr
+
+	.align 2
+	.global deinterleave_chroma_neon
+	.type	deinterleave_chroma_neon, %function
+deinterleave_chroma_neon:
+	push		{r4-r6,lr}
+	ldmia		r0,	{U, V, OPITCH}
+	ldmia		r1,	{UV, IPITCH}
+	cmp		HEIGHT,	#0
+
+	@ round the width up to a multiple of 8
+	add		WIDTH,	WIDTH, #7
+	bic		WIDTH,	WIDTH, #7
+
+	sub		IPAD,	IPITCH,	WIDTH, lsl #1
+	sub		OPAD,	OPITCH,	WIDTH
+1:
+	movsgt		COUNT,	WIDTH
+	pople		{r4-r6,pc}
+2:
+	pld		[UV, #64]
+	vld2.u8		{d0, d1},	[UV,:128]!
+	subs		COUNT,	COUNT,	#8
+	vst1.u8		{d0},		[U,:64]!
+	vst1.u8		{d1},		[V,:64]!
+	bgt		2b
+
+	subs		HEIGHT,	#1
+	add		UV,	UV,	IPAD
+	add		U,	U,	OPAD
+	add		V,	V,	OPAD
+	b		1b



More information about the vlc-commits mailing list