[vlc-commits] Clean up NEON chroma converter

Rémi Denis-Courmont git at videolan.org
Thu Jul 7 19:25:18 CEST 2011


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Thu Jul  7 20:21:09 2011 +0300| [d7472f3aa1234089237fed9ae52e37b5c92d4133] | committer: Rémi Denis-Courmont

Clean up NEON chroma converter

 - do not assume output pitch equals (double) pixel width
 - improve function prototypes
 - hand zero-width or zero-height corner cases in ASM (totally useless)
 - use ARM conditon flag (opS) as appropriate

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=d7472f3aa1234089237fed9ae52e37b5c92d4133
---

 modules/arm_neon/Modules.am                    |    2 +-
 modules/arm_neon/chroma_neon.h                 |   48 ++++++++++++
 modules/arm_neon/{i420_yuy2.c => chroma_yuv.c} |   59 ++++++--------
 modules/arm_neon/i420_yuyv.S                   |   98 +++++++++++-------------
 4 files changed, 118 insertions(+), 89 deletions(-)

diff --git a/modules/arm_neon/Modules.am b/modules/arm_neon/Modules.am
index 5b0748c..93500b7 100644
--- a/modules/arm_neon/Modules.am
+++ b/modules/arm_neon/Modules.am
@@ -11,7 +11,7 @@ libaudio_format_neon_plugin_la_DEPENDENCIES =
 
 libi420_yuy2_neon_plugin_la_SOURCES = \
 	i420_yuyv.S \
-	i420_yuy2.c
+	chroma_yuv.c chroma_neon.h
 libi420_yuy2_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
 libi420_yuy2_neon_plugin_la_LIBADD = $(AM_LIBADD)
 libi420_yuy2_neon_plugin_la_DEPENDENCIES =
diff --git a/modules/arm_neon/chroma_neon.h b/modules/arm_neon/chroma_neon.h
new file mode 100644
index 0000000..40bfbcc
--- /dev/null
+++ b/modules/arm_neon/chroma_neon.h
@@ -0,0 +1,48 @@
+/*****************************************************************************
+ * chroma_neon.h
+ *****************************************************************************
+ * Copyright (C) 2011 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+/* Planes must start on a 16-bytes boundary. Pitches must be multiples of 16
+ * bytes even for subsampled components. */
+
+/* Planar picture buffer.
+ * Pitch corresponds to luminance component in bytes. Chrominance pitches are
+ * inferred from the color subsampling ratio. */
+struct yuv_planes
+{
+	void *y, *u, *v;
+	size_t pitch;
+};
+
+/* Packed picture buffer. Pitch is in bytes (_not_ pixels). */
+struct yuv_pack
+{
+	void *yuv;
+	size_t pitch;
+};
+
+/* I420 to YUYV conversion. */
+void i420_yuyv_neon (struct yuv_pack *const out,
+                     const struct yuv_planes *const in,
+                     int width, int height);
+
+/* I420 to UYVY conversion. */
+void i420_uyvy_neon (struct yuv_pack *const out,
+                     const struct yuv_planes *const in,
+                     int width, int height);
diff --git a/modules/arm_neon/i420_yuy2.c b/modules/arm_neon/chroma_yuv.c
similarity index 67%
rename from modules/arm_neon/i420_yuy2.c
rename to modules/arm_neon/chroma_yuv.c
index 5cc9907..0dc66ed 100644
--- a/modules/arm_neon/i420_yuy2.c
+++ b/modules/arm_neon/chroma_yuv.c
@@ -26,6 +26,7 @@
 #include <vlc_plugin.h>
 #include <vlc_filter.h>
 #include <vlc_cpu.h>
+#include "chroma_neon.h"
 
 static int Open (vlc_object_t *);
 
@@ -35,58 +36,48 @@ vlc_module_begin ()
     set_callbacks (Open, NULL)
 vlc_module_end ()
 
-void i420_yuyv_neon (uint8_t *out, const uint8_t **in,
-                     unsigned int pitch, unsigned int s_off,
-                     unsigned int height);
+#define DEFINE_PACK(pack, pict) \
+    struct yuv_pack pack = { (pict)->Y_PIXELS, (pict)->Y_PITCH }
+#define DEFINE_PLANES(planes, pict) \
+    struct yuv_planes planes = { \
+        (pict)->Y_PIXELS, (pict)->U_PIXELS, (pict)->V_PIXELS, (pict)->Y_PITCH }
+#define DEFINE_PLANES_SWAP(planes, pict) \
+    struct yuv_planes planes = { \
+        (pict)->Y_PIXELS, (pict)->V_PIXELS, (pict)->U_PIXELS, (pict)->Y_PITCH }
 
 static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
 {
-    uint8_t *out = dst->p->p_pixels;
-    const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, };
-    size_t height = filter->fmt_in.video.i_height;
-    int i_pitch = (dst->p->i_pitch >> 1) & ~0xF;
-    int s_offset = src->p->i_pitch - i_pitch;
-
-    i420_yuyv_neon (out, yuv, i_pitch, s_offset, height);
+    DEFINE_PACK(out, dst);
+    DEFINE_PLANES(in, src);
+    i420_yuyv_neon (&out, &in, filter->fmt_in.video.i_width,
+                    filter->fmt_in.video.i_height);
 }
 VIDEO_FILTER_WRAPPER (I420_YUYV)
 
 static void YV12_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
 {
-    uint8_t *out = dst->p->p_pixels;
-    const uint8_t *yuv[3] = { src->Y_PIXELS, src->V_PIXELS, src->U_PIXELS, };
-    size_t height = filter->fmt_in.video.i_height;
-    int i_pitch = (dst->p->i_pitch >> 1) & ~0xF;
-    int s_offset = src->p->i_pitch - i_pitch;
-
-    i420_yuyv_neon (out, yuv, i_pitch, s_offset, height);
+    DEFINE_PACK(out, dst);
+    DEFINE_PLANES_SWAP(in, src);
+    i420_yuyv_neon (&out, &in, filter->fmt_in.video.i_width,
+                    filter->fmt_in.video.i_height);
 }
 VIDEO_FILTER_WRAPPER (YV12_YUYV)
 
-void i420_uyvy_neon (uint8_t *out, const uint8_t **in,
-                     uintptr_t pitch, uintptr_t s_off, uintptr_t height);
-
 static void I420_UYVY (filter_t *filter, picture_t *src, picture_t *dst)
 {
-    uint8_t *out = dst->p->p_pixels;
-    const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, };
-    size_t height = filter->fmt_in.video.i_height;
-    int i_pitch = (dst->p->i_pitch >> 1) & ~0xF;
-    int s_offset = src->p->i_pitch - i_pitch;
-
-    i420_uyvy_neon (out, yuv, i_pitch, s_offset, height);
+    DEFINE_PACK(out, dst);
+    DEFINE_PLANES(in, src);
+    i420_uyvy_neon (&out, &in, filter->fmt_in.video.i_width,
+                    filter->fmt_in.video.i_height);
 }
 VIDEO_FILTER_WRAPPER (I420_UYVY)
 
 static void YV12_UYVY (filter_t *filter, picture_t *src, picture_t *dst)
 {
-    uint8_t *out = dst->p->p_pixels;
-    const uint8_t *yuv[3] = { src->Y_PIXELS, src->V_PIXELS, src->U_PIXELS, };
-    size_t height = filter->fmt_in.video.i_height;
-    int i_pitch = (dst->p->i_pitch >> 1) & ~0xF;
-    int s_offset = src->p->i_pitch - i_pitch;
-
-    i420_uyvy_neon (out, yuv, i_pitch, s_offset, height);
+    DEFINE_PACK(out, dst);
+    DEFINE_PLANES_SWAP(in, src);
+    i420_uyvy_neon (&out, &in, filter->fmt_in.video.i_width,
+                    filter->fmt_in.video.i_height);
 }
 VIDEO_FILTER_WRAPPER (YV12_UYVY)
 
diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S
index 556680f..67c3043 100644
--- a/modules/arm_neon/i420_yuyv.S
+++ b/modules/arm_neon/i420_yuyv.S
@@ -1,7 +1,7 @@
  @*****************************************************************************
  @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion
  @*****************************************************************************
- @ Copyright (C) 2009 Rémi Denis-Courmont
+ @ Copyright (C) 2009-2011 Rémi Denis-Courmont
  @
  @ This program is free software; you can redistribute it and/or modify
  @ it under the terms of the GNU General Public License as published by
@@ -23,28 +23,33 @@
 
 #define O1	r0
 #define O2	r1
-#define PITCH	r2
-#define S_OFF	r3
+#define WIDTH	r2
+#define HEIGHT	r3
 #define Y1	r4
 #define Y2	r5
 #define U	r6
 #define V	r7
-#define HEIGHT	r8
-#define END_O1	r12
+#define YPITCH	r8
+#define OPAD	r10
+#define YPAD	r11
+#define COUNT	ip
+#define OPITCH	lr
 
 	.align
 	.global i420_yuyv_neon
 	.type	i420_yuyv_neon, %function
 i420_yuyv_neon:
-	push		{r4-r8, lr}
-	ldr		HEIGHT, [sp, #(4*6)]
-	ldmia		r1,	{Y1, U, V}
-	add		O2,	O1,	PITCH, lsl #1
-	add		Y2,	Y1,	PITCH
-	add		Y2,	S_OFF
+	push		{r4-r8,r10-r11,lr}
+	ldmia		r0,	{O1, OPITCH}
+	ldmia		r1,	{Y1, U, V, YPITCH}
+	cmp		HEIGHT,	#0
+	sub		OPAD,	OPITCH,	WIDTH,	lsl #1
+	sub		YPAD,	YPITCH,	WIDTH
 1:
-	mov		END_O1,	O2
-	pld		[Y2]
+	movgts		COUNT,	WIDTH
+	add		O2,	O1,	OPITCH
+	add		Y2,	Y1,	YPITCH
+	pople		{r4-r8,r10-r11,pc}
 2:
 	pld		[U, #64]
 	vld1.u8		{d2},		[U,:64]!
@@ -52,6 +57,7 @@ i420_yuyv_neon:
 	vld1.u8		{d3},		[V,:64]!
 	pld		[Y1, #64]
 	vzip.u8		d2,	d3
+	subs		COUNT,	COUNT,	#16
 	vld1.u8		{q0},		[Y1,:128]!
 	pld		[Y2, #64]
 	vmov		q3,	q1
@@ -60,36 +66,29 @@ i420_yuyv_neon:
 	vzip.u8		q2,	q3
 	vst1.u8		{q0-q1},	[O1,:128]!
 	vst1.u8		{q2-q3},	[O2,:128]!
+	bgt		2b
 
-	cmp		O1,	END_O1
-	bne		2b
-
-	sub		HEIGHT,	#2
-	mov		O1,	O2
-	add		O2,	PITCH,	lsl #1
-	add		Y2,	S_OFF
-	mov		Y1,	Y2
-	add		Y2,	PITCH
-	add		Y2,	S_OFF
-	add		U,	S_OFF,	lsr #1
-	add		V,	S_OFF,	lsr #1
-
-	cmp		HEIGHT,	#0
-	bne		1b
-
-	pop		{r4-r8, pc}
+	subs		HEIGHT,	#2
+	add		O1,	O2,	OPAD
+	add		Y1,	Y2,	YPAD
+	add		U,	U,	YPAD,	lsr #1
+	add		V,	V,	YPAD,	lsr #1
+	b		1b
 
 	.global i420_uyvy_neon
 	.type	i420_uyvy_neon, %function
 i420_uyvy_neon:
-	push		{r4-r8, lr}
-	ldr		HEIGHT, [sp, #(4*6)]
-	ldmia		r1,	{Y1, U, V}
-	add		O2,	O1,	PITCH, lsl #1
-	add		Y2,	Y1,	PITCH
-	add		Y2,	S_OFF
+	push		{r4-r8,r10-r11,lr}
+	ldmia		r0,	{O1, OPITCH}
+	ldmia		r1,	{Y1, U, V, YPITCH}
+	cmp		HEIGHT,	#0
+	sub		OPAD,	OPITCH,	WIDTH,	lsl #1
+	sub		YPAD,	YPITCH,	WIDTH
 1:
-	mov		END_O1,	O2
+	movgts		COUNT,	WIDTH
+	add		O2,	O1,	OPITCH
+	add		Y2,	Y1,	YPITCH
+	pople		{r4-r8,r10-r11,pc}
 2:
 	pld		[U, #64]
 	vld1.u8		{d0},		[U,:64]!
@@ -97,6 +96,7 @@ i420_uyvy_neon:
 	vld1.u8		{d1},		[V,:64]!
 	pld		[Y1, #64]
 	vzip.u8		d0,	d1
+	subs		COUNT,	COUNT,	#16
 	vld1.u8		{q1},		[Y1,:128]!
 	pld		[Y2, #64]
 	vmov		q2,	q0
@@ -105,21 +105,11 @@ i420_uyvy_neon:
 	vzip.u8		q2,	q3
 	vst1.u8		{q0-q1},	[O1,:128]!
 	vst1.u8		{q2-q3},	[O2,:128]!
+	bgt		2b
 
-	cmp		O1,	END_O1
-	bne		2b
-
-	sub		HEIGHT,	#2
-	mov		O1,	O2
-	add		O2,	PITCH,	lsl #1
-	add		Y2,	S_OFF
-	mov		Y1,	Y2
-	add		Y2,	PITCH
-	add		Y2,	S_OFF
-	add		U,	S_OFF,	lsr #1
-	add		V,	S_OFF,	lsr #1
-
-	cmp		HEIGHT,	#0
-	bne		1b
-
-	pop		{r4-r8, pc}
+	subs		HEIGHT,	#2
+	add		O1,	O2,	OPAD
+	add		Y1,	Y2,	YPAD
+	add		U,	U,	YPAD,	lsr #1
+	add		V,	V,	YPAD,	lsr #1
+	b		1b



More information about the vlc-commits mailing list