[vlc-commits] chroma_yuv_neon: packed to planar YUV422
Rémi Denis-Courmont
git at videolan.org
Thu Jul 7 22:13:17 CEST 2011
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Thu Jul 7 23:12:58 2011 +0300| [d063eece351c0f0da7023c511143e95c6bd1eac8] | committer: Rémi Denis-Courmont
chroma_yuv_neon: packed to planar YUV422
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=d063eece351c0f0da7023c511143e95c6bd1eac8
---
modules/arm_neon/chroma_neon.h | 8 +++
modules/arm_neon/chroma_yuv.c | 80 ++++++++++++++++++++++++++++++++++
modules/arm_neon/yuyv_i422.S | 94 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 182 insertions(+), 0 deletions(-)
diff --git a/modules/arm_neon/chroma_neon.h b/modules/arm_neon/chroma_neon.h
index df8730e..204c5f1 100644
--- a/modules/arm_neon/chroma_neon.h
+++ b/modules/arm_neon/chroma_neon.h
@@ -56,3 +56,11 @@ void i422_yuyv_neon (struct yuv_pack *const out,
void i422_uyvy_neon (struct yuv_pack *const out,
const struct yuv_planes *const in,
int width, int height);
+
+/* YUYV to I422 conversion. */
+void yuyv_i422_neon (struct yuv_planes *const out,
+ const struct yuv_pack *const in, int width, int height);
+
+/* UYVY to I422 conversion. */
+void uyvy_i422_neon (struct yuv_planes *const out,
+ const struct yuv_pack *const in, int width, int height);
diff --git a/modules/arm_neon/chroma_yuv.c b/modules/arm_neon/chroma_yuv.c
index 3ac30fb..fc9ca0f 100644
--- a/modules/arm_neon/chroma_yuv.c
+++ b/modules/arm_neon/chroma_yuv.c
@@ -121,6 +121,43 @@ static void I422_VYUY (filter_t *filter, picture_t *src, picture_t *dst)
VIDEO_FILTER_WRAPPER (I422_VYUY)
+/* Packedr YUV422 to planar YUV422 */
+static void YUYV_I422 (filter_t *filter, picture_t *src, picture_t *dst)
+{
+ DEFINE_PLANES(out, dst);
+ DEFINE_PACK(in, src);
+ yuyv_i422_neon (&out, &in, filter->fmt_in.video.i_width,
+ filter->fmt_in.video.i_height);
+}
+VIDEO_FILTER_WRAPPER (YUYV_I422)
+
+static void YVYU_I422 (filter_t *filter, picture_t *src, picture_t *dst)
+{
+ DEFINE_PLANES_SWAP(out, dst);
+ DEFINE_PACK(in, src);
+ yuyv_i422_neon (&out, &in, filter->fmt_in.video.i_width,
+ filter->fmt_in.video.i_height);
+}
+VIDEO_FILTER_WRAPPER (YVYU_I422)
+
+static void UYVY_I422 (filter_t *filter, picture_t *src, picture_t *dst)
+{
+ DEFINE_PLANES(out, dst);
+ DEFINE_PACK(in, src);
+ uyvy_i422_neon (&out, &in, filter->fmt_in.video.i_width,
+ filter->fmt_in.video.i_height);
+}
+VIDEO_FILTER_WRAPPER (UYVY_I422)
+
+static void VYUY_I422 (filter_t *filter, picture_t *src, picture_t *dst)
+{
+ DEFINE_PLANES_SWAP(out, dst);
+ DEFINE_PACK(in, src);
+ uyvy_i422_neon (&out, &in, filter->fmt_in.video.i_width,
+ filter->fmt_in.video.i_height);
+}
+VIDEO_FILTER_WRAPPER (VYUY_I422)
+
static int Open (vlc_object_t *obj)
{
filter_t *filter = (filter_t *)obj;
@@ -133,6 +170,7 @@ static int Open (vlc_object_t *obj)
switch (filter->fmt_in.video.i_chroma)
{
+ /* Planar to packed */
case VLC_CODEC_I420:
switch (filter->fmt_out.video.i_chroma)
{
@@ -193,6 +231,48 @@ static int Open (vlc_object_t *obj)
}
break;
+ /* Packed to planar */
+ case VLC_CODEC_YUYV:
+ switch (filter->fmt_out.video.i_chroma)
+ {
+ case VLC_CODEC_I422:
+ filter->pf_video_filter = YUYV_I422_Filter;
+ break;
+ default:
+ return VLC_EGENERIC;
+ }
+
+ case VLC_CODEC_UYVY:
+ switch (filter->fmt_out.video.i_chroma)
+ {
+ case VLC_CODEC_I422:
+ filter->pf_video_filter = UYVY_I422_Filter;
+ break;
+ default:
+ return VLC_EGENERIC;
+ }
+
+ case VLC_CODEC_YVYU:
+ switch (filter->fmt_out.video.i_chroma)
+ {
+ case VLC_CODEC_I422:
+ filter->pf_video_filter = YVYU_I422_Filter;
+ break;
+ default:
+ return VLC_EGENERIC;
+ }
+
+
+ case VLC_CODEC_VYUY:
+ switch (filter->fmt_out.video.i_chroma)
+ {
+ case VLC_CODEC_I422:
+ filter->pf_video_filter = VYUY_I422_Filter;
+ break;
+ default:
+ return VLC_EGENERIC;
+ }
+
default:
return VLC_EGENERIC;
}
diff --git a/modules/arm_neon/yuyv_i422.S b/modules/arm_neon/yuyv_i422.S
new file mode 100644
index 0000000..41e4708
--- /dev/null
+++ b/modules/arm_neon/yuyv_i422.S
@@ -0,0 +1,94 @@
+ @*****************************************************************************
+ @ yuyv_i422_neon.S : ARM NEONv1 packed to planar YUV422 conversion
+ @*****************************************************************************
+ @ Copyright (C) 2011 Rémi Denis-Courmont
+ @
+ @ This program is free software; you can redistribute it and/or modify
+ @ it under the terms of the GNU Lesser General Public License as published by
+ @ the Free Software Foundation; either version 2.1 of the License, or
+ @ (at your option) any later version.
+ @
+ @ This program is distributed in the hope that it will be useful,
+ @ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ @ GNU General Public License for more details.
+ @
+ @ You should have received a copy of the GNU Lesser General Public License
+ @ along with this program; if not, write to the Free Software Foundation,
+ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ @****************************************************************************/
+
+ .fpu neon
+ .text
+
+#define I r0
+#define IPAD r1
+#define WIDTH r2
+#define HEIGHT r3
+#define Y r4
+#define U r5
+#define V r6
+#define COUNT ip
+#define YPAD lr
+
+ .align
+ .global yuyv_i422_neon
+ .type yuyv_i422_neon, %function
+yuyv_i422_neon:
+ push {r4-r6,lr}
+ ldmia r0, {Y, U, V, YPAD}
+ ldmia r1, {I, IPAD}
+ cmp HEIGHT, #0
+ sub YPAD, YPAD, WIDTH
+ sub IPAD, IPAD, WIDTH, lsl #1
+1:
+ movgts COUNT, WIDTH
+ pople {r4-r6,pc}
+2:
+ pld [I, #64]
+ subs COUNT, COUNT, #16
+ vld1.u8 {q0-q1}, [I,:128]!
+ vuzp.u8 q0, q1
+ @ TODO: unroll (1 cycle stall)
+ vuzp.u8 d2, d3
+ vst1.u8 {q0}, [Y,:128]!
+ vst1.u8 {d2}, [U,:64]!
+ vst1.u8 {d3}, [V,:64]!
+ bgt 2b
+
+ subs HEIGHT, #1
+ add I, I, IPAD
+ add Y, Y, YPAD
+ add U, U, YPAD, lsr #1
+ add V, V, YPAD, lsr #1
+ b 1b
+
+ .global uyvy_i422_neon
+ .type uyvy_i422_neon, %function
+uyvy_i422_neon:
+ push {r4-r6,lr}
+ ldmia r0, {Y, U, V, YPAD}
+ ldmia r1, {I, IPAD}
+ cmp HEIGHT, #0
+ sub YPAD, YPAD, WIDTH
+ sub IPAD, IPAD, WIDTH, lsl #1
+1:
+ movgts COUNT, WIDTH
+ pople {r4-r6,pc}
+2:
+ pld [I, #64]
+ subs COUNT, COUNT, #16
+ vld1.u8 {q0-q1}, [I,:128]!
+ vuzp.u8 q0, q1
+ vuzp.u8 d0, d1
+ vst1.u8 {q1}, [Y,:128]!
+ vst1.u8 {d0}, [U,:64]!
+ vst1.u8 {d1}, [V,:64]!
+ bgt 2b
+
+ subs HEIGHT, #1
+ add I, I, IPAD
+ add Y, Y, YPAD
+ add U, U, YPAD, lsr #1
+ add V, V, YPAD, lsr #1
+ b 1b
More information about the vlc-commits
mailing list