[vlc-commits] [Git][videolan/vlc][master] 2 commits: i42{0, 2}_yuy2: remove dead chroma conversions
Steve Lhomme (@robUx4)
gitlab at videolan.org
Wed Nov 12 09:50:05 UTC 2025
Steve Lhomme pushed to branch master at VideoLAN / VLC
Commits:
a3c54b58 by Rémi Denis-Courmont at 2025-11-12T09:35:26+00:00
i42{0,2}_yuy2: remove dead chroma conversions
With the removal of XVideo, there is not a single video output supporting
any format in the YUY2 family of Packed 4:2:2 YUV.
Note that we still need conversion *from* YUY2 for some capture devices
(mostly for capture at resolution small enough to go over USB 2).
- - - - -
2101c125 by Rémi Denis-Courmont at 2025-11-12T09:35:26+00:00
chroma_yuv: remove conversions to YUY2
These were added for XVideo output on TI OMAP3xxx devices, notably the
Nokia 900 and N9. With the removal of the XVideo video output,
they have become useless.
- - - - -
12 changed files:
- modules/isa/arm/Makefile.am
- modules/isa/arm/neon/chroma_neon.h
- modules/isa/arm/neon/chroma_yuv.c
- − modules/isa/arm/neon/i420_yuyv.S
- − modules/isa/arm/neon/i422_yuyv.S
- modules/video_chroma/Makefile.am
- − modules/video_chroma/i420_yuy2.c
- − modules/video_chroma/i420_yuy2.h
- − modules/video_chroma/i422_yuy2.c
- − modules/video_chroma/i422_yuy2.h
- modules/video_chroma/meson.build
- po/POTFILES.in
Changes:
=====================================
modules/isa/arm/Makefile.am
=====================================
@@ -12,8 +12,6 @@ endif
libchroma_yuv_neon_plugin_la_SOURCES = \
isa/arm/neon/deinterleave_chroma.S \
- isa/arm/neon/i420_yuyv.S \
- isa/arm/neon/i422_yuyv.S \
isa/arm/neon/yuyv_i422.S \
isa/arm/neon/chroma_yuv.c isa/arm/neon/chroma_neon.h
libchroma_yuv_neon_plugin_LIBTOOLFLAGS = --tag=CC
=====================================
modules/isa/arm/neon/chroma_neon.h
=====================================
@@ -45,26 +45,6 @@ struct yuv_pack
size_t pitch;
};
-/* I420 to YUYV conversion. */
-void i420_yuyv_neon (struct yuv_pack *const out,
- const struct yuv_planes *const in,
- int width, int height);
-
-/* I420 to UYVY conversion. */
-void i420_uyvy_neon (struct yuv_pack *const out,
- const struct yuv_planes *const in,
- int width, int height);
-
-/* I422 to YUYV conversion. */
-void i422_yuyv_neon (struct yuv_pack *const out,
- const struct yuv_planes *const in,
- int width, int height);
-
-/* I422 to UYVY conversion. */
-void i422_uyvy_neon (struct yuv_pack *const out,
- const struct yuv_planes *const in,
- int width, int height);
-
/* YUYV to I422 conversion. */
void yuyv_i422_neon (struct yuv_planes *const out,
const struct yuv_pack *const in,
=====================================
modules/isa/arm/neon/chroma_yuv.c
=====================================
@@ -36,19 +36,12 @@ static void ProbeChroma(vlc_chroma_conv_vec *vec)
{
#define PACKED_CHROMAS VLC_CODEC_YUYV, VLC_CODEC_UYVY, VLC_CODEC_YVYU, VLC_CODEC_VYUY
- vlc_chroma_conv_add_in_outlist(vec, 0.75, VLC_CODEC_I420, PACKED_CHROMAS);
- vlc_chroma_conv_add_in_outlist(vec, 0.75, VLC_CODEC_YV12, PACKED_CHROMAS);
- vlc_chroma_conv_add_in_outlist(vec, 0.75, VLC_CODEC_I422, PACKED_CHROMAS);
-
vlc_chroma_conv_add_in_outlist(vec, 0.75, VLC_CODEC_NV12, VLC_CODEC_I420,
VLC_CODEC_YV12);
vlc_chroma_conv_add_in_outlist(vec, 0.75, VLC_CODEC_NV21, VLC_CODEC_I420,
VLC_CODEC_YV12);
vlc_chroma_conv_add(vec, 0.75, VLC_CODEC_NV24, VLC_CODEC_I444, false);
-
- vlc_chroma_conv_add_out_inlist(vec, 0.75, VLC_CODEC_I422, VLC_CODEC_NV16,
- PACKED_CHROMAS);
}
vlc_module_begin ()
set_description (N_("ARM NEON video chroma conversions"))
@@ -75,44 +68,6 @@ vlc_module_end ()
#define DEFINE_UV_PACK(pack, pict) \
struct yuv_pack pack = { (pict)->U_PIXELS, (pict)->U_PITCH }
-/* Planar YUV420 to packed YUV422 */
-static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES(in, src);
- i420_yuyv_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I420_YUYV)
-
-static void I420_YVYU (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES_SWAP(in, src);
- i420_yuyv_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I420_YVYU)
-
-static void I420_UYVY (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES(in, src);
- i420_uyvy_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I420_UYVY)
-
-static void I420_VYUY (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES_SWAP(in, src);
- i420_uyvy_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I420_VYUY)
-
-
/* Semiplanar NV12/21/16/24 to planar I420/YV12/I422/I444 */
static void copy_y_plane(filter_t *filter, picture_t *src, picture_t *dst)
{
@@ -159,44 +114,6 @@ SEMIPLANAR_FILTERS (Semiplanar_Planar_422, 2, 1)
SEMIPLANAR_FILTERS (Semiplanar_Planar_444, 1, 1)
-/* Planar YUV422 to packed YUV422 */
-static void I422_YUYV (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES(in, src);
- i422_yuyv_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I422_YUYV)
-
-static void I422_YVYU (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES_SWAP(in, src);
- i422_yuyv_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I422_YVYU)
-
-static void I422_UYVY (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES(in, src);
- i422_uyvy_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I422_UYVY)
-
-static void I422_VYUY (filter_t *filter, picture_t *src, picture_t *dst)
-{
- DEFINE_PACK(out, dst);
- DEFINE_PLANES_SWAP(in, src);
- i422_uyvy_neon (&out, &in, filter->fmt_in.video.i_width,
- filter->fmt_in.video.i_height);
-}
-VIDEO_FILTER_WRAPPER (I422_VYUY)
-
-
/* Packed YUV422 to planar YUV422 */
static void YUYV_I422 (filter_t *filter, picture_t *src, picture_t *dst)
{
@@ -244,67 +161,6 @@ static int Open (filter_t *filter)
switch (filter->fmt_in.video.i_chroma)
{
- /* Planar to packed */
- case VLC_CODEC_I420:
- switch (filter->fmt_out.video.i_chroma)
- {
- case VLC_CODEC_YUYV:
- filter->ops = &I420_YUYV_ops;
- break;
- case VLC_CODEC_UYVY:
- filter->ops = &I420_UYVY_ops;
- break;
- case VLC_CODEC_YVYU:
- filter->ops = &I420_YVYU_ops;
- break;
- case VLC_CODEC_VYUY:
- filter->ops = &I420_VYUY_ops;
- break;
- default:
- return VLC_EGENERIC;
- }
- break;
-
- case VLC_CODEC_YV12:
- switch (filter->fmt_out.video.i_chroma)
- {
- case VLC_CODEC_YUYV:
- filter->ops = &I420_YVYU_ops;
- break;
- case VLC_CODEC_UYVY:
- filter->ops = &I420_VYUY_ops;
- break;
- case VLC_CODEC_YVYU:
- filter->ops = &I420_YUYV_ops;
- break;
- case VLC_CODEC_VYUY:
- filter->ops = &I420_UYVY_ops;
- break;
- default:
- return VLC_EGENERIC;
- }
- break;
-
- case VLC_CODEC_I422:
- switch (filter->fmt_out.video.i_chroma)
- {
- case VLC_CODEC_YUYV:
- filter->ops = &I422_YUYV_ops;
- break;
- case VLC_CODEC_UYVY:
- filter->ops = &I422_UYVY_ops;
- break;
- case VLC_CODEC_YVYU:
- filter->ops = &I422_YVYU_ops;
- break;
- case VLC_CODEC_VYUY:
- filter->ops = &I422_VYUY_ops;
- break;
- default:
- return VLC_EGENERIC;
- }
- break;
-
/* Semiplanar to planar */
case VLC_CODEC_NV12:
switch (filter->fmt_out.video.i_chroma)
=====================================
modules/isa/arm/neon/i420_yuyv.S deleted
=====================================
@@ -1,120 +0,0 @@
- @*****************************************************************************
- @ i420_yuyv.S : ARM NEONv1 I420 to YUYV chroma conversion
- @*****************************************************************************
- @ Copyright (C) 2009-2011 Rémi Denis-Courmont
- @
- @ This program is free software; you can redistribute it and/or modify
- @ it under the terms of the GNU Lesser General Public License as published by
- @ the Free Software Foundation; either version 2.1 of the License, or
- @ (at your option) any later version.
- @
- @ This program is distributed in the hope that it will be useful,
- @ but WITHOUT ANY WARRANTY; without even the implied warranty of
- @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- @ GNU Lesser General Public License for more details.
- @
- @ You should have received a copy of the GNU Lesser General Public License
- @ along with this program; if not, write to the Free Software Foundation,
- @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- @****************************************************************************/
-
-#include "../asm.S"
-
- .syntax unified
-#if HAVE_AS_FPU_DIRECTIVE
- .fpu neon
-#endif
- .text
-
-#define O1 r0
-#define O2 r1
-#define WIDTH r2
-#define HEIGHT r3
-#define Y1 r4
-#define Y2 r5
-#define U r6
-#define V r7
-#define YPITCH r8
-#define OPAD r10
-#define YPAD r11
-#define COUNT ip
-#define OPITCH lr
-
- .align 2
-function i420_yuyv_neon
- push {r4-r8,r10-r11,lr}
- ldmia r0, {O1, OPITCH}
- ldmia r1, {Y1, U, V, YPITCH}
- cmp HEIGHT, #0
- sub OPAD, OPITCH, WIDTH, lsl #1
- sub YPAD, YPITCH, WIDTH
-1:
- it gt
- movsgt COUNT, WIDTH
- add O2, O1, OPITCH
- add Y2, Y1, YPITCH
- it le
- pople {r4-r8,r10-r11,pc}
-2:
- pld [U, #64]
- vld1.u8 {d2}, [U,:64]!
- pld [V, #64]
- vld1.u8 {d3}, [V,:64]!
- pld [Y1, #64]
- vzip.u8 d2, d3
- subs COUNT, COUNT, #16
- vld1.u8 {q0}, [Y1,:128]!
- pld [Y2, #64]
- vmov q3, q1
- vzip.u8 q0, q1
- vld1.u8 {q2}, [Y2,:128]!
- vzip.u8 q2, q3
- vst1.u8 {q0-q1}, [O1,:128]!
- vst1.u8 {q2-q3}, [O2,:128]!
- bgt 2b
-
- subs HEIGHT, #2
- add O1, O2, OPAD
- add Y1, Y2, YPAD
- add U, U, YPAD, lsr #1
- add V, V, YPAD, lsr #1
- b 1b
-
-function i420_uyvy_neon
- push {r4-r8,r10-r11,lr}
- ldmia r0, {O1, OPITCH}
- ldmia r1, {Y1, U, V, YPITCH}
- cmp HEIGHT, #0
- sub OPAD, OPITCH, WIDTH, lsl #1
- sub YPAD, YPITCH, WIDTH
-1:
- it gt
- movsgt COUNT, WIDTH
- add O2, O1, OPITCH
- add Y2, Y1, YPITCH
- it le
- pople {r4-r8,r10-r11,pc}
-2:
- pld [U, #64]
- vld1.u8 {d0}, [U,:64]!
- pld [V, #64]
- vld1.u8 {d1}, [V,:64]!
- pld [Y1, #64]
- vzip.u8 d0, d1
- subs COUNT, COUNT, #16
- vld1.u8 {q1}, [Y1,:128]!
- pld [Y2, #64]
- vmov q2, q0
- vzip.u8 q0, q1
- vld1.u8 {q3}, [Y2,:128]!
- vzip.u8 q2, q3
- vst1.u8 {q0-q1}, [O1,:128]!
- vst1.u8 {q2-q3}, [O2,:128]!
- bgt 2b
-
- subs HEIGHT, #2
- add O1, O2, OPAD
- add Y1, Y2, YPAD
- add U, U, YPAD, lsr #1
- add V, V, YPAD, lsr #1
- b 1b
=====================================
modules/isa/arm/neon/i422_yuyv.S deleted
=====================================
@@ -1,101 +0,0 @@
- @*****************************************************************************
- @ i422_yuyv.S : ARM NEONv1 I422 to YUYV chroma conversion
- @*****************************************************************************
- @ Copyright (C) 2011 Rémi Denis-Courmont
- @
- @ This program is free software; you can redistribute it and/or modify
- @ it under the terms of the GNU Lesser General Public License as published by
- @ the Free Software Foundation; either version 2.1 of the License, or
- @ (at your option) any later version.
- @
- @ This program is distributed in the hope that it will be useful,
- @ but WITHOUT ANY WARRANTY; without even the implied warranty of
- @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- @ GNU Lesser General Public License for more details.
- @
- @ You should have received a copy of the GNU Lesser General Public License
- @ along with this program; if not, write to the Free Software Foundation,
- @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- @****************************************************************************/
-
-#include "../asm.S"
-
- .syntax unified
-#if HAVE_AS_FPU_DIRECTIVE
- .fpu neon
-#endif
- .text
-
-#define O r0
-#define OPAD r1
-#define WIDTH r2
-#define HEIGHT r3
-#define Y r4
-#define U r5
-#define V r6
-#define COUNT ip
-#define YPAD lr
-
- .align 2
-function i422_yuyv_neon
- push {r4-r6,lr}
- ldmia r1, {Y, U, V, YPAD}
- ldmia r0, {O, OPAD}
- cmp HEIGHT, #0
- sub OPAD, OPAD, WIDTH, lsl #1
- sub YPAD, YPAD, WIDTH
-1:
- ite gt
- movsgt COUNT, WIDTH
- pople {r4-r6,pc}
-2:
- pld [U, #64]
- vld1.u8 {d2}, [U,:64]!
- pld [V, #64]
- vld1.u8 {d3}, [V,:64]!
- pld [Y, #64]
- vzip.u8 d2, d3
- subs COUNT, COUNT, #16
- vld1.u8 {q0}, [Y,:128]!
- vzip.u8 q0, q1
- @ TODO: unroll (1 cycle stall)
- vst1.u8 {q0-q1}, [O,:128]!
- bgt 2b
-
- subs HEIGHT, #1
- add U, U, YPAD, lsr #1
- add V, V, YPAD, lsr #1
- add Y, Y, YPAD
- add O, O, OPAD
- b 1b
-
-function i422_uyvy_neon
- push {r4-r6,lr}
- ldmia r1, {Y, U, V, YPAD}
- ldmia r0, {O, OPAD}
- cmp HEIGHT, #0
- sub OPAD, OPAD, WIDTH, lsl #1
- sub YPAD, YPAD, WIDTH
-1:
- ite gt
- movsgt COUNT, WIDTH
- pople {r4-r6,pc}
-2:
- pld [U, #64]
- vld1.u8 {d0}, [U,:64]!
- pld [V, #64]
- vld1.u8 {d1}, [V,:64]!
- pld [Y, #64]
- vzip.u8 d0, d1
- subs COUNT, COUNT, #16
- vld1.u8 {q1}, [Y,:128]!
- vzip.u8 q0, q1
- vst1.u8 {q0-q1}, [O,:128]!
- bgt 2b
-
- subs HEIGHT, #1
- add U, U, YPAD, lsr #1
- add V, V, YPAD, lsr #1
- add Y, Y, YPAD
- add O, O, OPAD
- b 1b
=====================================
modules/video_chroma/Makefile.am
=====================================
@@ -16,16 +16,12 @@ libgrey_yuv_plugin_la_SOURCES = video_chroma/grey_yuv.c
libi420_rgb_plugin_la_SOURCES = video_chroma/i420_rgb.c video_chroma/i420_rgb.h \
video_chroma/i420_rgb8.c video_chroma/i420_rgb16.c video_chroma/i420_rgb_c.h
-libi420_yuy2_plugin_la_SOURCES = video_chroma/i420_yuy2.c video_chroma/i420_yuy2.h
-
libi420_nv12_plugin_la_SOURCES = video_chroma/i420_nv12.c
libi420_nv12_plugin_la_CPPFLAGS = $(AM_CPPFLAGS)
libi420_nv12_plugin_la_LIBADD = libchroma_copy.la
libi422_i420_plugin_la_SOURCES = video_chroma/i422_i420.c
-libi422_yuy2_plugin_la_SOURCES = video_chroma/i422_yuy2.c video_chroma/i422_yuy2.h
-
librv32_plugin_la_SOURCES = video_chroma/rv32.c
libyuy2_i420_plugin_la_SOURCES = video_chroma/yuy2_i420.c
@@ -38,10 +34,8 @@ liborient_plugin_la_SOURCES = video_chroma/orient.c video_chroma/orient.h
chroma_LTLIBRARIES = \
libi420_rgb_plugin.la \
- libi420_yuy2_plugin.la \
libi420_nv12_plugin.la \
libi422_i420_plugin.la \
- libi422_yuy2_plugin.la \
libgrey_yuv_plugin.la \
libyuy2_i420_plugin.la \
libyuy2_i422_plugin.la \
@@ -53,31 +47,14 @@ chroma_LTLIBRARIES = \
EXTRA_LTLIBRARIES += libswscale_plugin.la
-# AltiVec
-libi420_yuy2_altivec_plugin_la_SOURCES = video_chroma/i420_yuy2.c video_chroma/i420_yuy2.h
-libi420_yuy2_altivec_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) -DPLUGIN_ALTIVEC
-
-if HAVE_ALTIVEC
-chroma_LTLIBRARIES += \
- libi420_yuy2_altivec_plugin.la
-endif
-
# SSE2
libi420_rgb_sse2_plugin_la_SOURCES = video_chroma/i420_rgb.c video_chroma/i420_rgb.h \
video_chroma/i420_rgb16_x86.c video_chroma/i420_rgb_sse2.h
libi420_rgb_sse2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) -DPLUGIN_SSE2
-libi420_yuy2_sse2_plugin_la_SOURCES = video_chroma/i420_yuy2.c video_chroma/i420_yuy2.h
-libi420_yuy2_sse2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) -DPLUGIN_SSE2
-
-libi422_yuy2_sse2_plugin_la_SOURCES = video_chroma/i422_yuy2.c video_chroma/i422_yuy2.h
-libi422_yuy2_sse2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) -DPLUGIN_SSE2
-
if HAVE_SSE2
chroma_LTLIBRARIES += \
- libi420_rgb_sse2_plugin.la \
- libi420_yuy2_sse2_plugin.la \
- libi422_yuy2_sse2_plugin.la
+ libi420_rgb_sse2_plugin.la
endif
libcvpx_plugin_la_SOURCES = codec/vt_utils.c codec/vt_utils.h video_chroma/cvpx.c
=====================================
modules/video_chroma/i420_yuy2.c deleted
=====================================
@@ -1,815 +0,0 @@
-/*****************************************************************************
- * i420_yuy2.c : YUV to YUV conversion module for vlc
- *****************************************************************************
- * Copyright (C) 2000, 2001 VLC authors and VideoLAN
- *
- * Authors: Samuel Hocevar <sam at zoy.org>
- * Damien Fouilleul <damien at videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- *****************************************************************************/
-
-/*****************************************************************************
- * Preamble
- *****************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <vlc_common.h>
-#include <vlc_plugin.h>
-#include <vlc_filter.h>
-#include <vlc_picture.h>
-#include <vlc_chroma_probe.h>
-#include <vlc_cpu.h>
-
-#if defined (PLUGIN_ALTIVEC) && defined(HAVE_ALTIVEC_H)
-# include <altivec.h>
-#endif
-
-#include "i420_yuy2.h"
-
-#define SRC_FOURCC "I420,IYUV,YV12"
-
-#if defined (PLUGIN_SSE2)
-# define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
-# define VLC_TARGET VLC_SSE
-# define COST 0.75
-#elif defined (PLUGIN_ALTIVEC)
-# define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
-# define VLC_TARGET VLC_ALTIVEC
-# define COST 0.75
-#else
-# define PLUGIN_PLAIN
-# define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,Y211"
-# define VLC_TARGET
-# define COST 1
-#endif
-
-/*****************************************************************************
- * Local and extern prototypes.
- *****************************************************************************/
-static int Activate ( filter_t * );
-
-/*****************************************************************************
- * Module descriptor.
- *****************************************************************************/
-static void ProbeChroma(vlc_chroma_conv_vec *vec)
-{
- vlc_chroma_conv_add_in_outlist(vec, COST, VLC_CODEC_I420, VLC_CODEC_YUYV,
- VLC_CODEC_YVYU, VLC_CODEC_UYVY);
-#ifdef PLUGIN_PLAIN
- vlc_chroma_conv_add(vec, COST, VLC_CODEC_I420, VLC_CODEC_Y211, false);
-#endif
-}
-
-vlc_module_begin ()
-#if defined (PLUGIN_PLAIN)
- set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_callback_video_converter( Activate, 80 )
-# define vlc_CPU_capable() (true)
-#elif defined (PLUGIN_SSE2)
- set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_callback_video_converter( Activate, 250 )
-# define vlc_CPU_capable() vlc_CPU_SSE2()
-#elif defined (PLUGIN_ALTIVEC)
- set_description( N_("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_callback_video_converter( Activate, 250 )
-# define vlc_CPU_capable() vlc_CPU_ALTIVEC()
-#endif
- add_submodule()
- set_callback_chroma_conv_probe(ProbeChroma)
-vlc_module_end ()
-
-VIDEO_FILTER_WRAPPER( I420_YUY2 )
-VIDEO_FILTER_WRAPPER( I420_YVYU )
-VIDEO_FILTER_WRAPPER( I420_UYVY )
-#if defined (PLUGIN_PLAIN)
-VIDEO_FILTER_WRAPPER( I420_Y211 )
-#endif
-
-static const struct vlc_filter_operations *
-GetFilterOperations( filter_t *p_filter )
-{
- switch( p_filter->fmt_out.video.i_chroma )
- {
- case VLC_CODEC_YUYV:
- return &I420_YUY2_ops;
-
- case VLC_CODEC_YVYU:
- return &I420_YVYU_ops;
-
- case VLC_CODEC_UYVY:
- return &I420_UYVY_ops;
-
-#if defined (PLUGIN_PLAIN)
- case VLC_CODEC_Y211:
- return &I420_Y211_ops;
-#endif
- default:
- return NULL;
- }
-}
-
-/*****************************************************************************
- * Activate: allocate a chroma function
- *****************************************************************************
- * This function allocates and initializes a chroma function
- *****************************************************************************/
-static int Activate( filter_t *p_filter )
-{
- if( !vlc_CPU_capable() )
- return VLC_EGENERIC;
-
- if( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) & 1
- || (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) & 1 )
- {
- return VLC_EGENERIC;
- }
-
- if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
- || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height
- || p_filter->fmt_in.video.orientation != p_filter->fmt_out.video.orientation )
- return VLC_EGENERIC;
-
- // VLC_CODEC_YV12: FIXME invert U and V in the filters :)
- if( p_filter->fmt_in.video.i_chroma != VLC_CODEC_I420)
- return VLC_EGENERIC;
-
- /* Find the adequate filter function depending on the output format. */
- p_filter->ops = GetFilterOperations( p_filter );
- if( p_filter->ops == NULL )
- return VLC_EGENERIC;
-
- return VLC_SUCCESS;
-}
-
-#if 0
-static inline unsigned long long read_cycles(void)
-{
- unsigned long long v;
- __asm__ __volatile__("rdtsc" : "=A" (v): );
-
- return v;
-}
-#endif
-
-/* Following functions are local */
-
-/*****************************************************************************
- * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
- *****************************************************************************/
-VLC_TARGET
-static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
-#if defined (PLUGIN_ALTIVEC)
-#define VEC_NEXT_LINES( ) \
- p_line1 = p_line2; \
- p_line2 += p_dest->p->i_pitch; \
- p_y1 = p_y2; \
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
-#define VEC_LOAD_UV( ) \
- u_vec = vec_ld( 0, p_u ); p_u += 16; \
- v_vec = vec_ld( 0, p_v ); p_v += 16;
-
-#define VEC_MERGE( a ) \
- uv_vec = a( u_vec, v_vec ); \
- y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
- vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
- vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
- y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
- vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
- vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
-
- vector unsigned char u_vec;
- vector unsigned char v_vec;
- vector unsigned char uv_vec;
- vector unsigned char y_vec;
-
- if( !( ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 32 ) |
- ( (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) % 2 ) ) )
- {
- /* Width is a multiple of 32, we take 2 lines at a time */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- VEC_NEXT_LINES( );
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
-#warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
-#if 0
- else if( !( ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) |
- ( (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) % 4 ) ) )
- {
- /* Width is only a multiple of 16, we take 4 lines at a time */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 4 ; i_y-- ; )
- {
- /* Line 1 and 2, pixels 0 to ( width - 16 ) */
- VEC_NEXT_LINES( );
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
-
- /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
-
- /* Line 3 and 4, pixels 0 to 16 */
- VEC_NEXT_LINES( );
- VEC_MERGE( vec_mergel );
-
- /* Line 3 and 4, pixels 16 to ( width ) */
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
-#endif
- else
- {
- /* Crap, use the C version */
-#undef VEC_NEXT_LINES
-#undef VEC_LOAD_UV
-#undef VEC_MERGE
-#endif
-
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch
- - p_filter->fmt_in.video.i_x_offset;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch
- - ( p_filter->fmt_in.video.i_x_offset / 2 );
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch
- - ( p_filter->fmt_out.video.i_x_offset * 2 );
-
-#if !defined(PLUGIN_SSE2)
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8; i_x-- ; )
- {
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 8 ) / 2; i_x-- ; )
- {
- C_YUV420_YUYV( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
-
-#if defined (PLUGIN_ALTIVEC)
- }
-#endif
-
-#elif defined(PLUGIN_SSE2)
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
-
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line2|(intptr_t)p_y2))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YUYV( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YUYV( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- SSE2_END;
-
-#endif // defined(PLUGIN_SSE2)
-}
-
-/*****************************************************************************
- * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
- *****************************************************************************/
-VLC_TARGET
-static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
-#if defined (PLUGIN_ALTIVEC)
-#define VEC_NEXT_LINES( ) \
- p_line1 = p_line2; \
- p_line2 += p_dest->p->i_pitch; \
- p_y1 = p_y2; \
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
-#define VEC_LOAD_UV( ) \
- u_vec = vec_ld( 0, p_u ); p_u += 16; \
- v_vec = vec_ld( 0, p_v ); p_v += 16;
-
-#define VEC_MERGE( a ) \
- vu_vec = a( v_vec, u_vec ); \
- y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
- vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
- vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
- y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
- vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
- vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
-
- vector unsigned char u_vec;
- vector unsigned char v_vec;
- vector unsigned char vu_vec;
- vector unsigned char y_vec;
-
- if( !( ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 32 ) |
- ( (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) % 2 ) ) )
- {
- /* Width is a multiple of 32, we take 2 lines at a time */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- VEC_NEXT_LINES( );
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else if( !( ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) |
- ( (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) % 4 ) ) )
- {
- /* Width is only a multiple of 16, we take 4 lines at a time */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 4 ; i_y-- ; )
- {
- /* Line 1 and 2, pixels 0 to ( width - 16 ) */
- VEC_NEXT_LINES( );
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
-
- /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
-
- /* Line 3 and 4, pixels 0 to 16 */
- VEC_NEXT_LINES( );
- VEC_MERGE( vec_mergel );
-
- /* Line 3 and 4, pixels 16 to ( width ) */
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else
- {
- /* Crap, use the C version */
-#undef VEC_NEXT_LINES
-#undef VEC_LOAD_UV
-#undef VEC_MERGE
-#endif
-
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch
- - p_filter->fmt_in.video.i_x_offset;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch
- - ( p_filter->fmt_in.video.i_x_offset / 2 );
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch
- - ( p_filter->fmt_out.video.i_x_offset * 2 );
-
-#if !defined(PLUGIN_SSE2)
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8 ; i_x-- ; )
- {
- C_YUV420_YVYU( );
- C_YUV420_YVYU( );
- C_YUV420_YVYU( );
- C_YUV420_YVYU( );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 8 ) / 2; i_x-- ; )
- {
- C_YUV420_YVYU( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
-
-#if defined (PLUGIN_ALTIVEC)
- }
-#endif
-
-#elif defined(PLUGIN_SSE2)
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line2|(intptr_t)p_y2))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YVYU( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YVYU( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- SSE2_END;
-#endif // defined(PLUGIN_SSE2)
-}
-
-/*****************************************************************************
- * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
- *****************************************************************************/
-VLC_TARGET
-static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
-#if defined (PLUGIN_ALTIVEC)
-#define VEC_NEXT_LINES( ) \
- p_line1 = p_line2; \
- p_line2 += p_dest->p->i_pitch; \
- p_y1 = p_y2; \
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
-#define VEC_LOAD_UV( ) \
- u_vec = vec_ld( 0, p_u ); p_u += 16; \
- v_vec = vec_ld( 0, p_v ); p_v += 16;
-
-#define VEC_MERGE( a ) \
- uv_vec = a( u_vec, v_vec ); \
- y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
- vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
- vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
- y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
- vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
- vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
-
- vector unsigned char u_vec;
- vector unsigned char v_vec;
- vector unsigned char uv_vec;
- vector unsigned char y_vec;
-
- if( !( ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 32 ) |
- ( (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) % 2 ) ) )
- {
- /* Width is a multiple of 32, we take 2 lines at a time */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- VEC_NEXT_LINES( );
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else if( !( ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) |
- ( (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) % 4 ) ) )
- {
- /* Width is only a multiple of 16, we take 4 lines at a time */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 4 ; i_y-- ; )
- {
- /* Line 1 and 2, pixels 0 to ( width - 16 ) */
- VEC_NEXT_LINES( );
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
-
- /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
-
- /* Line 3 and 4, pixels 0 to 16 */
- VEC_NEXT_LINES( );
- VEC_MERGE( vec_mergel );
-
- /* Line 3 and 4, pixels 16 to ( width ) */
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 32 ; i_x-- ; )
- {
- VEC_LOAD_UV( );
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else
- {
- /* Crap, use the C version */
-#undef VEC_NEXT_LINES
-#undef VEC_LOAD_UV
-#undef VEC_MERGE
-#endif
-
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch
- - p_filter->fmt_in.video.i_x_offset;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch
- - ( p_filter->fmt_in.video.i_x_offset / 2 );
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch
- - ( p_filter->fmt_out.video.i_x_offset * 2 );
-
-#if !defined(PLUGIN_SSE2)
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8 ; i_x-- ; )
- {
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 8 ) / 2; i_x--; )
- {
- C_YUV420_UYVY( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
-
-#if defined (PLUGIN_ALTIVEC)
- }
-#endif
-
-#elif defined(PLUGIN_SSE2)
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line2|(intptr_t)p_y2))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_UYVY( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_UYVY( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- SSE2_END;
-#endif // defined(PLUGIN_SSE2)
-}
-
-/*****************************************************************************
- * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
- *****************************************************************************/
-#if defined (PLUGIN_PLAIN)
-static void I420_Y211( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch
- - p_filter->fmt_in.video.i_x_offset;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch
- - ( p_filter->fmt_in.video.i_x_offset / 2 );
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch
- - ( p_filter->fmt_out.video.i_x_offset * 2 );
-
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
-
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
-
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8 ; i_x-- ; )
- {
- C_YUV420_Y211( );
- C_YUV420_Y211( );
- }
-
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line2 += i_dest_margin;
- }
-}
-#endif
=====================================
modules/video_chroma/i420_yuy2.h deleted
=====================================
@@ -1,317 +0,0 @@
-/*****************************************************************************
- * i420_yuy2.h : YUV to YUV conversion module for vlc
- *****************************************************************************
- * Copyright (C) 2000, 2001 VLC authors and VideoLAN
- *
- * Authors: Samuel Hocevar <sam at zoy.org>
- * Damien Fouilleul <damien at videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- *****************************************************************************/
-
-#if defined( PLUGIN_SSE2 )
-
-#if defined(CAN_COMPILE_SSE2)
-
-/* SSE2 assembly */
-
-#define SSE2_CALL(SSE2_INSTRUCTIONS) \
- do { \
- __asm__ __volatile__( \
- ".p2align 3 \n\t \
-movq (%0), %%xmm1 # Load 8 Cb 00 00 00 ... u2 u1 u0 \n\
-movq (%1), %%xmm2 # Load 8 Cr 00 00 00 ... v2 v1 v0 \n\
-" \
- : \
- : "r" (p_u), "r" (p_v) \
- : "xmm1", "xmm2"); \
- __asm__ __volatile__( \
- ".p2align 3 \n\t" \
- SSE2_INSTRUCTIONS \
- : \
- : "r" (p_line1), "r" (p_line2), \
- "r" (p_y1), "r" (p_y2) \
- : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); \
- p_line1 += 32; p_line2 += 32; \
- p_y1 += 16; p_y2 += 16; \
- p_u += 8; p_v += 8; \
- } while(0)
-
-#define SSE2_END __asm__ __volatile__ ( "sfence" ::: "memory" )
-
-#define SSE2_YUV420_YUYV_ALIGNED " \n\
-movdqa (%2), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movdqa (%3), %%xmm3 # Load 16 Y YF YE YD ... Y2 Y1 Y0 \n\
-punpcklbw %%xmm2, %%xmm1 # 00 00 ... v1 u1 v0 u0 \n\
-movdqa %%xmm0, %%xmm2 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm1, %%xmm2 # v3 y7 ... v0 y1 u0 y0 \n\
-movntdq %%xmm2, (%0) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm0 # v7 yF ... v4 y9 u4 y8 \n\
-movntdq %%xmm0, 16(%0) # Store high YUYV \n\
-movdqa %%xmm3, %%xmm4 # YF YE YD ... Y2 Y1 Y0 \n\
-punpcklbw %%xmm1, %%xmm4 # v3 Y7 ... v0 Y1 u0 Y0 \n\
-movntdq %%xmm4, (%1) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm3 # v7 YF ... v4 Y9 u4 Y8 \n\
-movntdq %%xmm3, 16(%1) # Store high YUYV \n\
-"
-
-#define SSE2_YUV420_YUYV_UNALIGNED " \n\
-movdqu (%2), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movdqu (%3), %%xmm3 # Load 16 Y YF YE YD ... Y2 Y1 Y0 \n\
-prefetchnta (%0) # Tell CPU not to cache output YUYV data \n\
-prefetchnta (%1) # Tell CPU not to cache output YUYV data \n\
-punpcklbw %%xmm2, %%xmm1 # 00 00 ... v1 u1 v0 u0 \n\
-movdqa %%xmm0, %%xmm2 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm1, %%xmm2 # v3 y7 ... v0 y1 u0 y0 \n\
-movdqu %%xmm2, (%0) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm0 # v7 yF ... v4 y9 u4 y8 \n\
-movdqu %%xmm0, 16(%0) # Store high YUYV \n\
-movdqa %%xmm3, %%xmm4 # YF YE YD ... Y2 Y1 Y0 \n\
-punpcklbw %%xmm1, %%xmm4 # v3 Y7 ... v0 Y1 u0 Y0 \n\
-movdqu %%xmm4, (%1) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm3 # v7 YF ... v4 Y9 u4 Y8 \n\
-movdqu %%xmm3, 16(%1) # Store high YUYV \n\
-"
-
-#define SSE2_YUV420_YVYU_ALIGNED " \n\
-movdqa (%2), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movdqa (%3), %%xmm3 # Load 16 Y YF YE YD ... Y2 Y1 Y0 \n\
-punpcklbw %%xmm1, %%xmm2 # u7 v7 ... u1 v1 u0 v0 \n\
-movdqa %%xmm0, %%xmm1 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm2, %%xmm1 # u3 y7 ... u0 y1 v0 y0 \n\
-movntdq %%xmm1, (%0) # Store low YUYV \n\
-punpckhbw %%xmm2, %%xmm0 # u7 yF ... u4 y9 v4 y8 \n\
-movntdq %%xmm0, 16(%0) # Store high YUYV \n\
-movdqa %%xmm3, %%xmm4 # YF YE YD ... Y2 Y1 Y0 \n\
-punpcklbw %%xmm2, %%xmm4 # u3 Y7 ... u0 Y1 v0 Y0 \n\
-movntdq %%xmm4, (%1) # Store low YUYV \n\
-punpckhbw %%xmm2, %%xmm3 # u7 YF ... u4 Y9 v4 Y8 \n\
-movntdq %%xmm3, 16(%1) # Store high YUYV \n\
-"
-
-#define SSE2_YUV420_YVYU_UNALIGNED " \n\
-movdqu (%2), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movdqu (%3), %%xmm3 # Load 16 Y YF YE YD ... Y2 Y1 Y0 \n\
-prefetchnta (%0) # Tell CPU not to cache output YVYU data \n\
-prefetchnta (%1) # Tell CPU not to cache output YVYU data \n\
-punpcklbw %%xmm1, %%xmm2 # u7 v7 ... u1 v1 u0 v0 \n\
-movdqu %%xmm0, %%xmm1 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm2, %%xmm1 # u3 y7 ... u0 y1 v0 y0 \n\
-movdqu %%xmm1, (%0) # Store low YUYV \n\
-punpckhbw %%xmm2, %%xmm0 # u7 yF ... u4 y9 v4 y8 \n\
-movdqu %%xmm0, 16(%0) # Store high YUYV \n\
-movdqu %%xmm3, %%xmm4 # YF YE YD ... Y2 Y1 Y0 \n\
-punpcklbw %%xmm2, %%xmm4 # u3 Y7 ... u0 Y1 v0 Y0 \n\
-movdqu %%xmm4, (%1) # Store low YUYV \n\
-punpckhbw %%xmm2, %%xmm3 # u7 YF ... u4 Y9 v4 Y8 \n\
-movdqu %%xmm3, 16(%1) # Store high YUYV \n\
-"
-
-#define SSE2_YUV420_UYVY_ALIGNED " \n\
-movdqa (%2), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movdqa (%3), %%xmm3 # Load 16 Y YF YE YD ... Y2 Y1 Y0 \n\
-punpcklbw %%xmm2, %%xmm1 # v7 u7 ... v1 u1 v0 u0 \n\
-movdqa %%xmm1, %%xmm2 # v7 u7 ... v1 u1 v0 u0 \n\
-punpcklbw %%xmm0, %%xmm2 # y7 v3 ... y1 v0 y0 u0 \n\
-movntdq %%xmm2, (%0) # Store low UYVY \n\
-movdqa %%xmm1, %%xmm2 # v7 u7 ... v1 u1 v0 u0 \n\
-punpckhbw %%xmm0, %%xmm2 # yF v7 ... y9 v4 y8 u4 \n\
-movntdq %%xmm2, 16(%0) # Store high UYVY \n\
-movdqa %%xmm1, %%xmm4 # v7 u7 ... v1 u1 v0 u0 \n\
-punpcklbw %%xmm3, %%xmm4 # Y7 v3 ... Y1 v0 Y0 u0 \n\
-movntdq %%xmm4, (%1) # Store low UYVY \n\
-punpckhbw %%xmm3, %%xmm1 # YF v7 ... Y9 v4 Y8 u4 \n\
-movntdq %%xmm1, 16(%1) # Store high UYVY \n\
-"
-
-#define SSE2_YUV420_UYVY_UNALIGNED " \n\
-movdqu (%2), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movdqu (%3), %%xmm3 # Load 16 Y YF YE YD ... Y2 Y1 Y0 \n\
-prefetchnta (%0) # Tell CPU not to cache output UYVY data \n\
-prefetchnta (%1) # Tell CPU not to cache output UYVY data \n\
-punpcklbw %%xmm2, %%xmm1 # v7 u7 ... v1 u1 v0 u0 \n\
-movdqu %%xmm1, %%xmm2 # v7 u7 ... v1 u1 v0 u0 \n\
-punpcklbw %%xmm0, %%xmm2 # y7 v3 ... y1 v0 y0 u0 \n\
-movdqu %%xmm2, (%0) # Store low UYVY \n\
-movdqu %%xmm1, %%xmm2 # v7 u7 ... v1 u1 v0 u0 \n\
-punpckhbw %%xmm0, %%xmm2 # yF v7 ... y9 v4 y8 u4 \n\
-movdqu %%xmm2, 16(%0) # Store high UYVY \n\
-movdqu %%xmm1, %%xmm4 # v7 u7 ... v1 u1 v0 u0 \n\
-punpcklbw %%xmm3, %%xmm4 # Y7 v3 ... Y1 v0 Y0 u0 \n\
-movdqu %%xmm4, (%1) # Store low UYVY \n\
-punpckhbw %%xmm3, %%xmm1 # YF v7 ... Y9 v4 Y8 u4 \n\
-movdqu %%xmm1, 16(%1) # Store high UYVY \n\
-"
-
-#elif defined(HAVE_SSE2_INTRINSICS)
-
-/* SSE2 intrinsics */
-
-#include <emmintrin.h>
-
-#define SSE2_CALL(SSE2_INSTRUCTIONS) \
- do { \
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4; \
- SSE2_INSTRUCTIONS \
- p_line1 += 32; p_line2 += 32; \
- p_y1 += 16; p_y2 += 16; \
- p_u += 8; p_v += 8; \
- } while(0)
-
-#define SSE2_END _mm_sfence()
-
-#define SSE2_YUV420_YUYV_ALIGNED \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm0 = _mm_load_si128((__m128i *)p_y1); \
- xmm3 = _mm_load_si128((__m128i *)p_y2); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_stream_si128((__m128i*)(p_line1), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_stream_si128((__m128i*)(p_line1+16), xmm0); \
- xmm4 = xmm3; \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm1); \
- _mm_stream_si128((__m128i*)(p_line2), xmm4); \
- xmm3 = _mm_unpackhi_epi8(xmm3, xmm1); \
- _mm_stream_si128((__m128i*)(p_line1+16), xmm3);
-
-#define SSE2_YUV420_YUYV_UNALIGNED \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm0 = _mm_loadu_si128((__m128i *)p_y1); \
- xmm3 = _mm_loadu_si128((__m128i *)p_y2); \
- _mm_prefetch(p_line1, _MM_HINT_NTA); \
- _mm_prefetch(p_line2, _MM_HINT_NTA); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line1), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line1+16), xmm0); \
- xmm4 = xmm3; \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line2), xmm4); \
- xmm3 = _mm_unpackhi_epi8(xmm3, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line1+16), xmm3);
-
-#define SSE2_YUV420_YVYU_ALIGNED \
- xmm1 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm0 = _mm_load_si128((__m128i *)p_y1); \
- xmm3 = _mm_load_si128((__m128i *)p_y2); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_stream_si128((__m128i*)(p_line1), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_stream_si128((__m128i*)(p_line1+16), xmm0); \
- xmm4 = xmm3; \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm1); \
- _mm_stream_si128((__m128i*)(p_line2), xmm4); \
- xmm3 = _mm_unpackhi_epi8(xmm3, xmm1); \
- _mm_stream_si128((__m128i*)(p_line1+16), xmm3);
-
-#define SSE2_YUV420_YVYU_UNALIGNED \
- xmm1 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm0 = _mm_loadu_si128((__m128i *)p_y1); \
- xmm3 = _mm_loadu_si128((__m128i *)p_y2); \
- _mm_prefetch(p_line1, _MM_HINT_NTA); \
- _mm_prefetch(p_line2, _MM_HINT_NTA); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line1), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line1+16), xmm0); \
- xmm4 = xmm3; \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line2), xmm4); \
- xmm3 = _mm_unpackhi_epi8(xmm3, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line1+16), xmm3);
-
-#define SSE2_YUV420_UYVY_ALIGNED \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm0 = _mm_load_si128((__m128i *)p_y1); \
- xmm3 = _mm_load_si128((__m128i *)p_y2); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm1; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm0); \
- _mm_stream_si128((__m128i*)(p_line1), xmm2); \
- xmm2 = xmm1; \
- xmm2 = _mm_unpackhi_epi8(xmm2, xmm0); \
- _mm_stream_si128((__m128i*)(p_line1+16), xmm2); \
- xmm4 = xmm1; \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm3); \
- _mm_stream_si128((__m128i*)(p_line2), xmm4); \
- xmm1 = _mm_unpackhi_epi8(xmm1, xmm3); \
- _mm_stream_si128((__m128i*)(p_line1+16), xmm1);
-
-#define SSE2_YUV420_UYVY_UNALIGNED \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm0 = _mm_loadu_si128((__m128i *)p_y1); \
- xmm3 = _mm_loadu_si128((__m128i *)p_y2); \
- _mm_prefetch(p_line1, _MM_HINT_NTA); \
- _mm_prefetch(p_line2, _MM_HINT_NTA); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm1; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm0); \
- _mm_storeu_si128((__m128i*)(p_line1), xmm2); \
- xmm2 = xmm1; \
- xmm2 = _mm_unpackhi_epi8(xmm2, xmm0); \
- _mm_storeu_si128((__m128i*)(p_line1+16), xmm2); \
- xmm4 = xmm1; \
- xmm4 = _mm_unpacklo_epi8(xmm4, xmm3); \
- _mm_storeu_si128((__m128i*)(p_line2), xmm4); \
- xmm1 = _mm_unpackhi_epi8(xmm1, xmm3); \
- _mm_storeu_si128((__m128i*)(p_line1+16), xmm1);
-
-#endif
-
-#endif
-
-/* Used in both accelerated and C modules */
-
-#define C_YUV420_YVYU( ) \
- *(p_line1)++ = *(p_y1)++; *(p_line2)++ = *(p_y2)++; \
- *(p_line1)++ = *(p_line2)++ = *(p_v)++; \
- *(p_line1)++ = *(p_y1)++; *(p_line2)++ = *(p_y2)++; \
- *(p_line1)++ = *(p_line2)++ = *(p_u)++; \
-
-#define C_YUV420_Y211( ) \
- *(p_line1)++ = *(p_y1); p_y1 += 2; \
- *(p_line2)++ = *(p_y2); p_y2 += 2; \
- *(p_line1)++ = *(p_line2)++ = *(p_u) - 0x80; p_u += 2; \
- *(p_line1)++ = *(p_y1); p_y1 += 2; \
- *(p_line2)++ = *(p_y2); p_y2 += 2; \
- *(p_line1)++ = *(p_line2)++ = *(p_v) - 0x80; p_v += 2; \
-
-
-#define C_YUV420_YUYV( ) \
- *(p_line1)++ = *(p_y1)++; *(p_line2)++ = *(p_y2)++; \
- *(p_line1)++ = *(p_line2)++ = *(p_u)++; \
- *(p_line1)++ = *(p_y1)++; *(p_line2)++ = *(p_y2)++; \
- *(p_line1)++ = *(p_line2)++ = *(p_v)++; \
-
-#define C_YUV420_UYVY( ) \
- *(p_line1)++ = *(p_line2)++ = *(p_u)++; \
- *(p_line1)++ = *(p_y1)++; *(p_line2)++ = *(p_y2)++; \
- *(p_line1)++ = *(p_line2)++ = *(p_v)++; \
- *(p_line1)++ = *(p_y1)++; *(p_line2)++ = *(p_y2)++; \
-
=====================================
modules/video_chroma/i422_yuy2.c deleted
=====================================
@@ -1,445 +0,0 @@
-/*****************************************************************************
- * i422_yuy2.c : Planar YUV 4:2:2 to Packed YUV conversion module for vlc
- *****************************************************************************
- * Copyright (C) 2000, 2001 VLC authors and VideoLAN
- *
- * Authors: Samuel Hocevar <sam at zoy.org>
- * Damien Fouilleul <damienf at videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- *****************************************************************************/
-
-/*****************************************************************************
- * Preamble
- *****************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <vlc_common.h>
-#include <vlc_plugin.h>
-#include <vlc_filter.h>
-#include <vlc_picture.h>
-#include <vlc_chroma_probe.h>
-#include <vlc_cpu.h>
-
-#include "i422_yuy2.h"
-
-#define SRC_FOURCC "I422"
-#if !defined (PLUGIN_SSE2)
-# define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,Y211"
-# define COST 0.75
-#else
-# define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
-# define COST 1
-#endif
-
-/*****************************************************************************
- * Local and extern prototypes.
- *****************************************************************************/
-static int Activate ( filter_t * );
-
-/*****************************************************************************
- * Module descriptor
- *****************************************************************************/
-static void ProbeChroma(vlc_chroma_conv_vec *vec)
-{
- vlc_chroma_conv_add_in_outlist(vec, COST, VLC_CODEC_I422, VLC_CODEC_YUYV,
- VLC_CODEC_YVYU, VLC_CODEC_UYVY);
-#ifdef PLUGIN_PLAIN
- vlc_chroma_conv_add(vec, COST, VLC_CODEC_I422, VLC_CODEC_Y211, false);
-#endif
-}
-
-vlc_module_begin ()
-#if defined (PLUGIN_SSE2)
- set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_callback_video_converter( Activate, 120 )
-# define vlc_CPU_capable() vlc_CPU_SSE2()
-# define VLC_TARGET VLC_SSE
-#else
- set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_callback_video_converter( Activate, 80 )
-# define PLUGIN_PLAIN
-# define vlc_CPU_capable() (true)
-# define VLC_TARGET
-#endif
- add_submodule()
- set_callback_chroma_conv_probe(ProbeChroma)
-vlc_module_end ()
-
-
-VIDEO_FILTER_WRAPPER( I422_YUY2 )
-VIDEO_FILTER_WRAPPER( I422_YVYU )
-VIDEO_FILTER_WRAPPER( I422_UYVY )
-#if defined (PLUGIN_PLAIN)
-VIDEO_FILTER_WRAPPER( I422_Y211 )
-#endif
-
-
-static const struct vlc_filter_operations*
-GetFilterOperations(filter_t *filter)
-{
- switch( filter->fmt_out.video.i_chroma )
- {
- case VLC_CODEC_YUYV:
- return &I422_YUY2_ops;
-
- case VLC_CODEC_YVYU:
- return &I422_YVYU_ops;
-
- case VLC_CODEC_UYVY:
- return &I422_UYVY_ops;
-
-#if defined (PLUGIN_PLAIN)
- case VLC_CODEC_Y211:
- return &I422_Y211_ops;
-#endif
-
- default:
- return NULL;
- }
-
-}
-
-/*****************************************************************************
- * Activate: allocate a chroma function
- *****************************************************************************
- * This function allocates and initializes a chroma function
- *****************************************************************************/
-static int Activate( filter_t *p_filter )
-{
- if( !vlc_CPU_capable() )
- return VLC_EGENERIC;
- if( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) & 1
- || (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) & 1 )
- {
- return VLC_EGENERIC;
- }
-
- if( p_filter->fmt_in.video.orientation != p_filter->fmt_out.video.orientation )
- {
- return VLC_EGENERIC;
- }
-
- /* This is a i422 -> * converter. */
- if( p_filter->fmt_in.video.i_chroma != VLC_CODEC_I422 )
- return VLC_EGENERIC;
-
-
- p_filter->ops = GetFilterOperations( p_filter );
- if( p_filter->ops == NULL)
- return VLC_EGENERIC;
-
- return VLC_SUCCESS;
-}
-
-/* Following functions are local */
-
-/*****************************************************************************
- * I422_YUY2: planar YUV 4:2:2 to packed YUY2 4:2:2
- *****************************************************************************/
-VLC_TARGET
-static void I422_YUY2( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line = p_dest->p->p_pixels;
- uint8_t *p_y = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch
- - p_filter->fmt_in.video.i_x_offset;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch
- - ( p_filter->fmt_in.video.i_x_offset );
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch
- - ( p_filter->fmt_out.video.i_x_offset * 2 );
-
-#if defined (PLUGIN_SSE2)
-
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line|(intptr_t)p_y))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV422_YUYV_ALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV422_YUYV( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
- }
- else {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV422_YUYV_UNALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV422_YUYV( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
- }
- SSE2_END;
-
-#else
-
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8 ; i_x-- ; )
- {
- C_YUV422_YUYV( p_line, p_y, p_u, p_v );
- C_YUV422_YUYV( p_line, p_y, p_u, p_v );
- C_YUV422_YUYV( p_line, p_y, p_u, p_v );
- C_YUV422_YUYV( p_line, p_y, p_u, p_v );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 8 ) / 2; i_x-- ; )
- {
- C_YUV422_YUYV( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
-
-#endif
-}
-
-/*****************************************************************************
- * I422_YVYU: planar YUV 4:2:2 to packed YVYU 4:2:2
- *****************************************************************************/
-VLC_TARGET
-static void I422_YVYU( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line = p_dest->p->p_pixels;
- uint8_t *p_y = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch
- - p_filter->fmt_in.video.i_x_offset;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch
- - ( p_filter->fmt_in.video.i_x_offset );
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch
- - ( p_filter->fmt_out.video.i_x_offset * 2 );
-
-#if defined (PLUGIN_SSE2)
-
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line|(intptr_t)p_y))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV422_YVYU_ALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV422_YVYU( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
- }
- else {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV422_YVYU_UNALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV422_YVYU( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
- }
- SSE2_END;
-
-#else
-
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8 ; i_x-- ; )
- {
- C_YUV422_YVYU( p_line, p_y, p_u, p_v );
- C_YUV422_YVYU( p_line, p_y, p_u, p_v );
- C_YUV422_YVYU( p_line, p_y, p_u, p_v );
- C_YUV422_YVYU( p_line, p_y, p_u, p_v );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 8 ) / 2; i_x-- ; )
- {
- C_YUV422_YVYU( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
-
-#endif
-}
-
-/*****************************************************************************
- * I422_UYVY: planar YUV 4:2:2 to packed UYVY 4:2:2
- *****************************************************************************/
-VLC_TARGET
-static void I422_UYVY( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line = p_dest->p->p_pixels;
- uint8_t *p_y = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch
- - p_filter->fmt_in.video.i_x_offset;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch
- - ( p_filter->fmt_in.video.i_x_offset );
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch
- - ( p_filter->fmt_out.video.i_x_offset * 2 );
-
-#if defined (PLUGIN_SSE2)
-
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line|(intptr_t)p_y))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV422_UYVY_ALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV422_UYVY( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
- }
- else {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 16 ; i_x-- ; )
- {
- SSE2_CALL( SSE2_YUV422_UYVY_UNALIGNED );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 16 ) / 2; i_x-- ; )
- {
- C_YUV422_UYVY( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
- }
- SSE2_END;
-
-#else
-
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8 ; i_x-- ; )
- {
- C_YUV422_UYVY( p_line, p_y, p_u, p_v );
- C_YUV422_UYVY( p_line, p_y, p_u, p_v );
- C_YUV422_UYVY( p_line, p_y, p_u, p_v );
- C_YUV422_UYVY( p_line, p_y, p_u, p_v );
- }
- for( i_x = ( (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) % 8 ) / 2; i_x-- ; )
- {
- C_YUV422_UYVY( p_line, p_y, p_u, p_v );
- }
- p_y += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line += i_dest_margin;
- }
-
-#endif
-}
-
-/*****************************************************************************
- * I422_Y211: planar YUV 4:2:2 to packed YUYV 2:1:1
- *****************************************************************************/
-#if defined (PLUGIN_PLAIN)
-static void I422_Y211( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
-{
- uint8_t *p_line = p_dest->p->p_pixels + p_dest->p->i_visible_lines * p_dest->p->i_pitch;
- uint8_t *p_y = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
-
- int i_x, i_y;
-
- for( i_y = (p_filter->fmt_in.video.i_y_offset + p_filter->fmt_in.video.i_visible_height) ; i_y-- ; )
- {
- for( i_x = (p_filter->fmt_in.video.i_x_offset + p_filter->fmt_in.video.i_visible_width) / 8 ; i_x-- ; )
- {
- C_YUV422_Y211( p_line, p_y, p_u, p_v );
- C_YUV422_Y211( p_line, p_y, p_u, p_v );
- }
- }
-}
-#endif
=====================================
modules/video_chroma/i422_yuy2.h deleted
=====================================
@@ -1,229 +0,0 @@
-/*****************************************************************************
- * i422_yuy2.h : YUV to YUV conversion module for vlc
- *****************************************************************************
- * Copyright (C) 2002 VLC authors and VideoLAN
- *
- * Authors: Samuel Hocevar <sam at zoy.org>
- * Damien Fouilleul <damienf at videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- *****************************************************************************/
-
-#if defined( PLUGIN_SSE2 )
-
-#if defined(CAN_COMPILE_SSE2)
-
-/* SSE2 assembly */
-
-#define SSE2_CALL(SSE2_INSTRUCTIONS) \
- do { \
- __asm__ __volatile__( \
- ".p2align 3 \n\t" \
- SSE2_INSTRUCTIONS \
- : \
- : "r" (p_line), "r" (p_y), \
- "r" (p_u), "r" (p_v) \
- : "xmm0", "xmm1", "xmm2" ); \
- p_line += 32; p_y += 16; \
- p_u += 8; p_v += 8; \
- } while(0)
-
-#define SSE2_END __asm__ __volatile__ ( "sfence" ::: "memory" )
-
-#define SSE2_YUV422_YUYV_ALIGNED " \n\
-movdqa (%1), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movq (%2), %%xmm1 # Load 8 Cb 00 00 00 ... u2 u1 u0 \n\
-movq (%3), %%xmm2 # Load 8 Cr 00 00 00 ... v2 v1 v0 \n\
-punpcklbw %%xmm2, %%xmm1 # v7 u7 ... v1 u1 v0 u0 \n\
-movdqa %%xmm0, %%xmm2 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm1, %%xmm2 # v3 y7 ... v0 y1 u0 y0 \n\
-movntdq %%xmm2, (%0) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm0 # v7 yF ... v4 y9 u4 y8 \n\
-movntdq %%xmm0, 16(%0) # Store high YUYV \n\
-"
-
-#define SSE2_YUV422_YUYV_UNALIGNED " \n\
-movdqu (%1), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movq (%2), %%xmm1 # Load 8 Cb 00 00 00 ... u2 u1 u0 \n\
-movq (%3), %%xmm2 # Load 8 Cr 00 00 00 ... v2 v1 v0 \n\
-prefetchnta (%0) # Tell CPU not to cache output YUYV data \n\
-punpcklbw %%xmm2, %%xmm1 # v7 u7 ... v1 u1 v0 u0 \n\
-movdqa %%xmm0, %%xmm2 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm1, %%xmm2 # v3 y7 ... v0 y1 u0 y0 \n\
-movdqu %%xmm2, (%0) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm0 # v7 yF ... v4 y9 u4 y8 \n\
-movdqu %%xmm0, 16(%0) # Store high YUYV \n\
-"
-
-#define SSE2_YUV422_YVYU_ALIGNED " \n\
-movdqa (%1), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movq (%2), %%xmm2 # Load 8 Cb 00 00 00 ... u2 u1 u0 \n\
-movq (%3), %%xmm1 # Load 8 Cr 00 00 00 ... v2 v1 v0 \n\
-punpcklbw %%xmm2, %%xmm1 # u7 v7 ... u1 v1 u0 v0 \n\
-movdqa %%xmm0, %%xmm2 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm1, %%xmm2 # u3 y7 ... u0 y1 v0 y0 \n\
-movntdq %%xmm2, (%0) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm0 # u7 yF ... u4 y9 v4 y8 \n\
-movntdq %%xmm0, 16(%0) # Store high YUYV \n\
-"
-
-#define SSE2_YUV422_YVYU_UNALIGNED " \n\
-movdqu (%1), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movq (%2), %%xmm2 # Load 8 Cb 00 00 00 ... u2 u1 u0 \n\
-movq (%3), %%xmm1 # Load 8 Cr 00 00 00 ... v2 v1 v0 \n\
-prefetchnta (%0) # Tell CPU not to cache output YUYV data \n\
-punpcklbw %%xmm2, %%xmm1 # u7 v7 ... u1 v1 u0 v0 \n\
-movdqa %%xmm0, %%xmm2 # yF yE yD ... y2 y1 y0 \n\
-punpcklbw %%xmm1, %%xmm2 # u3 y7 ... u0 y1 v0 y0 \n\
-movdqu %%xmm2, (%0) # Store low YUYV \n\
-punpckhbw %%xmm1, %%xmm0 # u7 yF ... u4 y9 v4 y8 \n\
-movdqu %%xmm0, 16(%0) # Store high YUYV \n\
-"
-
-#define SSE2_YUV422_UYVY_ALIGNED " \n\
-movdqa (%1), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movq (%2), %%xmm1 # Load 8 Cb 00 00 00 ... u2 u1 u0 \n\
-movq (%3), %%xmm2 # Load 8 Cr 00 00 00 ... v2 v1 v0 \n\
-punpcklbw %%xmm2, %%xmm1 # v7 u7 ... v1 u1 v0 u0 \n\
-movdqa %%xmm1, %%xmm2 # v7 u7 ... v1 u1 v0 u0 \n\
-punpcklbw %%xmm0, %%xmm2 # y7 v3 ... y1 v0 y0 u0 \n\
-movntdq %%xmm2, (%0) # Store low UYVY \n\
-punpckhbw %%xmm0, %%xmm1 # yF v7 ... y9 v4 y8 u4 \n\
-movntdq %%xmm1, 16(%0) # Store high UYVY \n\
-"
-
-#define SSE2_YUV422_UYVY_UNALIGNED " \n\
-movdqu (%1), %%xmm0 # Load 16 Y yF yE yD ... y2 y1 y0 \n\
-movq (%2), %%xmm1 # Load 8 Cb 00 00 00 ... u2 u1 u0 \n\
-movq (%3), %%xmm2 # Load 8 Cr 00 00 00 ... v2 v1 v0 \n\
-prefetchnta (%0) # Tell CPU not to cache output YUYV data \n\
-punpcklbw %%xmm2, %%xmm1 # v7 u7 ... v1 u1 v0 u0 \n\
-movdqa %%xmm1, %%xmm2 # v7 u7 ... v1 u1 v0 u0 \n\
-punpcklbw %%xmm0, %%xmm2 # y7 v3 ... y1 v0 y0 u0 \n\
-movdqu %%xmm2, (%0) # Store low UYVY \n\
-punpckhbw %%xmm0, %%xmm1 # yF v7 ... y9 v4 y8 u4 \n\
-movdqu %%xmm1, 16(%0) # Store high UYVY \n\
-"
-
-#elif defined(HAVE_SSE2_INTRINSICS)
-
-/* SSE2 intrinsics */
-
-#include <emmintrin.h>
-
-#define SSE2_CALL(SSE2_INSTRUCTIONS) \
- do { \
- __m128i xmm0, xmm1, xmm2; \
- SSE2_INSTRUCTIONS \
- p_line += 32; p_y += 16; \
- p_u += 8; p_v += 8; \
- } while(0)
-
-#define SSE2_END _mm_sfence()
-
-#define SSE2_YUV422_YUYV_ALIGNED \
- xmm0 = _mm_load_si128((__m128i *)p_y); \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_stream_si128((__m128i*)(p_line), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_stream_si128((__m128i*)(p_line+16), xmm0);
-
-#define SSE2_YUV422_YUYV_UNALIGNED \
- xmm0 = _mm_loadu_si128((__m128i *)p_y); \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line+16), xmm0);
-
-#define SSE2_YUV422_YVYU_ALIGNED \
- xmm0 = _mm_load_si128((__m128i *)p_y); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm1 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_stream_si128((__m128i*)(p_line), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_stream_si128((__m128i*)(p_line+16), xmm0);
-
-#define SSE2_YUV422_YVYU_UNALIGNED \
- xmm0 = _mm_loadu_si128((__m128i *)p_y); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm1 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm0; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line), xmm2); \
- xmm0 = _mm_unpackhi_epi8(xmm0, xmm1); \
- _mm_storeu_si128((__m128i*)(p_line+16), xmm0);
-
-#define SSE2_YUV422_UYVY_ALIGNED \
- xmm0 = _mm_load_si128((__m128i *)p_y); \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm1; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm0); \
- _mm_stream_si128((__m128i*)(p_line), xmm2); \
- xmm1 = _mm_unpackhi_epi8(xmm1, xmm0); \
- _mm_stream_si128((__m128i*)(p_line+16), xmm1);
-
-#define SSE2_YUV422_UYVY_UNALIGNED \
- xmm0 = _mm_loadu_si128((__m128i *)p_y); \
- xmm1 = _mm_loadl_epi64((__m128i *)p_u); \
- xmm2 = _mm_loadl_epi64((__m128i *)p_v); \
- xmm1 = _mm_unpacklo_epi8(xmm1, xmm2); \
- xmm2 = xmm1; \
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm0); \
- _mm_storeu_si128((__m128i*)(p_line), xmm2); \
- xmm1 = _mm_unpackhi_epi8(xmm1, xmm0); \
- _mm_storeu_si128((__m128i*)(p_line+16), xmm1);
-
-#endif
-
-#endif
-
-#define C_YUV422_YUYV( p_line, p_y, p_u, p_v ) \
- *(p_line)++ = *(p_y)++; \
- *(p_line)++ = *(p_u)++; \
- *(p_line)++ = *(p_y)++; \
- *(p_line)++ = *(p_v)++; \
-
-#define C_YUV422_YVYU( p_line, p_y, p_u, p_v ) \
- *(p_line)++ = *(p_y)++; \
- *(p_line)++ = *(p_v)++; \
- *(p_line)++ = *(p_y)++; \
- *(p_line)++ = *(p_u)++; \
-
-#define C_YUV422_UYVY( p_line, p_y, p_u, p_v ) \
- *(p_line)++ = *(p_u)++; \
- *(p_line)++ = *(p_y)++; \
- *(p_line)++ = *(p_v)++; \
- *(p_line)++ = *(p_y)++; \
-
-#define C_YUV422_Y211( p_line, p_y, p_u, p_v ) \
- *(p_line)++ = *(p_y); p_y += 2; \
- *(p_line)++ = *(p_u) - 0x80; p_u += 2; \
- *(p_line)++ = *(p_y); p_y += 2; \
- *(p_line)++ = *(p_v) - 0x80; p_v += 2; \
-
=====================================
modules/video_chroma/meson.build
=====================================
@@ -39,11 +39,6 @@ vlc_modules += {
)
}
-vlc_modules += {
- 'name' : 'i420_yuy2',
- 'sources' : files('i420_yuy2.c'),
-}
-
vlc_modules += {
'name' : 'i420_nv12',
'sources' : files('i420_nv12.c'),
@@ -55,11 +50,6 @@ vlc_modules += {
'sources' : files('i422_i420.c')
}
-vlc_modules += {
- 'name' : 'i422_yuy2',
- 'sources' : files('i422_yuy2.c'),
-}
-
vlc_modules += {
'name' : 'rv32',
'sources' : files('rv32.c')
@@ -90,20 +80,6 @@ vlc_modules += {
'enabled' : have_sse2,
}
-vlc_modules += {
- 'name' : 'i420_yuy2_sse2',
- 'sources' : files('i420_yuy2.c'),
- 'c_args' : ['-DPLUGIN_SSE2'],
- 'enabled' : have_sse2,
-}
-
-vlc_modules += {
- 'name' : 'i422_yuy2_sse2',
- 'sources' : files('i422_yuy2.c'),
- 'c_args' : ['-DPLUGIN_SSE2'],
- 'enabled' : have_sse2,
-}
-
vlc_modules += {
'name' : 'orient',
'sources' : files('orient.c'),
=====================================
po/POTFILES.in
=====================================
@@ -1277,11 +1277,7 @@ modules/video_chroma/i420_rgb.h
modules/video_chroma/i420_rgb16.c
modules/video_chroma/i420_rgb8.c
modules/video_chroma/i420_rgb_c.h
-modules/video_chroma/i420_yuy2.c
-modules/video_chroma/i420_yuy2.h
modules/video_chroma/i422_i420.c
-modules/video_chroma/i422_yuy2.c
-modules/video_chroma/i422_yuy2.h
modules/video_chroma/rv32.c
modules/video_chroma/swscale.c
modules/video_chroma/yuvp.c
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/005d0e95de53e51a6c37a5d44f2783e19e642a15...2101c1258b171a75178e19a3f8bc1c6bab4000cf
--
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/005d0e95de53e51a6c37a5d44f2783e19e642a15...2101c1258b171a75178e19a3f8bc1c6bab4000cf
You're receiving this email because of your account on code.videolan.org.
VideoLAN code repository instance
More information about the vlc-commits
mailing list