[vlc-devel] [PATCH] chroma: add a converter from I420/YV12 to NV12

Steve Lhomme robux4 at videolabs.io
Wed May 11 17:29:39 CEST 2016


--
replaces https://patches.videolan.org/patch/13320/ with code factorization
and SSE optimization on the Y plane copy
---
 modules/video_chroma/Makefile.am |   6 ++
 modules/video_chroma/copy.c      |  76 ++++++++++++++++++
 modules/video_chroma/copy.h      |   4 +
 modules/video_chroma/i420_nv12.c | 165 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 251 insertions(+)
 create mode 100644 modules/video_chroma/i420_nv12.c

diff --git a/modules/video_chroma/Makefile.am b/modules/video_chroma/Makefile.am
index 7bcbf5a..2c5265e 100644
--- a/modules/video_chroma/Makefile.am
+++ b/modules/video_chroma/Makefile.am
@@ -21,6 +21,11 @@ libi420_yuy2_plugin_la_SOURCES = video_chroma/i420_yuy2.c video_chroma/i420_yuy2
 libi420_yuy2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) \
 	-DMODULE_NAME_IS_i420_yuy2
 
+libi420_nv12_plugin_la_SOURCES = video_chroma/i420_nv12.c \
+	video_chroma/copy.c video_chroma/copy.h
+libi420_nv12_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) \
+	-DMODULE_NAME_IS_i420_nv12
+
 libi422_i420_plugin_la_SOURCES = video_chroma/i422_i420.c
 
 libi422_yuy2_plugin_la_SOURCES = video_chroma/i422_yuy2.c video_chroma/i422_yuy2.h
@@ -38,6 +43,7 @@ libyuvp_plugin_la_SOURCES = video_chroma/yuvp.c
 chroma_LTLIBRARIES = \
 	libi420_rgb_plugin.la \
 	libi420_yuy2_plugin.la \
+	libi420_nv12_plugin.la \
 	libi422_i420_plugin.la \
 	libi422_yuy2_plugin.la \
 	libgrey_yuv_plugin.la \
diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
index 653c8db..cdeb96c 100644
--- a/modules/video_chroma/copy.c
+++ b/modules/video_chroma/copy.c
@@ -365,6 +365,41 @@ static void SSE_CopyFromNv12ToNv12(picture_t *dst,
                   width, height/2, cpu);
     asm volatile ("emms");
 }
+
+static void SSE_CopyFromI420ToNv12(picture_t *dst,
+                             uint8_t *src[2], size_t src_pitch[2],
+                             unsigned width, unsigned height,
+                             copy_cache_t *cache, unsigned cpu)
+{
+    SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
+                  src[0], src_pitch[0],
+                  cache->buffer, cache->size,
+                  width, height, cpu);
+
+    /* TODO optimise the plane merging */
+    const unsigned copy_lines = height / 2;
+    const unsigned copy_pitch = width / 2;
+
+    const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
+    const int i_extra_pitch_u  = src_pitch[U_PLANE] - copy_pitch;
+    const int i_extra_pitch_v  = src_pitch[V_PLANE] - copy_pitch;
+
+    uint8_t *dstUV = dst->p[1].p_pixels;
+    uint8_t *srcU  = src[U_PLANE];
+    uint8_t *srcV  = src[V_PLANE];
+    for ( unsigned int line = 0; line < copy_lines; line++ )
+    {
+        for ( unsigned int col = 0; col < copy_pitch; col++ )
+        {
+            *dstUV++ = *srcU++;
+            *dstUV++ = *srcV++;
+        }
+        dstUV += i_extra_pitch_uv;
+        srcU  += i_extra_pitch_u;
+        srcV  += i_extra_pitch_v;
+    }
+    asm volatile ("emms");
+}
 #undef COPY64
 #endif /* CAN_COMPILE_SSE2 */
 
@@ -450,6 +485,47 @@ void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
                 width/2, height/2);
 }
 
+void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
+                        unsigned width, unsigned height,
+                        copy_cache_t *cache)
+{
+#ifdef CAN_COMPILE_SSE2
+    unsigned cpu = vlc_CPU();
+    if (vlc_CPU_SSE2())
+        return SSE_CopyFromI420ToNv12(dst, src, src_pitch, width, height,
+                                cache, cpu);
+#else
+    (void) cache;
+#endif
+
+    CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
+              src[0], src_pitch[0],
+              width, height);
+
+    const unsigned copy_lines = height / 2;
+    const unsigned copy_pitch = width / 2;
+
+    const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
+    const int i_extra_pitch_u  = src_pitch[U_PLANE] - copy_pitch;
+    const int i_extra_pitch_v  = src_pitch[V_PLANE] - copy_pitch;
+
+    uint8_t *dstUV = dst->p[1].p_pixels;
+    uint8_t *srcU  = src[U_PLANE];
+    uint8_t *srcV  = src[V_PLANE];
+    for ( unsigned int line = 0; line < copy_lines; line++ )
+    {
+        for ( unsigned int col = 0; col < copy_pitch; col++ )
+        {
+            *dstUV++ = *srcU++;
+            *dstUV++ = *srcV++;
+        }
+        dstUV += i_extra_pitch_uv;
+        srcU  += i_extra_pitch_u;
+        srcV  += i_extra_pitch_v;
+    }
+}
+
+
 void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
                   unsigned width, unsigned height,
                   copy_cache_t *cache)
diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
index 06ac44d..533e2fa 100644
--- a/modules/video_chroma/copy.h
+++ b/modules/video_chroma/copy.h
@@ -50,4 +50,8 @@ void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
 void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
                         unsigned width, unsigned height);
 
+void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
+                        unsigned width, unsigned height,
+                        copy_cache_t *cache);
+
 #endif
diff --git a/modules/video_chroma/i420_nv12.c b/modules/video_chroma/i420_nv12.c
new file mode 100644
index 0000000..25d9473
--- /dev/null
+++ b/modules/video_chroma/i420_nv12.c
@@ -0,0 +1,165 @@
+/*****************************************************************************
+ * i420_nv12.c : Planar YUV 4:2:0 to Planar NV12 4:2:0 to  conversion module for vlc
+ *****************************************************************************
+ * Copyright (C) 2016 VLC authors and VideoLAN
+ *
+ * Authors: Steve Lhomme <robux4 at videolabs.io>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_filter.h>
+#include "copy.h"
+
+#include <assert.h>
+
+#define SRC_FOURCC  "I420,YV12"
+#define DEST_FOURCC "NV12"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void I420_NV12( filter_t *, picture_t *, picture_t * );
+static void YV12_NV12( filter_t *, picture_t *, picture_t * );
+static picture_t *I420_NV12_Filter( filter_t *, picture_t * );
+static picture_t *YV12_NV12_Filter( filter_t *, picture_t * );
+
+struct filter_sys_t
+{
+    copy_cache_t cache;
+};
+
+/*****************************************************************************
+ * Create: allocate a chroma function
+ *****************************************************************************
+ * This function allocates and initializes a chroma function
+ *****************************************************************************/
+static int Create( vlc_object_t *p_this )
+{
+    filter_t *p_filter = (filter_t *)p_this;
+
+    if( p_filter->fmt_in.video.i_width & 1
+     || p_filter->fmt_in.video.i_height & 1 )
+    {
+        return -1;
+    }
+
+    if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
+       || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height
+       || p_filter->fmt_in.video.orientation != p_filter->fmt_out.video.orientation )
+        return -1;
+
+    switch( p_filter->fmt_in.video.i_chroma )
+    {
+        case VLC_CODEC_I420:
+        case VLC_CODEC_J420:
+            p_filter->pf_video_filter = I420_NV12_Filter;
+            break;
+
+        case VLC_CODEC_YV12:
+            p_filter->pf_video_filter = YV12_NV12_Filter;
+            break;
+
+        default:
+            return -1;
+    }
+
+    filter_sys_t *p_sys = calloc(1, sizeof(filter_sys_t));
+    if (!p_sys)
+         return VLC_ENOMEM;
+
+    CopyInitCache( &p_sys->cache, p_filter->fmt_in.video.i_x_offset +
+                                  p_filter->fmt_in.video.i_visible_width );
+    p_filter->p_sys = p_sys;
+
+    return 0;
+}
+
+static void Delete(vlc_object_t *p_this)
+{
+    filter_t *p_filter = (filter_t *)p_this;
+    filter_sys_t *p_sys = p_filter->p_sys;
+    CopyCleanCache( &p_sys->cache );
+}
+
+/* Following functions are local */
+VIDEO_FILTER_WRAPPER( I420_NV12 )
+VIDEO_FILTER_WRAPPER( YV12_NV12 )
+
+#define UVPLANE 1
+
+static void I420_YUV( filter_sys_t *p_sys, picture_t *p_src, picture_t *p_dst, bool invertUV )
+{
+    p_dst->format.i_x_offset = p_src->format.i_x_offset;
+    p_dst->format.i_y_offset = p_src->format.i_y_offset;
+
+    const size_t u_plane = invertUV ? V_PLANE : U_PLANE;
+    const size_t v_plane = invertUV ? U_PLANE : V_PLANE;
+
+    size_t pitch[3] = {
+        p_src->p[Y_PLANE].i_pitch,
+        p_src->p[u_plane].i_pitch,
+        p_src->p[v_plane].i_pitch,
+    };
+
+    uint8_t *plane[3] = {
+        (uint8_t*)p_src->p[Y_PLANE].p_pixels,
+        (uint8_t*)p_src->p[u_plane].p_pixels,
+        (uint8_t*)p_src->p[v_plane].p_pixels,
+    };
+
+    CopyFromI420ToNv12( p_dst, plane, pitch,
+                        p_src->format.i_x_offset + p_src->format.i_visible_width,
+                        p_src->format.i_y_offset + p_src->format.i_visible_height,
+                        &p_sys->cache );
+}
+
+/*****************************************************************************
+ * planar I420 4:2:0 Y:U:V to planar NV12 4:2:0 Y:UV
+ *****************************************************************************/
+static void I420_NV12( filter_t *p_filter, picture_t *p_src,
+                                           picture_t *p_dst )
+{
+    I420_YUV( p_filter->p_sys, p_src, p_dst, false );
+}
+
+/*****************************************************************************
+ * planar YV12 4:2:0 Y:V:U to planar NV12 4:2:0 Y:UV
+ *****************************************************************************/
+static void YV12_NV12( filter_t *p_filter, picture_t *p_src,
+                                           picture_t *p_dst )
+{
+    I420_YUV( p_filter->p_sys, p_src, p_dst, true );
+}
+
+
+/*****************************************************************************
+ * Module descriptor
+ *****************************************************************************/
+vlc_module_begin ()
+    set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
+    set_capability( "video filter2", 160 )
+    set_callbacks( Create, Delete )
+vlc_module_end ()
-- 
2.8.1



More information about the vlc-devel mailing list