[vlc-devel] [PATCH] copy: add conversions to I420 and NV12

Jean-Baptiste Kempf jb at videolan.org
Tue Apr 28 15:20:27 CEST 2015


On 28 Apr, Steve Lhomme wrote :
> ---
>  modules/video_chroma/copy.c | 117 ++++++++++++++++++++++++++++++++++++++++++++
>  modules/video_chroma/copy.h |  12 +++++
>  2 files changed, 129 insertions(+)
> 
> diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
> index cc98c92..8481d37 100644
> --- a/modules/video_chroma/copy.c
> +++ b/modules/video_chroma/copy.c
> @@ -348,6 +348,59 @@ static void SSE_CopyFromYv12(picture_t *dst,
>      }
>      asm volatile ("emms");
>  }
> +static void SSE_CopyFromNv12ToI420(picture_t *dst,
> +                             uint8_t *src[2], size_t src_pitch[2],
> +                             unsigned width, unsigned height,
> +                             copy_cache_t *cache, unsigned cpu)
> +{
> +    SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> +                  src[0], src_pitch[0],
> +                  cache->buffer, cache->size,
> +                  width, height, cpu);
> +    SSE_SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
> +                    dst->p[2].p_pixels, dst->p[2].i_pitch,
> +                    src[1], src_pitch[1],
> +                    cache->buffer, cache->size,
> +                    (width+1)/2, (height+1)/2, cpu);
> +    asm volatile ("emms");
> +}

This one is exactly SSE_CopyFromNv12 with p[1] and p[2] inverted.
I fear it is wrong since Nv12 and I420 are inverted wrt U and V order.



> +static void SSE_CopyFromYv12ToI420(picture_t *dst,
> +                             uint8_t *src[3], size_t src_pitch[3],
> +                             unsigned width, unsigned height,
> +                             copy_cache_t *cache, unsigned cpu)
> +{
> +    SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> +                  src[0], src_pitch[0],
> +                  cache->buffer, cache->size,
> +                  width, height, cpu);
> +    SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> +                  src[2], src_pitch[2],
> +                  cache->buffer, cache->size,
> +                  width / 2, height / 2, cpu);
> +    SSE_CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
> +                  src[1], src_pitch[1],
> +                  cache->buffer, cache->size,
> +                  width / 2, height / 2, cpu);
> +    asm volatile ("emms");
> +}

This is not useful, as a function.
First, having a video decoder that outputs Yv12 is rare, but then
copying from Yv12 to Yv12 is trivial and converting from Yv12 to I420 is
just a pointer swap.

> +static void SSE_CopyFromNv12ToNv12(picture_t *dst,
> +                             uint8_t *src[2], size_t src_pitch[2],
> +                             unsigned width, unsigned height,
> +                             copy_cache_t *cache, unsigned cpu)
> +{
> +    SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> +                  src[0], src_pitch[0],
> +                  cache->buffer, cache->size,
> +                  width, height, cpu);
> +    SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> +                  src[1], src_pitch[1],
> +                  cache->buffer, cache->size,
> +                  width, height/2, cpu);
> +    asm volatile ("emms");
> +}

This looks very similar to SSE_CopyFromYv12, with n = 2.

Moreover, please be careful about (width+1) and (height+1)

> +void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> +                  unsigned width, unsigned height,
> +                  copy_cache_t *cache)
> +{
> +#ifdef CAN_COMPILE_SSE2
> +    unsigned cpu = vlc_CPU();
> +    if (vlc_CPU_SSE2())
> +        return SSE_CopyFromNv12ToI420(dst, src, src_pitch, width, height,
> +                                cache, cpu);
> +#else
> +    (void) cache;
> +#endif
> +
> +    CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> +              src[0], src_pitch[0],
> +              width, height);
> +    SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
> +                dst->p[2].p_pixels, dst->p[2].i_pitch,
> +                src[1], src_pitch[1],
> +                width/2, height/2);
> +}

Same as above. CopyFromNv12 does the same. And a swap is trivial.

> +void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> +                  unsigned width, unsigned height,
> +                  copy_cache_t *cache)
> +{
> +#ifdef CAN_COMPILE_SSE2
> +    unsigned cpu = vlc_CPU();
> +    if (vlc_CPU_SSE2())
> +        return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, width, height,
> +                                cache, cpu);
> +#else
> +    (void) cache;
> +#endif
> +
> +    CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> +              src[0], src_pitch[0],
> +              width, height);
> +    CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> +              src[1], src_pitch[1],
> +              width, height/2);
> +}

This is quite similar to CopyFromYv12

>  void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
>                    unsigned width, unsigned height,
>                    copy_cache_t *cache)
> @@ -420,3 +516,24 @@ void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
>       CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
>                 src[2], src_pitch[2], width / 2, height / 2);
>  }
> +
> +void CopyFromYv12ToI420(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> +                  unsigned width, unsigned height,
> +                  copy_cache_t *cache)
> +{
> +#ifdef CAN_COMPILE_SSE2
> +    unsigned cpu = vlc_CPU();
> +    if (vlc_CPU_SSE2())
> +        return SSE_CopyFromYv12ToI420(dst, src, src_pitch, width, height,
> +                                cache, cpu);
> +#else
> +    (void) cache;
> +#endif
> +
> +     CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> +               src[0], src_pitch[0], width, height);
> +     CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> +               src[2], src_pitch[2], width / 2, height / 2);
> +     CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
> +               src[1], src_pitch[1], width / 2, height / 2);
> +}

CopyFromYv12 is quite identical.

I think we can do way better than this.

-- 
Jean-Baptiste Kempf
http://www.jbkempf.com/ - +33 672 704 734
Sent from my Electronic Device



More information about the vlc-devel mailing list