[vlc-devel] [PATCH] copy: add conversions to I420 and NV12
Jean-Baptiste Kempf
jb at videolan.org
Tue Apr 28 15:20:27 CEST 2015
On 28 Apr, Steve Lhomme wrote :
> ---
> modules/video_chroma/copy.c | 117 ++++++++++++++++++++++++++++++++++++++++++++
> modules/video_chroma/copy.h | 12 +++++
> 2 files changed, 129 insertions(+)
>
> diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
> index cc98c92..8481d37 100644
> --- a/modules/video_chroma/copy.c
> +++ b/modules/video_chroma/copy.c
> @@ -348,6 +348,59 @@ static void SSE_CopyFromYv12(picture_t *dst,
> }
> asm volatile ("emms");
> }
> +static void SSE_CopyFromNv12ToI420(picture_t *dst,
> + uint8_t *src[2], size_t src_pitch[2],
> + unsigned width, unsigned height,
> + copy_cache_t *cache, unsigned cpu)
> +{
> + SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> + src[0], src_pitch[0],
> + cache->buffer, cache->size,
> + width, height, cpu);
> + SSE_SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
> + dst->p[2].p_pixels, dst->p[2].i_pitch,
> + src[1], src_pitch[1],
> + cache->buffer, cache->size,
> + (width+1)/2, (height+1)/2, cpu);
> + asm volatile ("emms");
> +}
This one is exactly SSE_CopyFromNv12 with p[1] and p[2] inverted.
I fear it is wrong since Nv12 and I420 are inverted wrt U and V order.
> +static void SSE_CopyFromYv12ToI420(picture_t *dst,
> + uint8_t *src[3], size_t src_pitch[3],
> + unsigned width, unsigned height,
> + copy_cache_t *cache, unsigned cpu)
> +{
> + SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> + src[0], src_pitch[0],
> + cache->buffer, cache->size,
> + width, height, cpu);
> + SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> + src[2], src_pitch[2],
> + cache->buffer, cache->size,
> + width / 2, height / 2, cpu);
> + SSE_CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
> + src[1], src_pitch[1],
> + cache->buffer, cache->size,
> + width / 2, height / 2, cpu);
> + asm volatile ("emms");
> +}
This is not useful, as a function.
First, having a video decoder that outputs Yv12 is rare, but then
copying from Yv12 to Yv12 is trivial and converting from Yv12 to I420 is
just a pointer swap.
> +static void SSE_CopyFromNv12ToNv12(picture_t *dst,
> + uint8_t *src[2], size_t src_pitch[2],
> + unsigned width, unsigned height,
> + copy_cache_t *cache, unsigned cpu)
> +{
> + SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> + src[0], src_pitch[0],
> + cache->buffer, cache->size,
> + width, height, cpu);
> + SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> + src[1], src_pitch[1],
> + cache->buffer, cache->size,
> + width, height/2, cpu);
> + asm volatile ("emms");
> +}
This looks very similar to SSE_CopyFromYv12, with n = 2.
Moreover, please be careful about (width+1) and (height+1)
> +void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> + unsigned width, unsigned height,
> + copy_cache_t *cache)
> +{
> +#ifdef CAN_COMPILE_SSE2
> + unsigned cpu = vlc_CPU();
> + if (vlc_CPU_SSE2())
> + return SSE_CopyFromNv12ToI420(dst, src, src_pitch, width, height,
> + cache, cpu);
> +#else
> + (void) cache;
> +#endif
> +
> + CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> + src[0], src_pitch[0],
> + width, height);
> + SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
> + dst->p[2].p_pixels, dst->p[2].i_pitch,
> + src[1], src_pitch[1],
> + width/2, height/2);
> +}
Same as above. CopyFromNv12 does the same. And a swap is trivial.
> +void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> + unsigned width, unsigned height,
> + copy_cache_t *cache)
> +{
> +#ifdef CAN_COMPILE_SSE2
> + unsigned cpu = vlc_CPU();
> + if (vlc_CPU_SSE2())
> + return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, width, height,
> + cache, cpu);
> +#else
> + (void) cache;
> +#endif
> +
> + CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> + src[0], src_pitch[0],
> + width, height);
> + CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> + src[1], src_pitch[1],
> + width, height/2);
> +}
This is quite similar to CopyFromYv12
> void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> unsigned width, unsigned height,
> copy_cache_t *cache)
> @@ -420,3 +516,24 @@ void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
> src[2], src_pitch[2], width / 2, height / 2);
> }
> +
> +void CopyFromYv12ToI420(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> + unsigned width, unsigned height,
> + copy_cache_t *cache)
> +{
> +#ifdef CAN_COMPILE_SSE2
> + unsigned cpu = vlc_CPU();
> + if (vlc_CPU_SSE2())
> + return SSE_CopyFromYv12ToI420(dst, src, src_pitch, width, height,
> + cache, cpu);
> +#else
> + (void) cache;
> +#endif
> +
> + CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> + src[0], src_pitch[0], width, height);
> + CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> + src[2], src_pitch[2], width / 2, height / 2);
> + CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
> + src[1], src_pitch[1], width / 2, height / 2);
> +}
CopyFromYv12 is quite identical.
I think we can do way better than this.
--
Jean-Baptiste Kempf
http://www.jbkempf.com/ - +33 672 704 734
Sent from my Electronic Device
More information about the vlc-devel
mailing list