[vlc-devel] [PATCH 2/2] copy: only copy the src_pitch pixels on each line not the whole width
Jean-Baptiste Kempf
jb at videolan.org
Thu Jul 28 14:58:18 CEST 2016
How does that work if you have an offset?
On 28 Jul, Steve Lhomme wrote :
> ---
> modules/codec/avcodec/vaapi.c | 6 +--
> modules/codec/avcodec/vda.c | 2 +-
> modules/codec/omxil/utils.c | 2 +-
> modules/video_chroma/copy.c | 101 ++++++++++++++++-------------------
> modules/video_chroma/copy.h | 14 ++---
> modules/video_chroma/cvpx_i420.c | 1 -
> modules/video_chroma/d3d11_surface.c | 9 ++--
> modules/video_chroma/dxa9.c | 9 ++--
> modules/video_chroma/i420_nv12.c | 1 -
> 9 files changed, 60 insertions(+), 85 deletions(-)
>
> diff --git a/modules/codec/avcodec/vaapi.c b/modules/codec/avcodec/vaapi.c
> index d66047d..937a3ef 100644
> --- a/modules/codec/avcodec/vaapi.c
> +++ b/modules/codec/avcodec/vaapi.c
> @@ -121,8 +121,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
> pp_plane[i] = (uint8_t*)p_base + image.offsets[i_src_plane];
> pi_pitch[i] = image.pitches[i_src_plane];
> }
> - CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
> - &sys->image_cache );
> + CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
> }
> else
> {
> @@ -135,8 +134,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
> pp_plane[i] = (uint8_t*)p_base + image.offsets[i];
> pi_pitch[i] = image.pitches[i];
> }
> - CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
> - &sys->image_cache );
> + CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
> }
>
> vaUnmapBuffer(sys->hw_ctx.display, image.buf);
> diff --git a/modules/codec/avcodec/vda.c b/modules/codec/avcodec/vda.c
> index e5ff1b5..cb4070e 100644
> --- a/modules/codec/avcodec/vda.c
> +++ b/modules/codec/avcodec/vda.c
> @@ -69,7 +69,7 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
> pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(buffer, i);
> }
>
> - CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_width, i_height);
> + CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height);
>
> CVPixelBufferUnlockBaseAddress(buffer, 0);
> }
> diff --git a/modules/codec/omxil/utils.c b/modules/codec/omxil/utils.c
> index aa2cb5f..abb5b0c 100644
> --- a/modules/codec/omxil/utils.c
> +++ b/modules/codec/omxil/utils.c
> @@ -223,7 +223,7 @@ void CopyOmxPicture( int i_color_format, picture_t *p_pic,
> copy_cache_t *p_surface_cache = (copy_cache_t*)p_architecture_specific->data;
> uint8_t *ppi_src_pointers[2] = { p_src, p_src + i_src_stride * i_slice_height };
> size_t pi_src_strides[2] = { i_src_stride, i_src_stride };
> - CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_src_stride, i_slice_height, p_surface_cache );
> + CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_slice_height, p_surface_cache );
> return;
> }
> #endif
> diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
> index fa0a212..c053282 100644
> --- a/modules/video_chroma/copy.c
> +++ b/modules/video_chroma/copy.c
> @@ -264,14 +264,14 @@ static void SSE_SplitUV(uint8_t *dstu, size_t dstu_pitch,
> static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
> const uint8_t *src, size_t src_pitch,
> uint8_t *cache, size_t cache_size,
> - unsigned width, unsigned height, unsigned cpu)
> + unsigned height, unsigned cpu)
> {
> - const unsigned w16 = (width+15) & ~15;
> + const unsigned w16 = (src_pitch+15) & ~15;
> const unsigned hstep = cache_size / w16;
> assert(hstep > 0);
>
> if (src_pitch == dst_pitch)
> - memcpy(dst, src, width * height);
> + memcpy(dst, src, src_pitch * height);
> else
> for (unsigned y = 0; y < height; y += hstep) {
> const unsigned hblock = __MIN(hstep, height - y);
> @@ -279,12 +279,12 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
> /* Copy a bunch of line into our cache */
> CopyFromUswc(cache, w16,
> src, src_pitch,
> - width, hblock, cpu);
> + src_pitch, hblock, cpu);
>
> /* Copy from our cache to the destination */
> Copy2d(dst, dst_pitch,
> cache, w16,
> - width, hblock);
> + src_pitch, hblock);
>
> /* */
> src += src_pitch * hblock;
> @@ -296,9 +296,9 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
> uint8_t *dstv, size_t dstv_pitch,
> const uint8_t *src, size_t src_pitch,
> uint8_t *cache, size_t cache_size,
> - unsigned width, unsigned height, unsigned cpu)
> + unsigned height, unsigned cpu)
> {
> - const unsigned w16 = (2*width+15) & ~15;
> + const unsigned w16 = (2*src_pitch+15) & ~15;
> const unsigned hstep = cache_size / w16;
> assert(hstep > 0);
>
> @@ -307,11 +307,11 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
>
> /* Copy a bunch of line into our cache */
> CopyFromUswc(cache, w16, src, src_pitch,
> - 2*width, hblock, cpu);
> + 2*src_pitch, hblock, cpu);
>
> /* Copy from our cache to the destination */
> SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch,
> - cache, w16, width, hblock, cpu);
> + cache, w16, src_pitch, hblock, cpu);
>
> /* */
> src += src_pitch * hblock;
> @@ -322,24 +322,24 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
>
> static void SSE_CopyFromNv12(picture_t *dst,
> uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height,
> + unsigned height,
> copy_cache_t *cache, unsigned cpu)
> {
> SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> src[0], src_pitch[0],
> cache->buffer, cache->size,
> - width, height, cpu);
> + height, cpu);
> SSE_SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
> dst->p[1].p_pixels, dst->p[1].i_pitch,
> src[1], src_pitch[1],
> cache->buffer, cache->size,
> - (width+1)/2, (height+1)/2, cpu);
> + (height+1)/2, cpu);
> asm volatile ("emms");
> }
>
> static void SSE_CopyFromYv12(picture_t *dst,
> uint8_t *src[3], size_t src_pitch[3],
> - unsigned width, unsigned height,
> + unsigned height,
> copy_cache_t *cache, unsigned cpu)
> {
> for (unsigned n = 0; n < 3; n++) {
> @@ -347,7 +347,7 @@ static void SSE_CopyFromYv12(picture_t *dst,
> SSE_CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
> src[n], src_pitch[n],
> cache->buffer, cache->size,
> - (width+d-1)/d, (height+d-1)/d, cpu);
> + (height+d-1)/d, cpu);
> }
> asm volatile ("emms");
> }
> @@ -355,33 +355,33 @@ static void SSE_CopyFromYv12(picture_t *dst,
>
> static void SSE_CopyFromNv12ToNv12(picture_t *dst,
> uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height,
> + unsigned height,
> copy_cache_t *cache, unsigned cpu)
> {
> SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> src[0], src_pitch[0],
> cache->buffer, cache->size,
> - width, height, cpu);
> + height, cpu);
> SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> src[1], src_pitch[1],
> cache->buffer, cache->size,
> - width, height/2, cpu);
> + height/2, cpu);
> asm volatile ("emms");
> }
>
> static void SSE_CopyFromI420ToNv12(picture_t *dst,
> uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height,
> + unsigned height,
> copy_cache_t *cache, unsigned cpu)
> {
> SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> src[0], src_pitch[0],
> cache->buffer, cache->size,
> - width, height, cpu);
> + height, cpu);
>
> /* TODO optimise the plane merging */
> const unsigned copy_lines = height / 2;
> - const unsigned copy_pitch = width / 2;
> + const unsigned copy_pitch = src_pitch[1];
>
> const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
> const int i_extra_pitch_u = src_pitch[U_PLANE] - copy_pitch;
> @@ -408,13 +408,13 @@ static void SSE_CopyFromI420ToNv12(picture_t *dst,
>
> static void CopyPlane(uint8_t *dst, size_t dst_pitch,
> const uint8_t *src, size_t src_pitch,
> - unsigned width, unsigned height)
> + unsigned height)
> {
> if (src_pitch == dst_pitch)
> - memcpy(dst, src, width * height);
> + memcpy(dst, src, src_pitch * height);
> else
> for (unsigned y = 0; y < height; y++) {
> - memcpy(dst, src, width);
> + memcpy(dst, src, src_pitch);
> src += src_pitch;
> dst += dst_pitch;
> }
> @@ -423,10 +423,10 @@ static void CopyPlane(uint8_t *dst, size_t dst_pitch,
> static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
> uint8_t *dstv, size_t dstv_pitch,
> const uint8_t *src, size_t src_pitch,
> - unsigned width, unsigned height)
> + unsigned height)
> {
> for (unsigned y = 0; y < height; y++) {
> - for (unsigned x = 0; x < width; x++) {
> + for (unsigned x = 0; x < src_pitch; x++) {
> dstu[x] = src[2*x+0];
> dstv[x] = src[2*x+1];
> }
> @@ -437,79 +437,69 @@ static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
> }
>
> void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height,
> - copy_cache_t *cache)
> + unsigned height, copy_cache_t *cache)
> {
> #ifdef CAN_COMPILE_SSE2
> unsigned cpu = vlc_CPU();
> if (vlc_CPU_SSE2())
> - return SSE_CopyFromNv12(dst, src, src_pitch, width, height,
> + return SSE_CopyFromNv12(dst, src, src_pitch, height,
> cache, cpu);
> #else
> (void) cache;
> #endif
>
> CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> - src[0], src_pitch[0],
> - width, height);
> + src[0], src_pitch[0], height);
> SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
> dst->p[1].p_pixels, dst->p[1].i_pitch,
> - src[1], src_pitch[1],
> - width/2, height/2);
> + src[1], src_pitch[1], height/2);
> }
>
> void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height,
> - copy_cache_t *cache)
> + unsigned height, copy_cache_t *cache)
> {
> #ifdef CAN_COMPILE_SSE2
> unsigned cpu = vlc_CPU();
> if (vlc_CPU_SSE2())
> - return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, width, height,
> + return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, height,
> cache, cpu);
> #else
> (void) cache;
> #endif
>
> CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> - src[0], src_pitch[0],
> - width, height);
> + src[0], src_pitch[0], height);
> CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> - src[1], src_pitch[1],
> - width, height/2);
> + src[1], src_pitch[1], height/2);
> }
>
> void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height)
> + unsigned height)
> {
> CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> - src[0], src_pitch[0],
> - width, height);
> + src[0], src_pitch[0], height);
> SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
> dst->p[2].p_pixels, dst->p[2].i_pitch,
> - src[1], src_pitch[1],
> - width/2, height/2);
> + src[1], src_pitch[1], height/2);
> }
>
> void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> - unsigned width, unsigned height,
> - copy_cache_t *cache)
> + unsigned height, copy_cache_t *cache)
> {
> #ifdef CAN_COMPILE_SSE2
> unsigned cpu = vlc_CPU();
> if (vlc_CPU_SSE2())
> - return SSE_CopyFromI420ToNv12(dst, src, src_pitch, width, height,
> + return SSE_CopyFromI420ToNv12(dst, src, src_pitch, height,
> cache, cpu);
> #else
> (void) cache;
> #endif
>
> CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> - src[0], src_pitch[0],
> - width, height);
> + src[0], src_pitch[0], height);
>
> const unsigned copy_lines = height / 2;
> - const unsigned copy_pitch = width / 2;
> + const unsigned copy_pitch = src_pitch[1];
>
> const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
> const int i_extra_pitch_u = src_pitch[U_PLANE] - copy_pitch;
> @@ -533,22 +523,21 @@ void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
>
>
> void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> - unsigned width, unsigned height,
> - copy_cache_t *cache)
> + unsigned height, copy_cache_t *cache)
> {
> #ifdef CAN_COMPILE_SSE2
> unsigned cpu = vlc_CPU();
> if (vlc_CPU_SSE2())
> - return SSE_CopyFromYv12(dst, src, src_pitch, width, height,
> + return SSE_CopyFromYv12(dst, src, src_pitch, height,
> cache, cpu);
> #else
> (void) cache;
> #endif
>
> CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> - src[0], src_pitch[0], width, height);
> + src[0], src_pitch[0], height);
> CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> - src[1], src_pitch[1], width / 2, height / 2);
> + src[1], src_pitch[1], height / 2);
> CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
> - src[2], src_pitch[2], width / 2, height / 2);
> + src[2], src_pitch[2], height / 2);
> }
> diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
> index 533e2fa..8776f95 100644
> --- a/modules/video_chroma/copy.h
> +++ b/modules/video_chroma/copy.h
> @@ -36,22 +36,18 @@ void CopyCleanCache(copy_cache_t *cache);
>
> /* Copy planes from NV12 to YV12 */
> void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height,
> - copy_cache_t *cache);
> + unsigned height, copy_cache_t *cache);
> /* Copy planes from YV12 to YV12 */
> void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> - unsigned width, unsigned height,
> - copy_cache_t *cache);
> + unsigned height, copy_cache_t *cache);
>
> void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height,
> - copy_cache_t *cache);
> + unsigned height, copy_cache_t *cache);
>
> void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> - unsigned width, unsigned height);
> + unsigned height);
>
> void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> - unsigned width, unsigned height,
> - copy_cache_t *cache);
> + unsigned height, copy_cache_t *cache);
>
> #endif
> diff --git a/modules/video_chroma/cvpx_i420.c b/modules/video_chroma/cvpx_i420.c
> index 18ed3cf..471f867 100644
> --- a/modules/video_chroma/cvpx_i420.c
> +++ b/modules/video_chroma/cvpx_i420.c
> @@ -89,7 +89,6 @@ static void CVPX_I420(filter_t *p_filter, picture_t *sourcePicture, picture_t *d
> }
>
> CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch,
> - sourcePicture->format.i_width,
> sourcePicture->format.i_height);
>
> CVPixelBufferUnlockBaseAddress(picsys->pixelBuffer, 0);
> diff --git a/modules/video_chroma/d3d11_surface.c b/modules/video_chroma/d3d11_surface.c
> index 3fbf4bf..76eb284 100644
> --- a/modules/video_chroma/d3d11_surface.c
> +++ b/modules/video_chroma/d3d11_surface.c
> @@ -153,8 +153,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
> + pitch[1] * src->format.i_height / 2,
> };
>
> - CopyFromYv12(dst, plane, pitch, src->format.i_width,
> - src->format.i_height, &sys->cache);
> + CopyFromYv12(dst, plane, pitch, src->format.i_height, &sys->cache);
> } else if (desc.Format == DXGI_FORMAT_NV12) {
> uint8_t *plane[2] = {
> lock.pData,
> @@ -164,8 +163,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
> lock.RowPitch,
> lock.RowPitch,
> };
> - CopyFromNv12(dst, plane, pitch, src->format.i_width,
> - src->format.i_height, &sys->cache);
> + CopyFromNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
> } else {
> msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to YV12", desc.Format);
> }
> @@ -223,8 +221,7 @@ static void D3D11_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
> lock.RowPitch,
> lock.RowPitch,
> };
> - CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
> - src->format.i_height, &sys->cache);
> + CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
> } else {
> msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to NV12", desc.Format);
> }
> diff --git a/modules/video_chroma/dxa9.c b/modules/video_chroma/dxa9.c
> index 5a99ac9..9cb7cf3 100644
> --- a/modules/video_chroma/dxa9.c
> +++ b/modules/video_chroma/dxa9.c
> @@ -108,8 +108,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
> plane[1] = plane[2];
> plane[2] = V;
> }
> - CopyFromYv12(dst, plane, pitch, src->format.i_width,
> - src->format.i_height, p_copy_cache);
> + CopyFromYv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
> } else if (desc.Format == MAKEFOURCC('N','V','1','2')) {
> uint8_t *plane[2] = {
> lock.pBits,
> @@ -119,8 +118,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
> lock.Pitch,
> lock.Pitch,
> };
> - CopyFromNv12(dst, plane, pitch, src->format.i_width,
> - src->format.i_height, p_copy_cache);
> + CopyFromNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
> } else {
> msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to YV12", desc.Format);
> }
> @@ -153,8 +151,7 @@ static void DXA9_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
> lock.Pitch,
> lock.Pitch,
> };
> - CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
> - src->format.i_height, p_copy_cache);
> + CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
> } else {
> msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to NV12", desc.Format);
> }
> diff --git a/modules/video_chroma/i420_nv12.c b/modules/video_chroma/i420_nv12.c
> index 5e0a767..16f4a5b 100644
> --- a/modules/video_chroma/i420_nv12.c
> +++ b/modules/video_chroma/i420_nv12.c
> @@ -131,7 +131,6 @@ static void I420_YUV( filter_sys_t *p_sys, picture_t *p_src, picture_t *p_dst, b
> };
>
> CopyFromI420ToNv12( p_dst, plane, pitch,
> - p_src->format.i_x_offset + p_src->format.i_visible_width,
> p_src->format.i_y_offset + p_src->format.i_visible_height,
> &p_sys->cache );
> }
> --
> 2.8.2
>
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel
--
With my kindest regards,
--
Jean-Baptiste Kempf
http://www.jbkempf.com/ - +33 672 704 734
Sent from my Electronic Device
More information about the vlc-devel
mailing list