[vlc-devel] [PATCH 2/2] copy: only copy the src_pitch pixels on each line not the whole width
Steve Lhomme
robux4 at videolabs.io
Thu Jul 28 14:45:40 CEST 2016
---
modules/codec/avcodec/vaapi.c | 6 +--
modules/codec/avcodec/vda.c | 2 +-
modules/codec/omxil/utils.c | 2 +-
modules/video_chroma/copy.c | 101 ++++++++++++++++-------------------
modules/video_chroma/copy.h | 14 ++---
modules/video_chroma/cvpx_i420.c | 1 -
modules/video_chroma/d3d11_surface.c | 9 ++--
modules/video_chroma/dxa9.c | 9 ++--
modules/video_chroma/i420_nv12.c | 1 -
9 files changed, 60 insertions(+), 85 deletions(-)
diff --git a/modules/codec/avcodec/vaapi.c b/modules/codec/avcodec/vaapi.c
index d66047d..937a3ef 100644
--- a/modules/codec/avcodec/vaapi.c
+++ b/modules/codec/avcodec/vaapi.c
@@ -121,8 +121,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
pp_plane[i] = (uint8_t*)p_base + image.offsets[i_src_plane];
pi_pitch[i] = image.pitches[i_src_plane];
}
- CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
- &sys->image_cache );
+ CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
}
else
{
@@ -135,8 +134,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
pp_plane[i] = (uint8_t*)p_base + image.offsets[i];
pi_pitch[i] = image.pitches[i];
}
- CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
- &sys->image_cache );
+ CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
}
vaUnmapBuffer(sys->hw_ctx.display, image.buf);
diff --git a/modules/codec/avcodec/vda.c b/modules/codec/avcodec/vda.c
index e5ff1b5..cb4070e 100644
--- a/modules/codec/avcodec/vda.c
+++ b/modules/codec/avcodec/vda.c
@@ -69,7 +69,7 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(buffer, i);
}
- CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_width, i_height);
+ CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height);
CVPixelBufferUnlockBaseAddress(buffer, 0);
}
diff --git a/modules/codec/omxil/utils.c b/modules/codec/omxil/utils.c
index aa2cb5f..abb5b0c 100644
--- a/modules/codec/omxil/utils.c
+++ b/modules/codec/omxil/utils.c
@@ -223,7 +223,7 @@ void CopyOmxPicture( int i_color_format, picture_t *p_pic,
copy_cache_t *p_surface_cache = (copy_cache_t*)p_architecture_specific->data;
uint8_t *ppi_src_pointers[2] = { p_src, p_src + i_src_stride * i_slice_height };
size_t pi_src_strides[2] = { i_src_stride, i_src_stride };
- CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_src_stride, i_slice_height, p_surface_cache );
+ CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_slice_height, p_surface_cache );
return;
}
#endif
diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
index fa0a212..c053282 100644
--- a/modules/video_chroma/copy.c
+++ b/modules/video_chroma/copy.c
@@ -264,14 +264,14 @@ static void SSE_SplitUV(uint8_t *dstu, size_t dstu_pitch,
static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
uint8_t *cache, size_t cache_size,
- unsigned width, unsigned height, unsigned cpu)
+ unsigned height, unsigned cpu)
{
- const unsigned w16 = (width+15) & ~15;
+ const unsigned w16 = (src_pitch+15) & ~15;
const unsigned hstep = cache_size / w16;
assert(hstep > 0);
if (src_pitch == dst_pitch)
- memcpy(dst, src, width * height);
+ memcpy(dst, src, src_pitch * height);
else
for (unsigned y = 0; y < height; y += hstep) {
const unsigned hblock = __MIN(hstep, height - y);
@@ -279,12 +279,12 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
/* Copy a bunch of line into our cache */
CopyFromUswc(cache, w16,
src, src_pitch,
- width, hblock, cpu);
+ src_pitch, hblock, cpu);
/* Copy from our cache to the destination */
Copy2d(dst, dst_pitch,
cache, w16,
- width, hblock);
+ src_pitch, hblock);
/* */
src += src_pitch * hblock;
@@ -296,9 +296,9 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
uint8_t *dstv, size_t dstv_pitch,
const uint8_t *src, size_t src_pitch,
uint8_t *cache, size_t cache_size,
- unsigned width, unsigned height, unsigned cpu)
+ unsigned height, unsigned cpu)
{
- const unsigned w16 = (2*width+15) & ~15;
+ const unsigned w16 = (2*src_pitch+15) & ~15;
const unsigned hstep = cache_size / w16;
assert(hstep > 0);
@@ -307,11 +307,11 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
/* Copy a bunch of line into our cache */
CopyFromUswc(cache, w16, src, src_pitch,
- 2*width, hblock, cpu);
+ 2*src_pitch, hblock, cpu);
/* Copy from our cache to the destination */
SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch,
- cache, w16, width, hblock, cpu);
+ cache, w16, src_pitch, hblock, cpu);
/* */
src += src_pitch * hblock;
@@ -322,24 +322,24 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
static void SSE_CopyFromNv12(picture_t *dst,
uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height,
+ unsigned height,
copy_cache_t *cache, unsigned cpu)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
cache->buffer, cache->size,
- width, height, cpu);
+ height, cpu);
SSE_SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
dst->p[1].p_pixels, dst->p[1].i_pitch,
src[1], src_pitch[1],
cache->buffer, cache->size,
- (width+1)/2, (height+1)/2, cpu);
+ (height+1)/2, cpu);
asm volatile ("emms");
}
static void SSE_CopyFromYv12(picture_t *dst,
uint8_t *src[3], size_t src_pitch[3],
- unsigned width, unsigned height,
+ unsigned height,
copy_cache_t *cache, unsigned cpu)
{
for (unsigned n = 0; n < 3; n++) {
@@ -347,7 +347,7 @@ static void SSE_CopyFromYv12(picture_t *dst,
SSE_CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
src[n], src_pitch[n],
cache->buffer, cache->size,
- (width+d-1)/d, (height+d-1)/d, cpu);
+ (height+d-1)/d, cpu);
}
asm volatile ("emms");
}
@@ -355,33 +355,33 @@ static void SSE_CopyFromYv12(picture_t *dst,
static void SSE_CopyFromNv12ToNv12(picture_t *dst,
uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height,
+ unsigned height,
copy_cache_t *cache, unsigned cpu)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
cache->buffer, cache->size,
- width, height, cpu);
+ height, cpu);
SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
src[1], src_pitch[1],
cache->buffer, cache->size,
- width, height/2, cpu);
+ height/2, cpu);
asm volatile ("emms");
}
static void SSE_CopyFromI420ToNv12(picture_t *dst,
uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height,
+ unsigned height,
copy_cache_t *cache, unsigned cpu)
{
SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0],
cache->buffer, cache->size,
- width, height, cpu);
+ height, cpu);
/* TODO optimise the plane merging */
const unsigned copy_lines = height / 2;
- const unsigned copy_pitch = width / 2;
+ const unsigned copy_pitch = src_pitch[1];
const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
const int i_extra_pitch_u = src_pitch[U_PLANE] - copy_pitch;
@@ -408,13 +408,13 @@ static void SSE_CopyFromI420ToNv12(picture_t *dst,
static void CopyPlane(uint8_t *dst, size_t dst_pitch,
const uint8_t *src, size_t src_pitch,
- unsigned width, unsigned height)
+ unsigned height)
{
if (src_pitch == dst_pitch)
- memcpy(dst, src, width * height);
+ memcpy(dst, src, src_pitch * height);
else
for (unsigned y = 0; y < height; y++) {
- memcpy(dst, src, width);
+ memcpy(dst, src, src_pitch);
src += src_pitch;
dst += dst_pitch;
}
@@ -423,10 +423,10 @@ static void CopyPlane(uint8_t *dst, size_t dst_pitch,
static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
uint8_t *dstv, size_t dstv_pitch,
const uint8_t *src, size_t src_pitch,
- unsigned width, unsigned height)
+ unsigned height)
{
for (unsigned y = 0; y < height; y++) {
- for (unsigned x = 0; x < width; x++) {
+ for (unsigned x = 0; x < src_pitch; x++) {
dstu[x] = src[2*x+0];
dstv[x] = src[2*x+1];
}
@@ -437,79 +437,69 @@ static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
}
void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height,
- copy_cache_t *cache)
+ unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
- return SSE_CopyFromNv12(dst, src, src_pitch, width, height,
+ return SSE_CopyFromNv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0],
- width, height);
+ src[0], src_pitch[0], height);
SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
dst->p[1].p_pixels, dst->p[1].i_pitch,
- src[1], src_pitch[1],
- width/2, height/2);
+ src[1], src_pitch[1], height/2);
}
void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height,
- copy_cache_t *cache)
+ unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
- return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, width, height,
+ return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0],
- width, height);
+ src[0], src_pitch[0], height);
CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
- src[1], src_pitch[1],
- width, height/2);
+ src[1], src_pitch[1], height/2);
}
void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height)
+ unsigned height)
{
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0],
- width, height);
+ src[0], src_pitch[0], height);
SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
dst->p[2].p_pixels, dst->p[2].i_pitch,
- src[1], src_pitch[1],
- width/2, height/2);
+ src[1], src_pitch[1], height/2);
}
void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
- unsigned width, unsigned height,
- copy_cache_t *cache)
+ unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
- return SSE_CopyFromI420ToNv12(dst, src, src_pitch, width, height,
+ return SSE_CopyFromI420ToNv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0],
- width, height);
+ src[0], src_pitch[0], height);
const unsigned copy_lines = height / 2;
- const unsigned copy_pitch = width / 2;
+ const unsigned copy_pitch = src_pitch[1];
const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
const int i_extra_pitch_u = src_pitch[U_PLANE] - copy_pitch;
@@ -533,22 +523,21 @@ void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
- unsigned width, unsigned height,
- copy_cache_t *cache)
+ unsigned height, copy_cache_t *cache)
{
#ifdef CAN_COMPILE_SSE2
unsigned cpu = vlc_CPU();
if (vlc_CPU_SSE2())
- return SSE_CopyFromYv12(dst, src, src_pitch, width, height,
+ return SSE_CopyFromYv12(dst, src, src_pitch, height,
cache, cpu);
#else
(void) cache;
#endif
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
- src[0], src_pitch[0], width, height);
+ src[0], src_pitch[0], height);
CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
- src[1], src_pitch[1], width / 2, height / 2);
+ src[1], src_pitch[1], height / 2);
CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
- src[2], src_pitch[2], width / 2, height / 2);
+ src[2], src_pitch[2], height / 2);
}
diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
index 533e2fa..8776f95 100644
--- a/modules/video_chroma/copy.h
+++ b/modules/video_chroma/copy.h
@@ -36,22 +36,18 @@ void CopyCleanCache(copy_cache_t *cache);
/* Copy planes from NV12 to YV12 */
void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height,
- copy_cache_t *cache);
+ unsigned height, copy_cache_t *cache);
/* Copy planes from YV12 to YV12 */
void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
- unsigned width, unsigned height,
- copy_cache_t *cache);
+ unsigned height, copy_cache_t *cache);
void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height,
- copy_cache_t *cache);
+ unsigned height, copy_cache_t *cache);
void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned width, unsigned height);
+ unsigned height);
void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
- unsigned width, unsigned height,
- copy_cache_t *cache);
+ unsigned height, copy_cache_t *cache);
#endif
diff --git a/modules/video_chroma/cvpx_i420.c b/modules/video_chroma/cvpx_i420.c
index 18ed3cf..471f867 100644
--- a/modules/video_chroma/cvpx_i420.c
+++ b/modules/video_chroma/cvpx_i420.c
@@ -89,7 +89,6 @@ static void CVPX_I420(filter_t *p_filter, picture_t *sourcePicture, picture_t *d
}
CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch,
- sourcePicture->format.i_width,
sourcePicture->format.i_height);
CVPixelBufferUnlockBaseAddress(picsys->pixelBuffer, 0);
diff --git a/modules/video_chroma/d3d11_surface.c b/modules/video_chroma/d3d11_surface.c
index 3fbf4bf..76eb284 100644
--- a/modules/video_chroma/d3d11_surface.c
+++ b/modules/video_chroma/d3d11_surface.c
@@ -153,8 +153,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
+ pitch[1] * src->format.i_height / 2,
};
- CopyFromYv12(dst, plane, pitch, src->format.i_width,
- src->format.i_height, &sys->cache);
+ CopyFromYv12(dst, plane, pitch, src->format.i_height, &sys->cache);
} else if (desc.Format == DXGI_FORMAT_NV12) {
uint8_t *plane[2] = {
lock.pData,
@@ -164,8 +163,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.RowPitch,
lock.RowPitch,
};
- CopyFromNv12(dst, plane, pitch, src->format.i_width,
- src->format.i_height, &sys->cache);
+ CopyFromNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
} else {
msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to YV12", desc.Format);
}
@@ -223,8 +221,7 @@ static void D3D11_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.RowPitch,
lock.RowPitch,
};
- CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
- src->format.i_height, &sys->cache);
+ CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
} else {
msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to NV12", desc.Format);
}
diff --git a/modules/video_chroma/dxa9.c b/modules/video_chroma/dxa9.c
index 5a99ac9..9cb7cf3 100644
--- a/modules/video_chroma/dxa9.c
+++ b/modules/video_chroma/dxa9.c
@@ -108,8 +108,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
plane[1] = plane[2];
plane[2] = V;
}
- CopyFromYv12(dst, plane, pitch, src->format.i_width,
- src->format.i_height, p_copy_cache);
+ CopyFromYv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
} else if (desc.Format == MAKEFOURCC('N','V','1','2')) {
uint8_t *plane[2] = {
lock.pBits,
@@ -119,8 +118,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.Pitch,
lock.Pitch,
};
- CopyFromNv12(dst, plane, pitch, src->format.i_width,
- src->format.i_height, p_copy_cache);
+ CopyFromNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
} else {
msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to YV12", desc.Format);
}
@@ -153,8 +151,7 @@ static void DXA9_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
lock.Pitch,
lock.Pitch,
};
- CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
- src->format.i_height, p_copy_cache);
+ CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
} else {
msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to NV12", desc.Format);
}
diff --git a/modules/video_chroma/i420_nv12.c b/modules/video_chroma/i420_nv12.c
index 5e0a767..16f4a5b 100644
--- a/modules/video_chroma/i420_nv12.c
+++ b/modules/video_chroma/i420_nv12.c
@@ -131,7 +131,6 @@ static void I420_YUV( filter_sys_t *p_sys, picture_t *p_src, picture_t *p_dst, b
};
CopyFromI420ToNv12( p_dst, plane, pitch,
- p_src->format.i_x_offset + p_src->format.i_visible_width,
p_src->format.i_y_offset + p_src->format.i_visible_height,
&p_sys->cache );
}
--
2.8.2
More information about the vlc-devel
mailing list