[vlc-commits] copy: add SSE optimization for NV12 to I420 conversion
Victorien Le Couviour--Tuffet
git at videolan.org
Fri May 19 09:40:05 CEST 2017
vlc | branch: master | Victorien Le Couviour--Tuffet <victorien.lecouviour.tuffet at gmail.com> | Fri May 12 20:55:02 2017 +0200| [4a30c258e0285b9c8525a3677ef628d425bcdb2c] | committer: Thomas Guillem
copy: add SSE optimization for NV12 to I420 conversion
Signed-off-by: Thomas Guillem <thomas at gllm.fr>
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=4a30c258e0285b9c8525a3677ef628d425bcdb2c
---
modules/codec/videotoolbox.m | 8 +++++++-
modules/video_chroma/copy.c | 26 +++++++++++++++++++++++++-
modules/video_chroma/copy.h | 2 +-
modules/video_chroma/cvpx_i420.c | 9 ++++++++-
4 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/modules/codec/videotoolbox.m b/modules/codec/videotoolbox.m
index 965262ebd9..0dccf8c956 100644
--- a/modules/codec/videotoolbox.m
+++ b/modules/codec/videotoolbox.m
@@ -1160,6 +1160,7 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
{
uint8_t *pp_plane[2];
size_t pi_pitch[2];
+ copy_cache_t cache;
if (!buffer || i_width == 0 || i_height == 0)
return;
@@ -1171,7 +1172,12 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(buffer, i);
}
- CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height);
+ if (CopyInitCache(&cache, i_width))
+ return;
+
+ CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height, &cache);
+
+ CopyCleanCache(&cache);
CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly);
}
diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
index 61e31494d1..03918fd44c 100644
--- a/modules/video_chroma/copy.c
+++ b/modules/video_chroma/copy.c
@@ -369,6 +369,21 @@ static void SSE_CopyFromNv12ToNv12(picture_t *dst,
asm volatile ("emms");
}
+static void
+SSE_CopyFromNv12ToI420(picture_t *dest, uint8_t *src[2],
+ size_t src_pitch[2], unsigned int height,
+ copy_cache_t *cache, unsigned int cpu)
+{
+ SSE_CopyPlane(dest->p[0].p_pixels, dest->p[0].i_pitch,
+ src[0], src_pitch[0], cache->buffer, cache->size,
+ height, cpu);
+ SSE_SplitPlanes(dest->p[1].p_pixels, dest->p[1].i_pitch,
+ dest->p[2].p_pixels, dest->p[2].i_pitch,
+ src[1], src_pitch[1], cache->buffer, cache->size,
+ height / 2, cpu);
+ asm volatile ("emms");
+}
+
static void SSE_CopyFromI420ToNv12(picture_t *dst,
uint8_t *src[3], size_t src_pitch[3],
unsigned height,
@@ -473,8 +488,17 @@ void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
}
void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned height)
+ unsigned height, copy_cache_t *cache)
{
+#ifdef CAN_COMPILE_SSE2
+ unsigned cpu = vlc_CPU();
+
+ if (vlc_CPU_SSE2())
+ return SSE_CopyFromNv12ToI420(dst, src, src_pitch, height, cache, cpu);
+#else
+ VLC_UNUSED(cache);
+#endif
+
CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
src[0], src_pitch[0], height);
SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
index 02b015c101..f32bd24c60 100644
--- a/modules/video_chroma/copy.h
+++ b/modules/video_chroma/copy.h
@@ -45,7 +45,7 @@ void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
unsigned height, copy_cache_t *cache);
void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
- unsigned height);
+ unsigned height, copy_cache_t *cache);
void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
unsigned height, copy_cache_t *cache);
diff --git a/modules/video_chroma/cvpx_i420.c b/modules/video_chroma/cvpx_i420.c
index e5f08cb234..5d489922b8 100644
--- a/modules/video_chroma/cvpx_i420.c
+++ b/modules/video_chroma/cvpx_i420.c
@@ -95,7 +95,14 @@ static void CVPX_I420(filter_t *p_filter, picture_t *sourcePicture, picture_t *d
pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(picsys->pixelBuffer, i);
}
- CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch, height);
+ copy_cache_t cache;
+
+ if (CopyInitCache(&cache, width))
+ return;
+
+ CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch, height, &cache);
+
+ CopyCleanCache(&cache);
CVPixelBufferUnlockBaseAddress(picsys->pixelBuffer, kCVPixelBufferLock_ReadOnly);
}
More information about the vlc-commits
mailing list