[vlc-devel] [PATCH] video-chroma: copy: add SSE optimization for NV12 to I420 conversion

Victorien Le Couviour--Tuffet victorien.lecouviour.tuffet at gmail.com
Sat May 13 00:43:38 CEST 2017


---
I forgot to modify the callsites of CopyFromNv12ToI420.

 modules/codec/videotoolbox.m     |  8 +++++++-
 modules/video_chroma/copy.c      | 26 +++++++++++++++++++++++++-
 modules/video_chroma/copy.h      |  2 +-
 modules/video_chroma/cvpx_i420.c |  9 ++++++++-
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/modules/codec/videotoolbox.m b/modules/codec/videotoolbox.m
index 965262ebd9..0dccf8c956 100644
--- a/modules/codec/videotoolbox.m
+++ b/modules/codec/videotoolbox.m
@@ -1160,6 +1160,7 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
 {
     uint8_t *pp_plane[2];
     size_t pi_pitch[2];
+    copy_cache_t cache;
 
     if (!buffer || i_width == 0 || i_height == 0)
         return;
@@ -1171,7 +1172,12 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
         pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(buffer, i);
     }
 
-    CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height);
+    if (CopyInitCache(&cache, i_width))
+        return;
+
+    CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height, &cache);
+
+    CopyCleanCache(&cache);
 
     CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly);
 }
diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
index 98816d4686..a0e3c7b812 100644
--- a/modules/video_chroma/copy.c
+++ b/modules/video_chroma/copy.c
@@ -369,6 +369,21 @@ static void SSE_CopyFromNv12ToNv12(picture_t *dst,
     asm volatile ("emms");
 }
 
+static void
+SSE_CopyFromNv12ToI420(picture_t *dest, uint8_t *src[2],
+                       size_t src_pitch[2], unsigned int height,
+                       copy_cache_t *cache, unsigned int cpu)
+{
+    SSE_CopyPlane(dest->p[0].p_pixels, dest->p[0].i_pitch,
+                  src[0], src_pitch[0], cache->buffer, cache->size,
+                  height, cpu);
+    SSE_SplitPlanes(dest->p[1].p_pixels, dest->p[1].i_pitch,
+                    dest->p[2].p_pixels, dest->p[2].i_pitch,
+                    src[1], src_pitch[1], cache->buffer, cache->size,
+                    height / 2, cpu);
+    asm volatile ("emms");
+}
+
 static void SSE_CopyFromI420ToNv12(picture_t *dst,
                              uint8_t *src[3], size_t src_pitch[3],
                              unsigned height,
@@ -474,8 +489,17 @@ void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
 }
 
 void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                        unsigned height)
+                        unsigned height, copy_cache_t *cache)
 {
+#ifdef CAN_COMPILE_SSE2
+    unsigned    cpu = vlc_CPU();
+
+    if (vlc_CPU_SSE2())
+        return SSE_CopyFromNv12ToI420(dst, src, src_pitch, height, cache, cpu);
+#else
+    VLC_UNUSED(cache);
+#endif
+
     CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
               src[0], src_pitch[0], height);
     SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
index f5a56cc75c..937773cb84 100644
--- a/modules/video_chroma/copy.h
+++ b/modules/video_chroma/copy.h
@@ -45,7 +45,7 @@ void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
                         unsigned height, copy_cache_t *cache);
 
 void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                        unsigned height);
+                        unsigned height, copy_cache_t *cache);
 
 void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
                         unsigned height, copy_cache_t *cache);
diff --git a/modules/video_chroma/cvpx_i420.c b/modules/video_chroma/cvpx_i420.c
index e5f08cb234..5d489922b8 100644
--- a/modules/video_chroma/cvpx_i420.c
+++ b/modules/video_chroma/cvpx_i420.c
@@ -95,7 +95,14 @@ static void CVPX_I420(filter_t *p_filter, picture_t *sourcePicture, picture_t *d
         pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(picsys->pixelBuffer, i);
     }
 
-    CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch, height);
+    copy_cache_t cache;
+
+    if (CopyInitCache(&cache, width))
+        return;
+
+    CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch, height, &cache);
+
+    CopyCleanCache(&cache);
 
     CVPixelBufferUnlockBaseAddress(picsys->pixelBuffer, kCVPixelBufferLock_ReadOnly);
 }
-- 
2.12.0



More information about the vlc-devel mailing list