[vlc-devel] [PATCH 2/2] copy: only copy the src_pitch pixels on each line not the whole width

Thu Jul 28 14:45:40 CEST 2016

---
 modules/codec/avcodec/vaapi.c        |   6 +--
 modules/codec/avcodec/vda.c          |   2 +-
 modules/codec/omxil/utils.c          |   2 +-
 modules/video_chroma/copy.c          | 101 ++++++++++++++++-------------------
 modules/video_chroma/copy.h          |  14 ++---
 modules/video_chroma/cvpx_i420.c     |   1 -
 modules/video_chroma/d3d11_surface.c |   9 ++--
 modules/video_chroma/dxa9.c          |   9 ++--
 modules/video_chroma/i420_nv12.c     |   1 -
 9 files changed, 60 insertions(+), 85 deletions(-)

diff --git a/modules/codec/avcodec/vaapi.c b/modules/codec/avcodec/vaapi.c
index d66047d..937a3ef 100644
--- a/modules/codec/avcodec/vaapi.c
+++ b/modules/codec/avcodec/vaapi.c
@@ -121,8 +121,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
             pp_plane[i] = (uint8_t*)p_base + image.offsets[i_src_plane];
             pi_pitch[i] = image.pitches[i_src_plane];
         }
-        CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
-                      &sys->image_cache );
+        CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
     }
     else
     {
@@ -135,8 +134,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
             pp_plane[i] = (uint8_t*)p_base + image.offsets[i];
             pi_pitch[i] = image.pitches[i];
         }
-        CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
-                      &sys->image_cache );
+        CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
     }
 
     vaUnmapBuffer(sys->hw_ctx.display, image.buf);
diff --git a/modules/codec/avcodec/vda.c b/modules/codec/avcodec/vda.c
index e5ff1b5..cb4070e 100644
--- a/modules/codec/avcodec/vda.c
+++ b/modules/codec/avcodec/vda.c
@@ -69,7 +69,7 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
         pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(buffer, i);
     }
 
-    CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_width, i_height);
+    CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height);
 
     CVPixelBufferUnlockBaseAddress(buffer, 0);
 }
diff --git a/modules/codec/omxil/utils.c b/modules/codec/omxil/utils.c
index aa2cb5f..abb5b0c 100644
--- a/modules/codec/omxil/utils.c
+++ b/modules/codec/omxil/utils.c
@@ -223,7 +223,7 @@ void CopyOmxPicture( int i_color_format, picture_t *p_pic,
         copy_cache_t *p_surface_cache = (copy_cache_t*)p_architecture_specific->data;
         uint8_t *ppi_src_pointers[2] = { p_src, p_src + i_src_stride * i_slice_height };
         size_t pi_src_strides[2] = { i_src_stride, i_src_stride };
-        CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_src_stride, i_slice_height, p_surface_cache );
+        CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_slice_height, p_surface_cache );
         return;
     }
 #endif
diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
index fa0a212..c053282 100644
--- a/modules/video_chroma/copy.c
+++ b/modules/video_chroma/copy.c
@@ -264,14 +264,14 @@ static void SSE_SplitUV(uint8_t *dstu, size_t dstu_pitch,
 static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
                           const uint8_t *src, size_t src_pitch,
                           uint8_t *cache, size_t cache_size,
-                          unsigned width, unsigned height, unsigned cpu)
+                          unsigned height, unsigned cpu)
 {
-    const unsigned w16 = (width+15) & ~15;
+    const unsigned w16 = (src_pitch+15) & ~15;
     const unsigned hstep = cache_size / w16;
     assert(hstep > 0);
 
     if (src_pitch == dst_pitch)
-        memcpy(dst, src, width * height);
+        memcpy(dst, src, src_pitch * height);
     else
     for (unsigned y = 0; y < height; y += hstep) {
         const unsigned hblock =  __MIN(hstep, height - y);
@@ -279,12 +279,12 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
         /* Copy a bunch of line into our cache */
         CopyFromUswc(cache, w16,
                      src, src_pitch,
-                     width, hblock, cpu);
+                     src_pitch, hblock, cpu);
 
         /* Copy from our cache to the destination */
         Copy2d(dst, dst_pitch,
                cache, w16,
-               width, hblock);
+               src_pitch, hblock);
 
         /* */
         src += src_pitch * hblock;
@@ -296,9 +296,9 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
                             uint8_t *dstv, size_t dstv_pitch,
                             const uint8_t *src, size_t src_pitch,
                             uint8_t *cache, size_t cache_size,
-                            unsigned width, unsigned height, unsigned cpu)
+                            unsigned height, unsigned cpu)
 {
-    const unsigned w16 = (2*width+15) & ~15;
+    const unsigned w16 = (2*src_pitch+15) & ~15;
     const unsigned hstep = cache_size / w16;
     assert(hstep > 0);
 
@@ -307,11 +307,11 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
 
         /* Copy a bunch of line into our cache */
         CopyFromUswc(cache, w16, src, src_pitch,
-                     2*width, hblock, cpu);
+                     2*src_pitch, hblock, cpu);
 
         /* Copy from our cache to the destination */
         SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch,
-                    cache, w16, width, hblock, cpu);
+                    cache, w16, src_pitch, hblock, cpu);
 
         /* */
         src  += src_pitch  * hblock;
@@ -322,24 +322,24 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
 
 static void SSE_CopyFromNv12(picture_t *dst,
                              uint8_t *src[2], size_t src_pitch[2],
-                             unsigned width, unsigned height,
+                             unsigned height,
                              copy_cache_t *cache, unsigned cpu)
 {
     SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
                   src[0], src_pitch[0],
                   cache->buffer, cache->size,
-                  width, height, cpu);
+                  height, cpu);
     SSE_SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
                     dst->p[1].p_pixels, dst->p[1].i_pitch,
                     src[1], src_pitch[1],
                     cache->buffer, cache->size,
-                    (width+1)/2, (height+1)/2, cpu);
+                    (height+1)/2, cpu);
     asm volatile ("emms");
 }
 
 static void SSE_CopyFromYv12(picture_t *dst,
                              uint8_t *src[3], size_t src_pitch[3],
-                             unsigned width, unsigned height,
+                             unsigned height,
                              copy_cache_t *cache, unsigned cpu)
 {
     for (unsigned n = 0; n < 3; n++) {
@@ -347,7 +347,7 @@ static void SSE_CopyFromYv12(picture_t *dst,
         SSE_CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
                       src[n], src_pitch[n],
                       cache->buffer, cache->size,
-                      (width+d-1)/d, (height+d-1)/d, cpu);
+                      (height+d-1)/d, cpu);
     }
     asm volatile ("emms");
 }
@@ -355,33 +355,33 @@ static void SSE_CopyFromYv12(picture_t *dst,
 
 static void SSE_CopyFromNv12ToNv12(picture_t *dst,
                              uint8_t *src[2], size_t src_pitch[2],
-                             unsigned width, unsigned height,
+                             unsigned height,
                              copy_cache_t *cache, unsigned cpu)
 {
     SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
                   src[0], src_pitch[0],
                   cache->buffer, cache->size,
-                  width, height, cpu);
+                  height, cpu);
     SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
                   src[1], src_pitch[1],
                   cache->buffer, cache->size,
-                  width, height/2, cpu);
+                  height/2, cpu);
     asm volatile ("emms");
 }
 
 static void SSE_CopyFromI420ToNv12(picture_t *dst,
                              uint8_t *src[2], size_t src_pitch[2],
-                             unsigned width, unsigned height,
+                             unsigned height,
                              copy_cache_t *cache, unsigned cpu)
 {
     SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
                   src[0], src_pitch[0],
                   cache->buffer, cache->size,
-                  width, height, cpu);
+                  height, cpu);
 
     /* TODO optimise the plane merging */
     const unsigned copy_lines = height / 2;
-    const unsigned copy_pitch = width / 2;
+    const unsigned copy_pitch = src_pitch[1];
 
     const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
     const int i_extra_pitch_u  = src_pitch[U_PLANE] - copy_pitch;
@@ -408,13 +408,13 @@ static void SSE_CopyFromI420ToNv12(picture_t *dst,
 
 static void CopyPlane(uint8_t *dst, size_t dst_pitch,
                       const uint8_t *src, size_t src_pitch,
-                      unsigned width, unsigned height)
+                      unsigned height)
 {
     if (src_pitch == dst_pitch)
-        memcpy(dst, src, width * height);
+        memcpy(dst, src, src_pitch * height);
     else
     for (unsigned y = 0; y < height; y++) {
-        memcpy(dst, src, width);
+        memcpy(dst, src, src_pitch);
         src += src_pitch;
         dst += dst_pitch;
     }
@@ -423,10 +423,10 @@ static void CopyPlane(uint8_t *dst, size_t dst_pitch,
 static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
                         uint8_t *dstv, size_t dstv_pitch,
                         const uint8_t *src, size_t src_pitch,
-                        unsigned width, unsigned height)
+                        unsigned height)
 {
     for (unsigned y = 0; y < height; y++) {
-        for (unsigned x = 0; x < width; x++) {
+        for (unsigned x = 0; x < src_pitch; x++) {
             dstu[x] = src[2*x+0];
             dstv[x] = src[2*x+1];
         }
@@ -437,79 +437,69 @@ static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
 }
 
 void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                  unsigned width, unsigned height,
-                  copy_cache_t *cache)
+                  unsigned height, copy_cache_t *cache)
 {
 #ifdef CAN_COMPILE_SSE2
     unsigned cpu = vlc_CPU();
     if (vlc_CPU_SSE2())
-        return SSE_CopyFromNv12(dst, src, src_pitch, width, height,
+        return SSE_CopyFromNv12(dst, src, src_pitch, height,
                                 cache, cpu);
 #else
     (void) cache;
 #endif
 
     CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
-              src[0], src_pitch[0],
-              width, height);
+              src[0], src_pitch[0], height);
     SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
                 dst->p[1].p_pixels, dst->p[1].i_pitch,
-                src[1], src_pitch[1],
-                width/2, height/2);
+                src[1], src_pitch[1], height/2);
 }
 
 void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                  unsigned width, unsigned height,
-                  copy_cache_t *cache)
+                  unsigned height, copy_cache_t *cache)
 {
 #ifdef CAN_COMPILE_SSE2
     unsigned cpu = vlc_CPU();
     if (vlc_CPU_SSE2())
-        return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, width, height,
+        return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, height,
                                 cache, cpu);
 #else
     (void) cache;
 #endif
 
     CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
-              src[0], src_pitch[0],
-              width, height);
+              src[0], src_pitch[0], height);
     CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
-              src[1], src_pitch[1],
-              width, height/2);
+              src[1], src_pitch[1], height/2);
 }
 
 void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                        unsigned width, unsigned height)
+                        unsigned height)
 {
     CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
-              src[0], src_pitch[0],
-              width, height);
+              src[0], src_pitch[0], height);
     SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
                 dst->p[2].p_pixels, dst->p[2].i_pitch,
-                src[1], src_pitch[1],
-                width/2, height/2);
+                src[1], src_pitch[1], height/2);
 }
 
 void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
-                        unsigned width, unsigned height,
-                        copy_cache_t *cache)
+                        unsigned height, copy_cache_t *cache)
 {
 #ifdef CAN_COMPILE_SSE2
     unsigned cpu = vlc_CPU();
     if (vlc_CPU_SSE2())
-        return SSE_CopyFromI420ToNv12(dst, src, src_pitch, width, height,
+        return SSE_CopyFromI420ToNv12(dst, src, src_pitch, height,
                                 cache, cpu);
 #else
     (void) cache;
 #endif
 
     CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
-              src[0], src_pitch[0],
-              width, height);
+              src[0], src_pitch[0], height);
 
     const unsigned copy_lines = height / 2;
-    const unsigned copy_pitch = width / 2;
+    const unsigned copy_pitch = src_pitch[1];
 
     const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
     const int i_extra_pitch_u  = src_pitch[U_PLANE] - copy_pitch;
@@ -533,22 +523,21 @@ void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
 
 
 void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
-                  unsigned width, unsigned height,
-                  copy_cache_t *cache)
+                  unsigned height, copy_cache_t *cache)
 {
 #ifdef CAN_COMPILE_SSE2
     unsigned cpu = vlc_CPU();
     if (vlc_CPU_SSE2())
-        return SSE_CopyFromYv12(dst, src, src_pitch, width, height,
+        return SSE_CopyFromYv12(dst, src, src_pitch, height,
                                 cache, cpu);
 #else
     (void) cache;
 #endif
 
      CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
-               src[0], src_pitch[0], width, height);
+               src[0], src_pitch[0], height);
      CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
-               src[1], src_pitch[1], width / 2, height / 2);
+               src[1], src_pitch[1], height / 2);
      CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
-               src[2], src_pitch[2], width / 2, height / 2);
+               src[2], src_pitch[2], height / 2);
 }
diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
index 533e2fa..8776f95 100644
--- a/modules/video_chroma/copy.h
+++ b/modules/video_chroma/copy.h
@@ -36,22 +36,18 @@ void CopyCleanCache(copy_cache_t *cache);
 
 /* Copy planes from NV12 to YV12 */
 void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                  unsigned width, unsigned height,
-                  copy_cache_t *cache);
+                  unsigned height, copy_cache_t *cache);
 /* Copy planes from YV12 to YV12 */
 void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
-                  unsigned width, unsigned height,
-                  copy_cache_t *cache);
+                  unsigned height, copy_cache_t *cache);
 
 void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                        unsigned width, unsigned height,
-                        copy_cache_t *cache);
+                        unsigned height, copy_cache_t *cache);
 
 void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
-                        unsigned width, unsigned height);
+                        unsigned height);
 
 void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
-                        unsigned width, unsigned height,
-                        copy_cache_t *cache);
+                        unsigned height, copy_cache_t *cache);
 
 #endif
diff --git a/modules/video_chroma/cvpx_i420.c b/modules/video_chroma/cvpx_i420.c
index 18ed3cf..471f867 100644
--- a/modules/video_chroma/cvpx_i420.c
+++ b/modules/video_chroma/cvpx_i420.c
@@ -89,7 +89,6 @@ static void CVPX_I420(filter_t *p_filter, picture_t *sourcePicture, picture_t *d
     }
 
     CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch,
-                       sourcePicture->format.i_width,
                        sourcePicture->format.i_height);
 
     CVPixelBufferUnlockBaseAddress(picsys->pixelBuffer, 0);
diff --git a/modules/video_chroma/d3d11_surface.c b/modules/video_chroma/d3d11_surface.c
index 3fbf4bf..76eb284 100644
--- a/modules/video_chroma/d3d11_surface.c
+++ b/modules/video_chroma/d3d11_surface.c
@@ -153,8 +153,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
                                  + pitch[1] * src->format.i_height / 2,
         };
 
-        CopyFromYv12(dst, plane, pitch, src->format.i_width,
-                     src->format.i_height, &sys->cache);
+        CopyFromYv12(dst, plane, pitch, src->format.i_height, &sys->cache);
     } else if (desc.Format == DXGI_FORMAT_NV12) {
         uint8_t *plane[2] = {
             lock.pData,
@@ -164,8 +163,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
             lock.RowPitch,
             lock.RowPitch,
         };
-        CopyFromNv12(dst, plane, pitch, src->format.i_width,
-                     src->format.i_height, &sys->cache);
+        CopyFromNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
     } else {
         msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to YV12", desc.Format);
     }
@@ -223,8 +221,7 @@ static void D3D11_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
             lock.RowPitch,
             lock.RowPitch,
         };
-        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
-                           src->format.i_height, &sys->cache);
+        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
     } else {
         msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to NV12", desc.Format);
     }
diff --git a/modules/video_chroma/dxa9.c b/modules/video_chroma/dxa9.c
index 5a99ac9..9cb7cf3 100644
--- a/modules/video_chroma/dxa9.c
+++ b/modules/video_chroma/dxa9.c
@@ -108,8 +108,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
             plane[1] = plane[2];
             plane[2] = V;
         }
-        CopyFromYv12(dst, plane, pitch, src->format.i_width,
-                     src->format.i_height, p_copy_cache);
+        CopyFromYv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
     } else if (desc.Format == MAKEFOURCC('N','V','1','2')) {
         uint8_t *plane[2] = {
             lock.pBits,
@@ -119,8 +118,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
             lock.Pitch,
             lock.Pitch,
         };
-        CopyFromNv12(dst, plane, pitch, src->format.i_width,
-                     src->format.i_height, p_copy_cache);
+        CopyFromNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
     } else {
         msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to YV12", desc.Format);
     }
@@ -153,8 +151,7 @@ static void DXA9_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
             lock.Pitch,
             lock.Pitch,
         };
-        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
-                           src->format.i_height, p_copy_cache);
+        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
     } else {
         msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to NV12", desc.Format);
     }
diff --git a/modules/video_chroma/i420_nv12.c b/modules/video_chroma/i420_nv12.c
index 5e0a767..16f4a5b 100644
--- a/modules/video_chroma/i420_nv12.c
+++ b/modules/video_chroma/i420_nv12.c
@@ -131,7 +131,6 @@ static void I420_YUV( filter_sys_t *p_sys, picture_t *p_src, picture_t *p_dst, b
     };
 
     CopyFromI420ToNv12( p_dst, plane, pitch,
-                        p_src->format.i_x_offset + p_src->format.i_visible_width,
                         p_src->format.i_y_offset + p_src->format.i_visible_height,
                         &p_sys->cache );
 }
-- 
2.8.2