[vlc-devel] [PATCH 2/2] copy: only copy the src_pitch pixels on each line not the whole width

Thu Jul 28 14:58:18 CEST 2016

How does that work if you have an offset?

On 28 Jul, Steve Lhomme wrote :
> ---
>  modules/codec/avcodec/vaapi.c        |   6 +--
>  modules/codec/avcodec/vda.c          |   2 +-
>  modules/codec/omxil/utils.c          |   2 +-
>  modules/video_chroma/copy.c          | 101 ++++++++++++++++-------------------
>  modules/video_chroma/copy.h          |  14 ++---
>  modules/video_chroma/cvpx_i420.c     |   1 -
>  modules/video_chroma/d3d11_surface.c |   9 ++--
>  modules/video_chroma/dxa9.c          |   9 ++--
>  modules/video_chroma/i420_nv12.c     |   1 -
>  9 files changed, 60 insertions(+), 85 deletions(-)
> 
> diff --git a/modules/codec/avcodec/vaapi.c b/modules/codec/avcodec/vaapi.c
> index d66047d..937a3ef 100644
> --- a/modules/codec/avcodec/vaapi.c
> +++ b/modules/codec/avcodec/vaapi.c
> @@ -121,8 +121,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
>              pp_plane[i] = (uint8_t*)p_base + image.offsets[i_src_plane];
>              pi_pitch[i] = image.pitches[i_src_plane];
>          }
> -        CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
> -                      &sys->image_cache );
> +        CopyFromYv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
>      }
>      else
>      {
> @@ -135,8 +134,7 @@ static int Extract( vlc_va_t *va, picture_t *p_picture, uint8_t *data )
>              pp_plane[i] = (uint8_t*)p_base + image.offsets[i];
>              pi_pitch[i] = image.pitches[i];
>          }
> -        CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->width, sys->height,
> -                      &sys->image_cache );
> +        CopyFromNv12( p_picture, pp_plane, pi_pitch, sys->height, &sys->image_cache );
>      }
>  
>      vaUnmapBuffer(sys->hw_ctx.display, image.buf);
> diff --git a/modules/codec/avcodec/vda.c b/modules/codec/avcodec/vda.c
> index e5ff1b5..cb4070e 100644
> --- a/modules/codec/avcodec/vda.c
> +++ b/modules/codec/avcodec/vda.c
> @@ -69,7 +69,7 @@ static void copy420YpCbCr8Planar(picture_t *p_pic,
>          pi_pitch[i] = CVPixelBufferGetBytesPerRowOfPlane(buffer, i);
>      }
>  
> -    CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_width, i_height);
> +    CopyFromNv12ToI420(p_pic, pp_plane, pi_pitch, i_height);
>  
>      CVPixelBufferUnlockBaseAddress(buffer, 0);
>  }
> diff --git a/modules/codec/omxil/utils.c b/modules/codec/omxil/utils.c
> index aa2cb5f..abb5b0c 100644
> --- a/modules/codec/omxil/utils.c
> +++ b/modules/codec/omxil/utils.c
> @@ -223,7 +223,7 @@ void CopyOmxPicture( int i_color_format, picture_t *p_pic,
>          copy_cache_t *p_surface_cache = (copy_cache_t*)p_architecture_specific->data;
>          uint8_t *ppi_src_pointers[2] = { p_src, p_src + i_src_stride * i_slice_height };
>          size_t pi_src_strides[2] = { i_src_stride, i_src_stride };
> -        CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_src_stride, i_slice_height, p_surface_cache );
> +        CopyFromNv12( p_pic, ppi_src_pointers, pi_src_strides, i_slice_height, p_surface_cache );
>          return;
>      }
>  #endif
> diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c
> index fa0a212..c053282 100644
> --- a/modules/video_chroma/copy.c
> +++ b/modules/video_chroma/copy.c
> @@ -264,14 +264,14 @@ static void SSE_SplitUV(uint8_t *dstu, size_t dstu_pitch,
>  static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
>                            const uint8_t *src, size_t src_pitch,
>                            uint8_t *cache, size_t cache_size,
> -                          unsigned width, unsigned height, unsigned cpu)
> +                          unsigned height, unsigned cpu)
>  {
> -    const unsigned w16 = (width+15) & ~15;
> +    const unsigned w16 = (src_pitch+15) & ~15;
>      const unsigned hstep = cache_size / w16;
>      assert(hstep > 0);
>  
>      if (src_pitch == dst_pitch)
> -        memcpy(dst, src, width * height);
> +        memcpy(dst, src, src_pitch * height);
>      else
>      for (unsigned y = 0; y < height; y += hstep) {
>          const unsigned hblock =  __MIN(hstep, height - y);
> @@ -279,12 +279,12 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
>          /* Copy a bunch of line into our cache */
>          CopyFromUswc(cache, w16,
>                       src, src_pitch,
> -                     width, hblock, cpu);
> +                     src_pitch, hblock, cpu);
>  
>          /* Copy from our cache to the destination */
>          Copy2d(dst, dst_pitch,
>                 cache, w16,
> -               width, hblock);
> +               src_pitch, hblock);
>  
>          /* */
>          src += src_pitch * hblock;
> @@ -296,9 +296,9 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
>                              uint8_t *dstv, size_t dstv_pitch,
>                              const uint8_t *src, size_t src_pitch,
>                              uint8_t *cache, size_t cache_size,
> -                            unsigned width, unsigned height, unsigned cpu)
> +                            unsigned height, unsigned cpu)
>  {
> -    const unsigned w16 = (2*width+15) & ~15;
> +    const unsigned w16 = (2*src_pitch+15) & ~15;
>      const unsigned hstep = cache_size / w16;
>      assert(hstep > 0);
>  
> @@ -307,11 +307,11 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
>  
>          /* Copy a bunch of line into our cache */
>          CopyFromUswc(cache, w16, src, src_pitch,
> -                     2*width, hblock, cpu);
> +                     2*src_pitch, hblock, cpu);
>  
>          /* Copy from our cache to the destination */
>          SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch,
> -                    cache, w16, width, hblock, cpu);
> +                    cache, w16, src_pitch, hblock, cpu);
>  
>          /* */
>          src  += src_pitch  * hblock;
> @@ -322,24 +322,24 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
>  
>  static void SSE_CopyFromNv12(picture_t *dst,
>                               uint8_t *src[2], size_t src_pitch[2],
> -                             unsigned width, unsigned height,
> +                             unsigned height,
>                               copy_cache_t *cache, unsigned cpu)
>  {
>      SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
>                    src[0], src_pitch[0],
>                    cache->buffer, cache->size,
> -                  width, height, cpu);
> +                  height, cpu);
>      SSE_SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
>                      dst->p[1].p_pixels, dst->p[1].i_pitch,
>                      src[1], src_pitch[1],
>                      cache->buffer, cache->size,
> -                    (width+1)/2, (height+1)/2, cpu);
> +                    (height+1)/2, cpu);
>      asm volatile ("emms");
>  }
>  
>  static void SSE_CopyFromYv12(picture_t *dst,
>                               uint8_t *src[3], size_t src_pitch[3],
> -                             unsigned width, unsigned height,
> +                             unsigned height,
>                               copy_cache_t *cache, unsigned cpu)
>  {
>      for (unsigned n = 0; n < 3; n++) {
> @@ -347,7 +347,7 @@ static void SSE_CopyFromYv12(picture_t *dst,
>          SSE_CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
>                        src[n], src_pitch[n],
>                        cache->buffer, cache->size,
> -                      (width+d-1)/d, (height+d-1)/d, cpu);
> +                      (height+d-1)/d, cpu);
>      }
>      asm volatile ("emms");
>  }
> @@ -355,33 +355,33 @@ static void SSE_CopyFromYv12(picture_t *dst,
>  
>  static void SSE_CopyFromNv12ToNv12(picture_t *dst,
>                               uint8_t *src[2], size_t src_pitch[2],
> -                             unsigned width, unsigned height,
> +                             unsigned height,
>                               copy_cache_t *cache, unsigned cpu)
>  {
>      SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
>                    src[0], src_pitch[0],
>                    cache->buffer, cache->size,
> -                  width, height, cpu);
> +                  height, cpu);
>      SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
>                    src[1], src_pitch[1],
>                    cache->buffer, cache->size,
> -                  width, height/2, cpu);
> +                  height/2, cpu);
>      asm volatile ("emms");
>  }
>  
>  static void SSE_CopyFromI420ToNv12(picture_t *dst,
>                               uint8_t *src[2], size_t src_pitch[2],
> -                             unsigned width, unsigned height,
> +                             unsigned height,
>                               copy_cache_t *cache, unsigned cpu)
>  {
>      SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
>                    src[0], src_pitch[0],
>                    cache->buffer, cache->size,
> -                  width, height, cpu);
> +                  height, cpu);
>  
>      /* TODO optimise the plane merging */
>      const unsigned copy_lines = height / 2;
> -    const unsigned copy_pitch = width / 2;
> +    const unsigned copy_pitch = src_pitch[1];
>  
>      const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
>      const int i_extra_pitch_u  = src_pitch[U_PLANE] - copy_pitch;
> @@ -408,13 +408,13 @@ static void SSE_CopyFromI420ToNv12(picture_t *dst,
>  
>  static void CopyPlane(uint8_t *dst, size_t dst_pitch,
>                        const uint8_t *src, size_t src_pitch,
> -                      unsigned width, unsigned height)
> +                      unsigned height)
>  {
>      if (src_pitch == dst_pitch)
> -        memcpy(dst, src, width * height);
> +        memcpy(dst, src, src_pitch * height);
>      else
>      for (unsigned y = 0; y < height; y++) {
> -        memcpy(dst, src, width);
> +        memcpy(dst, src, src_pitch);
>          src += src_pitch;
>          dst += dst_pitch;
>      }
> @@ -423,10 +423,10 @@ static void CopyPlane(uint8_t *dst, size_t dst_pitch,
>  static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
>                          uint8_t *dstv, size_t dstv_pitch,
>                          const uint8_t *src, size_t src_pitch,
> -                        unsigned width, unsigned height)
> +                        unsigned height)
>  {
>      for (unsigned y = 0; y < height; y++) {
> -        for (unsigned x = 0; x < width; x++) {
> +        for (unsigned x = 0; x < src_pitch; x++) {
>              dstu[x] = src[2*x+0];
>              dstv[x] = src[2*x+1];
>          }
> @@ -437,79 +437,69 @@ static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
>  }
>  
>  void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> -                  unsigned width, unsigned height,
> -                  copy_cache_t *cache)
> +                  unsigned height, copy_cache_t *cache)
>  {
>  #ifdef CAN_COMPILE_SSE2
>      unsigned cpu = vlc_CPU();
>      if (vlc_CPU_SSE2())
> -        return SSE_CopyFromNv12(dst, src, src_pitch, width, height,
> +        return SSE_CopyFromNv12(dst, src, src_pitch, height,
>                                  cache, cpu);
>  #else
>      (void) cache;
>  #endif
>  
>      CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> -              src[0], src_pitch[0],
> -              width, height);
> +              src[0], src_pitch[0], height);
>      SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
>                  dst->p[1].p_pixels, dst->p[1].i_pitch,
> -                src[1], src_pitch[1],
> -                width/2, height/2);
> +                src[1], src_pitch[1], height/2);
>  }
>  
>  void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> -                  unsigned width, unsigned height,
> -                  copy_cache_t *cache)
> +                  unsigned height, copy_cache_t *cache)
>  {
>  #ifdef CAN_COMPILE_SSE2
>      unsigned cpu = vlc_CPU();
>      if (vlc_CPU_SSE2())
> -        return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, width, height,
> +        return SSE_CopyFromNv12ToNv12(dst, src, src_pitch, height,
>                                  cache, cpu);
>  #else
>      (void) cache;
>  #endif
>  
>      CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> -              src[0], src_pitch[0],
> -              width, height);
> +              src[0], src_pitch[0], height);
>      CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> -              src[1], src_pitch[1],
> -              width, height/2);
> +              src[1], src_pitch[1], height/2);
>  }
>  
>  void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> -                        unsigned width, unsigned height)
> +                        unsigned height)
>  {
>      CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> -              src[0], src_pitch[0],
> -              width, height);
> +              src[0], src_pitch[0], height);
>      SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch,
>                  dst->p[2].p_pixels, dst->p[2].i_pitch,
> -                src[1], src_pitch[1],
> -                width/2, height/2);
> +                src[1], src_pitch[1], height/2);
>  }
>  
>  void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> -                        unsigned width, unsigned height,
> -                        copy_cache_t *cache)
> +                        unsigned height, copy_cache_t *cache)
>  {
>  #ifdef CAN_COMPILE_SSE2
>      unsigned cpu = vlc_CPU();
>      if (vlc_CPU_SSE2())
> -        return SSE_CopyFromI420ToNv12(dst, src, src_pitch, width, height,
> +        return SSE_CopyFromI420ToNv12(dst, src, src_pitch, height,
>                                  cache, cpu);
>  #else
>      (void) cache;
>  #endif
>  
>      CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> -              src[0], src_pitch[0],
> -              width, height);
> +              src[0], src_pitch[0], height);
>  
>      const unsigned copy_lines = height / 2;
> -    const unsigned copy_pitch = width / 2;
> +    const unsigned copy_pitch = src_pitch[1];
>  
>      const int i_extra_pitch_uv = dst->p[1].i_pitch - 2 * copy_pitch;
>      const int i_extra_pitch_u  = src_pitch[U_PLANE] - copy_pitch;
> @@ -533,22 +523,21 @@ void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
>  
>  
>  void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> -                  unsigned width, unsigned height,
> -                  copy_cache_t *cache)
> +                  unsigned height, copy_cache_t *cache)
>  {
>  #ifdef CAN_COMPILE_SSE2
>      unsigned cpu = vlc_CPU();
>      if (vlc_CPU_SSE2())
> -        return SSE_CopyFromYv12(dst, src, src_pitch, width, height,
> +        return SSE_CopyFromYv12(dst, src, src_pitch, height,
>                                  cache, cpu);
>  #else
>      (void) cache;
>  #endif
>  
>       CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
> -               src[0], src_pitch[0], width, height);
> +               src[0], src_pitch[0], height);
>       CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch,
> -               src[1], src_pitch[1], width / 2, height / 2);
> +               src[1], src_pitch[1], height / 2);
>       CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch,
> -               src[2], src_pitch[2], width / 2, height / 2);
> +               src[2], src_pitch[2], height / 2);
>  }
> diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h
> index 533e2fa..8776f95 100644
> --- a/modules/video_chroma/copy.h
> +++ b/modules/video_chroma/copy.h
> @@ -36,22 +36,18 @@ void CopyCleanCache(copy_cache_t *cache);
>  
>  /* Copy planes from NV12 to YV12 */
>  void CopyFromNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> -                  unsigned width, unsigned height,
> -                  copy_cache_t *cache);
> +                  unsigned height, copy_cache_t *cache);
>  /* Copy planes from YV12 to YV12 */
>  void CopyFromYv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> -                  unsigned width, unsigned height,
> -                  copy_cache_t *cache);
> +                  unsigned height, copy_cache_t *cache);
>  
>  void CopyFromNv12ToNv12(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> -                        unsigned width, unsigned height,
> -                        copy_cache_t *cache);
> +                        unsigned height, copy_cache_t *cache);
>  
>  void CopyFromNv12ToI420(picture_t *dst, uint8_t *src[2], size_t src_pitch[2],
> -                        unsigned width, unsigned height);
> +                        unsigned height);
>  
>  void CopyFromI420ToNv12(picture_t *dst, uint8_t *src[3], size_t src_pitch[3],
> -                        unsigned width, unsigned height,
> -                        copy_cache_t *cache);
> +                        unsigned height, copy_cache_t *cache);
>  
>  #endif
> diff --git a/modules/video_chroma/cvpx_i420.c b/modules/video_chroma/cvpx_i420.c
> index 18ed3cf..471f867 100644
> --- a/modules/video_chroma/cvpx_i420.c
> +++ b/modules/video_chroma/cvpx_i420.c
> @@ -89,7 +89,6 @@ static void CVPX_I420(filter_t *p_filter, picture_t *sourcePicture, picture_t *d
>      }
>  
>      CopyFromNv12ToI420(destinationPicture, pp_plane, pi_pitch,
> -                       sourcePicture->format.i_width,
>                         sourcePicture->format.i_height);
>  
>      CVPixelBufferUnlockBaseAddress(picsys->pixelBuffer, 0);
> diff --git a/modules/video_chroma/d3d11_surface.c b/modules/video_chroma/d3d11_surface.c
> index 3fbf4bf..76eb284 100644
> --- a/modules/video_chroma/d3d11_surface.c
> +++ b/modules/video_chroma/d3d11_surface.c
> @@ -153,8 +153,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
>                                   + pitch[1] * src->format.i_height / 2,
>          };
>  
> -        CopyFromYv12(dst, plane, pitch, src->format.i_width,
> -                     src->format.i_height, &sys->cache);
> +        CopyFromYv12(dst, plane, pitch, src->format.i_height, &sys->cache);
>      } else if (desc.Format == DXGI_FORMAT_NV12) {
>          uint8_t *plane[2] = {
>              lock.pData,
> @@ -164,8 +163,7 @@ static void D3D11_YUY2(filter_t *p_filter, picture_t *src, picture_t *dst)
>              lock.RowPitch,
>              lock.RowPitch,
>          };
> -        CopyFromNv12(dst, plane, pitch, src->format.i_width,
> -                     src->format.i_height, &sys->cache);
> +        CopyFromNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
>      } else {
>          msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to YV12", desc.Format);
>      }
> @@ -223,8 +221,7 @@ static void D3D11_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
>              lock.RowPitch,
>              lock.RowPitch,
>          };
> -        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
> -                           src->format.i_height, &sys->cache);
> +        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, &sys->cache);
>      } else {
>          msg_Err(p_filter, "Unsupported D3D11VA conversion from 0x%08X to NV12", desc.Format);
>      }
> diff --git a/modules/video_chroma/dxa9.c b/modules/video_chroma/dxa9.c
> index 5a99ac9..9cb7cf3 100644
> --- a/modules/video_chroma/dxa9.c
> +++ b/modules/video_chroma/dxa9.c
> @@ -108,8 +108,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
>              plane[1] = plane[2];
>              plane[2] = V;
>          }
> -        CopyFromYv12(dst, plane, pitch, src->format.i_width,
> -                     src->format.i_height, p_copy_cache);
> +        CopyFromYv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
>      } else if (desc.Format == MAKEFOURCC('N','V','1','2')) {
>          uint8_t *plane[2] = {
>              lock.pBits,
> @@ -119,8 +118,7 @@ static void DXA9_YV12(filter_t *p_filter, picture_t *src, picture_t *dst)
>              lock.Pitch,
>              lock.Pitch,
>          };
> -        CopyFromNv12(dst, plane, pitch, src->format.i_width,
> -                     src->format.i_height, p_copy_cache);
> +        CopyFromNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
>      } else {
>          msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to YV12", desc.Format);
>      }
> @@ -153,8 +151,7 @@ static void DXA9_NV12(filter_t *p_filter, picture_t *src, picture_t *dst)
>              lock.Pitch,
>              lock.Pitch,
>          };
> -        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_width,
> -                           src->format.i_height, p_copy_cache);
> +        CopyFromNv12ToNv12(dst, plane, pitch, src->format.i_height, p_copy_cache);
>      } else {
>          msg_Err(p_filter, "Unsupported DXA9 conversion from 0x%08X to NV12", desc.Format);
>      }
> diff --git a/modules/video_chroma/i420_nv12.c b/modules/video_chroma/i420_nv12.c
> index 5e0a767..16f4a5b 100644
> --- a/modules/video_chroma/i420_nv12.c
> +++ b/modules/video_chroma/i420_nv12.c
> @@ -131,7 +131,6 @@ static void I420_YUV( filter_sys_t *p_sys, picture_t *p_src, picture_t *p_dst, b
>      };
>  
>      CopyFromI420ToNv12( p_dst, plane, pitch,
> -                        p_src->format.i_x_offset + p_src->format.i_visible_width,
>                          p_src->format.i_y_offset + p_src->format.i_visible_height,
>                          &p_sys->cache );
>  }
> -- 
> 2.8.2
> 
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel

-- 
With my kindest regards,

-- 
Jean-Baptiste Kempf
http://www.jbkempf.com/ - +33 672 704 734
Sent from my Electronic Device