[vlc-devel] [PATCH v1 1/1] picture: increase alignment for AVX2 on x86 to 32-bytes

Janne Grunau janne-vlc at jannau.net
Sun Nov 27 22:27:51 CET 2016


Required for direct rendering with AVX2 enabled libavcodec and AVX2
optimizations for the blend deinterlacer.
---
 configure.ac                | 8 ++++++++
 modules/video_output/evas.c | 2 +-
 src/misc/picture.c          | 4 ++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1e1f2f25b1..ff284a38ee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1565,6 +1565,14 @@ AS_IF([test "${enable_altivec}" = "yes"], [
 AC_SUBST(ALTIVEC_CFLAGS)
 AM_CONDITIONAL([HAVE_ALTIVEC], [test "$have_altivec" = "yes"])
 
+dnl Alignment for SIMD functions
+dnl AVX2 on x86 needs 32 byte alignment, everything else needs 16 byte alignment
+case "$host_cpu" in
+    i?86|x86_64) simd_align=32 ;;
+    *)           simd_align=16 ;;
+esac
+AC_DEFINE_UNQUOTED([SIMD_ALIGNMENT], [$simd_align], [required alignment for SIMD])
+
 dnl
 dnl  Memory usage
 dnl
diff --git a/modules/video_output/evas.c b/modules/video_output/evas.c
index b38c6b5784..aefc3622cc 100644
--- a/modules/video_output/evas.c
+++ b/modules/video_output/evas.c
@@ -1007,7 +1007,7 @@ EvasImageBuffersAlloc( vout_display_t *vd, video_format_t *p_fmt )
     {
         struct buffer *p_buffer = &sys->p_buffers[i];
 
-        p_buffer->p[0] = vlc_memalign( 16, i_bytes );
+        p_buffer->p[0] = vlc_memalign( SIMD_ALIGNMENT, i_bytes );
 
         if( !p_buffer->p[0] )
         {
diff --git a/src/misc/picture.c b/src/misc/picture.c
index 43207ce58d..f17ef3241f 100644
--- a/src/misc/picture.c
+++ b/src/misc/picture.c
@@ -62,7 +62,7 @@ static int AllocatePicture( picture_t *p_pic )
         i_bytes += p->i_pitch * p->i_lines;
     }
 
-    uint8_t *p_data = vlc_memalign( 16, i_bytes );
+    uint8_t *p_data = vlc_memalign( SIMD_ALIGNMENT, i_bytes );
     if( i_bytes > 0 && p_data == NULL )
     {
         p_pic->i_planes = 0;
@@ -170,7 +170,7 @@ int picture_Setup( picture_t *p_picture, const video_format_t *restrict fmt )
     unsigned int i_ratio_h  = 1;
     for( unsigned i = 0; i < p_dsc->plane_count; i++ )
     {
-        i_modulo_w = LCM( i_modulo_w, 16 * p_dsc->p[i].w.den );
+        i_modulo_w = LCM( i_modulo_w, SIMD_ALIGNMENT * p_dsc->p[i].w.den );
         i_modulo_h = LCM( i_modulo_h, 16 * p_dsc->p[i].h.den );
         if( i_ratio_h < p_dsc->p[i].h.den )
             i_ratio_h = p_dsc->p[i].h.den;
-- 
2.11.0.rc2



More information about the vlc-devel mailing list