[vlc-commits] sharpen: help compiler auto-vectorization

Felix Abecassis git at videolan.org
Fri May 22 22:23:02 CEST 2015


vlc | branch: master | Felix Abecassis <felix.abecassis at gmail.com> | Fri May 22 15:32:39 2015 -0400| [32466e668505f25097e2811a563a19d16de5fbb7] | committer: Tristan Matthews

sharpen: help compiler auto-vectorization

Refs #9458

Signed-off-by: Tristan Matthews <tmatth at videolan.org>

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=32466e668505f25097e2811a563a19d16de5fbb7
---

 modules/video_filter/sharpen.c |   38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/modules/video_filter/sharpen.c b/modules/video_filter/sharpen.c
index 499358e..0edf928 100644
--- a/modules/video_filter/sharpen.c
+++ b/modules/video_filter/sharpen.c
@@ -171,13 +171,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
 {
     picture_t *p_outpic;
     int i, j;
-    uint8_t *p_src = NULL;
-    uint8_t *p_out = NULL;
+    uint8_t *restrict p_src = NULL;
+    uint8_t *restrict p_out = NULL;
     int i_src_pitch;
     int i_out_pitch;
     int pix;
     const int v1 = -1;
     const int v2 = 3; /* 2^3 = 8 */
+    const unsigned i_visible_lines = p_pic->p[Y_PLANE].i_visible_lines;
+    const unsigned i_visible_pitch = p_pic->p[Y_PLANE].i_visible_pitch;
+    const int sigma = var_GetFloat( p_filter, FILTER_PREFIX "sigma" ) * (1 << 20);
 
     if( !p_pic ) return NULL;
 
@@ -196,22 +199,15 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
 
     /* perform convolution only on Y plane. Avoid border line. */
     vlc_mutex_lock( &p_filter->p_sys->lock );
-    for( i = 0; i < p_pic->p[Y_PLANE].i_visible_lines; i++ )
+
+    memcpy(p_out, p_src, i_visible_pitch);
+
+    for( i = 1; i < i_visible_lines - 1; i++ )
     {
-        if( (i == 0) || (i == p_pic->p[Y_PLANE].i_visible_lines - 1) )
-        {
-            for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
-                p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] );
-            continue ;
-        }
-        for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
-        {
-            if( (j == 0) || (j == p_pic->p[Y_PLANE].i_visible_pitch - 1) )
-            {
-                p_out[i * i_out_pitch + j] = p_src[i * i_src_pitch + j];
-                continue ;
-            }
+        p_out[i * i_out_pitch] = p_src[i * i_src_pitch];
 
+        for( j = 1; j < i_visible_pitch - 1; j++ )
+        {
             pix = (p_src[(i - 1) * i_src_pitch + j - 1] * v1) +
                   (p_src[(i - 1) * i_src_pitch + j    ] * v1) +
                   (p_src[(i - 1) * i_src_pitch + j + 1] * v1) +
@@ -223,10 +219,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
                   (p_src[(i + 1) * i_src_pitch + j + 1] * v1);
 
            pix = pix >= 0 ? clip(pix) : -clip(pix * -1);
-           p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] +
-               p_filter->p_sys->tab_precalc[pix + 256] );
+           p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j]
+                                              + ((pix * sigma) >> 20));
         }
+
+        p_out[i * i_out_pitch + i_visible_pitch - 1] =
+            p_src[i * i_src_pitch + i_visible_pitch - 1];
     }
+    memcpy(&p_out[(i_visible_lines - 1) * i_out_pitch],
+           &p_src[(i_visible_lines - 1) * i_src_pitch], i_visible_pitch);
+
     vlc_mutex_unlock( &p_filter->p_sys->lock );
 
     plane_CopyPixels( &p_outpic->p[U_PLANE], &p_pic->p[U_PLANE] );



More information about the vlc-commits mailing list