[vlc-commits] sharpen: help compiler auto-vectorization
Felix Abecassis
git at videolan.org
Fri May 22 22:23:02 CEST 2015
vlc | branch: master | Felix Abecassis <felix.abecassis at gmail.com> | Fri May 22 15:32:39 2015 -0400| [32466e668505f25097e2811a563a19d16de5fbb7] | committer: Tristan Matthews
sharpen: help compiler auto-vectorization
Refs #9458
Signed-off-by: Tristan Matthews <tmatth at videolan.org>
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=32466e668505f25097e2811a563a19d16de5fbb7
---
modules/video_filter/sharpen.c | 38 ++++++++++++++++++++------------------
1 file changed, 20 insertions(+), 18 deletions(-)
diff --git a/modules/video_filter/sharpen.c b/modules/video_filter/sharpen.c
index 499358e..0edf928 100644
--- a/modules/video_filter/sharpen.c
+++ b/modules/video_filter/sharpen.c
@@ -171,13 +171,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
{
picture_t *p_outpic;
int i, j;
- uint8_t *p_src = NULL;
- uint8_t *p_out = NULL;
+ uint8_t *restrict p_src = NULL;
+ uint8_t *restrict p_out = NULL;
int i_src_pitch;
int i_out_pitch;
int pix;
const int v1 = -1;
const int v2 = 3; /* 2^3 = 8 */
+ const unsigned i_visible_lines = p_pic->p[Y_PLANE].i_visible_lines;
+ const unsigned i_visible_pitch = p_pic->p[Y_PLANE].i_visible_pitch;
+ const int sigma = var_GetFloat( p_filter, FILTER_PREFIX "sigma" ) * (1 << 20);
if( !p_pic ) return NULL;
@@ -196,22 +199,15 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
/* perform convolution only on Y plane. Avoid border line. */
vlc_mutex_lock( &p_filter->p_sys->lock );
- for( i = 0; i < p_pic->p[Y_PLANE].i_visible_lines; i++ )
+
+ memcpy(p_out, p_src, i_visible_pitch);
+
+ for( i = 1; i < i_visible_lines - 1; i++ )
{
- if( (i == 0) || (i == p_pic->p[Y_PLANE].i_visible_lines - 1) )
- {
- for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
- p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] );
- continue ;
- }
- for( j = 0; j < p_pic->p[Y_PLANE].i_visible_pitch; j++ )
- {
- if( (j == 0) || (j == p_pic->p[Y_PLANE].i_visible_pitch - 1) )
- {
- p_out[i * i_out_pitch + j] = p_src[i * i_src_pitch + j];
- continue ;
- }
+ p_out[i * i_out_pitch] = p_src[i * i_src_pitch];
+ for( j = 1; j < i_visible_pitch - 1; j++ )
+ {
pix = (p_src[(i - 1) * i_src_pitch + j - 1] * v1) +
(p_src[(i - 1) * i_src_pitch + j ] * v1) +
(p_src[(i - 1) * i_src_pitch + j + 1] * v1) +
@@ -223,10 +219,16 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
(p_src[(i + 1) * i_src_pitch + j + 1] * v1);
pix = pix >= 0 ? clip(pix) : -clip(pix * -1);
- p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j] +
- p_filter->p_sys->tab_precalc[pix + 256] );
+ p_out[i * i_out_pitch + j] = clip( p_src[i * i_src_pitch + j]
+ + ((pix * sigma) >> 20));
}
+
+ p_out[i * i_out_pitch + i_visible_pitch - 1] =
+ p_src[i * i_src_pitch + i_visible_pitch - 1];
}
+ memcpy(&p_out[(i_visible_lines - 1) * i_out_pitch],
+ &p_src[(i_visible_lines - 1) * i_src_pitch], i_visible_pitch);
+
vlc_mutex_unlock( &p_filter->p_sys->lock );
plane_CopyPixels( &p_outpic->p[U_PLANE], &p_pic->p[U_PLANE] );
More information about the vlc-commits
mailing list