[vlc-commits] sepia: fix lack of Y adjustment
Lyndon Brown
git at videolan.org
Wed Mar 20 15:34:48 CET 2019
vlc | branch: master | Lyndon Brown <jnqnfe at gmail.com> | Sun Mar 17 00:55:19 2019 +0000| [464e2d440348d74904c53c42aeb366dce78e3394] | committer: Thomas Guillem
sepia: fix lack of Y adjustment
The SIMD accelerated PlanarI420 implementation contained mistakes that
meant that actually Y was being written untouched.
Signed-off-by: Thomas Guillem <thomas at gllm.fr>
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=464e2d440348d74904c53c42aeb366dce78e3394
---
modules/video_filter/sepia.c | 36 ++++++++++++++++--------------------
1 file changed, 16 insertions(+), 20 deletions(-)
diff --git a/modules/video_filter/sepia.c b/modules/video_filter/sepia.c
index 5f4a4c61fb..9ca567c614 100644
--- a/modules/video_filter/sepia.c
+++ b/modules/video_filter/sepia.c
@@ -201,24 +201,22 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
*****************************************************************************/
VLC_SSE
static inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
- int i_intensity_spread)
+ int i_intensity_shifted_pair)
{
__asm__ volatile (
// y = y - y / 4 + i_intensity / 4
"movq (%1), %%xmm1\n"
- "punpcklbw %%xmm7, %%xmm1\n"
- "movq (%1), %%xmm2\n" // store bytes as words with 0s in between
- "punpcklbw %%xmm7, %%xmm2\n"
+ "punpcklbw %%xmm7, %%xmm1\n" // zero-extend bytes to words
+ "movdqa %%xmm1, %%xmm2\n" // copy it
"movd %2, %%xmm3\n"
"pshufd $0, %%xmm3, %%xmm3\n"
- "psrlw $2, %%xmm2\n" // rotate right 2
- "psubusb %%xmm1, %%xmm2\n" // subtract
- "psrlw $2, %%xmm3\n"
- "paddsb %%xmm1, %%xmm3\n" // add
- "packuswb %%xmm2, %%xmm1\n" // pack back to bytes
- "movq %%xmm1, (%0) \n" // load to dest
+ "psrlw $2, %%xmm2\n" // get 1/4 of it
+ "psubusb %%xmm2, %%xmm1\n"
+ "paddusb %%xmm3, %%xmm1\n"
+ "packuswb %%xmm1, %%xmm1\n" // pack back to bytes
+ "movq %%xmm1, (%0) \n"
:
- :"r" (dst), "r"(src), "r"(i_intensity_spread)
+ :"r" (dst), "r"(src), "r"(i_intensity_shifted_pair)
:"memory", "xmm1", "xmm2", "xmm3");
}
@@ -230,11 +228,9 @@ static void PlanarI420SepiaSSE( picture_t *p_pic, picture_t *p_outpic,
const uint8_t filling_const_8u = 128 - i_intensity / 6;
const uint8_t filling_const_8v = 128 + i_intensity / 14;
/* prepared value for faster broadcasting in xmm register */
- int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
+ int i_intensity_shifted_pair = 0x10001 * (((uint8_t) i_intensity) >> 2);
- __asm__ volatile(
- "pxor %%xmm7, %%xmm7\n"
- ::: "xmm7");
+ __asm__ volatile("pxor %%xmm7, %%xmm7\n" ::: "xmm7");
/* iterate for every two visible line in the frame */
for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
@@ -250,16 +246,16 @@ static void PlanarI420SepiaSSE( picture_t *p_pic, picture_t *p_outpic,
/* Compute yellow channel values with asm function */
Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
&p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
- i_intensity_spread );
+ i_intensity_shifted_pair );
Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
&p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
- i_intensity_spread );
+ i_intensity_shifted_pair );
Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
&p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
- i_intensity_spread );
+ i_intensity_shifted_pair );
Sepia8ySSE2(&p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
&p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
- i_intensity_spread );
+ i_intensity_shifted_pair );
/* Copy precomputed values to destination memory location */
memset(&p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
filling_const_8u, 8 );
@@ -363,7 +359,7 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
/*****************************************************************************
* PackedYUVSepia: Applies sepia to one frame of the packed YUV video
*****************************************************************************
- * This function applies sepia effext to one frame of the video by iterating
+ * This function applies sepia effect to one frame of the video by iterating
* through video lines. In every pass, we calculate new values for pixels
* (UYVY, VYUY, YUYV and YVYU formats are supported)
*****************************************************************************/
More information about the vlc-commits
mailing list