[vlc-devel] [PATCH-fixed] Added assembly (SSE2, SSE4.1) processing functions to adjust filter

Rémi Denis-Courmont remi at remlab.net
Thu Aug 25 08:35:38 CEST 2011


On Thu, 25 Aug 2011 00:11:59 +0200, "Martin Briza" <Gamajun at seznam.cz>
wrote:
> From: Martin Briza <xbriza00 at stud.fit.vutbr.cz>
> 
> ---
>   modules/video_filter/adjust.c         |   35 ++-
>   modules/video_filter/adjust_sat_hue.c |  926  
> +++++++++++++++++++++++++++++++++
>   modules/video_filter/adjust_sat_hue.h |   34 ++
>   3 files changed, 991 insertions(+), 4 deletions(-)
> 
> diff --git a/modules/video_filter/adjust.c
b/modules/video_filter/adjust.c
> index c2f6649..a741d24 100644
> --- a/modules/video_filter/adjust.c
> +++ b/modules/video_filter/adjust.c
> @@ -166,15 +166,42 @@ static int Create( vlc_object_t *p_this )
>           CASE_PLANAR_YUV
>               /* Planar YUV */
>               p_filter->pf_video_filter = FilterPlanar;
> -            p_sys->pf_process_sat_hue_clip = planar_sat_hue_clip_C;
> -            p_sys->pf_process_sat_hue = planar_sat_hue_C;
> +#ifdef CAN_COMPILE_SSE4_1
> +            if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
> +            {
> +                p_sys->pf_process_sat_hue_clip =  
> planar_sat_hue_clip_SSE41;
> +                p_sys->pf_process_sat_hue = planar_sat_hue_SSE2;
> +            }
> +            else
> +#elif defined( CAN_COMPILE_SSE4_1 )
> +            if (vlc_CPU() & CPU_CAPABILITY_SSE2)
> +            {
> +                p_sys->pf_process_sat_hue_clip = planar_sat_hue_clip_C;
> +                p_sys->pf_process_sat_hue = planar_sat_hue_SSE2;
> +            }
> +            else
> +#endif
> +            {
> +                p_sys->pf_process_sat_hue_clip = planar_sat_hue_clip_C;
> +                p_sys->pf_process_sat_hue = planar_sat_hue_C;
> +            }
>               break;
> 
>           CASE_PACKED_YUV_422
>               /* Packed YUV 4:2:2 */
>               p_filter->pf_video_filter = FilterPacked;
> -            p_sys->pf_process_sat_hue_clip = packed_sat_hue_clip_C;
> -            p_sys->pf_process_sat_hue = packed_sat_hue_C;
> +#ifdef CAN_COMPILE_SSE4_1
> +            if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
> +            {
> +                p_sys->pf_process_sat_hue_clip =  
> packed_sat_hue_clip_SSE41;
> +                p_sys->pf_process_sat_hue = packed_sat_hue_SSE41;
> +            }
> +            else
> +#endif
> +            {
> +                p_sys->pf_process_sat_hue_clip = packed_sat_hue_clip_C;
> +                p_sys->pf_process_sat_hue = packed_sat_hue_C;
> +            }
>               break;
> 
>           default:
> diff --git a/modules/video_filter/adjust_sat_hue.c  
> b/modules/video_filter/adjust_sat_hue.c
> index cbc6f13..75c783f 100644
> --- a/modules/video_filter/adjust_sat_hue.c
> +++ b/modules/video_filter/adjust_sat_hue.c
> @@ -66,10 +66,936 @@
>   #define ADJUST_4_TIMES(x) x; x; x; x
>   #define ADJUST_8_TIMES(x) x; x; x; x; x; x; x; x
> 
> +#ifdef _WIN64
> +#define STORE_XMM_REGISTERS \
> +    static uint64_t xmm_temporary_storage[32]; \
> +    __asm__ volatile( \
> +        "movdqa              %%xmm0,    (%[x])\n" \
> +        "movdqa              %%xmm1,  16(%[x])\n" \
> +        "movdqa              %%xmm2,  32(%[x])\n" \
> +        "movdqa              %%xmm3,  48(%[x])\n" \
> +        "movdqa              %%xmm4,  64(%[x])\n" \
> +        "movdqa              %%xmm5,  80(%[x])\n" \
> +        "movdqa              %%xmm6,  96(%[x])\n" \
> +        "movdqa              %%xmm7, 112(%[x])\n" \
> +        "movdqa              %%xmm8, 128(%[x])\n" \
> +        "movdqa              %%xmm9, 144(%[x])\n" \
> +        "movdqa             %%xmm10, 160(%[x])\n" \
> +        "movdqa             %%xmm11, 176(%[x])\n" \
> +        "movdqa             %%xmm12, 192(%[x])\n" \
> +        "movdqa             %%xmm13, 208(%[x])\n" \
> +        "movdqa             %%xmm14, 224(%[x])\n" \
> +        "movdqa             %%xmm15, 240(%[x])\n" \
> +    : \
> +    : [x] "r" (xmm_temporary_storage) \
> +    : "memory" \
> +    )
> +#else
> +#define STORE_XMM_REGISTERS
> +#endif

Isn't that sort of stuff supposed to be dealt with by the clobber list of
the inline assembly?


-- 
Rémi Denis-Courmont
http://www.remlab.net/



More information about the vlc-devel mailing list