[vlc-devel] [PATCH-fixed] Added assembly (SSE2, SSE4.1) processing functions to adjust filter
Rémi Denis-Courmont
remi at remlab.net
Thu Aug 25 08:35:38 CEST 2011
On Thu, 25 Aug 2011 00:11:59 +0200, "Martin Briza" <Gamajun at seznam.cz>
wrote:
> From: Martin Briza <xbriza00 at stud.fit.vutbr.cz>
>
> ---
> modules/video_filter/adjust.c | 35 ++-
> modules/video_filter/adjust_sat_hue.c | 926
> +++++++++++++++++++++++++++++++++
> modules/video_filter/adjust_sat_hue.h | 34 ++
> 3 files changed, 991 insertions(+), 4 deletions(-)
>
> diff --git a/modules/video_filter/adjust.c
b/modules/video_filter/adjust.c
> index c2f6649..a741d24 100644
> --- a/modules/video_filter/adjust.c
> +++ b/modules/video_filter/adjust.c
> @@ -166,15 +166,42 @@ static int Create( vlc_object_t *p_this )
> CASE_PLANAR_YUV
> /* Planar YUV */
> p_filter->pf_video_filter = FilterPlanar;
> - p_sys->pf_process_sat_hue_clip = planar_sat_hue_clip_C;
> - p_sys->pf_process_sat_hue = planar_sat_hue_C;
> +#ifdef CAN_COMPILE_SSE4_1
> + if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
> + {
> + p_sys->pf_process_sat_hue_clip =
> planar_sat_hue_clip_SSE41;
> + p_sys->pf_process_sat_hue = planar_sat_hue_SSE2;
> + }
> + else
> +#elif defined( CAN_COMPILE_SSE4_1 )
> + if (vlc_CPU() & CPU_CAPABILITY_SSE2)
> + {
> + p_sys->pf_process_sat_hue_clip = planar_sat_hue_clip_C;
> + p_sys->pf_process_sat_hue = planar_sat_hue_SSE2;
> + }
> + else
> +#endif
> + {
> + p_sys->pf_process_sat_hue_clip = planar_sat_hue_clip_C;
> + p_sys->pf_process_sat_hue = planar_sat_hue_C;
> + }
> break;
>
> CASE_PACKED_YUV_422
> /* Packed YUV 4:2:2 */
> p_filter->pf_video_filter = FilterPacked;
> - p_sys->pf_process_sat_hue_clip = packed_sat_hue_clip_C;
> - p_sys->pf_process_sat_hue = packed_sat_hue_C;
> +#ifdef CAN_COMPILE_SSE4_1
> + if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
> + {
> + p_sys->pf_process_sat_hue_clip =
> packed_sat_hue_clip_SSE41;
> + p_sys->pf_process_sat_hue = packed_sat_hue_SSE41;
> + }
> + else
> +#endif
> + {
> + p_sys->pf_process_sat_hue_clip = packed_sat_hue_clip_C;
> + p_sys->pf_process_sat_hue = packed_sat_hue_C;
> + }
> break;
>
> default:
> diff --git a/modules/video_filter/adjust_sat_hue.c
> b/modules/video_filter/adjust_sat_hue.c
> index cbc6f13..75c783f 100644
> --- a/modules/video_filter/adjust_sat_hue.c
> +++ b/modules/video_filter/adjust_sat_hue.c
> @@ -66,10 +66,936 @@
> #define ADJUST_4_TIMES(x) x; x; x; x
> #define ADJUST_8_TIMES(x) x; x; x; x; x; x; x; x
>
> +#ifdef _WIN64
> +#define STORE_XMM_REGISTERS \
> + static uint64_t xmm_temporary_storage[32]; \
> + __asm__ volatile( \
> + "movdqa %%xmm0, (%[x])\n" \
> + "movdqa %%xmm1, 16(%[x])\n" \
> + "movdqa %%xmm2, 32(%[x])\n" \
> + "movdqa %%xmm3, 48(%[x])\n" \
> + "movdqa %%xmm4, 64(%[x])\n" \
> + "movdqa %%xmm5, 80(%[x])\n" \
> + "movdqa %%xmm6, 96(%[x])\n" \
> + "movdqa %%xmm7, 112(%[x])\n" \
> + "movdqa %%xmm8, 128(%[x])\n" \
> + "movdqa %%xmm9, 144(%[x])\n" \
> + "movdqa %%xmm10, 160(%[x])\n" \
> + "movdqa %%xmm11, 176(%[x])\n" \
> + "movdqa %%xmm12, 192(%[x])\n" \
> + "movdqa %%xmm13, 208(%[x])\n" \
> + "movdqa %%xmm14, 224(%[x])\n" \
> + "movdqa %%xmm15, 240(%[x])\n" \
> + : \
> + : [x] "r" (xmm_temporary_storage) \
> + : "memory" \
> + )
> +#else
> +#define STORE_XMM_REGISTERS
> +#endif
Isn't that sort of stuff supposed to be dealt with by the clobber list of
the inline assembly?
--
Rémi Denis-Courmont
http://www.remlab.net/
More information about the vlc-devel
mailing list