[vlc-devel] [PATCH 23/25] deinterlace: add render_linear AVX2 asm
Victorien Le Couviour--Tuffet
victorien.lecouviour.tuffet at gmail.com
Tue Apr 14 12:40:34 CEST 2020
8-bit:
SSE2: 47257 cycles
AVX2: 40519 cycles
16-bit:
SSE2: 95914 cycles
AVX2: 77356 cycles
---
modules/video_filter/deinterlace/algo_basic.c | 8 ++++++++
modules/video_filter/deinterlace/algo_basic_x86.asm | 8 ++++++++
2 files changed, 16 insertions(+)
diff --git a/modules/video_filter/deinterlace/algo_basic.c b/modules/video_filter/deinterlace/algo_basic.c
index 2ed07079eb..3e075bbeda 100644
--- a/modules/video_filter/deinterlace/algo_basic.c
+++ b/modules/video_filter/deinterlace/algo_basic.c
@@ -236,6 +236,10 @@ RENDER_LINEAR(Merge16BitGeneric, 16, c)
RENDER_LINEAR_SIMD(8, sse2)
RENDER_LINEAR_SIMD(16, sse2)
#endif
+#ifdef __x86_64__
+RENDER_LINEAR_SIMD(8, avx2)
+RENDER_LINEAR_SIMD(16, avx2)
+#endif
#if defined(CAN_COMPILE_ARM)
RENDER_LINEAR_ARM(8, arm_neon)
RENDER_LINEAR_ARM(16, arm_neon)
@@ -253,6 +257,10 @@ RENDER_LINEAR_ARM(16, arm64_neon)
ordered_renderer_t LinearRenderer(unsigned pixel_size)
{
+#ifdef __x86_64__
+ if (vlc_CPU_AVX2())
+ return pixel_size & 1 ? RenderLinear8Bit_avx2 : RenderLinear16Bit_avx2;
+#endif
#if defined(__i386__) || defined(__x86_64__)
if (vlc_CPU_SSE2())
return pixel_size & 1 ? RenderLinear8Bit_sse2 : RenderLinear16Bit_sse2;
diff --git a/modules/video_filter/deinterlace/algo_basic_x86.asm b/modules/video_filter/deinterlace/algo_basic_x86.asm
index 19db8ba08d..199d19e7a2 100644
--- a/modules/video_filter/deinterlace/algo_basic_x86.asm
+++ b/modules/video_filter/deinterlace/algo_basic_x86.asm
@@ -139,3 +139,11 @@ DEINT_BLEND 8
DEINT_LINEAR 16
DEINT_MEAN 16
DEINT_BLEND 16
+
+%if ARCH_X86_64
+INIT_YMM avx2
+
+DEINT_LINEAR 8
+
+DEINT_LINEAR 16
+%endif
--
2.24.1
More information about the vlc-devel
mailing list