[vlc-devel] [PATCH 25/25] deinterlace: add render_blend AVX2 asm

Victorien Le Couviour--Tuffet victorien.lecouviour.tuffet at gmail.com
Tue Apr 14 12:40:36 CEST 2020


8-bit:
   SSE2: 57423 cycles
   AVX2: 45560 cycles
16-bit:
   SSE2: 111841 cycles
   AVX2:  91020 cycles
---
 modules/video_filter/deinterlace/algo_basic.c       | 9 +++++++++
 modules/video_filter/deinterlace/algo_basic_x86.asm | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/modules/video_filter/deinterlace/algo_basic.c b/modules/video_filter/deinterlace/algo_basic.c
index 17434a66a7..bdfc43cbdc 100644
--- a/modules/video_filter/deinterlace/algo_basic.c
+++ b/modules/video_filter/deinterlace/algo_basic.c
@@ -488,6 +488,10 @@ RENDER_BLEND(Merge16BitGeneric, 16, c)
 RENDER_BLEND_SIMD(8, sse2)
 RENDER_BLEND_SIMD(16, sse2)
 #endif
+#ifdef __x86_64__
+RENDER_BLEND_SIMD(8, avx2)
+RENDER_BLEND_SIMD(16, avx2)
+#endif
 #if defined(CAN_COMPILE_ARM)
 RENDER_BLEND_ARM(8, arm_neon)
 RENDER_BLEND_ARM(16, arm_neon)
@@ -505,6 +509,11 @@ RENDER_BLEND_ARM(16, arm64_neon)
 
 single_pic_renderer_t BlendRenderer(unsigned pixel_size)
 {
+#ifdef __x86_64__
+    if (vlc_CPU_AVX2())
+        return pixel_size & 1 ? RenderBlend8Bit_avx2 : RenderBlend16Bit_avx2;
+    else
+#endif
 #if defined(__i386__) || defined(__x86_64__)
     if (vlc_CPU_SSE2())
         return pixel_size & 1 ? RenderBlend8Bit_sse2 : RenderBlend16Bit_sse2;
diff --git a/modules/video_filter/deinterlace/algo_basic_x86.asm b/modules/video_filter/deinterlace/algo_basic_x86.asm
index 3bb773eac3..c86ca7d4e7 100644
--- a/modules/video_filter/deinterlace/algo_basic_x86.asm
+++ b/modules/video_filter/deinterlace/algo_basic_x86.asm
@@ -145,7 +145,9 @@ INIT_YMM avx2
 
 DEINT_LINEAR 8
 DEINT_MEAN 8
+DEINT_BLEND 8
 
 DEINT_LINEAR 16
 DEINT_MEAN 16
+DEINT_BLEND 16
 %endif
-- 
2.24.1



More information about the vlc-devel mailing list