[x265] [PATCH 285 of 307] x86: AVX512 optimise cpy1Dto2D_shr

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:34:43 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1515559539 -19800
#      Wed Jan 10 10:15:39 2018 +0530
# Node ID a4d60c45fdce6797486f25f5f319615b25bd86f0
# Parent  a5d29083237f28a944143862f980960c3f2b15ff
x86: AVX512 optimise cpy1Dto2D_shr
Work on shift in xmm registers when possible

diff -r a5d29083237f -r a4d60c45fdce source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Mon Jan 08 16:26:59 2018 +0530
+++ b/source/common/x86/blockcopy8.asm	Wed Jan 10 10:15:39 2018 +0530
@@ -6781,10 +6781,10 @@
 cglobal cpy1Dto2D_shr_16, 3, 5, 4
     shl                 r2d,             1
     movd                xm0,             r3m
-    pcmpeqw             ymm1,            ymm1
-    psllw               ym1,             ymm1,       xm0
-    psraw               ym1,             1
-    vinserti32x8        m1,              ym1,        1
+    pcmpeqw             xmm1,            xmm1
+    psllw               xm1,             xmm1,       xm0
+    psraw               xm1,             1
+    vpbroadcastw         m1,              xm1
     mov                 r3d,             4
     lea                 r4,              [r2 * 3]
 
@@ -6903,10 +6903,10 @@
 cglobal cpy1Dto2D_shr_32, 3, 4, 6
     shl                 r2d,             1
     movd                xm0,             r3m
-    pcmpeqw             ymm1,            ymm1
-    psllw               ym1,             ymm1,       xm0
-    psraw               ym1,             1
-    vinserti32x8        m1,              ym1,        1
+    pcmpeqw             xmm1,            xmm1
+    psllw               xm1,             xmm1,       xm0
+    psraw               xm1,             1
+    vpbroadcastw        m1,              xm1
     mov                 r3d,             16
 
 .loop:


More information about the x265-devel mailing list