[x265] [PATCH 285 of 307] x86: AVX512 optimise cpy1Dto2D_shr
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:43 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1515559539 -19800
# Wed Jan 10 10:15:39 2018 +0530
# Node ID a4d60c45fdce6797486f25f5f319615b25bd86f0
# Parent a5d29083237f28a944143862f980960c3f2b15ff
x86: AVX512 optimise cpy1Dto2D_shr
Work on shift in xmm registers when possible
diff -r a5d29083237f -r a4d60c45fdce source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Mon Jan 08 16:26:59 2018 +0530
+++ b/source/common/x86/blockcopy8.asm Wed Jan 10 10:15:39 2018 +0530
@@ -6781,10 +6781,10 @@
cglobal cpy1Dto2D_shr_16, 3, 5, 4
shl r2d, 1
movd xm0, r3m
- pcmpeqw ymm1, ymm1
- psllw ym1, ymm1, xm0
- psraw ym1, 1
- vinserti32x8 m1, ym1, 1
+ pcmpeqw xmm1, xmm1
+ psllw xm1, xmm1, xm0
+ psraw xm1, 1
+ vpbroadcastw m1, xm1
mov r3d, 4
lea r4, [r2 * 3]
@@ -6903,10 +6903,10 @@
cglobal cpy1Dto2D_shr_32, 3, 4, 6
shl r2d, 1
movd xm0, r3m
- pcmpeqw ymm1, ymm1
- psllw ym1, ymm1, xm0
- psraw ym1, 1
- vinserti32x8 m1, ym1, 1
+ pcmpeqw xmm1, xmm1
+ psllw xm1, xmm1, xm0
+ psraw xm1, 1
+ vpbroadcastw m1, xm1
mov r3d, 16
.loop:
More information about the x265-devel
mailing list