[x265] [PATCH 254 of 307] x86: AVX512 pixel_satd_48x64 for high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:12 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1513073695 -19800
# Tue Dec 12 15:44:55 2017 +0530
# Node ID b858f80e3ff03118abb1ef3e4ea56059f9ec5af4
# Parent 75d5a01d97daad790cecd35b40ff4b0e4cc34cac
x86: AVX512 pixel_satd_48x64 for high bit depth
AVX2 performance : 13.40x
AVX512 performance : 18.26x
diff -r 75d5a01d97da -r b858f80e3ff0 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Apr 05 19:25:07 2018 -0700
+++ b/source/common/x86/asm-primitives.cpp Tue Dec 12 15:44:55 2017 +0530
@@ -3039,6 +3039,7 @@
p.pu[LUMA_64x32].satd = PFX(pixel_satd_64x32_avx512);
p.pu[LUMA_64x48].satd = PFX(pixel_satd_64x48_avx512);
p.pu[LUMA_64x64].satd = PFX(pixel_satd_64x64_avx512);
+ p.pu[LUMA_48x64].satd = PFX(pixel_satd_48x64_avx512);
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = PFX(pixel_satd_16x32_avx512);
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = PFX(pixel_satd_16x16_avx512);
p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd = PFX(pixel_satd_16x8_avx512);
diff -r 75d5a01d97da -r b858f80e3ff0 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Thu Apr 05 19:25:07 2018 -0700
+++ b/source/common/x86/pixel-a.asm Tue Dec 12 15:44:55 2017 +0530
@@ -14150,6 +14150,32 @@
SATD_32xN_HBD_AVX512 24
SATD_32xN_HBD_AVX512 32
SATD_32xN_HBD_AVX512 64
+INIT_ZMM avx512
+cglobal pixel_satd_48x64, 4,10,8
+ add r1d, r1d
+ add r3d, r3d
+ lea r4, [3 * r1]
+ lea r5, [3 * r3]
+ pxor m6, m6
+ mov r8, r0
+ mov r9, r2
+
+%rep 15
+ PROCESS_SATD_32x4_HBD_AVX512
+ lea r0, [r0 + 4 * r1]
+ lea r2, [r2 + 4 * r3]
+%endrep
+ PROCESS_SATD_32x4_HBD_AVX512
+ lea r0, [r8 + mmsize]
+ lea r2, [r9 + mmsize]
+%rep 7
+ PROCESS_SATD_16x8_HBD_AVX512
+ lea r0, [r6 + 4 * r1]
+ lea r2, [r7 + 4 * r3]
+%endrep
+ PROCESS_SATD_16x8_HBD_AVX512
+ SATD_HBD_AVX512_END
+ RET
%macro SATD_64xN_HBD_AVX512 1
INIT_ZMM avx512
More information about the x265-devel
mailing list