[x265] [PATCH 291 of 307] x86: AVX512 pixel_satd_64xN
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:49 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1515750476 -19800
# Fri Jan 12 15:17:56 2018 +0530
# Node ID e4983d90f403d968d6760ae044f86a7a2e1865a2
# Parent 1c2875198a213a5f8d84bff57fcec15727f94a4f
x86: AVX512 pixel_satd_64xN
Size | AVX2 performance | AVX512 performance
-----------------------------------------------
64x16 | 10.73x | 13.02x
64x32 | 11.13x | 13.21x
64x48 | 11.13x | 13.19x
64x64 | 11.36x | 13.78x
diff -r 1c2875198a21 -r e4983d90f403 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Jan 12 13:52:25 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Jan 12 15:17:56 2018 +0530
@@ -5347,7 +5347,10 @@
p.pu[LUMA_32x24].satd = PFX(pixel_satd_32x24_avx512);
p.pu[LUMA_32x32].satd = PFX(pixel_satd_32x32_avx512);
p.pu[LUMA_32x64].satd = PFX(pixel_satd_32x64_avx512);
-
+ p.pu[LUMA_64x16].satd = PFX(pixel_satd_64x16_avx512);
+ p.pu[LUMA_64x32].satd = PFX(pixel_satd_64x32_avx512);
+ p.pu[LUMA_64x48].satd = PFX(pixel_satd_64x48_avx512);
+ p.pu[LUMA_64x64].satd = PFX(pixel_satd_64x64_avx512);
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = PFX(pixel_satd_32x32_avx512);
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = PFX(pixel_satd_32x16_avx512);
p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = PFX(pixel_satd_32x24_avx512);
diff -r 1c2875198a21 -r e4983d90f403 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Fri Jan 12 13:52:25 2018 +0530
+++ b/source/common/x86/pixel-a.asm Fri Jan 12 15:17:56 2018 +0530
@@ -14163,6 +14163,38 @@
SATD_32xN_AVX512 32
SATD_32xN_AVX512 48
SATD_32xN_AVX512 64
+
+%macro SATD_64xN_AVX512 1
+INIT_ZMM avx512
+cglobal pixel_satd_64x%1, 4,8,8
+ lea r4, [3 * r1]
+ lea r5, [3 * r3]
+ pxor m6, m6
+ mov r6, r0
+ mov r7, r2
+
+%rep %1/4 - 1
+ PROCESS_SATD_32x4_AVX512
+ lea r0, [r0 + 4 * r1]
+ lea r2, [r2 + 4 * r3]
+%endrep
+ PROCESS_SATD_32x4_AVX512
+ lea r0, [r6 + mmsize/2]
+ lea r2, [r7 + mmsize/2]
+%rep %1/4 - 1
+ PROCESS_SATD_32x4_AVX512
+ lea r0, [r0 + 4 * r1]
+ lea r2, [r2 + 4 * r3]
+%endrep
+ PROCESS_SATD_32x4_AVX512
+ SATD_MAIN_AVX512_END
+ RET
+%endmacro
+
+SATD_64xN_AVX512 16
+SATD_64xN_AVX512 32
+SATD_64xN_AVX512 48
+SATD_64xN_AVX512 64
%endif ; ARCH_X86_64 == 1 && HIGH_BIT_DEPTH == 0
%if ARCH_X86_64 == 1 && HIGH_BIT_DEPTH == 1
INIT_YMM avx2
More information about the x265-devel
mailing list