[x265] [PATCH 291 of 307] x86: AVX512 pixel_satd_64xN

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:34:49 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1515750476 -19800
#      Fri Jan 12 15:17:56 2018 +0530
# Node ID e4983d90f403d968d6760ae044f86a7a2e1865a2
# Parent  1c2875198a213a5f8d84bff57fcec15727f94a4f
x86: AVX512 pixel_satd_64xN

Size   | AVX2 performance | AVX512 performance
-----------------------------------------------
64x16  |    10.73x         |    13.02x
64x32  |    11.13x         |    13.21x
64x48  |    11.13x         |    13.19x
64x64  |    11.36x         |    13.78x

diff -r 1c2875198a21 -r e4983d90f403 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Jan 12 13:52:25 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Jan 12 15:17:56 2018 +0530
@@ -5347,7 +5347,10 @@
         p.pu[LUMA_32x24].satd = PFX(pixel_satd_32x24_avx512);
         p.pu[LUMA_32x32].satd = PFX(pixel_satd_32x32_avx512);
         p.pu[LUMA_32x64].satd = PFX(pixel_satd_32x64_avx512);
-
+        p.pu[LUMA_64x16].satd = PFX(pixel_satd_64x16_avx512);
+        p.pu[LUMA_64x32].satd = PFX(pixel_satd_64x32_avx512);
+        p.pu[LUMA_64x48].satd = PFX(pixel_satd_64x48_avx512);
+        p.pu[LUMA_64x64].satd = PFX(pixel_satd_64x64_avx512);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = PFX(pixel_satd_32x32_avx512);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = PFX(pixel_satd_32x16_avx512);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = PFX(pixel_satd_32x24_avx512);
diff -r 1c2875198a21 -r e4983d90f403 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Fri Jan 12 13:52:25 2018 +0530
+++ b/source/common/x86/pixel-a.asm	Fri Jan 12 15:17:56 2018 +0530
@@ -14163,6 +14163,38 @@
 SATD_32xN_AVX512 32
 SATD_32xN_AVX512 48
 SATD_32xN_AVX512 64
+
+%macro SATD_64xN_AVX512 1
+INIT_ZMM avx512
+cglobal pixel_satd_64x%1, 4,8,8
+    lea             r4, [3 * r1]
+    lea             r5, [3 * r3]
+    pxor            m6, m6
+    mov             r6, r0
+    mov             r7, r2
+
+%rep %1/4 - 1
+    PROCESS_SATD_32x4_AVX512
+    lea             r0, [r0 + 4 * r1]
+    lea             r2, [r2 + 4 * r3]
+%endrep
+    PROCESS_SATD_32x4_AVX512
+    lea             r0, [r6 + mmsize/2]
+    lea             r2, [r7 + mmsize/2]
+%rep %1/4 - 1
+    PROCESS_SATD_32x4_AVX512
+    lea             r0, [r0 + 4 * r1]
+    lea             r2, [r2 + 4 * r3]
+%endrep
+    PROCESS_SATD_32x4_AVX512
+    SATD_MAIN_AVX512_END
+    RET
+%endmacro
+
+SATD_64xN_AVX512 16
+SATD_64xN_AVX512 32
+SATD_64xN_AVX512 48
+SATD_64xN_AVX512 64
 %endif ; ARCH_X86_64 == 1 && HIGH_BIT_DEPTH == 0
 %if ARCH_X86_64 == 1 && HIGH_BIT_DEPTH == 1
 INIT_YMM avx2


More information about the x265-devel mailing list