[x265] [PATCH 254 of 307] x86: AVX512 pixel_satd_48x64 for high bit depth

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:34:12 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1513073695 -19800
#      Tue Dec 12 15:44:55 2017 +0530
# Node ID b858f80e3ff03118abb1ef3e4ea56059f9ec5af4
# Parent  75d5a01d97daad790cecd35b40ff4b0e4cc34cac
x86: AVX512 pixel_satd_48x64 for high bit depth

AVX2 performance  :  13.40x
AVX512 performance : 18.26x

diff -r 75d5a01d97da -r b858f80e3ff0 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Apr 05 19:25:07 2018 -0700
+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 12 15:44:55 2017 +0530
@@ -3039,6 +3039,7 @@
         p.pu[LUMA_64x32].satd = PFX(pixel_satd_64x32_avx512);
         p.pu[LUMA_64x48].satd = PFX(pixel_satd_64x48_avx512);
         p.pu[LUMA_64x64].satd = PFX(pixel_satd_64x64_avx512);
+        p.pu[LUMA_48x64].satd = PFX(pixel_satd_48x64_avx512);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = PFX(pixel_satd_16x32_avx512);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = PFX(pixel_satd_16x16_avx512);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd = PFX(pixel_satd_16x8_avx512);
diff -r 75d5a01d97da -r b858f80e3ff0 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Thu Apr 05 19:25:07 2018 -0700
+++ b/source/common/x86/pixel-a.asm	Tue Dec 12 15:44:55 2017 +0530
@@ -14150,6 +14150,32 @@
 SATD_32xN_HBD_AVX512 24
 SATD_32xN_HBD_AVX512 32
 SATD_32xN_HBD_AVX512 64
+INIT_ZMM avx512
+cglobal pixel_satd_48x64, 4,10,8
+    add             r1d, r1d
+    add             r3d, r3d
+    lea             r4, [3 * r1]
+    lea             r5, [3 * r3]
+    pxor            m6, m6
+    mov             r8, r0
+    mov             r9, r2
+
+%rep 15
+    PROCESS_SATD_32x4_HBD_AVX512
+    lea             r0, [r0 + 4 * r1]
+    lea             r2, [r2 + 4 * r3]
+%endrep
+    PROCESS_SATD_32x4_HBD_AVX512
+    lea             r0, [r8 + mmsize]
+    lea             r2, [r9 + mmsize]
+%rep 7
+    PROCESS_SATD_16x8_HBD_AVX512
+    lea             r0, [r6 + 4 * r1]
+    lea             r2, [r7 + 4 * r3]
+%endrep
+    PROCESS_SATD_16x8_HBD_AVX512
+    SATD_HBD_AVX512_END
+    RET
 
 %macro SATD_64xN_HBD_AVX512 1
 INIT_ZMM avx512


More information about the x265-devel mailing list