[x265] [PATCH 265 of 307] x86: AVX512 intra_pred_ang32x32 mode 2 and 34 for high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:23 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1513084512 -19800
# Tue Dec 12 18:45:12 2017 +0530
# Node ID 3d780e0d48827cd1cc4e664c3bf96dce6f515810
# Parent 0b38182facb66543adfbf9664cda085f7e641327
x86: AVX512 intra_pred_ang32x32 mode 2 and 34 for high bit depth
Primitive | AVX2 performance | AVX512 performance
-------------------------------------------------------------
intra_ang_32x32[ 2] | 24.45x | 39.65x
intra_ang_32x32[34] | 3.10x | 5.26x
diff -r 0b38182facb6 -r 3d780e0d4882 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Dec 14 11:36:42 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Dec 12 18:45:12 2017 +0530
@@ -3059,8 +3059,10 @@
p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].satd = PFX(pixel_satd_32x64_avx512);
p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].satd = PFX(pixel_satd_32x32_avx512);
p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].satd = PFX(pixel_satd_32x16_avx512);
+
p.cu[BLOCK_32x32].intra_pred[DC_IDX] = PFX(intra_pred_dc32_avx512);
-
+ p.cu[BLOCK_32x32].intra_pred[2] = PFX(intra_pred_ang32_2_avx512);
+ p.cu[BLOCK_32x32].intra_pred[34] = PFX(intra_pred_ang32_2_avx512);
p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r 0b38182facb6 -r 3d780e0d4882 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Thu Dec 14 11:36:42 2017 +0530
+++ b/source/common/x86/intrapred.h Tue Dec 12 18:45:12 2017 +0530
@@ -85,7 +85,7 @@
DECL_ALL(ssse3);
DECL_ALL(sse4);
DECL_ALL(avx2);
-
+DECL_ALL(avx512);
#undef DECL_ALL
#undef DECL_ANGS
#undef DECL_ANG
diff -r 0b38182facb6 -r 3d780e0d4882 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Thu Dec 14 11:36:42 2017 +0530
+++ b/source/common/x86/intrapred16.asm Tue Dec 12 18:45:12 2017 +0530
@@ -18296,7 +18296,102 @@
;-------------------------------------------------------------------------------------------------------
; end of avx2 code for intra_pred_ang32 mode 2 to 34
;-------------------------------------------------------------------------------------------------------
-
+;-------------------------------------------------------------------------------------------------------
+; avx512 code for intra_pred_ang32 mode 2 to 34 start
+;-------------------------------------------------------------------------------------------------------
+INIT_ZMM avx512
+cglobal intra_pred_ang32_2, 3,5,3
+ lea r4, [r2]
+ add r2, 128
+ cmp r3m, byte 34
+ cmove r2, r4
+ add r1d, r1d
+ lea r3, [r1 * 3]
+ movu m0, [r2 + 4]
+ movu m1, [r2 + 20]
+
+ movu [r0], m0
+ palignr m2, m1, m0, 2
+ movu [r0 + r1], m2
+ palignr m2, m1, m0, 4
+ movu [r0 + r1 * 2], m2
+ palignr m2, m1, m0, 6
+ movu [r0 + r3], m2
+
+ lea r0, [r0 + r1 * 4]
+ palignr m2, m1, m0, 8
+ movu [r0], m2
+ palignr m2, m1, m0, 10
+ movu [r0 + r1], m2
+ palignr m2, m1, m0, 12
+ movu [r0 + r1 * 2], m2
+ palignr m2, m1, m0, 14
+ movu [r0 + r3], m2
+
+ movu m0, [r2 + 36]
+ lea r0, [r0 + r1 * 4]
+ movu [r0], m1
+ palignr m2, m0, m1, 2
+ movu [r0 + r1], m2
+ palignr m2, m0, m1, 4
+ movu [r0 + r1 * 2], m2
+ palignr m2, m0, m1, 6
+ movu [r0 + r3], m2
+
+ lea r0, [r0 + r1 * 4]
+ palignr m2, m0, m1, 8
+ movu [r0], m2
+ palignr m2, m0, m1, 10
+ movu [r0 + r1], m2
+ palignr m2, m0, m1, 12
+ movu [r0 + r1 * 2], m2
+ palignr m2, m0, m1, 14
+ movu [r0 + r3], m2
+
+ lea r0, [r0 + r1 * 4]
+ movu m1, [r2 + 52]
+
+ movu [r0], m0
+ palignr m2, m1, m0, 2
+ movu [r0 + r1], m2
+ palignr m2, m1, m0, 4
+ movu [r0 + r1 * 2], m2
+ palignr m2, m1, m0, 6
+ movu [r0 + r3], m2
+
+ lea r0, [r0 + r1 * 4]
+ palignr m2, m1, m0, 8
+ movu [r0], m2
+ palignr m2, m1, m0, 10
+ movu [r0 + r1], m2
+ palignr m2, m1, m0, 12
+ movu [r0 + r1 * 2], m2
+ palignr m2, m1, m0, 14
+ movu [r0 + r3], m2
+
+ movu m0, [r2 + 68]
+ lea r0, [r0 + r1 * 4]
+ movu [r0], m1
+ palignr m2, m0, m1, 2
+ movu [r0 + r1], m2
+ palignr m2, m0, m1, 4
+ movu [r0 + r1 * 2], m2
+ palignr m2, m0, m1, 6
+ movu [r0 + r3], m2
+
+ lea r0, [r0 + r1 * 4]
+ palignr m2, m0, m1, 8
+ movu [r0], m2
+ palignr m2, m0, m1, 10
+ movu [r0 + r1], m2
+ palignr m2, m0, m1, 12
+ movu [r0 + r1 * 2], m2
+ palignr m2, m0, m1, 14
+ movu [r0 + r3], m2
+ RET
+;-------------------------------------------------------------------------------------------------------
+; avx512 code for intra_pred_ang32 mode 2 to 34 end
+;-------------------------------------------------------------------------------------------------------
%macro MODE_2_34 0
movu m0, [r2 + 4]
movu m1, [r2 + 20]
More information about the x265-devel
mailing list