[x265] [PATCH 273 of 307] x86: AVX512 intra_pred_ang32x32 mode 10 for high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:31 CEST 2018
# HG changeset patch
# User Jayashri Murugan <jayashri at multicorewareinc.com>
# Date 1514366500 -19800
# Wed Dec 27 14:51:40 2017 +0530
# Node ID 8036bbce3d26fbebd3408a7e17a76206275fbde9
# Parent ca3c04bd0a71bb263b8084283acce012f0cc397c
x86: AVX512 intra_pred_ang32x32 mode 10 for high bit depth
Primitive | AVX2 performance | AVX512 performance
-------------------------------------------------------------
intra_ang_32x32[10] | 18.99x | 29.11x
diff -r ca3c04bd0a71 -r 8036bbce3d26 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Dec 29 09:30:36 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Dec 27 14:51:40 2017 +0530
@@ -3091,6 +3091,8 @@
p.cu[BLOCK_32x32].intra_pred[DC_IDX] = PFX(intra_pred_dc32_avx512);
p.cu[BLOCK_32x32].intra_pred[2] = PFX(intra_pred_ang32_2_avx512);
p.cu[BLOCK_32x32].intra_pred[34] = PFX(intra_pred_ang32_2_avx512);
+ p.cu[BLOCK_32x32].intra_pred[10] = PFX(intra_pred_ang32_10_avx512);
+
p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r ca3c04bd0a71 -r 8036bbce3d26 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Fri Dec 29 09:30:36 2017 +0530
+++ b/source/common/x86/intrapred16.asm Wed Dec 27 14:51:40 2017 +0530
@@ -18389,6 +18389,100 @@
palignr m2, m0, m1, 14
movu [r0 + r3], m2
RET
+
+cglobal intra_pred_ang32_10, 3,4,2
+ add r2, mmsize*2
+ add r1d, r1d
+ lea r3, [r1 * 3]
+
+ vpbroadcastw m0, [r2 + 2] ; [1...]
+ vpbroadcastw m1, [r2 + 2 + 2] ; [2...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 4] ; [3...]
+ vpbroadcastw m1, [r2 + 2 + 6] ; [4...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+ lea r0, [r0 + r1 * 4]
+
+ vpbroadcastw m0, [r2 + 2 + 8] ; [5...]
+ vpbroadcastw m1, [r2 + 2 + 10] ; [6...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 12] ; [7...]
+ vpbroadcastw m1, [r2 + 2 + 14] ; [8...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+ lea r0, [r0 + r1 *4]
+
+ vpbroadcastw m0, [r2 + 2 + 16] ; [9...]
+ vpbroadcastw m1, [r2 + 2 + 18] ; [10...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 20] ; [11...]
+ vpbroadcastw m1, [r2 + 2 + 22] ; [12...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+ lea r0, [r0 + r1 *4]
+
+ vpbroadcastw m0, [r2 + 2 + 24] ; [13...]
+ vpbroadcastw m1, [r2 + 2 + 26] ; [14...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 28] ; [15...]
+ vpbroadcastw m1, [r2 + 2 + 30] ; [16...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+ lea r0, [r0 + r1 *4]
+
+ vpbroadcastw m0, [r2 + 2 + 32] ; [17...]
+ vpbroadcastw m1, [r2 + 2 + 34] ; [18...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 36] ; [19...]
+ vpbroadcastw m1, [r2 + 2 + 38] ; [20...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+ lea r0, [r0 + r1 *4]
+
+ vpbroadcastw m0, [r2 + 2 + 40] ; [21...]
+ vpbroadcastw m1, [r2 + 2 + 42] ; [22...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 44] ; [23...]
+ vpbroadcastw m1, [r2 + 2 + 46] ; [24...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+ lea r0, [r0 + r1 *4]
+
+ vpbroadcastw m0, [r2 + 2 + 48] ; [25...]
+ vpbroadcastw m1, [r2 + 2 + 50] ; [26...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 52] ; [27...]
+ vpbroadcastw m1, [r2 + 2 + 54] ; [28...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+ lea r0, [r0 + r1 *4]
+
+ vpbroadcastw m0, [r2 + 2 + 56] ; [29...]
+ vpbroadcastw m1, [r2 + 2 + 58] ; [30...]
+ movu [r0], m0
+ movu [r0 + r1], m1
+
+ vpbroadcastw m0, [r2 + 2 + 60] ; [31...]
+ vpbroadcastw m1, [r2 + 2 + 62] ; [32...]
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r3], m1
+
+ RET
;-------------------------------------------------------------------------------------------------------
; avx512 code for intra_pred_ang32 mode 2 to 34 end
;-------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list