[x265] [PATCH 2 of 5] asm: AVX2 asm for intra_ang_32 mode 10, 816c->452c
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Aug 18 06:11:36 CEST 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1439557064 -19800
# Fri Aug 14 18:27:44 2015 +0530
# Node ID 8752daab2f07711c556dfffa9a733b7278484479
# Parent 5ed23f786ea8f98e003189a537f960e4ff16201f
asm: AVX2 asm for intra_ang_32 mode 10, 816c->452c
diff -r 5ed23f786ea8 -r 8752daab2f07 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Aug 14 11:28:37 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Aug 14 18:27:44 2015 +0530
@@ -3026,6 +3026,7 @@
p.cu[BLOCK_32x32].intra_pred[7] = PFX(intra_pred_ang32_7_avx2);
p.cu[BLOCK_32x32].intra_pred[8] = PFX(intra_pred_ang32_8_avx2);
p.cu[BLOCK_32x32].intra_pred[9] = PFX(intra_pred_ang32_9_avx2);
+ p.cu[BLOCK_32x32].intra_pred[10] = PFX(intra_pred_ang32_10_avx2);
p.cu[BLOCK_32x32].intra_pred[34] = PFX(intra_pred_ang32_34_avx2);
p.cu[BLOCK_32x32].intra_pred[2] = PFX(intra_pred_ang32_2_avx2);
p.cu[BLOCK_32x32].intra_pred[26] = PFX(intra_pred_ang32_26_avx2);
diff -r 5ed23f786ea8 -r 8752daab2f07 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Fri Aug 14 11:28:37 2015 +0530
+++ b/source/common/x86/intrapred8.asm Fri Aug 14 18:27:44 2015 +0530
@@ -462,6 +462,7 @@
%endrep
SECTION .text
+cextern pb_1
cextern pw_2
cextern pw_3
cextern pw_4
@@ -13500,6 +13501,132 @@
call ang32_mode_9_27_avx2
RET
+cglobal intra_pred_ang32_10, 5,5,4
+ pxor m0, m0
+ mova m1, [pb_1]
+ lea r4, [r1 * 3]
+
+ vbroadcasti128 m2, [r2 + mmsize*2 + 1]
+
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+ pxor m0, m0
+ vbroadcasti128 m2, [r2 + mmsize*2 + mmsize/2 + 1]
+
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+ RET
+
%endif ; ARCH_X86_64
;-----------------------------------------------------------------------------------------
; end of intra_pred_ang32 angular modes avx2 asm
More information about the x265-devel
mailing list