[x265] [PATCH 2 of 7] asm: AVX2 asm for intra_ang_32 mode 10, 816c->452c
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Wed Aug 26 12:24:31 CEST 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1440480212 -19800
# Tue Aug 25 10:53:32 2015 +0530
# Node ID 0409b136c208cb944fb76bfd400e76ba43e330a8
# Parent 38a0e6b5f22302fb076913077d464b902c9cf63e
asm: AVX2 asm for intra_ang_32 mode 10, 816c->452c
diff -r 38a0e6b5f223 -r 0409b136c208 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Aug 24 18:25:53 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Aug 25 10:53:32 2015 +0530
@@ -2998,6 +2998,7 @@
p.cu[BLOCK_32x32].intra_pred[7] = PFX(intra_pred_ang32_7_avx2);
p.cu[BLOCK_32x32].intra_pred[8] = PFX(intra_pred_ang32_8_avx2);
p.cu[BLOCK_32x32].intra_pred[9] = PFX(intra_pred_ang32_9_avx2);
+ p.cu[BLOCK_32x32].intra_pred[10] = PFX(intra_pred_ang32_10_avx2);
p.cu[BLOCK_32x32].intra_pred[34] = PFX(intra_pred_ang32_34_avx2);
p.cu[BLOCK_32x32].intra_pred[2] = PFX(intra_pred_ang32_2_avx2);
p.cu[BLOCK_32x32].intra_pred[26] = PFX(intra_pred_ang32_26_avx2);
diff -r 38a0e6b5f223 -r 0409b136c208 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Mon Aug 24 18:25:53 2015 +0530
+++ b/source/common/x86/intrapred8.asm Tue Aug 25 10:53:32 2015 +0530
@@ -531,6 +531,7 @@
%endrep
SECTION .text
+cextern pb_1
cextern pw_2
cextern pw_3
cextern pw_4
@@ -13893,6 +13894,132 @@
movu [r0 + r4], m1
RET
+cglobal intra_pred_ang32_10, 5,5,4
+ pxor m0, m0
+ mova m1, [pb_1]
+ lea r4, [r1 * 3]
+
+ vbroadcasti128 m2, [r2 + mmsize*2 + 1]
+
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+ pxor m0, m0
+ vbroadcasti128 m2, [r2 + mmsize*2 + mmsize/2 + 1]
+
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+
+ lea r0, [r0 + r1 * 4]
+
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r1 * 2], m3
+ paddb m0, m1
+ pshufb m3, m2, m0
+ movu [r0 + r4], m3
+ RET
+
%endif ; ARCH_X86_64
;-----------------------------------------------------------------------------------------
; end of intra_pred_ang32 angular modes avx2 asm
More information about the x265-devel
mailing list