[x265] [PATCH 1 of 6] asm: avx2 code for intra_pred_ang16x16 mode 2 and 34
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Thu May 28 08:36:18 CEST 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1432791818 -19800
# Thu May 28 11:13:38 2015 +0530
# Node ID 1af2413e19a29f28872386ff97b041360745daea
# Parent 18939c0e321f08207fa0a383939bc44485773013
asm: avx2 code for intra_pred_ang16x16 mode 2 and 34
performance improvement over SSE:
intra_ang_16x16[ 2] 356c-> 192c, 46%
intra_ang_16x16[34] 351c-> 201c, 43%
diff -r 18939c0e321f -r 1af2413e19a2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue May 26 13:17:55 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu May 28 11:13:38 2015 +0530
@@ -1196,6 +1196,9 @@
}
if (cpuMask & X265_CPU_AVX2)
{
+ p.cu[BLOCK_16x16].intra_pred[2] = x265_intra_pred_ang16_2_avx2;
+ p.cu[BLOCK_16x16].intra_pred[34] = x265_intra_pred_ang16_2_avx2;
+
p.pu[LUMA_8x4].addAvg = x265_addAvg_8x4_avx2;
p.pu[LUMA_8x8].addAvg = x265_addAvg_8x8_avx2;
p.pu[LUMA_8x16].addAvg = x265_addAvg_8x16_avx2;
diff -r 18939c0e321f -r 1af2413e19a2 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Tue May 26 13:17:55 2015 +0530
+++ b/source/common/x86/intrapred.h Thu May 28 11:13:38 2015 +0530
@@ -242,6 +242,7 @@
void x265_intra_pred_ang8_21_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang8_22_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang8_23_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
+void x265_intra_pred_ang16_2_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang16_3_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang16_4_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang16_5_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
diff -r 18939c0e321f -r 1af2413e19a2 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Tue May 26 13:17:55 2015 +0530
+++ b/source/common/x86/intrapred16.asm Thu May 28 11:13:38 2015 +0530
@@ -10182,6 +10182,63 @@
.quit:
RET
+;-------------------------------------------------------------------------------------------------------
+; avx2 code for intra_pred_ang16 mode 2 to 34 start
+;-------------------------------------------------------------------------------------------------------
+INIT_YMM avx2
+cglobal intra_pred_ang16_2, 3,5,3
+ lea r4, [r2]
+ add r2, 64
+ cmp r3m, byte 34
+ cmove r2, r4
+ add r1d, r1d
+ lea r3, [r1 * 3]
+ movu m0, [r2 + 4]
+ movu m1, [r2 + 20]
+
+ movu [r0], m0
+ palignr m2, m1, m0, 2
+ movu [r0 + r1], m2
+ palignr m2, m1, m0, 4
+ movu [r0 + r1 * 2], m2
+ palignr m2, m1, m0, 6
+ movu [r0 + r3], m2
+
+ lea r0, [r0 + r1 * 4]
+ palignr m2, m1, m0, 8
+ movu [r0], m2
+ palignr m2, m1, m0, 10
+ movu [r0 + r1], m2
+ palignr m2, m1, m0, 12
+ movu [r0 + r1 * 2], m2
+ palignr m2, m1, m0, 14
+ movu [r0 + r3], m2
+
+ movu m0, [r2 + 36]
+ lea r0, [r0 + r1 * 4]
+ movu [r0], m1
+ palignr m2, m0, m1, 2
+ movu [r0 + r1], m2
+ palignr m2, m0, m1, 4
+ movu [r0 + r1 * 2], m2
+ palignr m2, m0, m1, 6
+ movu [r0 + r3], m2
+
+ lea r0, [r0 + r1 * 4]
+ palignr m2, m0, m1, 8
+ movu [r0], m2
+ palignr m2, m0, m1, 10
+ movu [r0 + r1], m2
+ palignr m2, m0, m1, 12
+ movu [r0 + r1 * 2], m2
+ palignr m2, m0, m1, 14
+ movu [r0 + r3], m2
+ RET
+
+;-------------------------------------------------------------------------------------------------------
+; end of avx2 code for intra_pred_ang16 mode 2 to 34
+;-------------------------------------------------------------------------------------------------------
+
%macro MODE_2_34 0
movu m0, [r2 + 4]
movu m1, [r2 + 20]
More information about the x265-devel
mailing list