[x265] [PATCH] asm: 10bpp avx2 code for intra_pred_ang32x32 mode 11 & 25
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Jun 12 06:22:35 CEST 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1434000876 -19800
# Thu Jun 11 11:04:36 2015 +0530
# Node ID 091940c84ef56d1373d8aa476e8d3f1618436567
# Parent 4d2da861ec98105cfa4bf118235678b6491a1c93
asm: 10bpp avx2 code for intra_pred_ang32x32 mode 11 & 25
performance improvement over SSE:
intra_ang_32x32[11] 8256c->4236c, 48%
intra_ang_32x32[25] 5646c->2755c, 51%
diff -r 4d2da861ec98 -r 091940c84ef5 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Jun 10 07:55:00 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp Thu Jun 11 11:04:36 2015 +0530
@@ -1263,6 +1263,8 @@
p.cu[BLOCK_32x32].intra_pred[8] = x265_intra_pred_ang32_8_avx2;
p.cu[BLOCK_32x32].intra_pred[9] = x265_intra_pred_ang32_9_avx2;
p.cu[BLOCK_32x32].intra_pred[10] = x265_intra_pred_ang32_10_avx2;
+ p.cu[BLOCK_32x32].intra_pred[11] = x265_intra_pred_ang32_11_avx2;
+ p.cu[BLOCK_32x32].intra_pred[25] = x265_intra_pred_ang32_25_avx2;
p.cu[BLOCK_32x32].intra_pred[26] = x265_intra_pred_ang32_26_avx2;
p.cu[BLOCK_32x32].intra_pred[27] = x265_intra_pred_ang32_27_avx2;
p.cu[BLOCK_32x32].intra_pred[28] = x265_intra_pred_ang32_28_avx2;
diff -r 4d2da861ec98 -r 091940c84ef5 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Wed Jun 10 07:55:00 2015 -0700
+++ b/source/common/x86/intrapred.h Thu Jun 11 11:04:36 2015 +0530
@@ -284,6 +284,7 @@
void x265_intra_pred_ang32_8_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang32_9_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang32_10_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
+void x265_intra_pred_ang32_11_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang32_26_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang32_27_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
void x265_intra_pred_ang32_28_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
diff -r 4d2da861ec98 -r 091940c84ef5 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Wed Jun 10 07:55:00 2015 -0700
+++ b/source/common/x86/intrapred16.asm Thu Jun 11 11:04:36 2015 +0530
@@ -14643,6 +14643,80 @@
movu [r0 + r2], m0
movu [r0 + r2 + 32], m1
RET
+
+cglobal intra_pred_ang32_11, 3,8,12, 0-8
+ movzx r5d, word [r2 + 128] ; [0]
+ movzx r6d, word [r2]
+ mov [rsp], r5w
+ mov [r2 + 128], r6w
+
+ movzx r5d, word [r2 + 126] ; [16]
+ movzx r6d, word [r2 + 32]
+ mov [rsp + 4], r5w
+ mov [r2 + 126], r6w
+
+ add r2, 128
+ xor r6d, r6d
+ lea r3, [ang_table_avx2 + 16 * 32]
+ add r1d, r1d
+ lea r4, [r1 * 3]
+ lea r7, [r0 + 8 * r1]
+
+ call ang16_mode_11_25
+
+ sub r2, 2
+ lea r0, [r0 + 32]
+
+ call ang16_mode_11_25
+
+ add r2, 34
+ lea r0, [r7 + 8 * r1]
+
+ call ang16_mode_11_25
+
+ sub r2, 2
+ lea r0, [r0 + 32]
+
+ call ang16_mode_11_25
+
+ mov r6d, [rsp]
+ mov [r2 - 30], r6w
+ mov r6d, [rsp + 4]
+ mov [r2 - 32], r6w
+ RET
+
+cglobal intra_pred_ang32_25, 3,7,12, 0-4
+ xor r6d, r6d
+ inc r6d
+ lea r3, [ang_table_avx2 + 16 * 32]
+ add r1d, r1d
+
+ movzx r4d, word [r2 - 2]
+ movzx r5d, word [r2 + 160] ; [16]
+ mov [rsp], r4w
+ mov [r2 - 2], r5w
+
+ lea r4, [r1 * 3]
+ lea r5, [r0 + 32]
+
+ call ang16_mode_11_25
+
+ sub r2, 2
+
+ call ang16_mode_11_25
+
+ add r2, 34
+ mov r0, r5
+
+ call ang16_mode_11_25
+
+ sub r2, 2
+
+ call ang16_mode_11_25
+
+ mov r5d, [rsp]
+ mov [r2 - 32], r5w
+ RET
;-------------------------------------------------------------------------------------------------------
; end of avx2 code for intra_pred_ang32 mode 2 to 34
;-------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list