[x265] [PATCH 09 of 18] asm: intra_pred_ang4_10_sse2 16-bit
dtyx265 at gmail.com
dtyx265 at gmail.com
Fri Apr 3 18:24:40 CEST 2015
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1428075968 25200
# Node ID 95c50ede466fa24a1ff13e5203305b35392a5f64
# Parent 47dcaffb0a2cbf71efa8ca6eabe45c610901513b
asm: intra_pred_ang4_10_sse2 16-bit
This is backported from sse4 code and replaces c code.
./test/TestBench --testbench intrapred | grep "intra_ang_4x4\[10\]"
intra_ang_4x4[10] 6.40x 197.60 1264.07
diff -r 47dcaffb0a2c -r 95c50ede466f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Apr 03 08:40:26 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp Fri Apr 03 08:46:08 2015 -0700
@@ -890,6 +890,7 @@
p.cu[BLOCK_4x4].intra_pred[7] = x265_intra_pred_ang4_7_sse2;
p.cu[BLOCK_4x4].intra_pred[8] = x265_intra_pred_ang4_8_sse2;
p.cu[BLOCK_4x4].intra_pred[9] = x265_intra_pred_ang4_9_sse2;
+ p.cu[BLOCK_4x4].intra_pred[10] = x265_intra_pred_ang4_10_sse2;
p.cu[BLOCK_4x4].intra_pred[27] = x265_intra_pred_ang4_9_sse2;
p.cu[BLOCK_4x4].intra_pred[28] = x265_intra_pred_ang4_8_sse2;
p.cu[BLOCK_4x4].intra_pred[29] = x265_intra_pred_ang4_7_sse2;
diff -r 47dcaffb0a2c -r 95c50ede466f source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Fri Apr 03 08:40:26 2015 -0700
+++ b/source/common/x86/intrapred16.asm Fri Apr 03 08:46:08 2015 -0700
@@ -917,6 +917,35 @@
mova m7, [r3 + 4 * 16] ; [ 8]
jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+cglobal intra_pred_ang4_10, 3,3,3
+ movh m0, [r2 + 18] ; [4 3 2 1]
+
+ punpcklwd m0, m0 ;[4 4 3 3 2 2 1 1]
+ pshufd m1, m0, 0xFA
+ add r1, r1
+ pshufd m0, m0, 0x50
+ movhps [r0 + r1], m0
+ movh [r0 + r1 * 2], m1
+ lea r1, [r1 * 3]
+ movhps [r0 + r1], m1
+
+ cmp r4m, byte 0
+ jz .quit
+
+ ; filter
+ movd m2, [r2] ; [7 6 5 4 3 2 1 0]
+ pshuflw m2, m2, 0x00
+ movh m1, [r2 + 2]
+ psubw m1, m2
+ psraw m1, 1
+ paddw m0, m1
+ pxor m1, m1
+ pmaxsw m0, m1
+ pminsw m0, [pw_1023]
+.quit:
+ movh [r0], m0
+ RET
+
;-----------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
;-----------------------------------------------------------------------------------
More information about the x265-devel
mailing list