<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><div> </div><pre><br>At 2015-03-28 10:42:42,dtyx265@gmail.com wrote:
># HG changeset patch
># User David T Yuen <dtyx265@gmail.com>
># Date 1427504802 25200
># Node ID c5fa8f49bf15818007fe6ab25eb6aec65bb203cb
># Parent d7fee58a8466d6862e72cced5a4aa12e37294d4e
>asm: intra_pred_ang4_26_sse2
>
>This is backported from sse4 code and replaces c code.
>
>64-bit
>
>./test/TestBench --testbench intrapred | grep "intra_ang_4x4\[26\]"
>intra_ang_4x4[26] 3.87x 217.84 843.32
>
>32-bit
>
>./test/TestBench --testbench intrapred | grep "intra_ang_4x4\[26\]"
>intra_ang_4x4[26] 2.82x 320.03 903.85
>
>diff -r d7fee58a8466 -r c5fa8f49bf15 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Fri Mar 27 17:51:06 2015 -0700
>+++ b/source/common/x86/asm-primitives.cpp Fri Mar 27 18:06:42 2015 -0700
>@@ -1205,6 +1205,7 @@
> p.cu[BLOCK_4x4].intra_pred[8] = x265_intra_pred_ang4_8_sse2;
> p.cu[BLOCK_4x4].intra_pred[9] = x265_intra_pred_ang4_9_sse2;
> p.cu[BLOCK_4x4].intra_pred[10] = x265_intra_pred_ang4_10_sse2;
>+ p.cu[BLOCK_4x4].intra_pred[26] = x265_intra_pred_ang4_26_sse2;
>
> p.cu[BLOCK_4x4].calcresidual = x265_getResidual4_sse2;
> p.cu[BLOCK_8x8].calcresidual = x265_getResidual8_sse2;
>diff -r d7fee58a8466 -r c5fa8f49bf15 source/common/x86/intrapred.h
>--- a/source/common/x86/intrapred.h Fri Mar 27 17:51:06 2015 -0700
>+++ b/source/common/x86/intrapred.h Fri Mar 27 18:06:42 2015 -0700
>@@ -56,6 +56,7 @@
> DECL_ANG(4, 8, sse2);
> DECL_ANG(4, 9, sse2);
> DECL_ANG(4, 10, sse2);
>+DECL_ANG(4, 26, sse2);
>
> DECL_ANG(4, 2, ssse3);
> DECL_ANG(4, 3, sse4);
>diff -r d7fee58a8466 -r c5fa8f49bf15 source/common/x86/intrapred8.asm
>--- a/source/common/x86/intrapred8.asm Fri Mar 27 17:51:06 2015 -0700
>+++ b/source/common/x86/intrapred8.asm Fri Mar 27 18:06:42 2015 -0700
>@@ -1512,6 +1512,46 @@
> movd [r0], m0
> RET
>
>+INIT_XMM sse2
>+cglobal intra_pred_ang4_26, 5,5,4
>+ movd m0, [r2 + 1] ; [8 7 6 5 4 3 2 1]
>+
>+ ; store
>+ movd [r0], m0
>+ movd [r0 + r1], m0
>+ movd [r0 + r1 * 2], m0
>+ lea r3, [r1 * 3]
>+ movd [r0 + r3], m0
>+
>+ ; filter
>+ cmp r4m, byte 0
>+ jz .quit
>+
>+ pxor m3, m3
>+ punpcklbw m0, m3
>+ pshuflw m0, m0, 0x00
>+ movd m2, [r2]
>+ punpcklbw m2, m3
>+ pshuflw m2, m2, 0x00
>+ movd m1, [r2 + 9]
>+ punpcklbw m1, m3
>+ psubw m1, m2
>+ psraw m1, 1
>+ paddw m0, m1
>+ packuswb m0, m0
>+
>+ movd r2, m0
</pre><pre>r2d?</pre><pre>>+ mov [r0], r2b
>+ shr r2, 8
>+ mov [r0 + r1], r2b
>+ shr r2, 8
>+ mov [r0 + r1 * 2], r2b
>+ shr r2, 8
>+ mov [r0 + r3], r2b
>+
>+.quit:
>+ RET
>+
> ;---------------------------------------------------------------------------------------------
> ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
> ;---------------------------------------------------------------------------------------------
>_______________________________________________
>x265-devel mailing list
>x265-devel@videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
</pre></div>