<div dir="ltr"><div>Replaced.</div><div><br></div><div>Regards,</div><div>Praveen Tiwari</div></div><div class="gmail_extra"><br><br><div class="gmail_quote">2013/12/4 chen <span dir="ltr"><<a href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div style="line-height:1.7;font-size:14px;font-family:arial"><div>mova m3, [tab_Zero]<br>pxor is faster<br>
<br>At 2013-12-04 21:57:09,<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a> wrote:<div><div class="h5"><br>># HG changeset patch<br>># User Praveen Tiwari<br>># Date 1386165418 -19800<br>
># Node ID 1e130f825cbbfa805602d4eb381b38e62ff82458<br>># Parent 1d2d60f4eb81882fa0f3ba6c4e7aa9a220968f7a<br>>all_angs_pred_4x4, asm code for all modes<br>><br>>diff -r 1d2d60f4eb81 -r 1e130f825cbb source/common/x86/asm-primitives.cpp<br>
>--- a/source/common/x86/asm-primitives.cpp Tue Dec 03 23:56:22 2013 -0600<br>>+++ b/source/common/x86/asm-primitives.cpp Wed Dec 04 19:26:58 2013 +0530<br>>@@ -780,6 +780,8 @@<br>> p.intra_pred_planar[BLOCK_8x8] = x265_intra_pred_planar8_sse4;<br>
> p.intra_pred_planar[BLOCK_16x16] = x265_intra_pred_planar16_sse4;<br>> p.intra_pred_planar[BLOCK_32x32] = x265_intra_pred_planar32_sse4;<br>>+<br>>+ p.intra_pred_allangs[BLOCK_4x4] = x265_all_angs_pred_4x4_sse4;<br>
> }<br>> if (cpuMask & X265_CPU_AVX)<br>> {<br>>diff -r 1d2d60f4eb81 -r 1e130f825cbb source/common/x86/intrapred.h<br>>--- a/source/common/x86/intrapred.h Tue Dec 03 23:56:22 2013 -0600<br>>+++ b/source/common/x86/intrapred.h Wed Dec 04 19:26:58 2013 +0530<br>
>@@ -39,4 +39,6 @@<br>> void x265_intra_pred_ang4_2_ssse3(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter);<br>> void x265_intra_pred_ang4_3_ssse3(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter);<br>
> <br>>+void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);<br>>+<br>> #endif // ifndef X265_INTRAPRED_H<br>>diff -r 1d2d60f4eb81 -r 1e130f825cbb source/common/x86/intrapred8.asm<br>
>--- a/source/common/x86/intrapred8.asm Tue Dec 03 23:56:22 2013 -0600<br>>+++ b/source/common/x86/intrapred8.asm Wed Dec 04 19:26:58 2013 +0530<br>>@@ -34,6 +34,8 @@<br>> <br>> c_trans_4x4 db 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15<br>
> <br>>+tab_Zero: db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0<br>>+<br>> const ang_table<br>> %assign x 0<br>> %rep 32<br>>@@ -750,3 +752,793 @@<br>> lea r1, [r1 * 3]<br>> movd [r0 + r1], m3<br>
> RET<br>>+<br>>+;-----------------------------------------------------------------------------<br>>+; void all_angs_pred_4x4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma)<br>
>+;-----------------------------------------------------------------------------<br>>+INIT_XMM sse4<br>>+cglobal all_angs_pred_4x4, 6, 6, 8 dest, above0, left0, above1, left1, bLuma<br>>+<br>>+; mode 2<br>>+<br>
>+movh m0, [r2 + 2]<br>>+movd [r0], m0<br>>+<br>>+palignr m1, m0, 1<br>>+movd [r0 + 4], m1<br>>+<br>>+palignr m1, m0, 2<br>>+movd [r0 + 8], m1<br>
>+<br>>+psrldq m0, 3<br>>+movd [r0 + 12], m0<br>>+<br>>+; mode 3<br>>+<br>>+mova m0, [pw_1024]<br>>+<br>>+movh m1, [r2 + 1]<br>>+<br>>+palignr m2, m1, 1<br>
>+punpcklbw m1, m2<br>>+<br>>+lea r5, [ang_table]<br>>+<br>>+pmaddubsw m5, m1, [r5 + 26 * 16]<br>>+pmulhrsw m5, m0<br>>+packuswb m5, m5<br>
>+movd [r0 + 16], m5<br>>+<br>>+palignr m2, m1, 2<br>>+<br>>+pmaddubsw m6, m2, [r5 + 20 * 16]<br>>+pmulhrsw m6, m0<br>>+packuswb m6, m6<br>
>+movd [r0 + 20], m6<br>>+<br>>+palignr m3, m1, 4<br>>+<br>>+pmaddubsw m4, m3, [r5 + 14 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>
>+movd [r0 + 24], m4<br>>+<br>>+palignr m4, m1, 6<br>>+<br>>+pmaddubsw m4, [r5 + 8 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 28], m4<br>
>+<br>>+; mode 4<br>>+<br>>+pmaddubsw m4, m1, [r5 + 21 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 32], m4<br>>+<br>>+pmaddubsw m4, m2, [r5 + 10 * 16]<br>
>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 36], m4<br>>+<br>>+pmaddubsw m4, m2, [r5 + 31 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>
>+movd [r0 + 40], m4<br>>+<br>>+pmaddubsw m4, m3, [r5 + 20 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 44], m4<br>>+<br>
>+; mode 5<br>>+<br>>+pmaddubsw m4, m1, [r5 + 17 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 48], m4<br>>+<br>>+pmaddubsw m4, m2, [r5 + 2 * 16]<br>
>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 52], m4<br>>+<br>>+pmaddubsw m4, m2, [r5 + 19 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>
>+movd [r0 + 56], m4<br>>+<br>>+pmaddubsw m3, [r5 + 4 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 60], m3<br>>+<br>>+; mode 6<br>
>+<br>>+pmaddubsw m3, m1, [r5 + 13 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 64], m3<br>>+<br>>+movd [r0 + 68], m5<br>
>+<br>>+pmaddubsw m3, m2, [r5 + 7 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 72], m3<br>>+<br>>+movd [r0 + 76], m6<br>>+<br>
>+; mode 7<br>>+<br>>+pmaddubsw m3, m1, [r5 + 9 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 80], m3<br>>+<br>>+pmaddubsw m3, m1, [r5 + 18 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 84], m3<br>>+<br>>+pmaddubsw m3, m1, [r5 + 27 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>
>+movd [r0 + 88], m3<br>>+<br>>+pmaddubsw m2, [r5 + 4 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 92], m2<br>>+<br>>+; mode 8<br>
>+<br>>+pmaddubsw m2, m1, [r5 + 5 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 96], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 10 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 100], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 15 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>
>+movd [r0 + 104], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 20 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 108], m2<br>>+<br>
>+; mode 9<br>>+<br>>+pmaddubsw m2, m1, [r5 + 2 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 112], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 4 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 116], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 6 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>
>+movd [r0 + 120], m2<br>>+<br>>+pmaddubsw m1, [r5 + 8 * 16]<br>>+pmulhrsw m1, m0<br>>+packuswb m1, m1<br>>+movd [r0 + 124], m1<br>>+<br>>+; mode 10<br>
>+<br>>+movh m1, [r2]<br>>+palignr m2, m1, 1<br>>+pshufd m3, m2, 0<br>>+movu [r0 + 128], m3<br>>+<br>>+mova m3, [tab_Zero]<br>
>+<br>>+pshufb m4, m2, m3<br>>+punpcklbw m4, m3<br>>+<br>>+movh m5, [r1]<br>>+<br>>+pshufb m6, m5, m3<br>>+punpcklbw m6, m3<br>
>+<br>>+psrldq m5, 1<br>>+punpcklbw m5, m3<br>>+<br>>+psubw m5, m6<br>>+psraw m5, 1<br>>+<br>>+paddw m4, m5<br>>+<br>>+packuswb m4, m3<br>
>+<br>>+pextrb [r0 + 128], m4, 0<br>>+pextrb [r0 + 132], m4, 1<br>>+pextrb [r0 + 136], m4, 2<br>>+pextrb [r0 + 140], m4, 3<br>>+<br>>+; mode 11<br>>+<br>>+palignr m2, m1, 1<br>
>+punpcklbw m1, m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 30 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 144], m2<br>>+<br>
>+pmaddubsw m2, m1, [r5 + 28 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 148], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 26 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 152], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 24 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>
>+movd [r0 + 156], m2<br>>+<br>>+; mode 12<br>>+<br>>+pmaddubsw m2, m1, [r5 + 27 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 160], m2<br>
>+<br>>+pmaddubsw m2, m1, [r5 + 22 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 164], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 17 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 168], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 12 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>
>+movd [r0 + 172], m2<br>>+<br>>+; mode 13<br>>+<br>>+pmaddubsw m2, m1, [r5 + 23 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 176], m2<br>
>+<br>>+pmaddubsw m2, m1, [r5 + 14 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 180], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 5 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 184], m2<br>>+<br>>+pslldq m2, m1, 2<br>>+pinsrb m2, [r1 + 0], 1<br>>+pinsrb m2, [r1 + 4], 0<br>
>+<br>>+pmaddubsw m3, m2, [r5 + 28 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 188], m3<br>>+<br>>+; mode 14<br>>+<br>>+pmaddubsw m3, m1, [r5 + 19 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 192], m3<br>>+<br>>+pmaddubsw m5, m1, [r5 + 6 * 16]<br>>+pmulhrsw m5, m0<br>>+packuswb m5, m5<br>
>+movd [r0 + 196], m5<br>>+<br>>+pinsrb m2, [r1 + 2], 0<br>>+<br>>+pmaddubsw m3, m2, [r5 + 25 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>
>+movd [r0 + 200], m3<br>>+<br>>+pmaddubsw m3, m2, [r5 + 12 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 204], m3<br>>+<br>
>+; mode 15<br>>+<br>>+pmaddubsw m3, m1, [r5 + 15 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 208], m3<br>>+<br>>+pmaddubsw m3, m2, [r5 + 30 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 212], m3<br>>+<br>>+pmaddubsw m3, m2, [r5 + 13 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>
>+movd [r0 + 216], m3<br>>+<br>>+pslldq m3, m2, 2<br>>+pinsrb m3, [r1 + 2], 1<br>>+pinsrb m3, [r1 + 4], 0<br>>+<br>>+pmaddubsw m4, m3, [r5 + 28 * 16]<br>
>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 220], m4<br>>+<br>>+; mode 16<br>>+<br>>+pmaddubsw m4, m1, [r5 + 11 * 16]<br>>+pmulhrsw m4, m0<br>
>+packuswb m4, m4<br>>+movd [r0 + 224], m4<br>>+<br>>+pmaddubsw m4, m2, [r5 + 22 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 228], m4<br>
>+<br>>+pmaddubsw m4, m2, [r5 + 1 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 232], m4<br>>+<br>>+pinsrb m3, [r1 + 3], 0<br>
>+<br>>+pmaddubsw m3, [r5 + 12 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 236], m3<br>>+<br>>+; mode 17<br>>+<br>>+movd [r0 + 240], m5<br>
>+<br>>+pslldq m1, 2<br>>+pinsrb m1, [r1 + 1], 0<br>>+pinsrb m1, [r1 + 0], 1<br>>+<br>>+pmaddubsw m2, m1, [r5 + 12 * 16]<br>>+pmulhrsw m2, m0<br>
>+packuswb m2, m2<br>>+movd [r0 + 244], m2<br>>+<br>>+pslldq m1, 2<br>>+pinsrb m1, [r1 + 2], 0<br>>+pinsrb m1, [r1 + 1], 1<br>>+<br>
>+pmaddubsw m2, m1, [r5 + 18 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 248], m2<br>>+<br>>+pslldq m1, 2<br>>+pinsrb m1, [r1 + 4], 0<br>
>+pinsrb m1, [r1 + 2], 1<br>>+<br>>+pmaddubsw m1, [r5 + 24 * 16]<br>>+pmulhrsw m1, m0<br>>+packuswb m1, m1<br>>+movd [r0 + 252], m1<br>>+<br>
>+; mode 18<br>>+<br>>+movh m1, [r1]<br>>+movd [r0 + 256], m1<br>>+<br>>+pslldq m2, m1, 1<br>>+pinsrb m2, [r2 + 1], 0<br>>+movd [r0 + 260], m2<br>
>+<br>>+pslldq m3, m2, 1<br>>+pinsrb m3, [r2 + 2], 0<br>>+movd [r0 + 264], m3<br>>+<br>>+pslldq m4, m3, 1<br>>+pinsrb m4, [r2 + 3], 0<br>
>+movd [r0 + 268], m4<br>>+<br>>+; mode 19<br>>+<br>>+palignr m4, m1, 1<br>>+punpcklbw m1, m4<br>>+<br>>+pmaddubsw m5, m1, [r5 + 6 * 16]<br>
>+pmulhrsw m5, m0<br>>+packuswb m5, m5<br>>+movd [r0 + 272], m5<br>>+<br>>+pslldq m2, m1, 2<br>>+pinsrb m2, [r2 + 1], 0<br>>+pinsrb m2, [r2], 1<br>
>+<br>>+pmaddubsw m3, m2, [r5 + 12 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 276], m3<br>>+<br>>+pslldq m3, m2, 2<br>
>+pinsrb m3, [r2 + 1], 1<br>>+pinsrb m3, [r2 + 2], 0<br>>+<br>>+pmaddubsw m4, m3, [r5 + 18 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>
>+movd [r0 + 280], m4<br>>+<br>>+pslldq m3, 2<br>>+pinsrb m3, [r2 + 2], 1<br>>+pinsrb m3, [r2 + 4], 0<br>>+<br>>+pmaddubsw m3, [r5 + 24 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 284], m3<br>>+<br>>+; mode 20<br>>+<br>>+pmaddubsw m3, m1, [r5 + 11 * 16]<br>>+pmulhrsw m3, m0<br>
>+packuswb m3, m3<br>>+movd [r0 + 288], m3<br>>+<br>>+pinsrb m2, [r2 + 2], 0<br>>+<br>>+pmaddubsw m3, m2, [r5 + 22 * 16]<br>>+pmulhrsw m3, m0<br>
>+packuswb m3, m3<br>>+movd [r0 + 292], m3<br>>+<br>>+pmaddubsw m3, m2, [r5 + 1 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 296], m3<br>
>+<br>>+pslldq m3, m2, 2<br>>+pinsrb m3, [r2 + 2], 1<br>>+pinsrb m3, [r2 + 3], 0<br>>+<br>>+pmaddubsw m4, m3, [r5 + 12 * 16]<br>>+pmulhrsw m4, m0<br>
>+packuswb m4, m4<br>>+movd [r0 + 300], m4<br>>+<br>>+; mode 21<br>>+<br>>+pmaddubsw m4, m1, [r5 + 15 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>
>+movd [r0 + 304], m4<br>>+<br>>+pmaddubsw m4, m2, [r5 + 30 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 308], m4<br>>+<br>
>+pmaddubsw m4, m2, [r5 + 13 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 312], m4<br>>+<br>>+pinsrb m3, [r2 + 4], 0<br>
>+<br>>+pmaddubsw m3, [r5 + 28 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 316], m3<br>>+<br>>+; mode 22<br>>+<br>>+pmaddubsw m3, m1, [r5 + 19 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 320], m3<br>>+<br>>+movd [r0 + 324], m5<br>>+<br>>+pmaddubsw m3, m2, [r5 + 25 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 328], m3<br>>+<br>>+pmaddubsw m3, m2, [r5 + 12 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>
>+movd [r0 + 332], m3<br>>+<br>>+; mode 23<br>>+<br>>+pmaddubsw m3, m1, [r5 + 23 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 336], m3<br>
>+<br>>+pmaddubsw m3, m1, [r5 + 14 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 340], m3<br>>+<br>>+pmaddubsw m3, m1, [r5 + 5 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 344], m3<br>>+<br>>+pinsrb m2, [r2 + 4], 0<br>>+<br>>+pmaddubsw m2, [r5 + 28 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 348], m2<br>>+<br>>+; mode 24<br>>+<br>>+pmaddubsw m2, m1, [r5 + 27 * 16]<br>>+pmulhrsw m2, m0<br>
>+packuswb m2, m2<br>>+movd [r0 + 352], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 22 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 356], m2<br>
>+<br>>+pmaddubsw m2, m1, [r5 + 17 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 360], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 12 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 364], m2<br>>+<br>>+; mode 25<br>>+<br>>+pmaddubsw m2, m1, [r5 + 30 * 16]<br>>+pmulhrsw m2, m0<br>
>+packuswb m2, m2<br>>+movd [r0 + 368], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 28 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 372], m2<br>
>+<br>>+pmaddubsw m2, m1, [r5 + 26 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 376], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 24 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 380], m2<br>>+<br>>+; mode 26<br>>+<br>>+movh m1, [r1 + 1]<br>>+pshufd m2, m1, 0<br>
>+movu [r0 + 384], m2<br>>+<br>>+mova m2, [tab_Zero]<br>>+<br>>+pshufb m3, m1, m2<br>>+punpcklbw m3, m2<br>>+<br>>+movh m4, [r2]<br>
>+<br>>+pshufb m5, m4, m2<br>>+punpcklbw m5, m2<br>>+<br>>+psrldq m4, 1<br>>+punpcklbw m4, m2<br>>+<br>>+psubw m4, m5<br>
>+psraw m4, 1<br>>+<br>>+paddw m3, m4<br>>+<br>>+packuswb m3, m2<br>>+<br>>+pextrb [r0 + 384], m3, 0<br>>+pextrb [r0 + 388], m3, 1<br>
>+pextrb [r0 + 392], m3, 2<br>>+pextrb [r0 + 396], m3, 3<br>>+<br>>+; mode 27<br>>+<br>>+palignr m2, m1, 1<br>>+punpcklbw m1, m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 2 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 400], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 4 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>
>+movd [r0 + 404], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 6 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 408], m2<br>>+<br>
>+pmaddubsw m2, m1, [r5 + 8 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 412], m2<br>>+<br>>+; mode 28<br>>+<br>>+pmaddubsw m2, m1, [r5 + 5 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 416], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 10 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>
>+movd [r0 + 420], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 15 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 424], m2<br>>+<br>
>+pmaddubsw m2, m1, [r5 + 20 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 428], m2<br>>+<br>>+; mode 29<br>>+<br>>+pmaddubsw m2, m1, [r5 + 9 * 16]<br>
>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 432], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 18 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>
>+movd [r0 + 436], m2<br>>+<br>>+pmaddubsw m2, m1, [r5 + 27 * 16]<br>>+pmulhrsw m2, m0<br>>+packuswb m2, m2<br>>+movd [r0 + 440], m2<br>>+<br>
>+palignr m2, m1, 2<br>>+<br>>+pmaddubsw m3, m2, [r5 + 4 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 444], m3<br>>+<br>
>+; mode 30<br>>+<br>>+pmaddubsw m3, m1, [r5 + 13 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 448], m3<br>>+<br>>+pmaddubsw m6, m1, [r5 + 26 * 16]<br>
>+pmulhrsw m6, m0<br>>+packuswb m6, m6<br>>+movd [r0 + 452], m6<br>>+<br>>+pmaddubsw m3, m2, [r5 + 7 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>
>+movd [r0 + 456], m3<br>>+<br>>+pmaddubsw m5, m2, [r5 + 20 * 16]<br>>+pmulhrsw m5, m0<br>>+packuswb m5, m5<br>>+movd [r0 + 460], m5<br>>+<br>
>+; mode 31<br>>+<br>>+pmaddubsw m3, m1, [r5 + 17 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 464], m3<br>>+<br>>+pmaddubsw m3, m2, [r5 + 2 * 16]<br>
>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>>+movd [r0 + 468], m3<br>>+<br>>+pmaddubsw m3, m2, [r5 + 19 * 16]<br>>+pmulhrsw m3, m0<br>>+packuswb m3, m3<br>
>+movd [r0 + 472], m3<br>>+<br>>+palignr m3, m2, 2<br>>+<br>>+pmaddubsw m4, m3, [r5 + 4 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>
>+movd [r0 + 476], m4<br>>+<br>>+; mode 32<br>>+<br>>+pmaddubsw m4, m1, [r5 + 21 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 480], m4<br>
>+<br>>+pmaddubsw m4, m2, [r5 + 10 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 484], m4<br>>+<br>>+pmaddubsw m4, m2, [r5 + 31 * 16]<br>
>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 488], m4<br>>+<br>>+pmaddubsw m4, m3, [r5 + 20 * 16]<br>>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>
>+movd [r0 + 492], m4<br>>+<br>>+; mode 33<br>>+<br>>+movd [r0 + 496], m6<br>>+<br>>+movd [r0 + 500], m5<br>>+<br>>+pmaddubsw m4, m3, [r5 + 14 * 16]<br>
>+pmulhrsw m4, m0<br>>+packuswb m4, m4<br>>+movd [r0 + 504], m4<br>>+<br>>+psrldq m3, 2<br>>+<br>>+pmaddubsw m3, [r5 + 8 * 16]<br>>+pmulhrsw m3, m0<br>
>+packuswb m3, m3<br>>+movd [r0 + 508], m3<br>>+<br>>+; mode 34<br>>+<br>>+movh m0, [r1 + 2]<br>>+movd [r0 + 512], m0<br>>+<br>>+palignr m1, m0, 1<br>
>+movd [r0 + 516], m1<br>>+<br>>+palignr m1, m0, 2<br>>+movd [r0 + 520], m1<br>>+<br>>+palignr m1, m0, 3<br>>+movd [r0 + 524], m1<br>>+<br>
>+RET<br>>diff -r 1d2d60f4eb81 -r 1e130f825cbb source/test/intrapredharness.cpp<br>>--- a/source/test/intrapredharness.cpp Tue Dec 03 23:56:22 2013 -0600<br>>+++ b/source/test/intrapredharness.cpp Wed Dec 04 19:26:58 2013 +0530<br>
>@@ -210,7 +210,7 @@<br>> <br>> for (int i = 0; i <= 100; i++)<br>> {<br>>- isLuma = (width <= 16) && (rand() % 2);<br>>+ isLuma = (width <= 16) ? true : false; // bFilter is true for 4x4, 8x8, 16x16 and false for 32x32<br>
> <br>> pixel * refAbove0 = pixel_buff + j;<br>> pixel * refLeft0 = refAbove0 + 3 * width;<br></div></div>>_______________________________________________<br>>x265-devel mailing list<br>
><a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>><a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</div></div><br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div>