<div dir="ltr"><div class="gmail_default" style="font-family:trebuchet ms,sans-serif;font-size:small">Ok. I will resend this patch on latest tip.<br></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, May 19, 2015 at 8:47 PM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">On 05/19, <a href="mailto:dnyaneshwar@multicorewareinc.com">dnyaneshwar@multicorewareinc.com</a> wrote:<br>
> # HG changeset patch<br>
> # User Dnyaneshwar G <<a href="mailto:dnyaneshwar@multicorewareinc.com">dnyaneshwar@multicorewareinc.com</a>><br>
> # Date 1432028888 -19800<br>
> # Tue May 19 15:18:08 2015 +0530<br>
> # Node ID b44cdf8dc08c77e84b8707992cd0006bbf23d864<br>
> # Parent ac32faec79be9c6a60d267086b4563bd884537c0<br>
> asm: removed some duplicate constants and moved others into const-a.asm<br>
<br>
</span>looks fine, but is not applying on the tip<br>
<div><div class="h5"><br>
> diff -r ac32faec79be -r b44cdf8dc08c source/common/x86/const-a.asm<br>
> --- a/source/common/x86/const-a.asm Mon May 18 18:03:19 2015 +0530<br>
> +++ b/source/common/x86/const-a.asm Tue May 19 15:18:08 2015 +0530<br>
> @@ -63,6 +63,8 @@<br>
><br>
> const pw_1, times 16 dw 1<br>
> const pw_2, times 16 dw 2<br>
> +const pw_3, times 16 dw 3<br>
> +const pw_7, times 16 dw 7<br>
> const pw_m2, times 8 dw -2<br>
> const pw_4, times 8 dw 4<br>
> const pw_8, times 8 dw 8<br>
> @@ -110,6 +112,7 @@<br>
> const pd_4, times 4 dd 4<br>
> const pd_8, times 4 dd 8<br>
> const pd_16, times 4 dd 16<br>
> +const pd_31, times 4 dd 31<br>
> const pd_32, times 4 dd 32<br>
> const pd_64, times 4 dd 64<br>
> const pd_128, times 4 dd 128<br>
> diff -r ac32faec79be -r b44cdf8dc08c source/common/x86/intrapred16.asm<br>
> --- a/source/common/x86/intrapred16.asm Mon May 18 18:03:19 2015 +0530<br>
> +++ b/source/common/x86/intrapred16.asm Tue May 19 15:18:08 2015 +0530<br>
> @@ -44,7 +44,6 @@<br>
> const pw_punpcklwd, db 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9<br>
> const c_mode32_10_0, db 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1<br>
><br>
> -const pw_unpackwdq, times 8 db 0,1<br>
> const pw_ang8_12, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 0, 1<br>
> const pw_ang8_13, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 8, 9, 0, 1<br>
> const pw_ang8_14, db 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 10, 11, 4, 5, 0, 1<br>
> @@ -58,16 +57,6 @@<br>
><br>
> ;; (blkSize - 1 - x)<br>
> pw_planar4_0: dw 3, 2, 1, 0, 3, 2, 1, 0<br>
> -pw_planar4_1: dw 3, 3, 3, 3, 3, 3, 3, 3<br>
> -pw_planar8_0: dw 7, 6, 5, 4, 3, 2, 1, 0<br>
> -pw_planar8_1: dw 7, 7, 7, 7, 7, 7, 7, 7<br>
> -pw_planar16_0: dw 15, 14, 13, 12, 11, 10, 9, 8<br>
> -pw_planar16_1: dw 15, 15, 15, 15, 15, 15, 15, 15<br>
> -pd_planar32_1: dd 31, 31, 31, 31<br>
> -<br>
> -pw_planar32_1: dw 31, 31, 31, 31, 31, 31, 31, 31<br>
> -pw_planar32_L: dw 31, 30, 29, 28, 27, 26, 25, 24<br>
> -pw_planar32_H: dw 23, 22, 21, 20, 19, 18, 17, 16<br>
><br>
> const planar32_table<br>
> %assign x 31<br>
> @@ -85,8 +74,11 @@<br>
><br>
> SECTION .text<br>
><br>
> +cextern pb_01<br>
> cextern pw_1<br>
> cextern pw_2<br>
> +cextern pw_3<br>
> +cextern pw_7<br>
> cextern pw_4<br>
> cextern pw_8<br>
> cextern pw_15<br>
> @@ -95,6 +87,7 @@<br>
> cextern pw_32<br>
> cextern pw_1023<br>
> cextern pd_16<br>
> +cextern pd_31<br>
> cextern pd_32<br>
> cextern pw_4096<br>
> cextern multiL<br>
> @@ -681,7 +674,7 @@<br>
> pshufd m4, m4, 0 ; v_bottomLeft<br>
><br>
> pmullw m3, [multiL] ; (x + 1) * topRight<br>
> - pmullw m0, m1, [pw_planar8_1] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m0, m1, [pw_7] ; (blkSize - 1 - y) * above[x]<br>
> paddw m3, [pw_8]<br>
> paddw m3, m4<br>
> paddw m3, m0<br>
> @@ -695,7 +688,7 @@<br>
> pshufhw m1, m2, 0x55 * (%1 - 4)<br>
> pshufd m1, m1, 0xAA<br>
> %endif<br>
> - pmullw m1, [pw_planar8_0]<br>
> + pmullw m1, [pw_planar16_mul + mmsize]<br>
> paddw m1, m3<br>
> psraw m1, 4<br>
> movu [r0], m1<br>
> @@ -733,8 +726,8 @@<br>
><br>
> pmullw m4, m3, [multiH] ; (x + 1) * topRight<br>
> pmullw m3, [multiL] ; (x + 1) * topRight<br>
> - pmullw m1, m2, [pw_planar16_1] ; (blkSize - 1 - y) * above[x]<br>
> - pmullw m5, m7, [pw_planar16_1] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m1, m2, [pw_15] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m5, m7, [pw_15] ; (blkSize - 1 - y) * above[x]<br>
> paddw m4, [pw_16]<br>
> paddw m3, [pw_16]<br>
> paddw m4, m6<br>
> @@ -770,8 +763,8 @@<br>
> paddw m4, m1<br>
> lea r0, [r0 + r1 * 2]<br>
> %endif<br>
> - pmullw m0, m5, [pw_planar8_0]<br>
> - pmullw m5, [pw_planar16_0]<br>
> + pmullw m0, m5, [pw_planar16_mul + mmsize]<br>
> + pmullw m5, [pw_planar16_mul]<br>
> paddw m0, m4<br>
> paddw m5, m3<br>
> psraw m5, 5<br>
> @@ -827,7 +820,7 @@<br>
> mova m9, m6<br>
> mova m10, m6<br>
><br>
> - mova m12, [pw_planar32_1]<br>
> + mova m12, [pw_31]<br>
> movu m4, [r2 + 2]<br>
> psubw m8, m4<br>
> pmullw m4, m12<br>
> @@ -848,10 +841,10 @@<br>
> pmullw m5, m12<br>
> paddw m3, m5<br>
><br>
> - mova m12, [pw_planar32_L]<br>
> - mova m13, [pw_planar32_H]<br>
> - mova m14, [pw_planar16_0]<br>
> - mova m15, [pw_planar8_0]<br>
> + mova m12, [pw_planar32_mul]<br>
> + mova m13, [pw_planar32_mul + mmsize]<br>
> + mova m14, [pw_planar16_mul]<br>
> + mova m15, [pw_planar16_mul + mmsize]<br>
> add r1, r1<br>
><br>
> %macro PROCESS 1<br>
> @@ -1596,7 +1589,7 @@<br>
> pshufd m4, m4, 0xAA<br>
><br>
> pmullw m3, [multi_2Row] ; (x + 1) * topRight<br>
> - pmullw m0, m1, [pw_planar4_1] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m0, m1, [pw_3] ; (blkSize - 1 - y) * above[x]<br>
><br>
> paddw m3, [pw_4]<br>
> paddw m3, m4<br>
> @@ -1934,7 +1927,7 @@<br>
> pshufd m4, m4, 0xAA<br>
><br>
> pmullw m3, [multi_2Row] ; (x + 1) * topRight<br>
> - pmullw m0, m1, [pw_planar4_1] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m0, m1, [pw_3] ; (blkSize - 1 - y) * above[x]<br>
><br>
> paddw m3, [pw_4]<br>
> paddw m3, m4<br>
> @@ -1990,12 +1983,12 @@<br>
> pshufd m4, m4, 0 ; v_bottomLeft<br>
><br>
> pmullw m3, [multiL] ; (x + 1) * topRight<br>
> - pmullw m0, m1, [pw_planar8_1] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m0, m1, [pw_7] ; (blkSize - 1 - y) * above[x]<br>
> paddw m3, [pw_8]<br>
> paddw m3, m4<br>
> paddw m3, m0<br>
> psubw m4, m1<br>
> - mova m0, [pw_planar8_0]<br>
> + mova m0, [pw_planar16_mul + mmsize]<br>
><br>
> %macro INTRA_PRED_PLANAR8 1<br>
> %if (%1 < 4)<br>
> @@ -2042,8 +2035,8 @@<br>
><br>
> pmullw m4, m3, [multiH] ; (x + 1) * topRight<br>
> pmullw m3, [multiL] ; (x + 1) * topRight<br>
> - pmullw m1, m2, [pw_planar16_1] ; (blkSize - 1 - y) * above[x]<br>
> - pmullw m5, m7, [pw_planar16_1] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m1, m2, [pw_15] ; (blkSize - 1 - y) * above[x]<br>
> + pmullw m5, m7, [pw_15] ; (blkSize - 1 - y) * above[x]<br>
> paddw m4, [pw_16]<br>
> paddw m3, [pw_16]<br>
> paddw m4, m6<br>
> @@ -2074,8 +2067,8 @@<br>
> %endif<br>
> %endif<br>
> %endif<br>
> - pmullw m0, m5, [pw_planar8_0]<br>
> - pmullw m5, [pw_planar16_0]<br>
> + pmullw m0, m5, [pw_planar16_mul + mmsize]<br>
> + pmullw m5, [pw_planar16_mul]<br>
> paddw m0, m4<br>
> paddw m5, m3<br>
> paddw m3, m6<br>
> @@ -2192,28 +2185,28 @@<br>
><br>
> ; above[0-3] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 2]<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m0, m5<br>
> psubd m5, m6, m4<br>
> mova m8, m5<br>
><br>
> ; above[4-7] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 10]<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m1, m5<br>
> psubd m5, m6, m4<br>
> mova m9, m5<br>
><br>
> ; above[8-11] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 18]<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m2, m5<br>
> psubd m5, m6, m4<br>
> mova m10, m5<br>
><br>
> ; above[12-15] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 26]<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m3, m5<br>
> psubd m5, m6, m4<br>
> mova m11, m5<br>
> @@ -2221,7 +2214,7 @@<br>
> ; above[16-19] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 34]<br>
> mova m7, m12<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m7, m5<br>
> mova m12, m7<br>
> psubd m5, m6, m4<br>
> @@ -2230,7 +2223,7 @@<br>
> ; above[20-23] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 42]<br>
> mova m7, m13<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m7, m5<br>
> mova m13, m7<br>
> psubd m5, m6, m4<br>
> @@ -2239,7 +2232,7 @@<br>
> ; above[24-27] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 50]<br>
> mova m7, m14<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m7, m5<br>
> mova m14, m7<br>
> psubd m5, m6, m4<br>
> @@ -2248,7 +2241,7 @@<br>
> ; above[28-31] * (blkSize - 1 - y)<br>
> pmovzxwd m4, [r2 + 58]<br>
> mova m7, m15<br>
> - pmulld m5, m4, [pd_planar32_1]<br>
> + pmulld m5, m4, [pd_31]<br>
> paddd m7, m5<br>
> mova m15, m7<br>
> psubd m5, m6, m4<br>
> @@ -3766,33 +3759,33 @@<br>
> RET<br>
><br>
> cglobal intra_pred_ang8_10, 3,6,3<br>
> - movu m1, [r2 + 34] ; [8 7 6 5 4 3 2 1]<br>
> - pshufb m0, m1, [pw_unpackwdq] ; [1 1 1 1 1 1 1 1]<br>
> + movu m1, [r2 + 34] ; [8 7 6 5 4 3 2 1]<br>
> + pshufb m0, m1, [pb_01] ; [1 1 1 1 1 1 1 1]<br>
> add r1, r1<br>
> lea r3, [r1 * 3]<br>
><br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [2 2 2 2 2 2 2 2]<br>
> + pshufb m2, m1, [pb_01] ; [2 2 2 2 2 2 2 2]<br>
> movu [r0 + r1], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [3 3 3 3 3 3 3 3]<br>
> + pshufb m2, m1, [pb_01] ; [3 3 3 3 3 3 3 3]<br>
> movu [r0 + r1 * 2], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [4 4 4 4 4 4 4 4]<br>
> + pshufb m2, m1, [pb_01] ; [4 4 4 4 4 4 4 4]<br>
> movu [r0 + r3], m2<br>
><br>
> lea r5, [r0 + r1 *4]<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [5 5 5 5 5 5 5 5]<br>
> + pshufb m2, m1, [pb_01] ; [5 5 5 5 5 5 5 5]<br>
> movu [r5], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [6 6 6 6 6 6 6 6]<br>
> + pshufb m2, m1, [pb_01] ; [6 6 6 6 6 6 6 6]<br>
> movu [r5 + r1], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [7 7 7 7 7 7 7 7]<br>
> + pshufb m2, m1, [pb_01] ; [7 7 7 7 7 7 7 7]<br>
> movu [r5 + r1 * 2], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [8 8 8 8 8 8 8 8]<br>
> + pshufb m2, m1, [pb_01] ; [8 8 8 8 8 8 8 8]<br>
> movu [r5 + r3], m2<br>
><br>
> cmp r4m, byte 0<br>
> @@ -3801,7 +3794,7 @@<br>
> ; filter<br>
><br>
> movh m1, [r2] ; [3 2 1 0]<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [0 0 0 0 0 0 0 0]<br>
> + pshufb m2, m1, [pb_01] ; [0 0 0 0 0 0 0 0]<br>
> movu m1, [r2 + 2] ; [8 7 6 5 4 3 2 1]<br>
> psubw m1, m2<br>
> psraw m1, 1<br>
> @@ -5671,9 +5664,9 @@<br>
> jz .quit<br>
><br>
> ; filter<br>
> - pshufb m0, [pw_unpackwdq]<br>
> + pshufb m0, [pb_01]<br>
> pinsrw m1, [r2], 0 ; [3 2 1 0]<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [0 0 0 0 0 0 0 0]<br>
> + pshufb m2, m1, [pb_01] ; [0 0 0 0 0 0 0 0]<br>
> movu m1, [r2 + 2 + 32] ; [8 7 6 5 4 3 2 1]<br>
> psubw m1, m2<br>
> psraw m1, 1<br>
> @@ -10006,73 +9999,73 @@<br>
> mov r5d, r4m<br>
> movu m1, [r2 + 2 + 64] ; [8 7 6 5 4 3 2 1]<br>
> movu m3, [r2 + 18 + 64] ; [16 15 14 13 12 11 10 9]<br>
> - pshufb m0, m1, [pw_unpackwdq] ; [1 1 1 1 1 1 1 1]<br>
> + pshufb m0, m1, [pb_01] ; [1 1 1 1 1 1 1 1]<br>
> add r1, r1<br>
> lea r4, [r1 * 3]<br>
><br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [2 2 2 2 2 2 2 2]<br>
> + pshufb m2, m1, [pb_01] ; [2 2 2 2 2 2 2 2]<br>
> movu [r0 + r1], m2<br>
> movu [r0 + r1 + 16], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [3 3 3 3 3 3 3 3]<br>
> + pshufb m2, m1, [pb_01] ; [3 3 3 3 3 3 3 3]<br>
> movu [r0 + r1 * 2], m2<br>
> movu [r0 + r1 * 2 + 16], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [4 4 4 4 4 4 4 4]<br>
> + pshufb m2, m1, [pb_01] ; [4 4 4 4 4 4 4 4]<br>
> movu [r0 + r4], m2<br>
> movu [r0 + r4 + 16], m2<br>
><br>
> lea r3, [r0 + r1 *4]<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [5 5 5 5 5 5 5 5]<br>
> + pshufb m2, m1, [pb_01] ; [5 5 5 5 5 5 5 5]<br>
> movu [r3], m2<br>
> movu [r3 + 16], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [6 6 6 6 6 6 6 6]<br>
> + pshufb m2, m1, [pb_01] ; [6 6 6 6 6 6 6 6]<br>
> movu [r3 + r1], m2<br>
> movu [r3 + r1 + 16], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [7 7 7 7 7 7 7 7]<br>
> + pshufb m2, m1, [pb_01] ; [7 7 7 7 7 7 7 7]<br>
> movu [r3 + r1 * 2], m2<br>
> movu [r3 + r1 * 2 + 16], m2<br>
> psrldq m1, 2<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [8 8 8 8 8 8 8 8]<br>
> + pshufb m2, m1, [pb_01] ; [8 8 8 8 8 8 8 8]<br>
> movu [r3 + r4], m2<br>
> movu [r3 + r4 + 16], m2<br>
><br>
> lea r3, [r3 + r1 *4]<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [9 9 9 9 9 9 9 9]<br>
> + pshufb m2, m3, [pb_01] ; [9 9 9 9 9 9 9 9]<br>
> movu [r3], m2<br>
> movu [r3 + 16], m2<br>
> psrldq m3, 2<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [10 10 10 10 10 10 10 10]<br>
> + pshufb m2, m3, [pb_01] ; [10 10 10 10 10 10 10 10]<br>
> movu [r3 + r1], m2<br>
> movu [r3 + r1 + 16], m2<br>
> psrldq m3, 2<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [11 11 11 11 11 11 11 11]<br>
> + pshufb m2, m3, [pb_01] ; [11 11 11 11 11 11 11 11]<br>
> movu [r3 + r1 * 2], m2<br>
> movu [r3 + r1 * 2 + 16], m2<br>
> psrldq m3, 2<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [12 12 12 12 12 12 12 12]<br>
> + pshufb m2, m3, [pb_01] ; [12 12 12 12 12 12 12 12]<br>
> movu [r3 + r4], m2<br>
> movu [r3 + r4 + 16], m2<br>
><br>
> lea r3, [r3 + r1 *4]<br>
> psrldq m3, 2<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [13 13 13 13 13 13 13 13]<br>
> + pshufb m2, m3, [pb_01] ; [13 13 13 13 13 13 13 13]<br>
> movu [r3], m2<br>
> movu [r3 + 16], m2<br>
> psrldq m3, 2<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [14 14 14 14 14 14 14 14]<br>
> + pshufb m2, m3, [pb_01] ; [14 14 14 14 14 14 14 14]<br>
> movu [r3 + r1], m2<br>
> movu [r3 + r1 + 16], m2<br>
> psrldq m3, 2<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [15 15 15 15 15 15 15 15]<br>
> + pshufb m2, m3, [pb_01] ; [15 15 15 15 15 15 15 15]<br>
> movu [r3 + r1 * 2], m2<br>
> movu [r3 + r1 * 2 + 16], m2<br>
> psrldq m3, 2<br>
> - pshufb m2, m3, [pw_unpackwdq] ; [16 16 16 16 16 16 16 16]<br>
> + pshufb m2, m3, [pb_01] ; [16 16 16 16 16 16 16 16]<br>
> movu [r3 + r4], m2<br>
> movu [r3 + r4 + 16], m2<br>
> mova m3, m0<br>
> @@ -10082,7 +10075,7 @@<br>
><br>
> ; filter<br>
> pinsrw m1, [r2], 0 ; [3 2 1 0]<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [0 0 0 0 0 0 0 0]<br>
> + pshufb m2, m1, [pb_01] ; [0 0 0 0 0 0 0 0]<br>
> movu m1, [r2 + 2] ; [8 7 6 5 4 3 2 1]<br>
> movu m3, [r2 + 18] ; [16 15 14 13 12 11 10 9]<br>
> psubw m1, m2<br>
> @@ -10152,9 +10145,9 @@<br>
><br>
> ; filter<br>
><br>
> - pshufb m0, [pw_unpackwdq]<br>
> + pshufb m0, [pb_01]<br>
> pinsrw m1, [r2], 0 ; [3 2 1 0]<br>
> - pshufb m2, m1, [pw_unpackwdq] ; [0 0 0 0 0 0 0 0]<br>
> + pshufb m2, m1, [pb_01] ; [0 0 0 0 0 0 0 0]<br>
> movu m1, [r2 + 2 + 64] ; [8 7 6 5 4 3 2 1]<br>
> movu m3, [r2 + 18 + 64] ; [16 15 14 13 12 11 10 9]<br>
> psubw m1, m2<br>
</div></div>> _______________________________________________<br>
> x265-devel mailing list<br>
> <a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
> <a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<span class="HOEnZb"><font color="#888888"><br>
--<br>
Steve Borho<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</font></span></blockquote></div><br></div>