<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><div>Most part are fine now, just modify about r5, see below comment</div>
<div></div>
<div id="divNeteaseMailCard"></div>
<div><br></div><pre><br>At 2015-04-29 06:27:27,dtyx265@gmail.com wrote:
># HG changeset patch
># User David T Yuen <a href="mailto:dtyx265@gmail.com>># Date 1430259967 25200># Node ID 6108fbda1be654a481a78f7ef593518033919674># Parent e9df93f380664932e7d6c7e85b2cae16cd5e1dcd>asm: interp_8tap_horiz pp and ps sse2>>This replaces c code and covers>">dtyx265@gmail.com>
># Date 1430259967 25200
># Node ID 6108fbda1be654a481a78f7ef593518033919674
># Parent e9df93f380664932e7d6c7e85b2cae16cd5e1dcd
>asm: interp_8tap_horiz pp and ps sse2
>
>This replaces c code and covers
>
</a>+;----------------------------------------------------------------------------------------------------------------------------
>+; void interp_8tap_horiz_%3_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
>+;----------------------------------------------------------------------------------------------------------------------------
>+%macro IPFILTER_LUMA_sse2 3
>+INIT_XMM sse2
>+cglobal interp_8tap_horiz_%3_%1x%2, 4,7,8
>+
>+ mov r4d, r4m
>+ add r4d, r4d
>+ pxor m6, m6
>+%ifdef PIC
>+ lea r6, [tabw_LumaCoeff]
>+ movu m3, [r6 + r4 * 8]
>+%else
>+ movu m3, [tabw_LumaCoeff + r4 * 8]
>+%endif
>+
>+ mov r4d, %2
>+%ifidn %3, pp
>+ mova m2, [pw_32]
>+%else
>+ mova m2, [pw_2000]
>+ add r3d, r3d
>+ cmp r5m, byte 0<br>if we move above 2 lines to up, we can reduce r6 and reuse r5.<br>
>+ je .loopH
>+ lea r6, [r1 + 2 * r1]
>+ sub r0d, r6d
>+ add r4d, 7
>+%endif
>+
>+.loopH:
>+%assign x 0
>+%rep %1 / 8
>+ FILTER_H8_W8_sse2
>+ %ifidn %3, pp
>+ paddw m1, m2
>+ psraw m1, 6
>+ packuswb m1, m1
>+ movh [r2 + x], m1
>+ %else
>+ psubw m1, m2
>+ movu [r2 + 2 * x], m1
>+ %endif
>+%assign x x+8
>+%endrep
>+
>+%rep (%1 % 8) / 4
>+ FILTER_H8_W4_sse2
>+ %ifidn %3, pp
>+ paddw m1, m2
>+ psraw m1, 6
>+ packuswb m1, m1
>+ movd [r2 + x], m1
>+ %else
>+ psubw m1, m2
>+ movh [r2 + 2 * x], m1
>+ %endif
>+%endrep
>+
>+ add r0d, r1d
>+ add r2d, r3d
>+
>+ dec r4d
>+ jnz .loopH
>+ RET
>+
>+%endmacro
</pre></div>