[x265] [PATCH] asm: routines for chroma vps filter functions for 2x4 and 2x8 block sizes
chen
chenm003 at 163.com
Wed Nov 13 12:55:38 CET 2013
>+;------------------------------------------------------------------------------------------------------------
>+;void interp_4tap_vert_ps_2x4(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
>+;------------------------------------------------------------------------------------------------------------
>+INIT_XMM sse4
>+cglobal interp_4tap_vert_ps_2x4, 4, 7, 8
>+
>+ mov r4d, r4m
>+ sub r0, r1
>+ add r3d, r3d
>+
>+%ifdef PIC
>+ lea r5, [tab_ChromaCoeff]
>+ movd m0, [r5 + r4 * 4]
>+%else
>+ movd m0, [tab_ChromaCoeff + r4 * 4]
>+%endif
>+
>+ pshufb m0, [tab_Cm]
>+
>+ mova m1, [tab_c_8192]
>+
>+ movd m2, [r0]
>+ movd m3, [r0 + r1]
>+ movd m4, [r0 + 2 * r1]
>+ lea r5, [r0 + 2 * r1]
>+ movd m5, [r5 + r1]
>+
>+ punpcklbw m2, m3
>+ punpcklbw m6, m4, m5
>+ punpcklbw m2, m6
>+
>+ pmaddubsw m2, m0
>+
>+ movd m6, [r0 + 4 * r1]
>+
>+ punpcklbw m3, m4
>+ punpcklbw m7, m5, m6
>+ punpcklbw m3, m7
>+
>+ pmaddubsw m3, m0
>+ phaddw m2, m3
>+ psubw m2, m1
>+
>+ movd [r2], m2
>+ pshufd m2, m2 , 2
may replace by shorter instruction movhlps
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131113/e6130620/attachment.html>
More information about the x265-devel
mailing list