[x265] [PATCH] asm: routines for chroma vps filter functions for 2x4 and 2x8 block sizes

chen chenm003 at 163.com
Wed Nov 13 12:55:38 CET 2013


>+;------------------------------------------------------------------------------------------------------------
>+;void interp_4tap_vert_ps_2x4(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
>+;------------------------------------------------------------------------------------------------------------
>+INIT_XMM sse4
>+cglobal interp_4tap_vert_ps_2x4, 4, 7, 8
>+
>+    mov         r4d, r4m
>+    sub         r0, r1
>+    add         r3d, r3d
>+
>+%ifdef PIC
>+    lea         r5, [tab_ChromaCoeff]
>+    movd        m0, [r5 + r4 * 4]
>+%else
>+    movd        m0, [tab_ChromaCoeff + r4 * 4]
>+%endif
>+
>+    pshufb      m0, [tab_Cm]
>+
>+    mova        m1, [tab_c_8192]
>+
>+    movd        m2, [r0]
>+    movd        m3, [r0 + r1]
>+    movd        m4, [r0 + 2 * r1]
>+    lea         r5, [r0 + 2 * r1]
>+    movd        m5, [r5 + r1]
>+
>+    punpcklbw   m2, m3
>+    punpcklbw   m6, m4, m5
>+    punpcklbw   m2, m6
>+
>+    pmaddubsw   m2, m0
>+
>+    movd        m6, [r0 + 4 * r1]
>+
>+    punpcklbw   m3, m4
>+    punpcklbw   m7, m5, m6
>+    punpcklbw   m3, m7
>+
>+    pmaddubsw   m3, m0
>+    phaddw      m2, m3
>+    psubw       m2, m1
>+
>+    movd        [r2], m2
>+    pshufd      m2, m2 , 2
may replace by shorter instruction movhlps
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131113/e6130620/attachment.html>


More information about the x265-devel mailing list