[x265] [PATCH] asm: 10bpp code for blockcopy_pp_2xN and 4xN

chen chenm003 at 163.com
Mon Dec 9 13:56:20 CET 2013


> ;-----------------------------------------------------------------------------
> ; void blockcopy_pp_4x2(pixel *dest, intptr_t deststride, pixel *src, intptr_t srcstride)
> ;-----------------------------------------------------------------------------
> INIT_XMM sse2
>-cglobal blockcopy_pp_4x2, 4, 6, 2, dest, deststride, src, srcstride
>-
>-mov     r4d,     [r2]
>-mov     r5d,     [r2 + r3]
>-
>-mov     [r0],            r4d
>-mov     [r0 + r1],       r5d
>-
>-RET
>+cglobal blockcopy_pp_4x2, 4, 6, 0, dest, deststride, src, srcstride
>+%if HIGH_BIT_DEPTH
>+    add    r1,           r1
>+    add    r3,           r3
>+    mov    r4,           [r2]
on x86, register is 32bits, this is 2xN

>+    mov    r5,           [r2 + r3]
>+    mov    [r0],         r4
>+    mov    [r0 + r1],    r5
>+%else
>+    mov     r4d,     [r2]
>+    mov     r5d,     [r2 + r3]
>+
>+    mov     [r0],            r4d
>+    mov     [r0 + r1],       r5d
>+%endif
>+    RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131209/09dd302d/attachment.html>


More information about the x265-devel mailing list