[x265] [PATCH] asm: 10bpp code for blockcopy_pp_2xN and 4xN
chen
chenm003 at 163.com
Mon Dec 9 13:56:20 CET 2013
> ;-----------------------------------------------------------------------------
> ; void blockcopy_pp_4x2(pixel *dest, intptr_t deststride, pixel *src, intptr_t srcstride)
> ;-----------------------------------------------------------------------------
> INIT_XMM sse2
>-cglobal blockcopy_pp_4x2, 4, 6, 2, dest, deststride, src, srcstride
>-
>-mov r4d, [r2]
>-mov r5d, [r2 + r3]
>-
>-mov [r0], r4d
>-mov [r0 + r1], r5d
>-
>-RET
>+cglobal blockcopy_pp_4x2, 4, 6, 0, dest, deststride, src, srcstride
>+%if HIGH_BIT_DEPTH
>+ add r1, r1
>+ add r3, r3
>+ mov r4, [r2]
on x86, register is 32bits, this is 2xN
>+ mov r5, [r2 + r3]
>+ mov [r0], r4
>+ mov [r0 + r1], r5
>+%else
>+ mov r4d, [r2]
>+ mov r5d, [r2 + r3]
>+
>+ mov [r0], r4d
>+ mov [r0 + r1], r5d
>+%endif
>+ RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131209/09dd302d/attachment.html>
More information about the x265-devel
mailing list