[x265] [PATCH] asm: assembly code for x265_pixel_satd_32x16
chen
chenm003 at 163.com
Mon Nov 11 15:51:40 CET 2013
>+cglobal pixel_satd_32x16, 4,8,8 ;if WIN64 && notcpuflag(avx)
>+ SATD_START_SSE2 m10, m7
>+ mov r6, r0
>+ mov r7, r2
>+%if vertical
>+ mova m7, [pw_00ff]
>+%endif
>+ call pixel_satd_16x4_internal
>+ call pixel_satd_16x4_internal
>+ call pixel_satd_16x4_internal
>+ call pixel_satd_16x4_internal
>+ lea r0, [r6 + 16]
>+ lea r2, [r7 + 16]
>+ call pixel_satd_16x4_internal
>+ call pixel_satd_16x4_internal
>+ call pixel_satd_16x4_internal
>+ call pixel_satd_16x4_internal
>+ SATD_END_SSE2 m10
>+
> %else
>+cglobal pixel_satd_32x16, 4,8,8
you can't use 8-registers in x86
>+%if WIN64 ;if WIN64 && cpuflag(avx)
>+ SATD_START_SSE2 m6, m7
>+ mov r6, r0
>+ mov r7, r2
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ lea r0, [r6 + 8]
>+ lea r2, [r7 + 8]
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ lea r0, [r6 + 16]
>+ lea r2, [r7 + 16]
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ lea r0, [r6 + 24]
>+ lea r2, [r7 + 24]
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ SATD_END_SSE2 m6
>+%else ;if !WIN64
>+ SATD_START_SSE2 m6, m7
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ mov r0, r0mp
>+ mov r2, r2mp
>+ add r0, 8
>+ add r2, 8
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ mov r0, r0mp
>+ mov r2, r2mp
>+ add r0, 16
>+ add r2, 16
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ mov r0, r0mp
>+ mov r2, r2mp
>+ add r0, 24
>+ add r2, 24
>+ call pixel_satd_8x8_internal
>+ call pixel_satd_8x8_internal
>+ SATD_END_SSE2 m6
>+%endif
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131111/006b1703/attachment.html>
More information about the x265-devel
mailing list