[x265] [PATCH] asm code for pixel_add_ps, 4x8 and 4x16
Praveen Tiwari
praveen at multicorewareinc.com
Wed Nov 20 14:18:53 CET 2013
Replaced in both C and asm codes, sent fix.
Regards,
Praveen Tiwari
On Wed, Nov 20, 2013 at 6:11 PM, chen <chenm003 at 163.com> wrote:
>
> >+;-----------------------------------------------------------------------------
>
> >+; void pixel_add_ps_%1x%2(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1)
>
> >+;-----------------------------------------------------------------------------
> use intprt_t type for stride is better
>
> >+%macro PIXEL_ADD_PS_W4_H4 2
> >+INIT_XMM sse4
>
> >+cglobal pixel_add_ps_%1x%2, 6, 7, 2, dest, destride, src0, scr1, srcStride0, srcStride1
> >+
> >+add r5, r5
> >+
> >+mov r6d, %2/4
> >+
> >+.loop
> >+ movd m0, [r2]
> >+ pmovzxbw m0, m0
> >+ movh m1, [r3]
> >+
> >+ paddw m0, m1
> >+ packuswb m0, m0
> >+
> >+ movd [r0], m0
> >+
> >+ movd m0, [r2 + r4]
> >+ pmovzxbw m0, m0
> >+ movh m1, [r3 + r5]
> >+
> >+ paddw m0, m1
> >+ packuswb m0, m0
> >+
> >+ movd [r0 + r1], m0
> >+
> >+ movd m0, [r2 + 2 * r4]
> >+ pmovzxbw m0, m0
> >+ movh m1, [r3 + 2 * r5]
> >+
> >+ paddw m0, m1
> >+ packuswb m0, m0
> >+
> >+ movd [r0 + 2 * r1], m0
> >+
> >+ lea r0, [r0 + 2 * r1]
> >+ lea r2, [r2 + 2 * r4]
> >+ lea r3, [r3 + 2 * r5]
> >+
> >+ movd m0, [r2 + r4]
> >+ pmovzxbw m0, m0
> >+ movh m1, [r3 + r5]
> >+
> >+ paddw m0, m1
> >+ packuswb m0, m0
> >+
> >+ movd [r0 + r1], m0
> >+
> >+ lea r0, [r0 + 2 * r1]
> >+ lea r2, [r2 + 2 * r4]
> >+ lea r3, [r3 + 2 * r5]
> >+
> >+ dec r6d
> >+ jnz .loop
> >+
> >+RET
> >+%endmacro
> >+
> >+PIXEL_ADD_PS_W4_H4 4, 8
> >+PIXEL_ADD_PS_W4_H4 4, 16
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131120/fa9e11e6/attachment-0001.html>
More information about the x265-devel
mailing list