[x265] [PATCH] asm code for pixel_add_ps, 4x8 and 4x16

Praveen Tiwari praveen at multicorewareinc.com
Wed Nov 20 14:18:53 CET 2013


Replaced in both C and asm codes, sent fix.

Regards,
Praveen Tiwari


On Wed, Nov 20, 2013 at 6:11 PM, chen <chenm003 at 163.com> wrote:

>
> >+;-----------------------------------------------------------------------------
>
> >+; void pixel_add_ps_%1x%2(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1)
>
> >+;-----------------------------------------------------------------------------
> use intprt_t type  for stride is better
>
> >+%macro PIXEL_ADD_PS_W4_H4 2
> >+INIT_XMM sse4
>
> >+cglobal pixel_add_ps_%1x%2, 6, 7, 2, dest, destride, src0, scr1, srcStride0, srcStride1
> >+
> >+add         r5,            r5
> >+
> >+mov         r6d,           %2/4
> >+
> >+.loop
> >+      movd        m0,            [r2]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0],          m0
> >+
> >+      movd        m0,            [r2 + r4]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3 + r5]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0 + r1],     m0
> >+
> >+      movd        m0,            [r2 + 2 * r4]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3 + 2 * r5]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0 + 2 * r1], m0
> >+
> >+      lea         r0,            [r0 + 2 * r1]
> >+      lea         r2,            [r2 + 2 * r4]
> >+      lea         r3,            [r3 + 2 * r5]
> >+
> >+      movd        m0,            [r2 + r4]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3 + r5]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0 + r1],     m0
> >+
> >+      lea         r0,            [r0 + 2 * r1]
> >+      lea         r2,            [r2 + 2 * r4]
> >+      lea         r3,            [r3 + 2 * r5]
> >+
> >+      dec         r6d
> >+      jnz         .loop
> >+
> >+RET
> >+%endmacro
> >+
> >+PIXEL_ADD_PS_W4_H4   4,  8
> >+PIXEL_ADD_PS_W4_H4   4, 16
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131120/fa9e11e6/attachment-0001.html>


More information about the x265-devel mailing list