[x265] [PATCH Review only] asm: code for scale2D_64to32 routine
chen
chenm003 at 163.com
Thu Nov 14 16:06:08 CET 2013
I give you some algorithm details here:
In:
A B
C D
Out:
(A + B + C + D + 2) / 4
This is standard MPEG4 interpolateHV, you may reference Xvid's code
or use pmaddubsw + pmulhrsw
>+;-----------------------------------------------------------------
>+; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride)
>+;-----------------------------------------------------------------
>+INIT_XMM ssse3
>+cglobal scale2D_64to32, 3, 7, 8, dest, src, stride
>+
>+ mova m7, [pw_00ff]
>+ mova m6, [pw_2]
>+ xor r3, r3
>+ mov r6d, 32
>+.loop
>+
>+ mov r4, r3
>+ imul r4, r2
>+
>+ mov r5, r3
>+ inc r5
>+ imul r5, r2
>+
>+ movu m0, [r1 + r4]
>+ palignr m1, m0, 1
>+ movu m2, [r1 + r5]
>+ palignr m3, m2, 1
>+
>+ pand m0, m7
>+ pand m1, m7
>+ pand m2, m7
>+ pand m3, m7
>+
>+ paddusw m0, m1
>+ paddusw m0, m2
>+ paddusw m0, m3
>+ paddusw m0, m6
>+
>+ psrlw m0, 2
>+
>+ movu m4, [r1 + r4 + 16]
>+ palignr m5, m4, 1
>+ movu m1, [r1 + r5 + 16]
>+ palignr m2, m1, 1
>+
>+ pand m4, m7
>+ pand m5, m7
>+ pand m1, m7
>+ pand m2, m7
>+
>+ paddusw m4, m5
>+ paddusw m4, m1
>+ paddusw m4, m2
>+ paddusw m4, m6
>+ psrlw m4, 2
>+
>+ packuswb m0, m4
>+ movu [r0], m0
>+
>+ movu m0, [r1 + r4 + 32]
>+ palignr m1, m0, 1
>+ movu m2, [r1 + r5 + 32]
>+ palignr m3, m2, 1
>+
>+ pand m0, m7
>+ pand m1, m7
>+ pand m2, m7
>+ pand m3, m7
>+
>+ paddusw m0, m1
>+ paddusw m0, m2
>+ paddusw m0, m3
>+ paddusw m0, m6
>+
>+ psrlw m0, 2
>+
>+ movu m4, [r1 + r4 + 48]
>+ palignr m5, m4, 1
>+ movu m1, [r1 + r5 + 48]
>+ palignr m2, m1, 1
>+
>+ pand m4, m7
>+ pand m5, m7
>+ pand m1, m7
>+ pand m2, m7
>+
>+ paddusw m4, m5
>+ paddusw m4, m1
>+ paddusw m4, m2
>+ paddusw m4, m6
>+ psrlw m4, 2
>+
>+ packuswb m0, m4
>+ movu [r0 + 16], m0
>+
>+ lea r0, [r0 + 32]
>+ add r3, 2
>+ dec r6d
>+
>+ jnz .loop
>+
>+RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131114/25b9ad70/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: interpolate8x8_xmm.asm
Type: application/octet-stream
Size: 19010 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131114/25b9ad70/attachment-0001.obj>
More information about the x265-devel
mailing list