[x265] [PATCH] weighted prediction (pixel), interface simplification
chen
chenm003 at 163.com
Fri Oct 17 18:16:59 CEST 2014
At 2014-10-17 18:29:49,praveen at multicorewareinc.com wrote:
># HG changeset patch
># User Praveen Tiwari
># Date 1413541750 -19800
># Node ID 61051f5a16b387120b17be2024543d14aea61f16
># Parent b7eeae24aae63495bcad1570ecd76cae988f0f6e
>weighted prediction (pixel), interface simplification
>
> void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t * pix1, intptr_t stride1,
>diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/x86/pixel-util8.asm
>--- a/source/common/x86/pixel-util8.asm Thu Oct 16 21:57:30 2014 +0530
>+++ b/source/common/x86/pixel-util8.asm Fri Oct 17 15:59:10 2014 +0530
>@@ -1298,35 +1298,32 @@
>
>
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
>-;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
>+;void weight_pp(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
> INIT_XMM sse4
> cglobal weight_pp, 6, 7, 6
>
>+ shl r5d, 6 ; m0 = [w0<<6]
> mov r6d, r6m
>- shl r6d, 6
>- movd m0, r6d ; m0 = [w0<<6]
>-
>- movd m1, r7m ; m1 = [round]
>- punpcklwd m0, m1 ; assuming both (w0<<6) and round are using maximum of 16 bits each.
>- pshufd m0, m0, 0 ; m0 = [w0<<6 round]
>-
>- movd m1, r8m
>-
>- movd m2, r9m
>+ shl r6d, 16
>+ or r6d, r5d ; assuming both (w0<<6) and round are using maximum of 16 bits each.
Please modify C model with assert()
>+ movd m0, r6d
>+ pshufd m0, m0, 0 ; m0 = [w0<<6, round]
>+
>+ movd m1, r7m
>+
>+ movd m2, r8m
> pshufd m2, m2, 0
>
> mova m5, [pw_1]
>
>- sub r2d, r4d
>- sub r3d, r4d
>-
>+ sub r2d, r3d
>+ shr r3d, 4
> .loopH:
>- mov r6d, r4d
>- shr r6d, 4
>+ mov r5d, r3d
>+
> .loopW:
>- movh m4, [r0]
>- pmovzxbw m4, m4
>+ pmovzxbw m4, [r0]
>
> punpcklwd m3, m4, m5
> pmaddwd m3, m0
>@@ -1364,13 +1361,13 @@
> add r0, 16
> add r1, 16
>
>- dec r6d
>+ dec r5d
> jnz .loopW
>
> lea r0, [r0 + r2]
>- lea r1, [r1 + r3]
>-
>- dec r5d
>+ lea r1, [r1 + r2]
>+
>+ dec r4d
> jnz .loopH
>
> RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141018/97f499d0/attachment.html>
More information about the x265-devel
mailing list