[x265] (no subject)
chen
chenm003 at 163.com
Fri Feb 21 03:58:40 CET 2014
At 2014-02-21 10:13:56,"Satoshi Nakagawa" <nakagawa424 at oki.com> wrote:
># HG changeset patch
># User Satoshi Nakagawa <nakagawa424 at oki.com>
># Date 1392948676 -32400
># Fri Feb 21 11:11:16 2014 +0900
># Node ID 66d8cb6573f27b29a9dc92ec480c635f0de48c03
># Parent 894bde574bc1678471e0c23ceb381a806768ea95
>asm: update count_nonzero, add testbench
>
>diff -r 894bde574bc1 -r 66d8cb6573f2 source/common/x86/pixel-util8.asm
>--- a/source/common/x86/pixel-util8.asm Thu Feb 20 17:18:42 2014 -0600
>+++ b/source/common/x86/pixel-util8.asm Fri Feb 21 11:11:16 2014 +0900
>@@ -1240,11 +1240,12 @@
> ; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
> ;-----------------------------------------------------------------------------
> INIT_XMM sse2
>-cglobal count_nonzero, 2,3,4
>+cglobal count_nonzero, 2,2,4
> pxor m0, m0
>- pxor m1, m1
>- mov r2d, r1d
> shr r1d, 3
>+ movd m1, r1d
>+ pshufd m1, m1, 0
>+ packssdw m1, m1
packssdw is expendsive instruction, pshuflw+punpcklqdq is better.
>
> .loop
> mova m2, [r0]
>@@ -1252,16 +1253,13 @@
> add r0, 32
> packssdw m2, m3
> pcmpeqw m2, m0
>- psrlw m2, 15
>- packsswb m2, m2
>- psadbw m2, m0
>- paddd m1, m2
>+ paddw m1, m2
> dec r1d
>- jnz .loop
>-
>- movd r1d, m1
>- sub r2d, r1d
>- mov eax, r2d
>+ jnz .loop
>+
>+ packuswb m1, m1
>+ psadbw m1, m0
>+ movd eax, m1
>
> RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140221/b9eea3c0/attachment.html>
More information about the x265-devel
mailing list