[x265] (no subject)

chen chenm003 at 163.com
Fri Feb 21 03:58:40 CET 2014


At 2014-02-21 10:13:56,"Satoshi Nakagawa" <nakagawa424 at oki.com> wrote:
># HG changeset patch
># User Satoshi Nakagawa <nakagawa424 at oki.com>
># Date 1392948676 -32400
>#      Fri Feb 21 11:11:16 2014 +0900
># Node ID 66d8cb6573f27b29a9dc92ec480c635f0de48c03
># Parent  894bde574bc1678471e0c23ceb381a806768ea95
>asm: update count_nonzero, add testbench
>
>diff -r 894bde574bc1 -r 66d8cb6573f2 source/common/x86/pixel-util8.asm
>--- a/source/common/x86/pixel-util8.asm Thu Feb 20 17:18:42 2014 -0600
>+++ b/source/common/x86/pixel-util8.asm Fri Feb 21 11:11:16 2014 +0900
>@@ -1240,11 +1240,12 @@
> ; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
> ;-----------------------------------------------------------------------------
> INIT_XMM sse2
>-cglobal count_nonzero, 2,3,4
>+cglobal count_nonzero, 2,2,4
>     pxor        m0, m0
>-    pxor        m1, m1
>-    mov         r2d, r1d
>     shr         r1d, 3
>+    movd        m1, r1d
>+    pshufd      m1, m1, 0
>+    packssdw    m1, m1
packssdw is expendsive instruction, pshuflw+punpcklqdq is better.
> 
> .loop
>     mova        m2, [r0]
>@@ -1252,16 +1253,13 @@
>     add         r0, 32
>     packssdw    m2, m3
>     pcmpeqw     m2, m0
>-    psrlw       m2, 15
>-    packsswb    m2, m2
>-    psadbw      m2, m0
>-    paddd       m1, m2
>+    paddw       m1, m2
>     dec         r1d
>-    jnz        .loop
>-
>-    movd        r1d, m1
>-    sub         r2d, r1d
>-    mov         eax, r2d
>+    jnz         .loop
>+
>+    packuswb    m1, m1
>+    psadbw      m1, m0
>+    movd        eax, m1
> 
>     RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140221/b9eea3c0/attachment.html>


More information about the x265-devel mailing list