[x265] primitives: add count_nonzero

chen chenm003 at 163.com
Wed Feb 19 19:16:06 CET 2014


+INIT_XMM sse2
+cglobal count_nonzero, 2,3,4
+    pxor        m0, m0
+    pxor        m1, m1
+    mov         r2d, r1d
+    shr         r1d, 3
+
+.loop

+    mova        m2, [r0]
+    mova        m3, [r0 + 16]

+    add         r0, 32

+    packssdw    m2, m3,
just count, no need it
 
+    pcmpeqw     m2, m0
+    psrlw       m2, 15
pcmp generte mask, it is 0xFFFF, so we no need to shift right
 
+    packsswb    m2, m2
+    psadbw      m2, m0
psad is low perf, why you need exact number in inner loop?
of course, abs(-1) = abs(1) 

+    paddd       m1, m2
+    dec         r1d
+    jnz        .loop
+
+    movd        r1d, m1
+    sub         r2d, r1d
+    mov         eax, r2d
+
+    RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140220/dfb5cc0c/attachment.html>


More information about the x265-devel mailing list