[x265] primitives: add count_nonzero
chen
chenm003 at 163.com
Wed Feb 19 19:16:06 CET 2014
+INIT_XMM sse2
+cglobal count_nonzero, 2,3,4
+ pxor m0, m0
+ pxor m1, m1
+ mov r2d, r1d
+ shr r1d, 3
+
+.loop
+ mova m2, [r0]
+ mova m3, [r0 + 16]
+ add r0, 32
+ packssdw m2, m3,
just count, no need it
+ pcmpeqw m2, m0
+ psrlw m2, 15
pcmp generte mask, it is 0xFFFF, so we no need to shift right
+ packsswb m2, m2
+ psadbw m2, m0
psad is low perf, why you need exact number in inner loop?
of course, abs(-1) = abs(1)
+ paddd m1, m2
+ dec r1d
+ jnz .loop
+
+ movd r1d, m1
+ sub r2d, r1d
+ mov eax, r2d
+
+ RET
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140220/dfb5cc0c/attachment.html>
More information about the x265-devel
mailing list