[x265] [PATCH] count_nonzero asm code, reduceded code size by combining mova and packsswb

praveen at multicorewareinc.com praveen at multicorewareinc.com
Fri Sep 5 07:51:22 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1409896229 -19800
# Node ID 0b3f68d5f1699540c71ed7b75e2b0ca965fad82f
# Parent  93db2f53fe573537bcd4eb53ca3cdb69af557eb5
count_nonzero asm code, reduceded code size by combining mova and packsswb

diff -r 93db2f53fe57 -r 0b3f68d5f169 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Thu Sep 04 16:42:24 2014 -0700
+++ b/source/common/x86/pixel-util8.asm	Fri Sep 05 11:20:29 2014 +0530
@@ -1045,7 +1045,7 @@
 ; int count_nonzero(const int16_t *quantCoeff, int numCoeff);
 ;-----------------------------------------------------------------------------
 INIT_XMM ssse3
-cglobal count_nonzero, 2,2,4
+cglobal count_nonzero, 2,2,3
     pxor        m0, m0
     shr         r1d, 4
     movd        m1, r1d
@@ -1053,8 +1053,7 @@
 
 .loop:
     mova        m2, [r0 +  0]
-    mova        m3, [r0 + 16]
-    packsswb    m2, m3
+    packsswb    m2, [r0 + 16]
     add         r0, 32
     pcmpeqb     m2, m0
     paddb       m1, m2


More information about the x265-devel mailing list