[x265] [PATCH] asm: new algorithm x265_count_nonzero_4x4_avx2, Issue #152

Min Chen chenm003 at 163.com
Mon Jun 29 21:53:57 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1435605518 25200
# Node ID e332f1657e95a8f3ef9dcc66a6e0e22ce00a23f6
# Parent  9feee64efa440c25f016d15ae982789e5393a77e
asm: new algorithm x265_count_nonzero_4x4_avx2, Issue #152
---
 source/common/x86/pixel-util8.asm |   19 +++++++------------
 1 files changed, 7 insertions(+), 12 deletions(-)

diff -r 9feee64efa44 -r e332f1657e95 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Fri Jun 26 15:29:51 2015 +0530
+++ b/source/common/x86/pixel-util8.asm	Mon Jun 29 12:18:38 2015 -0700
@@ -1079,7 +1079,7 @@
     jnz            .loop
     RET
 
-z
+
 ;-----------------------------------------------------------------------------
 ; int x265_count_nonzero_4x4_sse2(const int16_t *quantCoeff);
 ;-----------------------------------------------------------------------------
@@ -1105,19 +1105,14 @@
 INIT_YMM avx2
 cglobal count_nonzero_4x4, 1,1,2
     pxor            m0, m0
-
-    mova            m1, [r0 + 0]
-    packsswb        m1, [r0 + 16]
-    pcmpeqb         m1, m0
-    paddb           m1, [pb_1]
-
-    psadbw          m1, m0
-    pshufd          m0, m1, 2
-    paddd           m1, m0
-    movd            eax, xm1
+    movu            m1, [r0]
+    pcmpeqw         m1, m0
+    pmovmskb        eax, m1
+    not             eax
+    popcnt          eax, eax
+    shr             eax, 1
     RET
 
-
 ;-----------------------------------------------------------------------------
 ; int x265_count_nonzero_8x8_sse2(const int16_t *quantCoeff);
 ;-----------------------------------------------------------------------------



More information about the x265-devel mailing list