[x265] primitives: add count_nonzero

Satoshi Nakagawa nakagawa424 at oki.com
Sat Feb 15 06:23:56 CET 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1392440540 -32400
#      Sat Feb 15 14:02:20 2014 +0900
# Node ID dcdb5c276c7827fa540348018ddd72618b558feb
# Parent  d77a549b8061f038b2d263c4508e256b4a335208
primitives: add count_nonzero

diff -r d77a549b8061 -r dcdb5c276c78 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp	Sat Feb 15 14:02:20 2014 +0900
@@ -703,18 +703,6 @@
     }
 }
 
-int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)
-{
-    int count = 0;
-
-    for (int i = 0; i < size; i++)
-    {
-        count += coeff[i] != 0;
-    }
-
-    return count;
-}
-
 /** encode quantization matrix
  * \param scalingList quantization matrix information
  */
diff -r d77a549b8061 -r dcdb5c276c78 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncEntropy.h	Sat Feb 15 14:02:20 2014 +0900
@@ -189,7 +189,6 @@
     void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int height, TextType ttype);
     void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
     void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp);
-    static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);
 }; // END CLASS DEFINITION TEncEntropy
 }
 //! \}
diff -r d77a549b8061 -r dcdb5c276c78 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Sat Feb 15 14:02:20 2014 +0900
@@ -36,6 +36,7 @@
 */
 
 #include "TEncSbac.h"
+#include "primitives.h"
 
 namespace x265 {
 //! \ingroup TLibEncoder
@@ -2103,7 +2104,7 @@
     assert(width <= m_slice->getSPS()->getMaxTrSize());
 
     // compute number of significant coefficients
-    uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * height);
+    uint32_t numSig = primitives.count_nonzero(coeff, width * height);
 
     if (numSig == 0)
         return;
diff -r d77a549b8061 -r dcdb5c276c78 source/common/dct.cpp
--- a/source/common/dct.cpp	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/common/dct.cpp	Sat Feb 15 14:02:20 2014 +0900
@@ -797,6 +797,20 @@
 
     return acSum;
 }
+
+int  count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
+{
+    assert(numCoeff > 0 && (numCoeff & 15) == 0);
+
+    int count = 0;
+
+    for (int i = 0; i < numCoeff; i++)
+    {
+        count += quantCoeff[i] != 0;
+    }
+
+    return count;
+}
 }  // closing - anonymous file-static namespace
 
 namespace x265 {
@@ -817,5 +831,6 @@
     p.idct[IDCT_8x8] = idct8_c;
     p.idct[IDCT_16x16] = idct16_c;
     p.idct[IDCT_32x32] = idct32_c;
+    p.count_nonzero = count_nonzero_c;
 }
 }
diff -r d77a549b8061 -r dcdb5c276c78 source/common/primitives.h
--- a/source/common/primitives.h	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/common/primitives.h	Sat Feb 15 14:02:20 2014 +0900
@@ -157,6 +157,7 @@
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
 typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
 typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
 
 typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
@@ -239,6 +240,7 @@
     quant_t         quant;
     dequant_scaling_t dequant_scaling;
     dequant_normal_t dequant_normal;
+    count_nonzero_t count_nonzero;
 
     calcresidual_t  calcresidual[NUM_SQUARE_BLOCKS];
     calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];
diff -r d77a549b8061 -r dcdb5c276c78 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp	Sat Feb 15 14:02:20 2014 +0900
@@ -982,6 +982,9 @@
         p.dct[DCT_4x4] = x265_dct4_sse2;
         p.idct[IDCT_4x4] = x265_idct4_sse2;
         p.idct[IDST_4x4] = x265_idst4_sse2;
+
+        p.count_nonzero = x265_count_nonzero_sse2;
+
     }
     if (cpuMask & X265_CPU_SSSE3)
     {
diff -r d77a549b8061 -r dcdb5c276c78 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/common/x86/pixel-util.h	Sat Feb 15 14:02:20 2014 +0900
@@ -46,6 +46,7 @@
 
 uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
 void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);
 
 void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r d77a549b8061 -r dcdb5c276c78 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Fri Feb 14 15:51:09 2014 -0600
+++ b/source/common/x86/pixel-util8.asm	Sat Feb 15 14:02:20 2014 +0900
@@ -1194,6 +1194,37 @@
     jnz        .loop
     RET
 
+
+;-----------------------------------------------------------------------------
+; int  count_nonzero(const int32_t *quantCoeff, int numCoeff);
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal count_nonzero, 2,3,4
+    pxor        m0, m0
+    pxor        m1, m1
+    mov         r2d, r1d
+    shr         r1d, 3
+
+.loop
+    movdqu      m2, [r0]	; TODO: movdqa
+    movdqu      m3, [r0 + 16]	; TODO: movdqa
+    add         r0, 32
+    packssdw    m2, m3
+    pcmpeqw     m2, m0
+    psrlw       m2, 15
+    packsswb    m2, m2
+    psadbw      m2, m0
+    paddd       m1, m2
+    dec         r1d
+    jnz        .loop
+
+    movd        r1d, m1
+    sub         r2d, r1d
+    mov         eax, r2d
+
+    RET
+
+
 ;-----------------------------------------------------------------------------------------------------------------------------------------------
 ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
 ;-----------------------------------------------------------------------------------------------------------------------------------------------


More information about the x265-devel mailing list