[x265] primitives: add count_nonzero

Satoshi Nakagawa nakagawa424 at oki.com
Mon Feb 17 07:08:36 CET 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1392617016 -32400
#      Mon Feb 17 15:03:36 2014 +0900
# Node ID 8dc1c9646b23a0e1110bef8a10ebfe3fee5d4250
# Parent  ce96cdb390fe26aee6effa731e51303c1d9056b0
primitives: add count_nonzero

diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp	Mon Feb 17 15:03:36 2014 +0900
@@ -724,18 +724,6 @@
     }
 }
 
-int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)
-{
-    int count = 0;
-
-    for (int i = 0; i < size; i++)
-    {
-        count += coeff[i] != 0;
-    }
-
-    return count;
-}
-
 /** encode quantization matrix
  * \param scalingList quantization matrix information
  */
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncEntropy.h	Mon Feb 17 15:03:36 2014 +0900
@@ -189,7 +189,6 @@
     void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int height, TextType ttype);
     void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
     void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp);
-    static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);
 }; // END CLASS DEFINITION TEncEntropy
 }
 //! \}
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Mon Feb 17 15:03:36 2014 +0900
@@ -36,6 +36,7 @@
 */
 
 #include "TEncSbac.h"
+#include "primitives.h"
 
 namespace x265 {
 //! \ingroup TLibEncoder
@@ -2106,7 +2107,7 @@
     assert(width <= m_slice->getSPS()->getMaxTrSize());
 
     // compute number of significant coefficients
-    uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * height);
+    uint32_t numSig = primitives.count_nonzero(coeff, width * height);
 
     if (numSig == 0)
         return;
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Feb 17 15:03:36 2014 +0900
@@ -87,9 +87,9 @@
         const uint32_t numLayersToAllocate = m_cfg->getQuadtreeTULog2MaxSize() - m_cfg->getQuadtreeTULog2MinSize() + 1;
         for (uint32_t i = 0; i < numLayersToAllocate; ++i)
         {
-            delete[] m_qtTempCoeffY[i];
-            delete[] m_qtTempCoeffCb[i];
-            delete[] m_qtTempCoeffCr[i];
+            X265_FREE(m_qtTempCoeffY[i]);
+            X265_FREE(m_qtTempCoeffCb[i]);
+            X265_FREE(m_qtTempCoeffCr[i]);
             m_qtTempTComYuv[i].destroy();
         }
     }
@@ -98,9 +98,9 @@
     delete[] m_qtTempCoeffCr;
     delete[] m_qtTempTrIdx;
     delete[] m_qtTempTComYuv;
-    delete[] m_qtTempTUCoeffY;
-    delete[] m_qtTempTUCoeffCb;
-    delete[] m_qtTempTUCoeffCr;
+    X265_FREE(m_qtTempTUCoeffY);
+    X265_FREE(m_qtTempTUCoeffCb);
+    X265_FREE(m_qtTempTUCoeffCr);
     for (uint32_t i = 0; i < 3; ++i)
     {
         delete[] m_qtTempCbf[i];
@@ -155,19 +155,18 @@
 
     for (uint32_t i = 0; i < numLayersToAllocate; ++i)
     {
-        m_qtTempCoeffY[i]  = new TCoeff[g_maxCUWidth * g_maxCUHeight];
-
-        m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];
-        m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];
+        m_qtTempCoeffY[i]  = X265_MALLOC(TCoeff, g_maxCUWidth * g_maxCUHeight);
+        m_qtTempCoeffCb[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
+        m_qtTempCoeffCr[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
         m_qtTempTComYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, cfg->param.internalCsp);
     }
 
     m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
     m_sharedPredTransformSkip[1] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
     m_sharedPredTransformSkip[2] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
-    m_qtTempTUCoeffY  = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
-    m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
-    m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
+    m_qtTempTUCoeffY  = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
+    m_qtTempTUCoeffCb = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
+    m_qtTempTUCoeffCr = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
 
     m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight, cfg->param.internalCsp);
 
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/dct.cpp
--- a/source/common/dct.cpp	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/dct.cpp	Mon Feb 17 15:03:36 2014 +0900
@@ -797,6 +797,21 @@
 
     return acSum;
 }
+
+int  count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
+{
+    assert(((intptr_t)quantCoeff & 15) == 0);
+    assert(numCoeff > 0 && (numCoeff & 15) == 0);
+
+    int count = 0;
+
+    for (int i = 0; i < numCoeff; i++)
+    {
+        count += quantCoeff[i] != 0;
+    }
+
+    return count;
+}
 }  // closing - anonymous file-static namespace
 
 namespace x265 {
@@ -817,5 +832,6 @@
     p.idct[IDCT_8x8] = idct8_c;
     p.idct[IDCT_16x16] = idct16_c;
     p.idct[IDCT_32x32] = idct32_c;
+    p.count_nonzero = count_nonzero_c;
 }
 }
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/primitives.h
--- a/source/common/primitives.h	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/primitives.h	Mon Feb 17 15:03:36 2014 +0900
@@ -158,6 +158,7 @@
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
 typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
 typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
 
 typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
@@ -240,6 +241,7 @@
     quant_t         quant;
     dequant_scaling_t dequant_scaling;
     dequant_normal_t dequant_normal;
+    count_nonzero_t count_nonzero;
 
     calcresidual_t  calcresidual[NUM_SQUARE_BLOCKS];
     calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp	Mon Feb 17 15:03:36 2014 +0900
@@ -1084,6 +1084,7 @@
         p.dct[DCT_4x4] = x265_dct4_sse2;
         p.idct[IDCT_4x4] = x265_idct4_sse2;
         p.idct[IDST_4x4] = x265_idst4_sse2;
+        p.count_nonzero = x265_count_nonzero_sse2;
     }
     if (cpuMask & X265_CPU_SSSE3)
     {
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/pixel-util.h	Mon Feb 17 15:03:36 2014 +0900
@@ -46,6 +46,7 @@
 
 uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
 void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);
 
 void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/pixel-util8.asm	Mon Feb 17 15:03:36 2014 +0900
@@ -1194,6 +1194,37 @@
     jnz        .loop
     RET
 
+
+;-----------------------------------------------------------------------------
+; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal count_nonzero, 2,3,4
+    pxor        m0, m0
+    pxor        m1, m1
+    mov         r2d, r1d
+    shr         r1d, 3
+
+.loop
+    mova        m2, [r0]
+    mova        m3, [r0 + 16]
+    add         r0, 32
+    packssdw    m2, m3
+    pcmpeqw     m2, m0
+    psrlw       m2, 15
+    packsswb    m2, m2
+    psadbw      m2, m0
+    paddd       m1, m2
+    dec         r1d
+    jnz        .loop
+
+    movd        r1d, m1
+    sub         r2d, r1d
+    mov         eax, r2d
+
+    RET
+
+
 ;-----------------------------------------------------------------------------------------------------------------------------------------------
 ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
 ;-----------------------------------------------------------------------------------------------------------------------------------------------


More information about the x265-devel mailing list