<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Feb 17, 2014 at 12:08 AM, Satoshi Nakagawa <span dir="ltr"><<a href="mailto:nakagawa424@oki.com" target="_blank">nakagawa424@oki.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class=""># HG changeset patch<br>
# User Satoshi Nakagawa <<a href="mailto:nakagawa424@oki.com">nakagawa424@oki.com</a>><br>
</div># Date 1392617016 -32400<br>
#      Mon Feb 17 15:03:36 2014 +0900<br>
# Node ID 8dc1c9646b23a0e1110bef8a10ebfe3fee5d4250<br>
# Parent  ce96cdb390fe26aee6effa731e51303c1d9056b0<br>
primitives: add count_nonzero<br></blockquote><div><br></div><div>Queued.  Please add a unit test for this primitive to one of the existing test benches</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.cpp<br>
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp    Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp    Mon Feb 17 15:03:36 2014 +0900<br>
@@ -724,18 +724,6 @@<br>
<div class="">     }<br>
 }<br>
<br>
-int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)<br>
-{<br>
-    int count = 0;<br>
-<br>
-    for (int i = 0; i < size; i++)<br>
-    {<br>
-        count += coeff[i] != 0;<br>
-    }<br>
-<br>
-    return count;<br>
-}<br>
-<br>
 /** encode quantization matrix<br>
  * \param scalingList quantization matrix information<br>
  */<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.h<br>
--- a/source/Lib/TLibEncoder/TEncEntropy.h      Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncEntropy.h      Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -189,7 +189,6 @@<br>
     void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int height, TextType ttype);<br>
     void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);<br>
     void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp);<br>
-    static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);<br>
 }; // END CLASS DEFINITION TEncEntropy<br>
 }<br>
 //! \}<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSbac.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSbac.cpp       Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp       Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -36,6 +36,7 @@<br>
 */<br>
<br>
 #include "TEncSbac.h"<br>
+#include "primitives.h"<br>
<br>
 namespace x265 {<br>
 //! \ingroup TLibEncoder<br>
</div>@@ -2106,7 +2107,7 @@<br>
<div class="">     assert(width <= m_slice->getSPS()->getMaxTrSize());<br>
<br>
     // compute number of significant coefficients<br>
-    uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * height);<br>
+    uint32_t numSig = primitives.count_nonzero(coeff, width * height);<br>
<br>
     if (numSig == 0)<br>
         return;<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp     Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Mon Feb 17 15:03:36 2014 +0900<br>
@@ -87,9 +87,9 @@<br>
         const uint32_t numLayersToAllocate = m_cfg->getQuadtreeTULog2MaxSize() - m_cfg->getQuadtreeTULog2MinSize() + 1;<br>
         for (uint32_t i = 0; i < numLayersToAllocate; ++i)<br>
         {<br>
-            delete[] m_qtTempCoeffY[i];<br>
-            delete[] m_qtTempCoeffCb[i];<br>
-            delete[] m_qtTempCoeffCr[i];<br>
+            X265_FREE(m_qtTempCoeffY[i]);<br>
+            X265_FREE(m_qtTempCoeffCb[i]);<br>
+            X265_FREE(m_qtTempCoeffCr[i]);<br>
             m_qtTempTComYuv[i].destroy();<br>
         }<br>
     }<br>
@@ -98,9 +98,9 @@<br>
     delete[] m_qtTempCoeffCr;<br>
     delete[] m_qtTempTrIdx;<br>
     delete[] m_qtTempTComYuv;<br>
-    delete[] m_qtTempTUCoeffY;<br>
-    delete[] m_qtTempTUCoeffCb;<br>
-    delete[] m_qtTempTUCoeffCr;<br>
+    X265_FREE(m_qtTempTUCoeffY);<br>
+    X265_FREE(m_qtTempTUCoeffCb);<br>
+    X265_FREE(m_qtTempTUCoeffCr);<br>
     for (uint32_t i = 0; i < 3; ++i)<br>
     {<br>
         delete[] m_qtTempCbf[i];<br>
@@ -155,19 +155,18 @@<br>
<br>
     for (uint32_t i = 0; i < numLayersToAllocate; ++i)<br>
     {<br>
-        m_qtTempCoeffY[i]  = new TCoeff[g_maxCUWidth * g_maxCUHeight];<br>
-<br>
-        m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];<br>
-        m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];<br>
+        m_qtTempCoeffY[i]  = X265_MALLOC(TCoeff, g_maxCUWidth * g_maxCUHeight);<br>
+        m_qtTempCoeffCb[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));<br>
+        m_qtTempCoeffCr[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));<br>
         m_qtTempTComYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, cfg->param.internalCsp);<br>
     }<br>
<br>
     m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
     m_sharedPredTransformSkip[1] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
     m_sharedPredTransformSkip[2] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
-    m_qtTempTUCoeffY  = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
-    m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
-    m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
+    m_qtTempTUCoeffY  = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);<br>
+    m_qtTempTUCoeffCb = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);<br>
+    m_qtTempTUCoeffCr = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);<br>
<br>
     m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight, cfg->param.internalCsp);<br>
<br>
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/dct.cpp<br>
--- a/source/common/dct.cpp     Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/dct.cpp     Mon Feb 17 15:03:36 2014 +0900<br>
@@ -797,6 +797,21 @@<br>
<div class=""><br>
     return acSum;<br>
 }<br>
+<br>
+int  count_nonzero_c(const int32_t *quantCoeff, int numCoeff)<br>
+{<br>
</div>+    assert(((intptr_t)quantCoeff & 15) == 0);<br>
<div class="">+    assert(numCoeff > 0 && (numCoeff & 15) == 0);<br>
+<br>
+    int count = 0;<br>
+<br>
+    for (int i = 0; i < numCoeff; i++)<br>
+    {<br>
+        count += quantCoeff[i] != 0;<br>
+    }<br>
+<br>
+    return count;<br>
+}<br>
 }  // closing - anonymous file-static namespace<br>
<br>
 namespace x265 {<br>
</div>@@ -817,5 +832,6 @@<br>
<div class="">     p.idct[IDCT_8x8] = idct8_c;<br>
     p.idct[IDCT_16x16] = idct16_c;<br>
     p.idct[IDCT_32x32] = idct32_c;<br>
+    p.count_nonzero = count_nonzero_c;<br>
 }<br>
 }<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/primitives.h<br>
--- a/source/common/primitives.h        Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/primitives.h        Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -158,6 +158,7 @@<br>
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);<br>
 typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);<br>
 typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);<br>
+typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);<br>
<br>
 typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
 typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
@@ -240,6 +241,7 @@<br>
     quant_t         quant;<br>
     dequant_scaling_t dequant_scaling;<br>
     dequant_normal_t dequant_normal;<br>
+    count_nonzero_t count_nonzero;<br>
<br>
     calcresidual_t  calcresidual[NUM_SQUARE_BLOCKS];<br>
     calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp      Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/asm-primitives.cpp      Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -1084,6 +1084,7 @@<br>
         p.dct[DCT_4x4] = x265_dct4_sse2;<br>
         p.idct[IDCT_4x4] = x265_idct4_sse2;<br>
         p.idct[IDST_4x4] = x265_idst4_sse2;<br>
+        p.count_nonzero = x265_count_nonzero_sse2;<br>
     }<br>
     if (cpuMask & X265_CPU_SSSE3)<br>
     {<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util.h<br>
--- a/source/common/x86/pixel-util.h    Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/pixel-util.h    Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -46,6 +46,7 @@<br>
<br>
 uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);<br>
 void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);<br>
+int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);<br>
<br>
 void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
 void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util8.asm<br>
--- a/source/common/x86/pixel-util8.asm Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/pixel-util8.asm Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -1194,6 +1194,37 @@<br>
     jnz        .loop<br>
     RET<br>
<br>
+<br>
+;-----------------------------------------------------------------------------<br>
+; int count_nonzero(const int32_t *quantCoeff, int numCoeff);<br>
+;-----------------------------------------------------------------------------<br>
+INIT_XMM sse2<br>
+cglobal count_nonzero, 2,3,4<br>
+    pxor        m0, m0<br>
+    pxor        m1, m1<br>
+    mov         r2d, r1d<br>
+    shr         r1d, 3<br>
+<br>
+.loop<br>
</div>+    mova        m2, [r0]<br>
+    mova        m3, [r0 + 16]<br>
<div class="HOEnZb"><div class="h5">+    add         r0, 32<br>
+    packssdw    m2, m3<br>
+    pcmpeqw     m2, m0<br>
+    psrlw       m2, 15<br>
+    packsswb    m2, m2<br>
+    psadbw      m2, m0<br>
+    paddd       m1, m2<br>
+    dec         r1d<br>
+    jnz        .loop<br>
+<br>
+    movd        r1d, m1<br>
+    sub         r2d, r1d<br>
+    mov         eax, r2d<br>
+<br>
+    RET<br>
+<br>
+<br>
 ;-----------------------------------------------------------------------------------------------------------------------------------------------<br>
 ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>
 ;-----------------------------------------------------------------------------------------------------------------------------------------------<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>