[x265] primitives: add count_nonzero

Mon Feb 17 19:27:04 CET 2014

On Mon, Feb 17, 2014 at 12:08 AM, Satoshi Nakagawa <nakagawa424 at oki.com>wrote:

> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1392617016 -32400
> #      Mon Feb 17 15:03:36 2014 +0900
> # Node ID 8dc1c9646b23a0e1110bef8a10ebfe3fee5d4250
> # Parent  ce96cdb390fe26aee6effa731e51303c1d9056b0
> primitives: add count_nonzero
>

Queued.  Please add a unit test for this primitive to one of the existing
test benches

>
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.cpp
> --- a/source/Lib/TLibEncoder/TEncEntropy.cpp    Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp    Mon Feb 17 15:03:36 2014
> +0900
> @@ -724,18 +724,6 @@
>      }
>  }
>
> -int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)
> -{
> -    int count = 0;
> -
> -    for (int i = 0; i < size; i++)
> -    {
> -        count += coeff[i] != 0;
> -    }
> -
> -    return count;
> -}
> -
>  /** encode quantization matrix
>   * \param scalingList quantization matrix information
>   */
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.h
> --- a/source/Lib/TLibEncoder/TEncEntropy.h      Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncEntropy.h      Mon Feb 17 15:03:36 2014
> +0900
> @@ -189,7 +189,6 @@
>      void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int
> height, TextType ttype);
>      void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
>      void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int
> ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int
> allowMergeLeft, int allowMergeUp);
> -    static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);
>  }; // END CLASS DEFINITION TEncEntropy
>  }
>  //! \}
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSbac.cpp
> --- a/source/Lib/TLibEncoder/TEncSbac.cpp       Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncSbac.cpp       Mon Feb 17 15:03:36 2014
> +0900
> @@ -36,6 +36,7 @@
>  */
>
>  #include "TEncSbac.h"
> +#include "primitives.h"
>
>  namespace x265 {
>  //! \ingroup TLibEncoder
> @@ -2106,7 +2107,7 @@
>      assert(width <= m_slice->getSPS()->getMaxTrSize());
>
>      // compute number of significant coefficients
> -    uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width *
> height);
> +    uint32_t numSig = primitives.count_nonzero(coeff, width * height);
>
>      if (numSig == 0)
>          return;
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp     Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Mon Feb 17 15:03:36 2014
> +0900
> @@ -87,9 +87,9 @@
>          const uint32_t numLayersToAllocate =
> m_cfg->getQuadtreeTULog2MaxSize() - m_cfg->getQuadtreeTULog2MinSize() + 1;
>          for (uint32_t i = 0; i < numLayersToAllocate; ++i)
>          {
> -            delete[] m_qtTempCoeffY[i];
> -            delete[] m_qtTempCoeffCb[i];
> -            delete[] m_qtTempCoeffCr[i];
> +            X265_FREE(m_qtTempCoeffY[i]);
> +            X265_FREE(m_qtTempCoeffCb[i]);
> +            X265_FREE(m_qtTempCoeffCr[i]);
>              m_qtTempTComYuv[i].destroy();
>          }
>      }
> @@ -98,9 +98,9 @@
>      delete[] m_qtTempCoeffCr;
>      delete[] m_qtTempTrIdx;
>      delete[] m_qtTempTComYuv;
> -    delete[] m_qtTempTUCoeffY;
> -    delete[] m_qtTempTUCoeffCb;
> -    delete[] m_qtTempTUCoeffCr;
> +    X265_FREE(m_qtTempTUCoeffY);
> +    X265_FREE(m_qtTempTUCoeffCb);
> +    X265_FREE(m_qtTempTUCoeffCr);
>      for (uint32_t i = 0; i < 3; ++i)
>      {
>          delete[] m_qtTempCbf[i];
> @@ -155,19 +155,18 @@
>
>      for (uint32_t i = 0; i < numLayersToAllocate; ++i)
>      {
> -        m_qtTempCoeffY[i]  = new TCoeff[g_maxCUWidth * g_maxCUHeight];
> -
> -        m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift)
> * (g_maxCUHeight >> m_vChromaShift)];
> -        m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift)
> * (g_maxCUHeight >> m_vChromaShift)];
> +        m_qtTempCoeffY[i]  = X265_MALLOC(TCoeff, g_maxCUWidth *
> g_maxCUHeight);
> +        m_qtTempCoeffCb[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >>
> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
> +        m_qtTempCoeffCr[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >>
> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
>          m_qtTempTComYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE,
> cfg->param.internalCsp);
>      }
>
>      m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
>      m_sharedPredTransformSkip[1] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
>      m_sharedPredTransformSkip[2] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> -    m_qtTempTUCoeffY  = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> -    m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> -    m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> +    m_qtTempTUCoeffY  = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
> +    m_qtTempTUCoeffCb = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
> +    m_qtTempTUCoeffCr = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
>
>      m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight,
> cfg->param.internalCsp);
>
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/dct.cpp
> --- a/source/common/dct.cpp     Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/dct.cpp     Mon Feb 17 15:03:36 2014 +0900
> @@ -797,6 +797,21 @@
>
>      return acSum;
>  }
> +
> +int  count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
> +{
> +    assert(((intptr_t)quantCoeff & 15) == 0);
> +    assert(numCoeff > 0 && (numCoeff & 15) == 0);
> +
> +    int count = 0;
> +
> +    for (int i = 0; i < numCoeff; i++)
> +    {
> +        count += quantCoeff[i] != 0;
> +    }
> +
> +    return count;
> +}
>  }  // closing - anonymous file-static namespace
>
>  namespace x265 {
> @@ -817,5 +832,6 @@
>      p.idct[IDCT_8x8] = idct8_c;
>      p.idct[IDCT_16x16] = idct16_c;
>      p.idct[IDCT_32x32] = idct32_c;
> +    p.count_nonzero = count_nonzero_c;
>  }
>  }
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/primitives.h
> --- a/source/common/primitives.h        Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/primitives.h        Mon Feb 17 15:03:36 2014 +0900
> @@ -158,6 +158,7 @@
>  typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t
> *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t*
> lastPos);
>  typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t
> *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
>  typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef,
> int num, int scale, int shift);
> +typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
>
>  typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
>  typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t
> srcStride, intptr_t dstStride, int width, int height, int w0, int round,
> int shift, int offset);
> @@ -240,6 +241,7 @@
>      quant_t         quant;
>      dequant_scaling_t dequant_scaling;
>      dequant_normal_t dequant_normal;
> +    count_nonzero_t count_nonzero;
>
>      calcresidual_t  calcresidual[NUM_SQUARE_BLOCKS];
>      calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/common/x86/asm-primitives.cpp      Mon Feb 17 15:03:36 2014
> +0900
> @@ -1084,6 +1084,7 @@
>          p.dct[DCT_4x4] = x265_dct4_sse2;
>          p.idct[IDCT_4x4] = x265_idct4_sse2;
>          p.idct[IDST_4x4] = x265_idst4_sse2;
> +        p.count_nonzero = x265_count_nonzero_sse2;
>      }
>      if (cpuMask & X265_CPU_SSSE3)
>      {
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util.h
> --- a/source/common/x86/pixel-util.h    Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/x86/pixel-util.h    Mon Feb 17 15:03:36 2014 +0900
> @@ -46,6 +46,7 @@
>
>  uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t
> *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t*
> lastPos);
>  void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef,
> int num, int scale, int shift);
> +int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);
>
>  void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
>  void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/x86/pixel-util8.asm Mon Feb 17 15:03:36 2014 +0900
> @@ -1194,6 +1194,37 @@
>      jnz        .loop
>      RET
>
> +
>
> +;-----------------------------------------------------------------------------
> +; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
>
> +;-----------------------------------------------------------------------------
> +INIT_XMM sse2
> +cglobal count_nonzero, 2,3,4
> +    pxor        m0, m0
> +    pxor        m1, m1
> +    mov         r2d, r1d
> +    shr         r1d, 3
> +
> +.loop
> +    mova        m2, [r0]
> +    mova        m3, [r0 + 16]
> +    add         r0, 32
> +    packssdw    m2, m3
> +    pcmpeqw     m2, m0
> +    psrlw       m2, 15
> +    packsswb    m2, m2
> +    psadbw      m2, m0
> +    paddd       m1, m2
> +    dec         r1d
> +    jnz        .loop
> +
> +    movd        r1d, m1
> +    sub         r2d, r1d
> +    mov         eax, r2d
> +
> +    RET
> +
> +
>
>  ;-----------------------------------------------------------------------------------------------------------------------------------------------
>  ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
>
>  ;-----------------------------------------------------------------------------------------------------------------------------------------------
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>

-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140217/78fcf237/attachment-0001.html>