[x265] primitives: add count_nonzero
Steve Borho
steve at borho.org
Mon Feb 17 19:27:04 CET 2014
On Mon, Feb 17, 2014 at 12:08 AM, Satoshi Nakagawa <nakagawa424 at oki.com>wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1392617016 -32400
> # Mon Feb 17 15:03:36 2014 +0900
> # Node ID 8dc1c9646b23a0e1110bef8a10ebfe3fee5d4250
> # Parent ce96cdb390fe26aee6effa731e51303c1d9056b0
> primitives: add count_nonzero
>
Queued. Please add a unit test for this primitive to one of the existing
test benches
>
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.cpp
> --- a/source/Lib/TLibEncoder/TEncEntropy.cpp Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Feb 17 15:03:36 2014
> +0900
> @@ -724,18 +724,6 @@
> }
> }
>
> -int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)
> -{
> - int count = 0;
> -
> - for (int i = 0; i < size; i++)
> - {
> - count += coeff[i] != 0;
> - }
> -
> - return count;
> -}
> -
> /** encode quantization matrix
> * \param scalingList quantization matrix information
> */
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.h
> --- a/source/Lib/TLibEncoder/TEncEntropy.h Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncEntropy.h Mon Feb 17 15:03:36 2014
> +0900
> @@ -189,7 +189,6 @@
> void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int
> height, TextType ttype);
> void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
> void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int
> ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int
> allowMergeLeft, int allowMergeUp);
> - static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);
> }; // END CLASS DEFINITION TEncEntropy
> }
> //! \}
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSbac.cpp
> --- a/source/Lib/TLibEncoder/TEncSbac.cpp Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Feb 17 15:03:36 2014
> +0900
> @@ -36,6 +36,7 @@
> */
>
> #include "TEncSbac.h"
> +#include "primitives.h"
>
> namespace x265 {
> //! \ingroup TLibEncoder
> @@ -2106,7 +2107,7 @@
> assert(width <= m_slice->getSPS()->getMaxTrSize());
>
> // compute number of significant coefficients
> - uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width *
> height);
> + uint32_t numSig = primitives.count_nonzero(coeff, width * height);
>
> if (numSig == 0)
> return;
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Feb 17 15:03:36 2014
> +0900
> @@ -87,9 +87,9 @@
> const uint32_t numLayersToAllocate =
> m_cfg->getQuadtreeTULog2MaxSize() - m_cfg->getQuadtreeTULog2MinSize() + 1;
> for (uint32_t i = 0; i < numLayersToAllocate; ++i)
> {
> - delete[] m_qtTempCoeffY[i];
> - delete[] m_qtTempCoeffCb[i];
> - delete[] m_qtTempCoeffCr[i];
> + X265_FREE(m_qtTempCoeffY[i]);
> + X265_FREE(m_qtTempCoeffCb[i]);
> + X265_FREE(m_qtTempCoeffCr[i]);
> m_qtTempTComYuv[i].destroy();
> }
> }
> @@ -98,9 +98,9 @@
> delete[] m_qtTempCoeffCr;
> delete[] m_qtTempTrIdx;
> delete[] m_qtTempTComYuv;
> - delete[] m_qtTempTUCoeffY;
> - delete[] m_qtTempTUCoeffCb;
> - delete[] m_qtTempTUCoeffCr;
> + X265_FREE(m_qtTempTUCoeffY);
> + X265_FREE(m_qtTempTUCoeffCb);
> + X265_FREE(m_qtTempTUCoeffCr);
> for (uint32_t i = 0; i < 3; ++i)
> {
> delete[] m_qtTempCbf[i];
> @@ -155,19 +155,18 @@
>
> for (uint32_t i = 0; i < numLayersToAllocate; ++i)
> {
> - m_qtTempCoeffY[i] = new TCoeff[g_maxCUWidth * g_maxCUHeight];
> -
> - m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift)
> * (g_maxCUHeight >> m_vChromaShift)];
> - m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift)
> * (g_maxCUHeight >> m_vChromaShift)];
> + m_qtTempCoeffY[i] = X265_MALLOC(TCoeff, g_maxCUWidth *
> g_maxCUHeight);
> + m_qtTempCoeffCb[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >>
> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
> + m_qtTempCoeffCr[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >>
> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
> m_qtTempTComYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE,
> cfg->param.internalCsp);
> }
>
> m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> m_sharedPredTransformSkip[1] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> m_sharedPredTransformSkip[2] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> - m_qtTempTUCoeffY = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> - m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> - m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
> + m_qtTempTUCoeffY = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
> + m_qtTempTUCoeffCb = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
> + m_qtTempTUCoeffCr = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
>
> m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight,
> cfg->param.internalCsp);
>
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/dct.cpp
> --- a/source/common/dct.cpp Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/dct.cpp Mon Feb 17 15:03:36 2014 +0900
> @@ -797,6 +797,21 @@
>
> return acSum;
> }
> +
> +int count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
> +{
> + assert(((intptr_t)quantCoeff & 15) == 0);
> + assert(numCoeff > 0 && (numCoeff & 15) == 0);
> +
> + int count = 0;
> +
> + for (int i = 0; i < numCoeff; i++)
> + {
> + count += quantCoeff[i] != 0;
> + }
> +
> + return count;
> +}
> } // closing - anonymous file-static namespace
>
> namespace x265 {
> @@ -817,5 +832,6 @@
> p.idct[IDCT_8x8] = idct8_c;
> p.idct[IDCT_16x16] = idct16_c;
> p.idct[IDCT_32x32] = idct32_c;
> + p.count_nonzero = count_nonzero_c;
> }
> }
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/primitives.h
> --- a/source/common/primitives.h Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/primitives.h Mon Feb 17 15:03:36 2014 +0900
> @@ -158,6 +158,7 @@
> typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t
> *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t*
> lastPos);
> typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t
> *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
> typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef,
> int num, int scale, int shift);
> +typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
>
> typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
> typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t
> srcStride, intptr_t dstStride, int width, int height, int w0, int round,
> int shift, int offset);
> @@ -240,6 +241,7 @@
> quant_t quant;
> dequant_scaling_t dequant_scaling;
> dequant_normal_t dequant_normal;
> + count_nonzero_t count_nonzero;
>
> calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];
> calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/common/x86/asm-primitives.cpp Mon Feb 17 15:03:36 2014
> +0900
> @@ -1084,6 +1084,7 @@
> p.dct[DCT_4x4] = x265_dct4_sse2;
> p.idct[IDCT_4x4] = x265_idct4_sse2;
> p.idct[IDST_4x4] = x265_idst4_sse2;
> + p.count_nonzero = x265_count_nonzero_sse2;
> }
> if (cpuMask & X265_CPU_SSSE3)
> {
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util.h
> --- a/source/common/x86/pixel-util.h Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/x86/pixel-util.h Mon Feb 17 15:03:36 2014 +0900
> @@ -46,6 +46,7 @@
>
> uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t
> *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t*
> lastPos);
> void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef,
> int num, int scale, int shift);
> +int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);
>
> void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
> void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
> diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/x86/pixel-util8.asm Mon Feb 17 15:03:36 2014 +0900
> @@ -1194,6 +1194,37 @@
> jnz .loop
> RET
>
> +
>
> +;-----------------------------------------------------------------------------
> +; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
>
> +;-----------------------------------------------------------------------------
> +INIT_XMM sse2
> +cglobal count_nonzero, 2,3,4
> + pxor m0, m0
> + pxor m1, m1
> + mov r2d, r1d
> + shr r1d, 3
> +
> +.loop
> + mova m2, [r0]
> + mova m3, [r0 + 16]
> + add r0, 32
> + packssdw m2, m3
> + pcmpeqw m2, m0
> + psrlw m2, 15
> + packsswb m2, m2
> + psadbw m2, m0
> + paddd m1, m2
> + dec r1d
> + jnz .loop
> +
> + movd r1d, m1
> + sub r2d, r1d
> + mov eax, r2d
> +
> + RET
> +
> +
>
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
> ;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
>
> ;-----------------------------------------------------------------------------------------------------------------------------------------------
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140217/78fcf237/attachment-0001.html>
More information about the x265-devel
mailing list