[x265] primitives: add count_nonzero
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Feb 17 07:08:36 CET 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1392617016 -32400
# Mon Feb 17 15:03:36 2014 +0900
# Node ID 8dc1c9646b23a0e1110bef8a10ebfe3fee5d4250
# Parent ce96cdb390fe26aee6effa731e51303c1d9056b0
primitives: add count_nonzero
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Feb 17 15:03:36 2014 +0900
@@ -724,18 +724,6 @@
}
}
-int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)
-{
- int count = 0;
-
- for (int i = 0; i < size; i++)
- {
- count += coeff[i] != 0;
- }
-
- return count;
-}
-
/** encode quantization matrix
* \param scalingList quantization matrix information
*/
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncEntropy.h Mon Feb 17 15:03:36 2014 +0900
@@ -189,7 +189,6 @@
void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int height, TextType ttype);
void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp);
- static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);
}; // END CLASS DEFINITION TEncEntropy
}
//! \}
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Feb 17 15:03:36 2014 +0900
@@ -36,6 +36,7 @@
*/
#include "TEncSbac.h"
+#include "primitives.h"
namespace x265 {
//! \ingroup TLibEncoder
@@ -2106,7 +2107,7 @@
assert(width <= m_slice->getSPS()->getMaxTrSize());
// compute number of significant coefficients
- uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * height);
+ uint32_t numSig = primitives.count_nonzero(coeff, width * height);
if (numSig == 0)
return;
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Sun Feb 16 22:47:32 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Feb 17 15:03:36 2014 +0900
@@ -87,9 +87,9 @@
const uint32_t numLayersToAllocate = m_cfg->getQuadtreeTULog2MaxSize() - m_cfg->getQuadtreeTULog2MinSize() + 1;
for (uint32_t i = 0; i < numLayersToAllocate; ++i)
{
- delete[] m_qtTempCoeffY[i];
- delete[] m_qtTempCoeffCb[i];
- delete[] m_qtTempCoeffCr[i];
+ X265_FREE(m_qtTempCoeffY[i]);
+ X265_FREE(m_qtTempCoeffCb[i]);
+ X265_FREE(m_qtTempCoeffCr[i]);
m_qtTempTComYuv[i].destroy();
}
}
@@ -98,9 +98,9 @@
delete[] m_qtTempCoeffCr;
delete[] m_qtTempTrIdx;
delete[] m_qtTempTComYuv;
- delete[] m_qtTempTUCoeffY;
- delete[] m_qtTempTUCoeffCb;
- delete[] m_qtTempTUCoeffCr;
+ X265_FREE(m_qtTempTUCoeffY);
+ X265_FREE(m_qtTempTUCoeffCb);
+ X265_FREE(m_qtTempTUCoeffCr);
for (uint32_t i = 0; i < 3; ++i)
{
delete[] m_qtTempCbf[i];
@@ -155,19 +155,18 @@
for (uint32_t i = 0; i < numLayersToAllocate; ++i)
{
- m_qtTempCoeffY[i] = new TCoeff[g_maxCUWidth * g_maxCUHeight];
-
- m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];
- m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];
+ m_qtTempCoeffY[i] = X265_MALLOC(TCoeff, g_maxCUWidth * g_maxCUHeight);
+ m_qtTempCoeffCb[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
+ m_qtTempCoeffCr[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));
m_qtTempTComYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, cfg->param.internalCsp);
}
m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
m_sharedPredTransformSkip[1] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
m_sharedPredTransformSkip[2] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
- m_qtTempTUCoeffY = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
- m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
- m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
+ m_qtTempTUCoeffY = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
+ m_qtTempTUCoeffCb = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
+ m_qtTempTUCoeffCr = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);
m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight, cfg->param.internalCsp);
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/dct.cpp
--- a/source/common/dct.cpp Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/dct.cpp Mon Feb 17 15:03:36 2014 +0900
@@ -797,6 +797,21 @@
return acSum;
}
+
+int count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
+{
+ assert(((intptr_t)quantCoeff & 15) == 0);
+ assert(numCoeff > 0 && (numCoeff & 15) == 0);
+
+ int count = 0;
+
+ for (int i = 0; i < numCoeff; i++)
+ {
+ count += quantCoeff[i] != 0;
+ }
+
+ return count;
+}
} // closing - anonymous file-static namespace
namespace x265 {
@@ -817,5 +832,6 @@
p.idct[IDCT_8x8] = idct8_c;
p.idct[IDCT_16x16] = idct16_c;
p.idct[IDCT_32x32] = idct32_c;
+ p.count_nonzero = count_nonzero_c;
}
}
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/primitives.h
--- a/source/common/primitives.h Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/primitives.h Mon Feb 17 15:03:36 2014 +0900
@@ -158,6 +158,7 @@
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
@@ -240,6 +241,7 @@
quant_t quant;
dequant_scaling_t dequant_scaling;
dequant_normal_t dequant_normal;
+ count_nonzero_t count_nonzero;
calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];
calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp Mon Feb 17 15:03:36 2014 +0900
@@ -1084,6 +1084,7 @@
p.dct[DCT_4x4] = x265_dct4_sse2;
p.idct[IDCT_4x4] = x265_idct4_sse2;
p.idct[IDST_4x4] = x265_idst4_sse2;
+ p.count_nonzero = x265_count_nonzero_sse2;
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/pixel-util.h Mon Feb 17 15:03:36 2014 +0900
@@ -46,6 +46,7 @@
uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);
void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Sun Feb 16 22:47:32 2014 -0600
+++ b/source/common/x86/pixel-util8.asm Mon Feb 17 15:03:36 2014 +0900
@@ -1194,6 +1194,37 @@
jnz .loop
RET
+
+;-----------------------------------------------------------------------------
+; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal count_nonzero, 2,3,4
+ pxor m0, m0
+ pxor m1, m1
+ mov r2d, r1d
+ shr r1d, 3
+
+.loop
+ mova m2, [r0]
+ mova m3, [r0 + 16]
+ add r0, 32
+ packssdw m2, m3
+ pcmpeqw m2, m0
+ psrlw m2, 15
+ packsswb m2, m2
+ psadbw m2, m0
+ paddd m1, m2
+ dec r1d
+ jnz .loop
+
+ movd r1d, m1
+ sub r2d, r1d
+ mov eax, r2d
+
+ RET
+
+
;-----------------------------------------------------------------------------------------------------------------------------------------------
;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
;-----------------------------------------------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list