<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Feb 17, 2014 at 12:08 AM, Satoshi Nakagawa <span dir="ltr"><<a href="mailto:nakagawa424@oki.com" target="_blank">nakagawa424@oki.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class=""># HG changeset patch<br>
# User Satoshi Nakagawa <<a href="mailto:nakagawa424@oki.com">nakagawa424@oki.com</a>><br>
</div># Date 1392617016 -32400<br>
# Mon Feb 17 15:03:36 2014 +0900<br>
# Node ID 8dc1c9646b23a0e1110bef8a10ebfe3fee5d4250<br>
# Parent ce96cdb390fe26aee6effa731e51303c1d9056b0<br>
primitives: add count_nonzero<br></blockquote><div><br></div><div>Queued. Please add a unit test for this primitive to one of the existing test benches</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.cpp<br>
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Mon Feb 17 15:03:36 2014 +0900<br>
@@ -724,18 +724,6 @@<br>
<div class=""> }<br>
}<br>
<br>
-int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)<br>
-{<br>
- int count = 0;<br>
-<br>
- for (int i = 0; i < size; i++)<br>
- {<br>
- count += coeff[i] != 0;<br>
- }<br>
-<br>
- return count;<br>
-}<br>
-<br>
/** encode quantization matrix<br>
* \param scalingList quantization matrix information<br>
*/<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncEntropy.h<br>
--- a/source/Lib/TLibEncoder/TEncEntropy.h Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncEntropy.h Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -189,7 +189,6 @@<br>
void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int height, TextType ttype);<br>
void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);<br>
void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp);<br>
- static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);<br>
}; // END CLASS DEFINITION TEncEntropy<br>
}<br>
//! \}<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSbac.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -36,6 +36,7 @@<br>
*/<br>
<br>
#include "TEncSbac.h"<br>
+#include "primitives.h"<br>
<br>
namespace x265 {<br>
//! \ingroup TLibEncoder<br>
</div>@@ -2106,7 +2107,7 @@<br>
<div class=""> assert(width <= m_slice->getSPS()->getMaxTrSize());<br>
<br>
// compute number of significant coefficients<br>
- uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * height);<br>
+ uint32_t numSig = primitives.count_nonzero(coeff, width * height);<br>
<br>
if (numSig == 0)<br>
return;<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Feb 17 15:03:36 2014 +0900<br>
@@ -87,9 +87,9 @@<br>
const uint32_t numLayersToAllocate = m_cfg->getQuadtreeTULog2MaxSize() - m_cfg->getQuadtreeTULog2MinSize() + 1;<br>
for (uint32_t i = 0; i < numLayersToAllocate; ++i)<br>
{<br>
- delete[] m_qtTempCoeffY[i];<br>
- delete[] m_qtTempCoeffCb[i];<br>
- delete[] m_qtTempCoeffCr[i];<br>
+ X265_FREE(m_qtTempCoeffY[i]);<br>
+ X265_FREE(m_qtTempCoeffCb[i]);<br>
+ X265_FREE(m_qtTempCoeffCr[i]);<br>
m_qtTempTComYuv[i].destroy();<br>
}<br>
}<br>
@@ -98,9 +98,9 @@<br>
delete[] m_qtTempCoeffCr;<br>
delete[] m_qtTempTrIdx;<br>
delete[] m_qtTempTComYuv;<br>
- delete[] m_qtTempTUCoeffY;<br>
- delete[] m_qtTempTUCoeffCb;<br>
- delete[] m_qtTempTUCoeffCr;<br>
+ X265_FREE(m_qtTempTUCoeffY);<br>
+ X265_FREE(m_qtTempTUCoeffCb);<br>
+ X265_FREE(m_qtTempTUCoeffCr);<br>
for (uint32_t i = 0; i < 3; ++i)<br>
{<br>
delete[] m_qtTempCbf[i];<br>
@@ -155,19 +155,18 @@<br>
<br>
for (uint32_t i = 0; i < numLayersToAllocate; ++i)<br>
{<br>
- m_qtTempCoeffY[i] = new TCoeff[g_maxCUWidth * g_maxCUHeight];<br>
-<br>
- m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];<br>
- m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];<br>
+ m_qtTempCoeffY[i] = X265_MALLOC(TCoeff, g_maxCUWidth * g_maxCUHeight);<br>
+ m_qtTempCoeffCb[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));<br>
+ m_qtTempCoeffCr[i] = X265_MALLOC(TCoeff, (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift));<br>
m_qtTempTComYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, cfg->param.internalCsp);<br>
}<br>
<br>
m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
m_sharedPredTransformSkip[1] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
m_sharedPredTransformSkip[2] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
- m_qtTempTUCoeffY = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
- m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
- m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];<br>
+ m_qtTempTUCoeffY = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);<br>
+ m_qtTempTUCoeffCb = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);<br>
+ m_qtTempTUCoeffCr = X265_MALLOC(TCoeff, MAX_TS_WIDTH * MAX_TS_HEIGHT);<br>
<br>
m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight, cfg->param.internalCsp);<br>
<br>
diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/dct.cpp<br>
--- a/source/common/dct.cpp Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/dct.cpp Mon Feb 17 15:03:36 2014 +0900<br>
@@ -797,6 +797,21 @@<br>
<div class=""><br>
return acSum;<br>
}<br>
+<br>
+int count_nonzero_c(const int32_t *quantCoeff, int numCoeff)<br>
+{<br>
</div>+ assert(((intptr_t)quantCoeff & 15) == 0);<br>
<div class="">+ assert(numCoeff > 0 && (numCoeff & 15) == 0);<br>
+<br>
+ int count = 0;<br>
+<br>
+ for (int i = 0; i < numCoeff; i++)<br>
+ {<br>
+ count += quantCoeff[i] != 0;<br>
+ }<br>
+<br>
+ return count;<br>
+}<br>
} // closing - anonymous file-static namespace<br>
<br>
namespace x265 {<br>
</div>@@ -817,5 +832,6 @@<br>
<div class=""> p.idct[IDCT_8x8] = idct8_c;<br>
p.idct[IDCT_16x16] = idct16_c;<br>
p.idct[IDCT_32x32] = idct32_c;<br>
+ p.count_nonzero = count_nonzero_c;<br>
}<br>
}<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/primitives.h<br>
--- a/source/common/primitives.h Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/primitives.h Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -158,6 +158,7 @@<br>
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);<br>
typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);<br>
typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);<br>
+typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);<br>
<br>
typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
@@ -240,6 +241,7 @@<br>
quant_t quant;<br>
dequant_scaling_t dequant_scaling;<br>
dequant_normal_t dequant_normal;<br>
+ count_nonzero_t count_nonzero;<br>
<br>
calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];<br>
calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/asm-primitives.cpp Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -1084,6 +1084,7 @@<br>
p.dct[DCT_4x4] = x265_dct4_sse2;<br>
p.idct[IDCT_4x4] = x265_idct4_sse2;<br>
p.idct[IDST_4x4] = x265_idst4_sse2;<br>
+ p.count_nonzero = x265_count_nonzero_sse2;<br>
}<br>
if (cpuMask & X265_CPU_SSSE3)<br>
{<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util.h<br>
--- a/source/common/x86/pixel-util.h Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/pixel-util.h Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -46,6 +46,7 @@<br>
<br>
uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);<br>
void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);<br>
+int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);<br>
<br>
void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>
</div>diff -r ce96cdb390fe -r 8dc1c9646b23 source/common/x86/pixel-util8.asm<br>
--- a/source/common/x86/pixel-util8.asm Sun Feb 16 22:47:32 2014 -0600<br>
+++ b/source/common/x86/pixel-util8.asm Mon Feb 17 15:03:36 2014 +0900<br>
<div class="">@@ -1194,6 +1194,37 @@<br>
jnz .loop<br>
RET<br>
<br>
+<br>
+;-----------------------------------------------------------------------------<br>
+; int count_nonzero(const int32_t *quantCoeff, int numCoeff);<br>
+;-----------------------------------------------------------------------------<br>
+INIT_XMM sse2<br>
+cglobal count_nonzero, 2,3,4<br>
+ pxor m0, m0<br>
+ pxor m1, m1<br>
+ mov r2d, r1d<br>
+ shr r1d, 3<br>
+<br>
+.loop<br>
</div>+ mova m2, [r0]<br>
+ mova m3, [r0 + 16]<br>
<div class="HOEnZb"><div class="h5">+ add r0, 32<br>
+ packssdw m2, m3<br>
+ pcmpeqw m2, m0<br>
+ psrlw m2, 15<br>
+ packsswb m2, m2<br>
+ psadbw m2, m0<br>
+ paddd m1, m2<br>
+ dec r1d<br>
+ jnz .loop<br>
+<br>
+ movd r1d, m1<br>
+ sub r2d, r1d<br>
+ mov eax, r2d<br>
+<br>
+ RET<br>
+<br>
+<br>
;-----------------------------------------------------------------------------------------------------------------------------------------------<br>
;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)<br>
;-----------------------------------------------------------------------------------------------------------------------------------------------<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br>Steve Borho
</div></div>