[x265] primitives: add count_nonzero
Satoshi Nakagawa
nakagawa424 at oki.com
Sat Feb 15 10:53:56 CET 2014
> for x264 code style, we need write as movu here
code style fixed, thanks.
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1392457490 -32400
# Sat Feb 15 18:44:50 2014 +0900
# Node ID 5093f4b038285cdd2466fac540884b8be93d2c87
# Parent 289b4ef4ecee345f6640d1602c6caf2c5b215d2d
primitives: add count_nonzero
diff -r 289b4ef4ecee -r 5093f4b03828 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Sat Feb 15 08:12:30 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Sat Feb 15 18:44:50 2014 +0900
@@ -722,18 +722,6 @@
}
}
-int TEncEntropy::countNonZeroCoeffs(TCoeff* coeff, uint32_t size)
-{
- int count = 0;
-
- for (int i = 0; i < size; i++)
- {
- count += coeff[i] != 0;
- }
-
- return count;
-}
-
/** encode quantization matrix
* \param scalingList quantization matrix information
*/
diff -r 289b4ef4ecee -r 5093f4b03828 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h Sat Feb 15 08:12:30 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncEntropy.h Sat Feb 15 18:44:50 2014 +0900
@@ -189,7 +189,6 @@
void estimateBit(estBitsSbacStruct* estBitsSbac, int width, int height, TextType ttype);
void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
void encodeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp);
- static int countNonZeroCoeffs(TCoeff* pcCoef, uint32_t uiSize);
}; // END CLASS DEFINITION TEncEntropy
}
//! \}
diff -r 289b4ef4ecee -r 5093f4b03828 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Sat Feb 15 08:12:30 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Sat Feb 15 18:44:50 2014 +0900
@@ -36,6 +36,7 @@
*/
#include "TEncSbac.h"
+#include "primitives.h"
namespace x265 {
//! \ingroup TLibEncoder
@@ -2105,7 +2106,7 @@
assert(width <= m_slice->getSPS()->getMaxTrSize());
// compute number of significant coefficients
- uint32_t numSig = TEncEntropy::countNonZeroCoeffs(coeff, width * height);
+ uint32_t numSig = primitives.count_nonzero(coeff, width * height);
if (numSig == 0)
return;
diff -r 289b4ef4ecee -r 5093f4b03828 source/common/dct.cpp
--- a/source/common/dct.cpp Sat Feb 15 08:12:30 2014 +0530
+++ b/source/common/dct.cpp Sat Feb 15 18:44:50 2014 +0900
@@ -797,6 +797,20 @@
return acSum;
}
+
+int count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
+{
+ assert(numCoeff > 0 && (numCoeff & 15) == 0);
+
+ int count = 0;
+
+ for (int i = 0; i < numCoeff; i++)
+ {
+ count += quantCoeff[i] != 0;
+ }
+
+ return count;
+}
} // closing - anonymous file-static namespace
namespace x265 {
@@ -817,5 +831,6 @@
p.idct[IDCT_8x8] = idct8_c;
p.idct[IDCT_16x16] = idct16_c;
p.idct[IDCT_32x32] = idct32_c;
+ p.count_nonzero = count_nonzero_c;
}
}
diff -r 289b4ef4ecee -r 5093f4b03828 source/common/primitives.h
--- a/source/common/primitives.h Sat Feb 15 08:12:30 2014 +0530
+++ b/source/common/primitives.h Sat Feb 15 18:44:50 2014 +0900
@@ -158,6 +158,7 @@
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
@@ -240,6 +241,7 @@
quant_t quant;
dequant_scaling_t dequant_scaling;
dequant_normal_t dequant_normal;
+ count_nonzero_t count_nonzero;
calcresidual_t calcresidual[NUM_SQUARE_BLOCKS];
calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
diff -r 289b4ef4ecee -r 5093f4b03828 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sat Feb 15 08:12:30 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Sat Feb 15 18:44:50 2014 +0900
@@ -1084,6 +1084,7 @@
p.dct[DCT_4x4] = x265_dct4_sse2;
p.idct[IDCT_4x4] = x265_idct4_sse2;
p.idct[IDST_4x4] = x265_idst4_sse2;
+ p.count_nonzero = x265_count_nonzero_sse2;
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r 289b4ef4ecee -r 5093f4b03828 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Sat Feb 15 08:12:30 2014 +0530
+++ b/source/common/x86/pixel-util.h Sat Feb 15 18:44:50 2014 +0900
@@ -46,6 +46,7 @@
uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+int x265_count_nonzero_sse2(const int32_t *quantCoeff, int numCoeff);
void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r 289b4ef4ecee -r 5093f4b03828 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Sat Feb 15 08:12:30 2014 +0530
+++ b/source/common/x86/pixel-util8.asm Sat Feb 15 18:44:50 2014 +0900
@@ -1194,6 +1194,37 @@
jnz .loop
RET
+
+;-----------------------------------------------------------------------------
+; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal count_nonzero, 2,3,4
+ pxor m0, m0
+ pxor m1, m1
+ mov r2d, r1d
+ shr r1d, 3
+
+.loop
+ movu m2, [r0] ; TODO: mova
+ movu m3, [r0 + 16] ; TODO: mova
+ add r0, 32
+ packssdw m2, m3
+ pcmpeqw m2, m0
+ psrlw m2, 15
+ packsswb m2, m2
+ psadbw m2, m0
+ paddd m1, m2
+ dec r1d
+ jnz .loop
+
+ movd r1d, m1
+ sub r2d, r1d
+ mov eax, r2d
+
+ RET
+
+
;-----------------------------------------------------------------------------------------------------------------------------------------------
;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
;-----------------------------------------------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list