[x265] asm: update count_nonzero, add testbench
Satoshi Nakagawa
nakagawa424 at oki.com
Fri Feb 21 03:35:08 CET 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1392948676 -32400
# Fri Feb 21 11:11:16 2014 +0900
# Node ID 66d8cb6573f27b29a9dc92ec480c635f0de48c03
# Parent 894bde574bc1678471e0c23ceb381a806768ea95
asm: update count_nonzero, add testbench
diff -r 894bde574bc1 -r 66d8cb6573f2 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Thu Feb 20 17:18:42 2014 -0600
+++ b/source/common/x86/pixel-util8.asm Fri Feb 21 11:11:16 2014 +0900
@@ -1240,11 +1240,12 @@
; int count_nonzero(const int32_t *quantCoeff, int numCoeff);
;-----------------------------------------------------------------------------
INIT_XMM sse2
-cglobal count_nonzero, 2,3,4
+cglobal count_nonzero, 2,2,4
pxor m0, m0
- pxor m1, m1
- mov r2d, r1d
shr r1d, 3
+ movd m1, r1d
+ pshufd m1, m1, 0
+ packssdw m1, m1
.loop
mova m2, [r0]
@@ -1252,16 +1253,13 @@
add r0, 32
packssdw m2, m3
pcmpeqw m2, m0
- psrlw m2, 15
- packsswb m2, m2
- psadbw m2, m0
- paddd m1, m2
+ paddw m1, m2
dec r1d
- jnz .loop
-
- movd r1d, m1
- sub r2d, r1d
- mov eax, r2d
+ jnz .loop
+
+ packuswb m1, m1
+ psadbw m1, m0
+ movd eax, m1
RET
diff -r 894bde574bc1 -r 66d8cb6573f2 source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp Thu Feb 20 17:18:42 2014 -0600
+++ b/source/test/mbdstharness.cpp Fri Feb 21 11:11:16 2014 +0900
@@ -380,6 +380,41 @@
return true;
}
+bool MBDstHarness::check_count_nonzero_primitive(count_nonzero_t ref, count_nonzero_t opt)
+{
+ ALIGN_VAR_32(int32_t, qcoeff[32 * 32]);
+
+ for (int i = 0; i < 4; i++)
+ {
+ int log2TrSize = i + 2;
+ int num = 1 << (log2TrSize * 2);
+ int mask = num - 1;
+
+ for (int n = 0; n <= num; n++)
+ {
+ memset(qcoeff, 0, num * sizeof(int32_t));
+
+ for (int j = 0; j < n; j++)
+ {
+ int k = rand() & mask;
+ while (qcoeff[k])
+ {
+ k = (k + 11) & mask;
+ }
+ qcoeff[k] = rand() - RAND_MAX / 2;
+ }
+
+ int refval = ref(qcoeff, num);
+ int optval = opt(qcoeff, num);
+
+ if (refval != optval)
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool MBDstHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
{
for (int i = 0; i < NUM_DCTS; i++)
@@ -424,6 +459,15 @@
}
}
+ if (opt.count_nonzero)
+ {
+ if (!check_count_nonzero_primitive(ref.count_nonzero, opt.count_nonzero))
+ {
+ printf("count_nonzero: Failed!\n");
+ return false;
+ }
+ }
+
return true;
}
@@ -465,4 +509,13 @@
int dummy = -1;
REPORT_SPEEDUP(opt.quant, ref.quant, mintbuf1, mintbuf2, mintbuf3, mintbuf4, 23, 23785, 32 * 32, &dummy);
}
+
+ if (opt.count_nonzero)
+ {
+ for (int i = 4; i <= 32; i <<= 1)
+ {
+ printf("count_nonzero[%dx%d]", i, i);
+ REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbufidct, i * i)
+ }
+ }
}
diff -r 894bde574bc1 -r 66d8cb6573f2 source/test/mbdstharness.h
--- a/source/test/mbdstharness.h Thu Feb 20 17:18:42 2014 -0600
+++ b/source/test/mbdstharness.h Fri Feb 21 11:11:16 2014 +0900
@@ -43,6 +43,7 @@
bool check_quant_primitive(quant_t ref, quant_t opt);
bool check_dct_primitive(dct_t ref, dct_t opt, int width);
bool check_idct_primitive(idct_t ref, idct_t opt, int width);
+ bool check_count_nonzero_primitive(count_nonzero_t ref, count_nonzero_t opt);
public:
More information about the x265-devel
mailing list