[x265] [PATCH 1 of 2] asm: modify API on findPosFirstLast to support all zeros block
Min Chen
chenm003 at 163.com
Fri May 15 02:11:59 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1431647427 25200
# Node ID c3549eeb4b595e129d18bc75bb4f03272f1624d3
# Parent 8592bf81d0848279fa79cd1487406cb516dffe99
asm: modify API on findPosFirstLast to support all zeros block
---
source/common/dct.cpp | 5 +++--
source/common/x86/pixel-util8.asm | 5 ++---
source/test/pixelharness.cpp | 32 +++++++++++++++++---------------
3 files changed, 22 insertions(+), 20 deletions(-)
diff -r 8592bf81d084 -r c3549eeb4b59 source/common/dct.cpp
--- a/source/common/dct.cpp Thu May 14 17:12:14 2015 +0530
+++ b/source/common/dct.cpp Thu May 14 16:50:27 2015 -0700
@@ -798,11 +798,11 @@
break;
}
- X265_CHECK(n >= 0, "non-zero coeff scan failuare!\n");
+ X265_CHECK(n >= -1, "non-zero coeff scan failuare!\n");
uint32_t lastNZPosInCG = (uint32_t)n;
- for (n = 0;; n++)
+ for (n = 0; n < SCAN_SET_SIZE; n++)
{
const uint32_t idx = scanTbl[n];
const uint32_t idxY = idx / MLS_CG_SIZE;
@@ -813,6 +813,7 @@
uint32_t firstNZPosInCG = (uint32_t)n;
+ // NOTE: when coeff block all ZERO, the lastNZPosInCG is undefined and firstNZPosInCG is 16
return ((lastNZPosInCG << 16) | firstNZPosInCG);
}
diff -r 8592bf81d084 -r c3549eeb4b59 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Thu May 14 17:12:14 2015 +0530
+++ b/source/common/x86/pixel-util8.asm Thu May 14 16:50:27 2015 -0700
@@ -6046,11 +6046,10 @@
pshufb m1, m0
; get First and Last pos
- xor eax, eax
pmovmskb r0d, m1
- not r0w
+ not r0d
bsr r1w, r0w
- bsf ax, r0w
+ bsf eax, r0d ; side effect: clear AH to Zero
shl r1d, 16
or eax, r1d
RET
diff -r 8592bf81d084 -r c3549eeb4b59 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Thu May 14 17:12:14 2015 +0530
+++ b/source/test/pixelharness.cpp Thu May 14 16:50:27 2015 -0700
@@ -1293,23 +1293,22 @@
bool PixelHarness::check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt)
{
- ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
+ ALIGN_VAR_16(coeff_t, ref_src[4 * 32 + ITERS * 2]);
+ memset(ref_src, 0, sizeof(ref_src));
- for (int i = 0; i < 32 * 32; i++)
+ // minus ITERS for keep probability to generate all zeros block
+ for (int i = 0; i < 4 * 32 - ITERS; i++)
{
ref_src[i] = rand() & SHORT_MAX;
}
- // extra test area all of 0x1234
- for (int i = 0; i < ITERS * 2; i++)
- {
- ref_src[32 * 32 + i] = 0x1234;
- }
+ // extra test area all of Zeros
for (int i = 0; i < ITERS; i++)
{
int rand_scan_type = rand() % NUM_SCAN_TYPE;
int rand_scan_size = (rand() % NUM_SCAN_SIZE) + 2;
+ const int trSize = (1 << rand_scan_size);
coeff_t *rand_src = ref_src + i;
const uint16_t* const scanTbl = g_scan4x4[rand_scan_type];
@@ -1319,17 +1318,20 @@
{
const uint32_t idxY = j / MLS_CG_SIZE;
const uint32_t idxX = j % MLS_CG_SIZE;
- if (rand_src[idxY * rand_scan_size + idxX]) break;
+ if (rand_src[idxY * trSize + idxX]) break;
}
- // fill one coeff when all coeff group are zero
+ uint32_t ref_scanPos = ref(rand_src, trSize, scanTbl);
+ uint32_t opt_scanPos = (int)checked(opt, rand_src, trSize, scanTbl);
+
+ // specially case: all coeff group are zero
if (j >= SCAN_SET_SIZE)
- rand_src[0] = 0x0BAD;
-
- uint32_t ref_scanPos = ref(rand_src, (1 << rand_scan_size), scanTbl);
- uint32_t opt_scanPos = (int)checked(opt, rand_src, (1 << rand_scan_size), scanTbl);
-
- if (ref_scanPos != opt_scanPos)
+ {
+ // all zero block the high 16-bits undefined
+ if ((uint16_t)ref_scanPos != (uint16_t)opt_scanPos)
+ return false;
+ }
+ else if (ref_scanPos != opt_scanPos)
return false;
reportfail();
More information about the x265-devel
mailing list