[x265] [PATCH 1 of 2] asm: modify API on findPosFirstLast to support all zeros block

Fri May 15 02:11:59 CEST 2015

# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1431647427 25200
# Node ID c3549eeb4b595e129d18bc75bb4f03272f1624d3
# Parent  8592bf81d0848279fa79cd1487406cb516dffe99
asm: modify API on findPosFirstLast to support all zeros block
---
 source/common/dct.cpp             |    5 +++--
 source/common/x86/pixel-util8.asm |    5 ++---
 source/test/pixelharness.cpp      |   32 +++++++++++++++++---------------
 3 files changed, 22 insertions(+), 20 deletions(-)

diff -r 8592bf81d084 -r c3549eeb4b59 source/common/dct.cpp

--- a/source/common/dct.cpp	Thu May 14 17:12:14 2015 +0530
+++ b/source/common/dct.cpp	Thu May 14 16:50:27 2015 -0700
@@ -798,11 +798,11 @@
             break;
     }
 
-    X265_CHECK(n >= 0, "non-zero coeff scan failuare!\n");
+    X265_CHECK(n >= -1, "non-zero coeff scan failuare!\n");
 
     uint32_t lastNZPosInCG = (uint32_t)n;
 
-    for (n = 0;; n++)
+    for (n = 0; n < SCAN_SET_SIZE; n++)
     {
         const uint32_t idx = scanTbl[n];
         const uint32_t idxY = idx / MLS_CG_SIZE;
@@ -813,6 +813,7 @@
 
     uint32_t firstNZPosInCG = (uint32_t)n;
 
+    // NOTE: when coeff block all ZERO, the lastNZPosInCG is undefined and firstNZPosInCG is 16
     return ((lastNZPosInCG << 16) | firstNZPosInCG);
 }
 
diff -r 8592bf81d084 -r c3549eeb4b59 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Thu May 14 17:12:14 2015 +0530
+++ b/source/common/x86/pixel-util8.asm	Thu May 14 16:50:27 2015 -0700
@@ -6046,11 +6046,10 @@
     pshufb      m1, m0
 
     ; get First and Last pos
-    xor         eax, eax
     pmovmskb    r0d, m1
-    not         r0w
+    not         r0d
     bsr         r1w, r0w
-    bsf          ax, r0w
+    bsf         eax, r0d    ; side effect: clear AH to Zero
     shl         r1d, 16
     or          eax, r1d
     RET
diff -r 8592bf81d084 -r c3549eeb4b59 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Thu May 14 17:12:14 2015 +0530
+++ b/source/test/pixelharness.cpp	Thu May 14 16:50:27 2015 -0700
@@ -1293,23 +1293,22 @@
 
 bool PixelHarness::check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt)
 {
-    ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
+    ALIGN_VAR_16(coeff_t, ref_src[4 * 32 + ITERS * 2]);
+    memset(ref_src, 0, sizeof(ref_src));
 
-    for (int i = 0; i < 32 * 32; i++)
+    // minus ITERS for keep probability to generate all zeros block
+    for (int i = 0; i < 4 * 32 - ITERS; i++)
     {
         ref_src[i] = rand() & SHORT_MAX;
     }
 
-    // extra test area all of 0x1234
-    for (int i = 0; i < ITERS * 2; i++)
-    {
-        ref_src[32 * 32 + i] = 0x1234;
-    }
+    // extra test area all of Zeros
 
     for (int i = 0; i < ITERS; i++)
     {
         int rand_scan_type = rand() % NUM_SCAN_TYPE;
         int rand_scan_size = (rand() % NUM_SCAN_SIZE) + 2;
+        const int trSize = (1 << rand_scan_size);
         coeff_t *rand_src = ref_src + i;
 
         const uint16_t* const scanTbl = g_scan4x4[rand_scan_type];
@@ -1319,17 +1318,20 @@
         {
             const uint32_t idxY = j / MLS_CG_SIZE;
             const uint32_t idxX = j % MLS_CG_SIZE;
-            if (rand_src[idxY * rand_scan_size + idxX]) break;
+            if (rand_src[idxY * trSize + idxX]) break;
         }
 
-        // fill one coeff when all coeff group are zero
+        uint32_t ref_scanPos = ref(rand_src, trSize, scanTbl);
+        uint32_t opt_scanPos = (int)checked(opt, rand_src, trSize, scanTbl);
+
+        // specially case: all coeff group are zero
         if (j >= SCAN_SET_SIZE)
-            rand_src[0] = 0x0BAD;
-
-        uint32_t ref_scanPos = ref(rand_src, (1 << rand_scan_size), scanTbl);
-        uint32_t opt_scanPos = (int)checked(opt, rand_src, (1 << rand_scan_size), scanTbl);
-
-        if (ref_scanPos != opt_scanPos)
+        {
+            // all zero block the high 16-bits undefined
+            if ((uint16_t)ref_scanPos != (uint16_t)opt_scanPos)
+                return false;
+        }
+        else if (ref_scanPos != opt_scanPos)
             return false;
 
         reportfail();