[x265] [PATCH] testbench: costCoeffNxN and enable asm code (based on Sumalatha's patch)

Sat Jun 20 02:45:03 CEST 2015

# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1434761096 25200
# Node ID 114cabc91eb889ab311a1e1497e8e1d0facfe9da
# Parent  44b6b2df7016f0129e66d91e9aab03261d02758a
testbench: costCoeffNxN and enable asm code (based on Sumalatha's patch)
---
 source/common/x86/asm-primitives.cpp |    2 +-
 source/test/pixelharness.cpp         |  164 ++++++++++++++++++++++++++++++++++
 source/test/pixelharness.h           |    2 +
 3 files changed, 167 insertions(+), 1 deletions(-)

diff -r 44b6b2df7016 -r 114cabc91eb8 source/common/x86/asm-primitives.cpp

--- a/source/common/x86/asm-primitives.cpp	Fri Jun 19 16:43:29 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Jun 19 17:44:56 2015 -0700
@@ -2451,7 +2451,7 @@
         ALL_LUMA_CU(psy_cost_ss, psyCost_ss, sse4);
 
         // TODO: it is passed smoke test, but we need testbench, so temporary disable
-        //p.costCoeffNxN = PFX(costCoeffNxN_sse4);
+        p.costCoeffNxN = PFX(costCoeffNxN_sse4);
 #endif
         // TODO: it is passed smoke test, but we need testbench to active it, so temporary disable
         //p.costCoeffRemain = x265_costCoeffRemain_sse4;
diff -r 44b6b2df7016 -r 114cabc91eb8 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Fri Jun 19 16:43:29 2015 +0530
+++ b/source/test/pixelharness.cpp	Fri Jun 19 17:44:56 2015 -0700
@@ -23,6 +23,7 @@
 
 #include "pixelharness.h"
 #include "primitives.h"
+#include "entropy.h"
 
 using namespace X265_NS;
 
@@ -1443,6 +1444,143 @@
     return true;
 }
 
+bool PixelHarness::check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt)
+{
+    ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 3]);
+    ALIGN_VAR_32(uint16_t, ref_absCoeff[1 << MLS_CG_SIZE]);
+    ALIGN_VAR_32(uint16_t, opt_absCoeff[1 << MLS_CG_SIZE]);
+
+    memset(ref_absCoeff, 0xCD, sizeof(ref_absCoeff));
+    memset(opt_absCoeff, 0xCD, sizeof(opt_absCoeff));
+
+    int totalCoeffs = 0;
+    for (int i = 0; i < 32 * 32; i++)
+    {
+        ref_src[i] = rand() & SHORT_MAX;
+
+        // more zero coeff
+        if (ref_src[i] < SHORT_MAX * 2 / 3)
+            ref_src[i] = 0;
+
+        // more negtive
+        if ((rand() % 10) < 8)
+            ref_src[i] *= -1;
+        totalCoeffs += (ref_src[i] != 0);
+    }
+
+    // extra test area all of 0x1234
+    for (int i = 0; i < ITERS * 3; i++)
+    {
+        ref_src[32 * 32 + i] = 0x1234;
+    }
+
+    // generate CABAC context table
+    uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
+    uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
+    for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++)
+    {
+        m_contextState_ref[k] = (rand() % (125 - 2)) + 2;
+        m_contextState_opt[k] = m_contextState_ref[k];
+    }
+    uint8_t *const ref_baseCtx = m_contextState_ref;
+    uint8_t *const opt_baseCtx = m_contextState_opt;
+
+    for (int i = 0; i < ITERS * 2; i++)
+    {
+        int rand_scan_type = rand() % NUM_SCAN_TYPE;
+        int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range is [1,15]
+        int rand_patternSigCtx = rand() % 4; //range [0,3]
+        int rand_scan_size = rand() % NUM_SCAN_SIZE;
+        int offset; // the value have a exact range, details in CoeffNxN()
+        if (rand_scan_size == 2)
+            offset = 0;
+        else if (rand_scan_size == 3)
+            offset = 9;
+        else
+            offset = 12;
+
+        const int trSize = (1 << (rand_scan_size + 2));
+        ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) =
+        {
+            // patternSigCtx = 0
+            {
+                2, 1, 1, 0,
+                1, 1, 0, 0,
+                1, 0, 0, 0,
+                0, 0, 0, 0,
+            },
+            // patternSigCtx = 1
+            {
+                2, 2, 2, 2,
+                1, 1, 1, 1,
+                0, 0, 0, 0,
+                0, 0, 0, 0,
+            },
+            // patternSigCtx = 2
+            {
+                2, 1, 0, 0,
+                2, 1, 0, 0,
+                2, 1, 0, 0,
+                2, 1, 0, 0,
+            },
+            // patternSigCtx = 3
+            {
+                2, 2, 2, 2,
+                2, 2, 2, 2,
+                2, 2, 2, 2,
+                2, 2, 2, 2,
+            },
+            // 4x4
+            {
+                0, 1, 4, 5,
+                2, 3, 4, 5,
+                6, 6, 8, 8,
+                7, 7, 8, 8
+            }
+        };
+        const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ? 4 : (uint32_t)rand_patternSigCtx];
+        const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size];
+        const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];
+
+        int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE);
+        int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE;
+        int rand_numCoeff = 0;
+        uint32_t scanFlagMask = 0;
+        const int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1 : 0;
+
+        for(int k = 0; k <= rand_scanPosSigOff; k++)
+        {
+            uint32_t pos = scanTbl[subPosBase + k];
+            coeff_t tmp_coeff = ref_src[i + pos];
+            if (tmp_coeff != 0)
+            {
+                rand_numCoeff++;
+            }
+            scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0);
+        }
+
+        // can't process all zeros block
+        if (rand_numCoeff == 0)
+            continue;
+
+        const uint32_t blkPosBase = scanTbl[subPosBase];
+        uint32_t ref_sum = ref(scanTblCG4x4, &ref_src[blkPosBase + i], trSize, ref_absCoeff + numNonZero, rand_tabSigCtx, scanFlagMask, (uint8_t*)ref_baseCtx, offset, rand_scanPosSigOff, subPosBase);
+        uint32_t opt_sum = (uint32_t)checked(opt, scanTblCG4x4, &ref_src[blkPosBase + i], trSize, opt_absCoeff + numNonZero, rand_tabSigCtx, scanFlagMask, (uint8_t*)opt_baseCtx, offset, rand_scanPosSigOff, subPosBase);
+
+        if (ref_sum != opt_sum)
+            return false;
+        if (memcmp(ref_baseCtx, opt_baseCtx, sizeof(m_contextState_ref)))
+            return false;
+
+        // NOTE: just first rand_numCoeff valid, but I check full buffer for confirm no overwrite bug
+        if (memcmp(ref_absCoeff, opt_absCoeff, sizeof(ref_absCoeff)))
+            return false;
+
+        reportfail();
+    }
+    return true;
+}
+
 bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
 {
     if (opt.pu[part].satd)
@@ -2018,6 +2156,14 @@
             return false;
         }
     }
+    if (opt.costCoeffNxN)
+    {
+        if (!check_costCoeffNxN(ref.costCoeffNxN, opt.costCoeffNxN))
+        {
+            printf("costCoeffNxN failed!\n");
+            return false;
+        }
+    }
 
     return true;
 }
@@ -2439,4 +2585,22 @@
         coefBuf[3 + 3 * 32] = 0x0BAD;
         REPORT_SPEEDUP(opt.findPosFirstLast, ref.findPosFirstLast, coefBuf, 32, g_scan4x4[SCAN_DIAG]);
     }
+    if (opt.costCoeffNxN)
+    {
+        HEADER0("costCoeffNxN");
+        coeff_t coefBuf[32 * 32];
+        uint16_t tmpOut[16];
+        memset(coefBuf, 1, sizeof(coefBuf));
+        ALIGN_VAR_32(static uint8_t const, ctxSig[]) =
+        {
+            0, 1, 4, 5,
+            2, 3, 4, 5,
+            6, 6, 8, 8,
+            7, 7, 8, 8
+        };
+        uint8_t ctx[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
+        memset(ctx, 120, sizeof(ctx));
+
+        REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN, g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32);
+    }
 }
diff -r 44b6b2df7016 -r 114cabc91eb8 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Fri Jun 19 16:43:29 2015 +0530
+++ b/source/test/pixelharness.h	Fri Jun 19 17:44:56 2015 -0700
@@ -110,6 +110,8 @@
     bool check_calSign(sign_t ref, sign_t opt);
     bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);
     bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);
+    bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
+
 
 public: