[x265] [PATCH] testbench for costCoeffRemain()

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Fri Jun 26 12:50:59 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
# Date 1435315850 -19800
#      Fri Jun 26 16:20:50 2015 +0530
# Node ID 83e1acab3578bcd77aef6d8d3d42f134893751bd
# Parent  1e5c4d155ab85e8e8dd199bb3515801766ea9e88
testbench for costCoeffRemain()

diff -r 1e5c4d155ab8 -r 83e1acab3578 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Jun 25 13:42:29 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Jun 26 16:20:50 2015 +0530
@@ -2491,7 +2491,7 @@
         p.costCoeffNxN = PFX(costCoeffNxN_sse4);
 #endif
         // TODO: it is passed smoke test, but we need testbench to active it, so temporary disable
-        //p.costCoeffRemain = x265_costCoeffRemain_sse4;
+        p.costCoeffRemain = x265_costCoeffRemain_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Thu Jun 25 13:42:29 2015 +0530
+++ b/source/test/pixelharness.cpp	Fri Jun 26 16:20:50 2015 +0530
@@ -1581,6 +1581,184 @@
     return true;
 }
 
+bool PixelHarness::check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt)
+{
+    ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
+    ALIGN_VAR_32(uint16_t, ref_absCoeff[1 << MLS_CG_SIZE]);
+
+    int totalCoeffs = 0;
+    for (int i = 0; i < 32 * 32; i++)
+    {
+        ref_src[i] = rand() & SHORT_MAX;
+        // more zero coeff
+        if (ref_src[i] < SHORT_MAX * 2 / 3)
+            ref_src[i] = 0;
+        // more negtive
+        if ((rand() % 10) < 8)
+            ref_src[i] *= -1;
+        totalCoeffs += (ref_src[i] != 0);
+    }
+    // extra test area all of 0x1234
+    for (int i = 0; i < ITERS * 2; i++)
+    {
+        ref_src[32 * 32 + i] = 0x1234;
+    }
+
+    // generate CABAC context table
+    uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
+    uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
+    for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++)
+    {
+        m_contextState_ref[k] = (rand() % (125 - 2)) + 2;
+        m_contextState_opt[k] = m_contextState_ref[k];
+    }
+    uint8_t *const ref_baseCtx = m_contextState_ref;
+    for (int i = 0; i < ITERS; i++)
+    {
+        int rand_scan_type = rand() % NUM_SCAN_TYPE;
+        int rand_scan_size = rand() % NUM_SCAN_SIZE;
+        int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range is [1,15]
+        int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1 : 0;
+
+        int rand_patternSigCtx = rand() % 4; //range [0,3]
+        int offset; // the value have a exact range, details in CoeffNxN()
+        if (rand_scan_size == 2)
+            offset = 0;
+        else if (rand_scan_size == 3)
+            offset = 9;
+        else
+            offset = 12;
+
+        const int trSize = (1 << (rand_scan_size + 2));
+        ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) =
+        {
+            // patternSigCtx = 0
+            {
+                2, 1, 1, 0,
+                1, 1, 0, 0,
+                1, 0, 0, 0,
+                0, 0, 0, 0,
+            },
+            // patternSigCtx = 1
+            {
+                2, 2, 2, 2,
+                1, 1, 1, 1,
+                0, 0, 0, 0,
+                0, 0, 0, 0,
+            },
+            // patternSigCtx = 2
+            {
+                2, 1, 0, 0,
+                2, 1, 0, 0,
+                2, 1, 0, 0,
+                2, 1, 0, 0,
+            },
+            // patternSigCtx = 3
+            {
+                2, 2, 2, 2,
+                2, 2, 2, 2,
+                2, 2, 2, 2,
+                2, 2, 2, 2,
+            },
+            // 4x4
+            {
+                0, 1, 4, 5,
+                2, 3, 4, 5,
+                6, 6, 8, 8,
+                7, 7, 8, 8
+            }
+        };
+        const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ? 4 : (uint32_t)rand_patternSigCtx];
+        const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size];
+        const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];
+
+        int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE);
+        int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE;
+        int rand_numCoeff = 0;
+        uint32_t scanFlagMask = 0;
+
+
+        for (int k = 0; k <= rand_scanPosSigOff; k++)
+        {
+            uint32_t pos = scanTbl[subPosBase + k];
+            coeff_t tmp_coeff = ref_src[i + pos];
+            if (tmp_coeff != 0)
+            {
+                rand_numCoeff++;
+            }
+            scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0);
+        }
+
+        // can't process all zeros block
+        if (rand_numCoeff == 0)
+            continue;
+
+        const uint32_t blkPosBase = scanTbl[subPosBase];
+
+        ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
+        uint32_t sum = 0;
+
+        // correct offset to match assembly
+        
+        uint16_t *absCoeff = ref_absCoeff + numNonZero;
+        
+        coeff_t *coeff = &ref_src[blkPosBase + i];
+
+        absCoeff -= numNonZero;
+
+        for (int i = 0; i < MLS_CG_SIZE; i++)
+        {
+            tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[i * trSize + 0]);
+            tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[i * trSize + 1]);
+            tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[i * trSize + 2]);
+            tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[i * trSize + 3]);
+        }
+
+        do
+        {
+            uint32_t blkPos, sig, ctxSig;
+            blkPos = scanTblCG4x4[rand_scanPosSigOff];
+            const uint32_t posZeroMask = (subPosBase + rand_scanPosSigOff) ? ~0 : 0;
+            sig = scanFlagMask & 1;
+            scanFlagMask >>= 1;
+            X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
+            if ((rand_scanPosSigOff != 0) || (subPosBase == 0) || numNonZero)
+            {
+                const uint32_t cnt = rand_tabSigCtx[blkPos] + offset;
+                ctxSig = cnt & posZeroMask;
+
+                //X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
+                //encodeBin(sig, baseCtx[ctxSig]);
+                const uint32_t mstate = ref_baseCtx[ctxSig];
+                const uint32_t mps = mstate & 1;
+                const uint32_t stateBits = PFX(entropyStateBits)[mstate ^ sig];
+                uint32_t nextState = (stateBits >> 24) + mps;
+                if ((mstate ^ sig) == 1)
+                    nextState = sig;
+                X265_CHECK(sbacNext(mstate, sig) == nextState, "nextState check failure\n");
+                X265_CHECK(sbacGetEntropyBits(mstate, sig) == (stateBits & 0xFFFFFF), "entropyBits check failure\n");
+                ref_baseCtx[ctxSig] = (uint8_t)nextState;
+                sum += stateBits;
+            }
+            assert(numNonZero <= 15);
+            assert(blkPos <= 15);
+            absCoeff[numNonZero] = tmpCoeff[blkPos];
+            numNonZero += sig;
+            rand_scanPosSigOff--;
+        } while (rand_scanPosSigOff >= 0);
+
+        //numNonZero = coeffNum[lastScanSet];
+        //memset(absCoeff, 0xCD, sizeof(absCoeff));
+        uint32_t firstC2Idx = (rand() >> 28);
+        int ref_sum = ref(absCoeff, numNonZero, firstC2Idx);
+        //int ref_sum1 = ref(absCoeff, numNonZero, firstC2Idx); // when C function is called instead of sse4 asm, ref_sum and ref_sum1 are same
+        int opt_sum = (int)checked(opt, absCoeff, numNonZero, firstC2Idx);
+        if (ref_sum != opt_sum)
+            return false;
+    }
+}
+
+
 bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
 {
     if (opt.pu[part].satd)
@@ -2164,6 +2342,14 @@
             return false;
         }
     }
+    if (opt.costCoeffRemain)
+    {
+        if (!check_costCoeffRemain(ref.costCoeffRemain, opt.costCoeffRemain))
+        {
+            printf("costCoeffRemain failed!\n");
+            return false;
+        }
+    }
 
     return true;
 }
@@ -2603,4 +2789,13 @@
 
         REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN, g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32);
     }
+
+    if (opt.costCoeffRemain)
+    {
+        HEADER0("costCoeffRemain");
+        uint16_t abscoefBuf[32 * 32];
+        memset(abscoefBuf, 0, sizeof(abscoefBuf));
+        memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));
+        REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain, abscoefBuf, 16, 32);
+    }
 }
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Thu Jun 25 13:42:29 2015 +0530
+++ b/source/test/pixelharness.h	Fri Jun 26 16:20:50 2015 +0530
@@ -111,7 +111,7 @@
     bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);
     bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);
     bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
-
+    bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt);
 
 public:
 


More information about the x265-devel mailing list