[x265] [PATCH] testbench for costCoeffRemain()

Fri Jun 26 12:52:06 CEST 2015

Please ignore this patch

Regards
Sumalatha

On Fri, Jun 26, 2015 at 4:20 PM, <sumalatha at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
> # Date 1435315850 -19800
> #      Fri Jun 26 16:20:50 2015 +0530
> # Node ID 83e1acab3578bcd77aef6d8d3d42f134893751bd
> # Parent  1e5c4d155ab85e8e8dd199bb3515801766ea9e88
> testbench for costCoeffRemain()
>
> diff -r 1e5c4d155ab8 -r 83e1acab3578 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Thu Jun 25 13:42:29 2015
> +0530
> +++ b/source/common/x86/asm-primitives.cpp      Fri Jun 26 16:20:50 2015
> +0530
> @@ -2491,7 +2491,7 @@
>          p.costCoeffNxN = PFX(costCoeffNxN_sse4);
>  #endif
>          // TODO: it is passed smoke test, but we need testbench to active
> it, so temporary disable
> -        //p.costCoeffRemain = x265_costCoeffRemain_sse4;
> +        p.costCoeffRemain = x265_costCoeffRemain_sse4;
>      }
>      if (cpuMask & X265_CPU_AVX)
>      {
> diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp      Thu Jun 25 13:42:29 2015 +0530
> +++ b/source/test/pixelharness.cpp      Fri Jun 26 16:20:50 2015 +0530
> @@ -1581,6 +1581,184 @@
>      return true;
>  }
>
> +bool PixelHarness::check_costCoeffRemain(costCoeffRemain_t ref,
> costCoeffRemain_t opt)
> +{
> +    ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
> +    ALIGN_VAR_32(uint16_t, ref_absCoeff[1 << MLS_CG_SIZE]);
> +
> +    int totalCoeffs = 0;
> +    for (int i = 0; i < 32 * 32; i++)
> +    {
> +        ref_src[i] = rand() & SHORT_MAX;
> +        // more zero coeff
> +        if (ref_src[i] < SHORT_MAX * 2 / 3)
> +            ref_src[i] = 0;
> +        // more negtive
> +        if ((rand() % 10) < 8)
> +            ref_src[i] *= -1;
> +        totalCoeffs += (ref_src[i] != 0);
> +    }
> +    // extra test area all of 0x1234
> +    for (int i = 0; i < ITERS * 2; i++)
> +    {
> +        ref_src[32 * 32 + i] = 0x1234;
> +    }
> +
> +    // generate CABAC context table
> +    uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
> +    uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
> +    for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++)
> +    {
> +        m_contextState_ref[k] = (rand() % (125 - 2)) + 2;
> +        m_contextState_opt[k] = m_contextState_ref[k];
> +    }
> +    uint8_t *const ref_baseCtx = m_contextState_ref;
> +    for (int i = 0; i < ITERS; i++)
> +    {
> +        int rand_scan_type = rand() % NUM_SCAN_TYPE;
> +        int rand_scan_size = rand() % NUM_SCAN_SIZE;
> +        int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range
> is [1,15]
> +        int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1
> : 0;
> +
> +        int rand_patternSigCtx = rand() % 4; //range [0,3]
> +        int offset; // the value have a exact range, details in CoeffNxN()
> +        if (rand_scan_size == 2)
> +            offset = 0;
> +        else if (rand_scan_size == 3)
> +            offset = 9;
> +        else
> +            offset = 12;
> +
> +        const int trSize = (1 << (rand_scan_size + 2));
> +        ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) =
> +        {
> +            // patternSigCtx = 0
> +            {
> +                2, 1, 1, 0,
> +                1, 1, 0, 0,
> +                1, 0, 0, 0,
> +                0, 0, 0, 0,
> +            },
> +            // patternSigCtx = 1
> +            {
> +                2, 2, 2, 2,
> +                1, 1, 1, 1,
> +                0, 0, 0, 0,
> +                0, 0, 0, 0,
> +            },
> +            // patternSigCtx = 2
> +            {
> +                2, 1, 0, 0,
> +                2, 1, 0, 0,
> +                2, 1, 0, 0,
> +                2, 1, 0, 0,
> +            },
> +            // patternSigCtx = 3
> +            {
> +                2, 2, 2, 2,
> +                2, 2, 2, 2,
> +                2, 2, 2, 2,
> +                2, 2, 2, 2,
> +            },
> +            // 4x4
> +            {
> +                0, 1, 4, 5,
> +                2, 3, 4, 5,
> +                6, 6, 8, 8,
> +                7, 7, 8, 8
> +            }
> +        };
> +        const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ?
> 4 : (uint32_t)rand_patternSigCtx];
> +        const uint16_t* const scanTbl =
> g_scanOrder[rand_scan_type][rand_scan_size];
> +        const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <=
> (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];
> +
> +        int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE);
> +        int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE;
> +        int rand_numCoeff = 0;
> +        uint32_t scanFlagMask = 0;
> +
> +
> +        for (int k = 0; k <= rand_scanPosSigOff; k++)
> +        {
> +            uint32_t pos = scanTbl[subPosBase + k];
> +            coeff_t tmp_coeff = ref_src[i + pos];
> +            if (tmp_coeff != 0)
> +            {
> +                rand_numCoeff++;
> +            }
> +            scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0);
> +        }
> +
> +        // can't process all zeros block
> +        if (rand_numCoeff == 0)
> +            continue;
> +
> +        const uint32_t blkPosBase = scanTbl[subPosBase];
> +
> +        ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
> +        uint32_t sum = 0;
> +
> +        // correct offset to match assembly
> +
> +        uint16_t *absCoeff = ref_absCoeff + numNonZero;
> +
> +        coeff_t *coeff = &ref_src[blkPosBase + i];
> +
> +        absCoeff -= numNonZero;
> +
> +        for (int i = 0; i < MLS_CG_SIZE; i++)
> +        {
> +            tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[i *
> trSize + 0]);
> +            tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[i *
> trSize + 1]);
> +            tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[i *
> trSize + 2]);
> +            tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[i *
> trSize + 3]);
> +        }
> +
> +        do
> +        {
> +            uint32_t blkPos, sig, ctxSig;
> +            blkPos = scanTblCG4x4[rand_scanPosSigOff];
> +            const uint32_t posZeroMask = (subPosBase +
> rand_scanPosSigOff) ? ~0 : 0;
> +            sig = scanFlagMask & 1;
> +            scanFlagMask >>= 1;
> +            X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign
> bit mistake\n");
> +            if ((rand_scanPosSigOff != 0) || (subPosBase == 0) ||
> numNonZero)
> +            {
> +                const uint32_t cnt = rand_tabSigCtx[blkPos] + offset;
> +                ctxSig = cnt & posZeroMask;
> +
> +                //X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx,
> log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff],
> bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx
> mistake!\n");;
> +                //encodeBin(sig, baseCtx[ctxSig]);
> +                const uint32_t mstate = ref_baseCtx[ctxSig];
> +                const uint32_t mps = mstate & 1;
> +                const uint32_t stateBits = PFX(entropyStateBits)[mstate ^
> sig];
> +                uint32_t nextState = (stateBits >> 24) + mps;
> +                if ((mstate ^ sig) == 1)
> +                    nextState = sig;
> +                X265_CHECK(sbacNext(mstate, sig) == nextState, "nextState
> check failure\n");
> +                X265_CHECK(sbacGetEntropyBits(mstate, sig) == (stateBits
> & 0xFFFFFF), "entropyBits check failure\n");
> +                ref_baseCtx[ctxSig] = (uint8_t)nextState;
> +                sum += stateBits;
> +            }
> +            assert(numNonZero <= 15);
> +            assert(blkPos <= 15);
> +            absCoeff[numNonZero] = tmpCoeff[blkPos];
> +            numNonZero += sig;
> +            rand_scanPosSigOff--;
> +        } while (rand_scanPosSigOff >= 0);
> +
> +        //numNonZero = coeffNum[lastScanSet];
> +        //memset(absCoeff, 0xCD, sizeof(absCoeff));
> +        uint32_t firstC2Idx = (rand() >> 28);
> +        int ref_sum = ref(absCoeff, numNonZero, firstC2Idx);
> +        //int ref_sum1 = ref(absCoeff, numNonZero, firstC2Idx); // when C
> function is called instead of sse4 asm, ref_sum and ref_sum1 are same
> +        int opt_sum = (int)checked(opt, absCoeff, numNonZero, firstC2Idx);
> +        if (ref_sum != opt_sum)
> +            return false;
> +    }
> +}
> +
> +
>  bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const
> EncoderPrimitives& opt)
>  {
>      if (opt.pu[part].satd)
> @@ -2164,6 +2342,14 @@
>              return false;
>          }
>      }
> +    if (opt.costCoeffRemain)
> +    {
> +        if (!check_costCoeffRemain(ref.costCoeffRemain,
> opt.costCoeffRemain))
> +        {
> +            printf("costCoeffRemain failed!\n");
> +            return false;
> +        }
> +    }
>
>      return true;
>  }
> @@ -2603,4 +2789,13 @@
>
>          REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN,
> g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32);
>      }
> +
> +    if (opt.costCoeffRemain)
> +    {
> +        HEADER0("costCoeffRemain");
> +        uint16_t abscoefBuf[32 * 32];
> +        memset(abscoefBuf, 0, sizeof(abscoefBuf));
> +        memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));
> +        REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain,
> abscoefBuf, 16, 32);
> +    }
>  }
> diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.h
> --- a/source/test/pixelharness.h        Thu Jun 25 13:42:29 2015 +0530
> +++ b/source/test/pixelharness.h        Fri Jun 26 16:20:50 2015 +0530
> @@ -111,7 +111,7 @@
>      bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);
>      bool check_findPosFirstLast(findPosFirstLast_t ref,
> findPosFirstLast_t opt);
>      bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
> -
> +    bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t
> opt);
>
>  public:
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150626/7d0aeda9/attachment.html>