[x265] [PATCH] testbench for costCoeffRemain()
Sumalatha Polureddy
sumalatha at multicorewareinc.com
Fri Jun 26 12:52:06 CEST 2015
Please ignore this patch
Regards
Sumalatha
On Fri, Jun 26, 2015 at 4:20 PM, <sumalatha at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
> # Date 1435315850 -19800
> # Fri Jun 26 16:20:50 2015 +0530
> # Node ID 83e1acab3578bcd77aef6d8d3d42f134893751bd
> # Parent 1e5c4d155ab85e8e8dd199bb3515801766ea9e88
> testbench for costCoeffRemain()
>
> diff -r 1e5c4d155ab8 -r 83e1acab3578 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Thu Jun 25 13:42:29 2015
> +0530
> +++ b/source/common/x86/asm-primitives.cpp Fri Jun 26 16:20:50 2015
> +0530
> @@ -2491,7 +2491,7 @@
> p.costCoeffNxN = PFX(costCoeffNxN_sse4);
> #endif
> // TODO: it is passed smoke test, but we need testbench to active
> it, so temporary disable
> - //p.costCoeffRemain = x265_costCoeffRemain_sse4;
> + p.costCoeffRemain = x265_costCoeffRemain_sse4;
> }
> if (cpuMask & X265_CPU_AVX)
> {
> diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp Thu Jun 25 13:42:29 2015 +0530
> +++ b/source/test/pixelharness.cpp Fri Jun 26 16:20:50 2015 +0530
> @@ -1581,6 +1581,184 @@
> return true;
> }
>
> +bool PixelHarness::check_costCoeffRemain(costCoeffRemain_t ref,
> costCoeffRemain_t opt)
> +{
> + ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
> + ALIGN_VAR_32(uint16_t, ref_absCoeff[1 << MLS_CG_SIZE]);
> +
> + int totalCoeffs = 0;
> + for (int i = 0; i < 32 * 32; i++)
> + {
> + ref_src[i] = rand() & SHORT_MAX;
> + // more zero coeff
> + if (ref_src[i] < SHORT_MAX * 2 / 3)
> + ref_src[i] = 0;
> + // more negtive
> + if ((rand() % 10) < 8)
> + ref_src[i] *= -1;
> + totalCoeffs += (ref_src[i] != 0);
> + }
> + // extra test area all of 0x1234
> + for (int i = 0; i < ITERS * 2; i++)
> + {
> + ref_src[32 * 32 + i] = 0x1234;
> + }
> +
> + // generate CABAC context table
> + uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
> + uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
> + for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++)
> + {
> + m_contextState_ref[k] = (rand() % (125 - 2)) + 2;
> + m_contextState_opt[k] = m_contextState_ref[k];
> + }
> + uint8_t *const ref_baseCtx = m_contextState_ref;
> + for (int i = 0; i < ITERS; i++)
> + {
> + int rand_scan_type = rand() % NUM_SCAN_TYPE;
> + int rand_scan_size = rand() % NUM_SCAN_SIZE;
> + int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range
> is [1,15]
> + int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1
> : 0;
> +
> + int rand_patternSigCtx = rand() % 4; //range [0,3]
> + int offset; // the value have a exact range, details in CoeffNxN()
> + if (rand_scan_size == 2)
> + offset = 0;
> + else if (rand_scan_size == 3)
> + offset = 9;
> + else
> + offset = 12;
> +
> + const int trSize = (1 << (rand_scan_size + 2));
> + ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) =
> + {
> + // patternSigCtx = 0
> + {
> + 2, 1, 1, 0,
> + 1, 1, 0, 0,
> + 1, 0, 0, 0,
> + 0, 0, 0, 0,
> + },
> + // patternSigCtx = 1
> + {
> + 2, 2, 2, 2,
> + 1, 1, 1, 1,
> + 0, 0, 0, 0,
> + 0, 0, 0, 0,
> + },
> + // patternSigCtx = 2
> + {
> + 2, 1, 0, 0,
> + 2, 1, 0, 0,
> + 2, 1, 0, 0,
> + 2, 1, 0, 0,
> + },
> + // patternSigCtx = 3
> + {
> + 2, 2, 2, 2,
> + 2, 2, 2, 2,
> + 2, 2, 2, 2,
> + 2, 2, 2, 2,
> + },
> + // 4x4
> + {
> + 0, 1, 4, 5,
> + 2, 3, 4, 5,
> + 6, 6, 8, 8,
> + 7, 7, 8, 8
> + }
> + };
> + const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ?
> 4 : (uint32_t)rand_patternSigCtx];
> + const uint16_t* const scanTbl =
> g_scanOrder[rand_scan_type][rand_scan_size];
> + const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <=
> (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];
> +
> + int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE);
> + int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE;
> + int rand_numCoeff = 0;
> + uint32_t scanFlagMask = 0;
> +
> +
> + for (int k = 0; k <= rand_scanPosSigOff; k++)
> + {
> + uint32_t pos = scanTbl[subPosBase + k];
> + coeff_t tmp_coeff = ref_src[i + pos];
> + if (tmp_coeff != 0)
> + {
> + rand_numCoeff++;
> + }
> + scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0);
> + }
> +
> + // can't process all zeros block
> + if (rand_numCoeff == 0)
> + continue;
> +
> + const uint32_t blkPosBase = scanTbl[subPosBase];
> +
> + ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
> + uint32_t sum = 0;
> +
> + // correct offset to match assembly
> +
> + uint16_t *absCoeff = ref_absCoeff + numNonZero;
> +
> + coeff_t *coeff = &ref_src[blkPosBase + i];
> +
> + absCoeff -= numNonZero;
> +
> + for (int i = 0; i < MLS_CG_SIZE; i++)
> + {
> + tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[i *
> trSize + 0]);
> + tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[i *
> trSize + 1]);
> + tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[i *
> trSize + 2]);
> + tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[i *
> trSize + 3]);
> + }
> +
> + do
> + {
> + uint32_t blkPos, sig, ctxSig;
> + blkPos = scanTblCG4x4[rand_scanPosSigOff];
> + const uint32_t posZeroMask = (subPosBase +
> rand_scanPosSigOff) ? ~0 : 0;
> + sig = scanFlagMask & 1;
> + scanFlagMask >>= 1;
> + X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign
> bit mistake\n");
> + if ((rand_scanPosSigOff != 0) || (subPosBase == 0) ||
> numNonZero)
> + {
> + const uint32_t cnt = rand_tabSigCtx[blkPos] + offset;
> + ctxSig = cnt & posZeroMask;
> +
> + //X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx,
> log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff],
> bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx
> mistake!\n");;
> + //encodeBin(sig, baseCtx[ctxSig]);
> + const uint32_t mstate = ref_baseCtx[ctxSig];
> + const uint32_t mps = mstate & 1;
> + const uint32_t stateBits = PFX(entropyStateBits)[mstate ^
> sig];
> + uint32_t nextState = (stateBits >> 24) + mps;
> + if ((mstate ^ sig) == 1)
> + nextState = sig;
> + X265_CHECK(sbacNext(mstate, sig) == nextState, "nextState
> check failure\n");
> + X265_CHECK(sbacGetEntropyBits(mstate, sig) == (stateBits
> & 0xFFFFFF), "entropyBits check failure\n");
> + ref_baseCtx[ctxSig] = (uint8_t)nextState;
> + sum += stateBits;
> + }
> + assert(numNonZero <= 15);
> + assert(blkPos <= 15);
> + absCoeff[numNonZero] = tmpCoeff[blkPos];
> + numNonZero += sig;
> + rand_scanPosSigOff--;
> + } while (rand_scanPosSigOff >= 0);
> +
> + //numNonZero = coeffNum[lastScanSet];
> + //memset(absCoeff, 0xCD, sizeof(absCoeff));
> + uint32_t firstC2Idx = (rand() >> 28);
> + int ref_sum = ref(absCoeff, numNonZero, firstC2Idx);
> + //int ref_sum1 = ref(absCoeff, numNonZero, firstC2Idx); // when C
> function is called instead of sse4 asm, ref_sum and ref_sum1 are same
> + int opt_sum = (int)checked(opt, absCoeff, numNonZero, firstC2Idx);
> + if (ref_sum != opt_sum)
> + return false;
> + }
> +}
> +
> +
> bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const
> EncoderPrimitives& opt)
> {
> if (opt.pu[part].satd)
> @@ -2164,6 +2342,14 @@
> return false;
> }
> }
> + if (opt.costCoeffRemain)
> + {
> + if (!check_costCoeffRemain(ref.costCoeffRemain,
> opt.costCoeffRemain))
> + {
> + printf("costCoeffRemain failed!\n");
> + return false;
> + }
> + }
>
> return true;
> }
> @@ -2603,4 +2789,13 @@
>
> REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN,
> g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32);
> }
> +
> + if (opt.costCoeffRemain)
> + {
> + HEADER0("costCoeffRemain");
> + uint16_t abscoefBuf[32 * 32];
> + memset(abscoefBuf, 0, sizeof(abscoefBuf));
> + memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));
> + REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain,
> abscoefBuf, 16, 32);
> + }
> }
> diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.h
> --- a/source/test/pixelharness.h Thu Jun 25 13:42:29 2015 +0530
> +++ b/source/test/pixelharness.h Fri Jun 26 16:20:50 2015 +0530
> @@ -111,7 +111,7 @@
> bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);
> bool check_findPosFirstLast(findPosFirstLast_t ref,
> findPosFirstLast_t opt);
> bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
> -
> + bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t
> opt);
>
> public:
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150626/7d0aeda9/attachment.html>
More information about the x265-devel
mailing list