[x265] [PATCH] testbench for costCoeffRemain()
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Fri Jun 26 12:50:59 CEST 2015
# HG changeset patch
# User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
# Date 1435315850 -19800
# Fri Jun 26 16:20:50 2015 +0530
# Node ID 83e1acab3578bcd77aef6d8d3d42f134893751bd
# Parent 1e5c4d155ab85e8e8dd199bb3515801766ea9e88
testbench for costCoeffRemain()
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Jun 25 13:42:29 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Jun 26 16:20:50 2015 +0530
@@ -2491,7 +2491,7 @@
p.costCoeffNxN = PFX(costCoeffNxN_sse4);
#endif
// TODO: it is passed smoke test, but we need testbench to active it, so temporary disable
- //p.costCoeffRemain = x265_costCoeffRemain_sse4;
+ p.costCoeffRemain = x265_costCoeffRemain_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Thu Jun 25 13:42:29 2015 +0530
+++ b/source/test/pixelharness.cpp Fri Jun 26 16:20:50 2015 +0530
@@ -1581,6 +1581,184 @@
return true;
}
+bool PixelHarness::check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt)
+{
+ ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
+ ALIGN_VAR_32(uint16_t, ref_absCoeff[1 << MLS_CG_SIZE]);
+
+ int totalCoeffs = 0;
+ for (int i = 0; i < 32 * 32; i++)
+ {
+ ref_src[i] = rand() & SHORT_MAX;
+ // more zero coeff
+ if (ref_src[i] < SHORT_MAX * 2 / 3)
+ ref_src[i] = 0;
+ // more negtive
+ if ((rand() % 10) < 8)
+ ref_src[i] *= -1;
+ totalCoeffs += (ref_src[i] != 0);
+ }
+ // extra test area all of 0x1234
+ for (int i = 0; i < ITERS * 2; i++)
+ {
+ ref_src[32 * 32 + i] = 0x1234;
+ }
+
+ // generate CABAC context table
+ uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
+ uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
+ for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++)
+ {
+ m_contextState_ref[k] = (rand() % (125 - 2)) + 2;
+ m_contextState_opt[k] = m_contextState_ref[k];
+ }
+ uint8_t *const ref_baseCtx = m_contextState_ref;
+ for (int i = 0; i < ITERS; i++)
+ {
+ int rand_scan_type = rand() % NUM_SCAN_TYPE;
+ int rand_scan_size = rand() % NUM_SCAN_SIZE;
+ int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range is [1,15]
+ int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1 : 0;
+
+ int rand_patternSigCtx = rand() % 4; //range [0,3]
+ int offset; // the value have a exact range, details in CoeffNxN()
+ if (rand_scan_size == 2)
+ offset = 0;
+ else if (rand_scan_size == 3)
+ offset = 9;
+ else
+ offset = 12;
+
+ const int trSize = (1 << (rand_scan_size + 2));
+ ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) =
+ {
+ // patternSigCtx = 0
+ {
+ 2, 1, 1, 0,
+ 1, 1, 0, 0,
+ 1, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ // patternSigCtx = 1
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ // patternSigCtx = 2
+ {
+ 2, 1, 0, 0,
+ 2, 1, 0, 0,
+ 2, 1, 0, 0,
+ 2, 1, 0, 0,
+ },
+ // patternSigCtx = 3
+ {
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ },
+ // 4x4
+ {
+ 0, 1, 4, 5,
+ 2, 3, 4, 5,
+ 6, 6, 8, 8,
+ 7, 7, 8, 8
+ }
+ };
+ const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ? 4 : (uint32_t)rand_patternSigCtx];
+ const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size];
+ const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];
+
+ int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE);
+ int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE;
+ int rand_numCoeff = 0;
+ uint32_t scanFlagMask = 0;
+
+
+ for (int k = 0; k <= rand_scanPosSigOff; k++)
+ {
+ uint32_t pos = scanTbl[subPosBase + k];
+ coeff_t tmp_coeff = ref_src[i + pos];
+ if (tmp_coeff != 0)
+ {
+ rand_numCoeff++;
+ }
+ scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0);
+ }
+
+ // can't process all zeros block
+ if (rand_numCoeff == 0)
+ continue;
+
+ const uint32_t blkPosBase = scanTbl[subPosBase];
+
+ ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
+ uint32_t sum = 0;
+
+ // correct offset to match assembly
+
+ uint16_t *absCoeff = ref_absCoeff + numNonZero;
+
+ coeff_t *coeff = &ref_src[blkPosBase + i];
+
+ absCoeff -= numNonZero;
+
+ for (int i = 0; i < MLS_CG_SIZE; i++)
+ {
+ tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[i * trSize + 0]);
+ tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[i * trSize + 1]);
+ tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[i * trSize + 2]);
+ tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[i * trSize + 3]);
+ }
+
+ do
+ {
+ uint32_t blkPos, sig, ctxSig;
+ blkPos = scanTblCG4x4[rand_scanPosSigOff];
+ const uint32_t posZeroMask = (subPosBase + rand_scanPosSigOff) ? ~0 : 0;
+ sig = scanFlagMask & 1;
+ scanFlagMask >>= 1;
+ X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
+ if ((rand_scanPosSigOff != 0) || (subPosBase == 0) || numNonZero)
+ {
+ const uint32_t cnt = rand_tabSigCtx[blkPos] + offset;
+ ctxSig = cnt & posZeroMask;
+
+ //X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
+ //encodeBin(sig, baseCtx[ctxSig]);
+ const uint32_t mstate = ref_baseCtx[ctxSig];
+ const uint32_t mps = mstate & 1;
+ const uint32_t stateBits = PFX(entropyStateBits)[mstate ^ sig];
+ uint32_t nextState = (stateBits >> 24) + mps;
+ if ((mstate ^ sig) == 1)
+ nextState = sig;
+ X265_CHECK(sbacNext(mstate, sig) == nextState, "nextState check failure\n");
+ X265_CHECK(sbacGetEntropyBits(mstate, sig) == (stateBits & 0xFFFFFF), "entropyBits check failure\n");
+ ref_baseCtx[ctxSig] = (uint8_t)nextState;
+ sum += stateBits;
+ }
+ assert(numNonZero <= 15);
+ assert(blkPos <= 15);
+ absCoeff[numNonZero] = tmpCoeff[blkPos];
+ numNonZero += sig;
+ rand_scanPosSigOff--;
+ } while (rand_scanPosSigOff >= 0);
+
+ //numNonZero = coeffNum[lastScanSet];
+ //memset(absCoeff, 0xCD, sizeof(absCoeff));
+ uint32_t firstC2Idx = (rand() >> 28);
+ int ref_sum = ref(absCoeff, numNonZero, firstC2Idx);
+ //int ref_sum1 = ref(absCoeff, numNonZero, firstC2Idx); // when C function is called instead of sse4 asm, ref_sum and ref_sum1 are same
+ int opt_sum = (int)checked(opt, absCoeff, numNonZero, firstC2Idx);
+ if (ref_sum != opt_sum)
+ return false;
+ }
+}
+
+
bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
{
if (opt.pu[part].satd)
@@ -2164,6 +2342,14 @@
return false;
}
}
+ if (opt.costCoeffRemain)
+ {
+ if (!check_costCoeffRemain(ref.costCoeffRemain, opt.costCoeffRemain))
+ {
+ printf("costCoeffRemain failed!\n");
+ return false;
+ }
+ }
return true;
}
@@ -2603,4 +2789,13 @@
REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN, g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32);
}
+
+ if (opt.costCoeffRemain)
+ {
+ HEADER0("costCoeffRemain");
+ uint16_t abscoefBuf[32 * 32];
+ memset(abscoefBuf, 0, sizeof(abscoefBuf));
+ memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));
+ REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain, abscoefBuf, 16, 32);
+ }
}
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.h
--- a/source/test/pixelharness.h Thu Jun 25 13:42:29 2015 +0530
+++ b/source/test/pixelharness.h Fri Jun 26 16:20:50 2015 +0530
@@ -111,7 +111,7 @@
bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);
bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);
bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
-
+ bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt);
public:
More information about the x265-devel
mailing list