<div dir="ltr">Please ignore this patch<div><br></div><div>Regards</div><div>Sumalatha</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Jun 26, 2015 at 4:20 PM,  <span dir="ltr"><<a href="mailto:sumalatha@multicorewareinc.com" target="_blank">sumalatha@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Sumalatha Polureddy<<a href="mailto:sumalatha@multicorewareinc.com">sumalatha@multicorewareinc.com</a>><br>
# Date 1435315850 -19800<br>
#      Fri Jun 26 16:20:50 2015 +0530<br>
# Node ID 83e1acab3578bcd77aef6d8d3d42f134893751bd<br>
# Parent  1e5c4d155ab85e8e8dd199bb3515801766ea9e88<br>
testbench for costCoeffRemain()<br>
<br>
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp      Thu Jun 25 13:42:29 2015 +0530<br>
+++ b/source/common/x86/asm-primitives.cpp      Fri Jun 26 16:20:50 2015 +0530<br>
@@ -2491,7 +2491,7 @@<br>
         p.costCoeffNxN = PFX(costCoeffNxN_sse4);<br>
 #endif<br>
         // TODO: it is passed smoke test, but we need testbench to active it, so temporary disable<br>
-        //p.costCoeffRemain = x265_costCoeffRemain_sse4;<br>
+        p.costCoeffRemain = x265_costCoeffRemain_sse4;<br>
     }<br>
     if (cpuMask & X265_CPU_AVX)<br>
     {<br>
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.cpp<br>
--- a/source/test/pixelharness.cpp      Thu Jun 25 13:42:29 2015 +0530<br>
+++ b/source/test/pixelharness.cpp      Fri Jun 26 16:20:50 2015 +0530<br>
@@ -1581,6 +1581,184 @@<br>
     return true;<br>
 }<br>
<br>
+bool PixelHarness::check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt)<br>
+{<br>
+    ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);<br>
+    ALIGN_VAR_32(uint16_t, ref_absCoeff[1 << MLS_CG_SIZE]);<br>
+<br>
+    int totalCoeffs = 0;<br>
+    for (int i = 0; i < 32 * 32; i++)<br>
+    {<br>
+        ref_src[i] = rand() & SHORT_MAX;<br>
+        // more zero coeff<br>
+        if (ref_src[i] < SHORT_MAX * 2 / 3)<br>
+            ref_src[i] = 0;<br>
+        // more negtive<br>
+        if ((rand() % 10) < 8)<br>
+            ref_src[i] *= -1;<br>
+        totalCoeffs += (ref_src[i] != 0);<br>
+    }<br>
+    // extra test area all of 0x1234<br>
+    for (int i = 0; i < ITERS * 2; i++)<br>
+    {<br>
+        ref_src[32 * 32 + i] = 0x1234;<br>
+    }<br>
+<br>
+    // generate CABAC context table<br>
+    uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];<br>
+    uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];<br>
+    for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++)<br>
+    {<br>
+        m_contextState_ref[k] = (rand() % (125 - 2)) + 2;<br>
+        m_contextState_opt[k] = m_contextState_ref[k];<br>
+    }<br>
+    uint8_t *const ref_baseCtx = m_contextState_ref;<br>
+    for (int i = 0; i < ITERS; i++)<br>
+    {<br>
+        int rand_scan_type = rand() % NUM_SCAN_TYPE;<br>
+        int rand_scan_size = rand() % NUM_SCAN_SIZE;<br>
+        int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range is [1,15]<br>
+        int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1 : 0;<br>
+<br>
+        int rand_patternSigCtx = rand() % 4; //range [0,3]<br>
+        int offset; // the value have a exact range, details in CoeffNxN()<br>
+        if (rand_scan_size == 2)<br>
+            offset = 0;<br>
+        else if (rand_scan_size == 3)<br>
+            offset = 9;<br>
+        else<br>
+            offset = 12;<br>
+<br>
+        const int trSize = (1 << (rand_scan_size + 2));<br>
+        ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) =<br>
+        {<br>
+            // patternSigCtx = 0<br>
+            {<br>
+                2, 1, 1, 0,<br>
+                1, 1, 0, 0,<br>
+                1, 0, 0, 0,<br>
+                0, 0, 0, 0,<br>
+            },<br>
+            // patternSigCtx = 1<br>
+            {<br>
+                2, 2, 2, 2,<br>
+                1, 1, 1, 1,<br>
+                0, 0, 0, 0,<br>
+                0, 0, 0, 0,<br>
+            },<br>
+            // patternSigCtx = 2<br>
+            {<br>
+                2, 1, 0, 0,<br>
+                2, 1, 0, 0,<br>
+                2, 1, 0, 0,<br>
+                2, 1, 0, 0,<br>
+            },<br>
+            // patternSigCtx = 3<br>
+            {<br>
+                2, 2, 2, 2,<br>
+                2, 2, 2, 2,<br>
+                2, 2, 2, 2,<br>
+                2, 2, 2, 2,<br>
+            },<br>
+            // 4x4<br>
+            {<br>
+                0, 1, 4, 5,<br>
+                2, 3, 4, 5,<br>
+                6, 6, 8, 8,<br>
+                7, 7, 8, 8<br>
+            }<br>
+        };<br>
+        const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ? 4 : (uint32_t)rand_patternSigCtx];<br>
+        const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size];<br>
+        const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];<br>
+<br>
+        int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE);<br>
+        int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE;<br>
+        int rand_numCoeff = 0;<br>
+        uint32_t scanFlagMask = 0;<br>
+<br>
+<br>
+        for (int k = 0; k <= rand_scanPosSigOff; k++)<br>
+        {<br>
+            uint32_t pos = scanTbl[subPosBase + k];<br>
+            coeff_t tmp_coeff = ref_src[i + pos];<br>
+            if (tmp_coeff != 0)<br>
+            {<br>
+                rand_numCoeff++;<br>
+            }<br>
+            scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0);<br>
+        }<br>
+<br>
+        // can't process all zeros block<br>
+        if (rand_numCoeff == 0)<br>
+            continue;<br>
+<br>
+        const uint32_t blkPosBase = scanTbl[subPosBase];<br>
+<br>
+        ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);<br>
+        uint32_t sum = 0;<br>
+<br>
+        // correct offset to match assembly<br>
+<br>
+        uint16_t *absCoeff = ref_absCoeff + numNonZero;<br>
+<br>
+        coeff_t *coeff = &ref_src[blkPosBase + i];<br>
+<br>
+        absCoeff -= numNonZero;<br>
+<br>
+        for (int i = 0; i < MLS_CG_SIZE; i++)<br>
+        {<br>
+            tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[i * trSize + 0]);<br>
+            tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[i * trSize + 1]);<br>
+            tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[i * trSize + 2]);<br>
+            tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[i * trSize + 3]);<br>
+        }<br>
+<br>
+        do<br>
+        {<br>
+            uint32_t blkPos, sig, ctxSig;<br>
+            blkPos = scanTblCG4x4[rand_scanPosSigOff];<br>
+            const uint32_t posZeroMask = (subPosBase + rand_scanPosSigOff) ? ~0 : 0;<br>
+            sig = scanFlagMask & 1;<br>
+            scanFlagMask >>= 1;<br>
+            X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");<br>
+            if ((rand_scanPosSigOff != 0) || (subPosBase == 0) || numNonZero)<br>
+            {<br>
+                const uint32_t cnt = rand_tabSigCtx[blkPos] + offset;<br>
+                ctxSig = cnt & posZeroMask;<br>
+<br>
+                //X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;<br>
+                //encodeBin(sig, baseCtx[ctxSig]);<br>
+                const uint32_t mstate = ref_baseCtx[ctxSig];<br>
+                const uint32_t mps = mstate & 1;<br>
+                const uint32_t stateBits = PFX(entropyStateBits)[mstate ^ sig];<br>
+                uint32_t nextState = (stateBits >> 24) + mps;<br>
+                if ((mstate ^ sig) == 1)<br>
+                    nextState = sig;<br>
+                X265_CHECK(sbacNext(mstate, sig) == nextState, "nextState check failure\n");<br>
+                X265_CHECK(sbacGetEntropyBits(mstate, sig) == (stateBits & 0xFFFFFF), "entropyBits check failure\n");<br>
+                ref_baseCtx[ctxSig] = (uint8_t)nextState;<br>
+                sum += stateBits;<br>
+            }<br>
+            assert(numNonZero <= 15);<br>
+            assert(blkPos <= 15);<br>
+            absCoeff[numNonZero] = tmpCoeff[blkPos];<br>
+            numNonZero += sig;<br>
+            rand_scanPosSigOff--;<br>
+        } while (rand_scanPosSigOff >= 0);<br>
+<br>
+        //numNonZero = coeffNum[lastScanSet];<br>
+        //memset(absCoeff, 0xCD, sizeof(absCoeff));<br>
+        uint32_t firstC2Idx = (rand() >> 28);<br>
+        int ref_sum = ref(absCoeff, numNonZero, firstC2Idx);<br>
+        //int ref_sum1 = ref(absCoeff, numNonZero, firstC2Idx); // when C function is called instead of sse4 asm, ref_sum and ref_sum1 are same<br>
+        int opt_sum = (int)checked(opt, absCoeff, numNonZero, firstC2Idx);<br>
+        if (ref_sum != opt_sum)<br>
+            return false;<br>
+    }<br>
+}<br>
+<br>
+<br>
 bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)<br>
 {<br>
     if (opt.pu[part].satd)<br>
@@ -2164,6 +2342,14 @@<br>
             return false;<br>
         }<br>
     }<br>
+    if (opt.costCoeffRemain)<br>
+    {<br>
+        if (!check_costCoeffRemain(ref.costCoeffRemain, opt.costCoeffRemain))<br>
+        {<br>
+            printf("costCoeffRemain failed!\n");<br>
+            return false;<br>
+        }<br>
+    }<br>
<br>
     return true;<br>
 }<br>
@@ -2603,4 +2789,13 @@<br>
<br>
         REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN, g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32);<br>
     }<br>
+<br>
+    if (opt.costCoeffRemain)<br>
+    {<br>
+        HEADER0("costCoeffRemain");<br>
+        uint16_t abscoefBuf[32 * 32];<br>
+        memset(abscoefBuf, 0, sizeof(abscoefBuf));<br>
+        memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));<br>
+        REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain, abscoefBuf, 16, 32);<br>
+    }<br>
 }<br>
diff -r 1e5c4d155ab8 -r 83e1acab3578 source/test/pixelharness.h<br>
--- a/source/test/pixelharness.h        Thu Jun 25 13:42:29 2015 +0530<br>
+++ b/source/test/pixelharness.h        Fri Jun 26 16:20:50 2015 +0530<br>
@@ -111,7 +111,7 @@<br>
     bool check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt);<br>
     bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);<br>
     bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);<br>
-<br>
+    bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt);<br>
<br>
 public:<br>
<br>
</blockquote></div><br></div>