[x265-commits] [x265] search: split generateCoeffRecon() into its two callers, ...

Wed Oct 29 17:08:57 CET 2014

details:   http://hg.videolan.org/x265/rev/7c2b831e52fb
branches:  
changeset: 8736:7c2b831e52fb
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 10:45:23 2014 -0500
description:
search: split generateCoeffRecon() into its two callers, improve comments

The function consisted of a big if (inter) else (intra) expression and it had
only two callers, one which knew it was inter and one which knew it was intra.
Subject: [x265] analysis: fix for --rd 0, do not assume a CU is only inter or intra

details:   http://hg.videolan.org/x265/rev/3aec7242d0be
branches:  stable
changeset: 8737:3aec7242d0be
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 10:53:33 2014 -0500
description:
analysis: fix for --rd 0, do not assume a CU is only inter or intra
Subject: [x265] Merge with stable

details:   http://hg.videolan.org/x265/rev/f5603998be03
branches:  
changeset: 8738:f5603998be03
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 10:53:51 2014 -0500
description:
Merge with stable
Subject: [x265] search: relocate intra in inter functions to search.cpp

details:   http://hg.videolan.org/x265/rev/fe3b88e11f44
branches:  
changeset: 8739:fe3b88e11f44
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 11:07:44 2014 -0500
description:
search: relocate intra in inter functions to search.cpp

diffstat:

 source/encoder/analysis.cpp |  264 ++++++-------------------------------------
 source/encoder/analysis.h   |    4 -
 source/encoder/search.cpp   |  255 ++++++++++++++++++++++++++++++++++++------
 source/encoder/search.h     |   19 ++-
 4 files changed, 271 insertions(+), 271 deletions(-)

diffs (truncated from 696 to 300 lines):

diff -r 7cfc1edb083f -r fe3b88e11f44 source/encoder/analysis.cpp

--- a/source/encoder/analysis.cpp	Tue Oct 28 16:31:31 2014 -0500
+++ b/source/encoder/analysis.cpp	Wed Oct 29 11:07:44 2014 -0500
@@ -389,7 +389,7 @@ void Analysis::parallelModeAnalysis(int 
         case 0:
             if (slave != this)
                 slave->m_rqt[m_curGeom->depth].cur.load(m_rqt[m_curGeom->depth].cur);
-            slave->checkIntraInInter_rd0_4(md.pred[PRED_INTRA], *m_curGeom);
+            slave->checkIntraInInter(md.pred[PRED_INTRA], *m_curGeom);
             if (m_param->rdLevel > 2)
                 slave->encodeIntraInInter(md.pred[PRED_INTRA], *m_curGeom);
             break;
@@ -554,8 +554,8 @@ void Analysis::compressInterCU_dist(cons
             {
                 if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
                     bestInter = &md.pred[PRED_Nx2N];
-                if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
-                    bestInter = &md.pred[PRED_Nx2N];
+                if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
+                    bestInter = &md.pred[PRED_2NxN];
             }
 
             if (bTryAmp)
@@ -640,7 +640,7 @@ void Analysis::compressInterCU_dist(cons
         if (md.bestMode->rdCost == MAX_INT64 && !bTryIntra)
         {
             md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
-            checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+            checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
             encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
             checkBestMode(md.pred[PRED_INTRA], depth);
         }
@@ -825,7 +825,7 @@ void Analysis::compressInterCU_rd0_4(con
                     md.bestMode->sa8dCost == MAX_INT64)
                 {
                     md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
-                    checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+                    checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
                     encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
                     if (md.pred[PRED_INTRA].rdCost < md.bestMode->rdCost)
                         md.bestMode = &md.pred[PRED_INTRA];
@@ -840,7 +840,7 @@ void Analysis::compressInterCU_rd0_4(con
                 if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
                 {
                     md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
-                    checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+                    checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
                     if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
                         md.bestMode = &md.pred[PRED_INTRA];
                 }
@@ -865,8 +865,23 @@ void Analysis::compressInterCU_rd0_4(con
                         encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
                     else if (m_param->rdLevel == 1)
                     {
-                        m_rqt[cuGeom.depth].tmpResiYuv.subtract(md.fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
-                        generateCoeffRecon(*md.bestMode, cuGeom);
+                        /* generate recon pixels with no rate distortion considerations */
+                        CUData& cu = md.bestMode->cu;
+                        m_quant.setQPforQuant(cu);
+
+                        uint32_t tuDepthRange[2];
+                        cu.getInterTUQtDepthRange(tuDepthRange, 0);
+
+                        m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
+                        residualTransformQuantInter(*md.bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);
+                        if (cu.getQtRootCbf(0))
+                            md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]);
+                        else
+                        {
+                            md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv);
+                            if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N)
+                                cu.setSkipFlagSubParts(true);
+                        }
                     }
                 }
                 else
@@ -874,7 +889,19 @@ void Analysis::compressInterCU_rd0_4(con
                     if (m_param->rdLevel == 2)
                         encodeIntraInInter(*md.bestMode, cuGeom);
                     else if (m_param->rdLevel == 1)
-                        generateCoeffRecon(*md.bestMode, cuGeom);
+                    {
+                        /* generate recon pixels with no rate distortion considerations */
+                        CUData& cu = md.bestMode->cu;
+                        m_quant.setQPforQuant(cu);
+
+                        uint32_t tuDepthRange[2];
+                        cu.getIntraTUQtDepthRange(tuDepthRange, 0);
+
+                        uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
+                        residualTransformQuantIntra(*md.bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+                        getBestIntraModeChroma(*md.bestMode, cuGeom);
+                        residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
+                    }
                 }
             }
         } // !earlyskip
@@ -1369,222 +1396,6 @@ void Analysis::checkInter_rd5_6(Mode& in
     }
 }
 
-/* Note that this function does not save the best intra prediction, it must
- * be generated later. It records the best mode in the cu */
-void Analysis::checkIntraInInter_rd0_4(Mode& intraMode, const CUGeom& cuGeom)
-{
-    CUData& cu = intraMode.cu;
-    uint32_t depth = cu.m_cuDepth[0];
-
-    cu.setPartSizeSubParts(SIZE_2Nx2N);
-    cu.setPredModeSubParts(MODE_INTRA);
-
-    uint32_t initTrDepth = 0;
-    uint32_t log2TrSize  = cu.m_log2CUSize[0] - initTrDepth;
-    uint32_t tuSize      = 1 << log2TrSize;
-    const uint32_t absPartIdx  = 0;
-
-    // Reference sample smoothing
-    initAdiPattern(cu, cuGeom, absPartIdx, initTrDepth, ALL_IDX);
-
-    pixel* fenc = m_modeDepth[depth].fencYuv.m_buf[0];
-    uint32_t stride = m_modeDepth[depth].fencYuv.m_size;
-
-    pixel *above         = m_refAbove    + tuSize - 1;
-    pixel *aboveFiltered = m_refAboveFlt + tuSize - 1;
-    pixel *left          = m_refLeft     + tuSize - 1;
-    pixel *leftFiltered  = m_refLeftFlt  + tuSize - 1;
-    int sad, bsad;
-    uint32_t bits, bbits, mode, bmode;
-    uint64_t cost, bcost;
-
-    // 33 Angle modes once
-    ALIGN_VAR_32(pixel, bufScale[32 * 32]);
-    ALIGN_VAR_32(pixel, bufTrans[32 * 32]);
-    ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
-    int scaleTuSize = tuSize;
-    int scaleStride = stride;
-    int costShift = 0;
-    int sizeIdx = log2TrSize - 2;
-
-    if (tuSize > 32)
-    {
-        // origin is 64x64, we scale to 32x32 and setup required parameters
-        primitives.scale2D_64to32(bufScale, fenc, stride);
-        fenc = bufScale;
-
-        // reserve space in case primitives need to store data in above
-        // or left buffers
-        pixel _above[4 * 32 + 1];
-        pixel _left[4 * 32 + 1];
-        pixel *aboveScale  = _above + 2 * 32;
-        pixel *leftScale   = _left + 2 * 32;
-        aboveScale[0] = leftScale[0] = above[0];
-        primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
-        primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
-
-        scaleTuSize = 32;
-        scaleStride = 32;
-        costShift = 2;
-        sizeIdx = 5 - 2; // log2(scaleTuSize) - 2
-
-        // Filtered and Unfiltered refAbove and refLeft pointing to above and left.
-        above         = aboveScale;
-        left          = leftScale;
-        aboveFiltered = aboveScale;
-        leftFiltered  = leftScale;
-    }
-
-    pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
-    int predsize = scaleTuSize * scaleTuSize;
-
-    m_entropyCoder.loadIntraDirModeLuma(m_rqt[depth].cur);
-
-    /* there are three cost tiers for intra modes:
-     *  pred[0]          - mode probable, least cost
-     *  pred[1], pred[2] - less probable, slightly more cost
-     *  non-mpm modes    - all cost the same (rbits) */
-    uint64_t mpms;
-    uint32_t preds[3];
-    uint32_t rbits = getIntraRemModeBits(cu, absPartIdx, preds, mpms);
-
-    // DC
-    primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
-    bsad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
-    bmode = mode = DC_IDX;
-    bbits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
-    bcost = m_rdCost.calcRdSADCost(bsad, bbits);
-
-    pixel *abovePlanar = above;
-    pixel *leftPlanar  = left;
-
-    if (tuSize & (8 | 16 | 32))
-    {
-        abovePlanar = aboveFiltered;
-        leftPlanar  = leftFiltered;
-    }
-
-    // PLANAR
-    primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
-    sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
-    mode = PLANAR_IDX;
-    bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
-    cost = m_rdCost.calcRdSADCost(sad, bits);
-    COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
-
-    // Transpose NxN
-    primitives.transpose[sizeIdx](bufTrans, fenc, scaleStride);
-
-    primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
-
-    bool modeHor;
-    pixel *cmp;
-    intptr_t srcStride;
-
-#define TRY_ANGLE(angle) \
-    modeHor = angle < 18; \
-    cmp = modeHor ? bufTrans : fenc; \
-    srcStride = modeHor ? scaleTuSize : scaleStride; \
-    sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
-    bits = (mpms & ((uint64_t)1 << angle)) ? m_entropyCoder.bitsIntraModeMPM(preds, angle) : rbits; \
-    cost = m_rdCost.calcRdSADCost(sad, bits)
-
-    if (m_param->bEnableFastIntra)
-    {
-        int asad = 0;
-        uint32_t lowmode, highmode, amode = 5, abits = 0;
-        uint64_t acost = MAX_INT64;
-
-        /* pick the best angle, sampling at distance of 5 */
-        for (mode = 5; mode < 35; mode += 5)
-        {
-            TRY_ANGLE(mode);
-            COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
-        }
-
-        /* refine best angle at distance 2, then distance 1 */
-        for (uint32_t dist = 2; dist >= 1; dist--)
-        {
-            lowmode = amode - dist;
-            highmode = amode + dist;
-
-            X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out of range\n");
-            TRY_ANGLE(lowmode);
-            COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, bits);
-
-            X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode out of range\n");
-            TRY_ANGLE(highmode);
-            COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, bits);
-        }
-
-        if (amode == 33)
-        {
-            TRY_ANGLE(34);
-            COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits);
-        }
-
-        COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
-    }
-    else // calculate and search all intra prediction angles for lowest cost
-    {
-        for (mode = 2; mode < 35; mode++)
-        {
-            TRY_ANGLE(mode);
-            COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
-        }
-    }
-
-    cu.setLumaIntraDirSubParts((uint8_t)bmode, absPartIdx, depth + initTrDepth);
-    intraMode.initCosts();
-    intraMode.totalBits = bbits;
-    intraMode.distortion = bsad;
-    intraMode.sa8dCost = bcost;
-}
-
-void Analysis::encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom)
-{
-    CUData& cu = intraMode.cu;
-    Yuv* reconYuv = &intraMode.reconYuv;
-    Yuv* fencYuv = &m_modeDepth[cuGeom.depth].fencYuv;
-
-    X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not expect NxN intra\n");
-    X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
-
-    m_quant.setQPforQuant(cu);
-
-    uint32_t tuDepthRange[2];
-    cu.getIntraTUQtDepthRange(tuDepthRange, 0);
-
-    m_entropyCoder.load(m_rqt[cuGeom.depth].cur);
-
-    Cost icosts;
-    codeIntraLumaQT(intraMode, cuGeom, 0, 0, false, icosts, tuDepthRange);
-    extractIntraResultQT(cu, *reconYuv, 0, 0);
-
-    intraMode.distortion  = icosts.distortion;
-    intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom);
-
-    m_entropyCoder.resetBits();
-    if (m_slice->m_pps->bTransquantBypassEnabled)
-        m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
-    m_entropyCoder.codeSkipFlag(cu, 0);