[x265] [PATCH] analysis: RDO based BIDIR decisions

Steve Borho steve at borho.org
Sun Nov 9 07:44:16 CET 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1415484653 21600
#      Sat Nov 08 16:10:53 2014 -0600
# Node ID 3bb94141e18be23d86a35b9fd3a3f800b9a2fb54
# Parent  1e04e178a349ff3a27ed0207cca7bdd9f0db4ff8
analysis: RDO based BIDIR decisions

diff -r 1e04e178a349 -r 3bb94141e18b source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/analysis.cpp	Sat Nov 08 16:10:53 2014 -0600
@@ -399,6 +399,8 @@
 
         case 1:
             slave->checkInter_rd0_4(md.pred[PRED_2Nx2N], *m_curGeom, SIZE_2Nx2N);
+            if (m_slice->m_sliceType == B_SLICE)
+                slave->checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], *m_curGeom);
             break;
 
         case 2:
@@ -449,6 +451,13 @@
 
         case 1:
             slave->checkInter_rd5_6(md.pred[PRED_2Nx2N], *m_curGeom, SIZE_2Nx2N, false);
+            md.pred[PRED_BIDIR].rdCost = MAX_INT64;
+            if (m_slice->m_sliceType == B_SLICE)
+            {
+                slave->checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], *m_curGeom);
+                if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
+                    encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], *m_curGeom);
+            }
             break;
 
         case 2:
@@ -504,6 +513,7 @@
 
         /* Initialize all prediction CUs based on parentCTU */
         md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+        md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
         md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
         md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
         if (m_param->bEnableRectInter)
@@ -595,16 +605,22 @@
 
             if (m_param->rdLevel > 2)
             {
-                /* encode best inter */
+                /* RD selection between merge, inter, bidir and intra */
                 for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
                 {
                     prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
                     motionCompensation(bestInter->predYuv, false, true);
                 }
                 encodeResAndCalcRdInterCU(*bestInter, cuGeom);
+                checkBestMode(*bestInter, depth);
 
-                /* RD selection between merge, inter and intra */
-                checkBestMode(*bestInter, depth);
+                /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+                if (m_slice->m_sliceType == B_SLICE &&
+                    md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+                {
+                    encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+                    checkBestMode(md.pred[PRED_BIDIR], depth);
+                }
 
 #if MATCH_NON_PMODE
                 if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) || md.bestMode->sa8dCost == MAX_INT64)
@@ -618,6 +634,9 @@
                 if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
                     md.bestMode = bestInter;
 
+                if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
+                    md.bestMode = &md.pred[PRED_BIDIR];
+
                 if (bTryIntra && md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
                 {
                     md.bestMode = &md.pred[PRED_INTRA];
@@ -641,6 +660,7 @@
             m_modeCompletionEvent.wait();
 
             checkBestMode(md.pred[PRED_2Nx2N], depth);
+            checkBestMode(md.pred[PRED_BIDIR], depth);
 
             if (m_param->bEnableRectInter)
             {
@@ -790,8 +810,14 @@
         {
             md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
             checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
+
+            if (m_slice->m_sliceType == B_SLICE)
+            {
+                md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+                checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+            }
+
             Mode *bestInter = &md.pred[PRED_2Nx2N];
-
             if (m_param->bEnableRectInter)
             {
                 md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
@@ -853,11 +879,18 @@
                     prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
                     motionCompensation(bestInter->predYuv, false, true);
                 }
+                encodeResAndCalcRdInterCU(*bestInter, cuGeom);
+                checkBestMode(*bestInter, depth);
 
-                encodeResAndCalcRdInterCU(*bestInter, cuGeom);
-
-                if (!md.bestMode || bestInter->rdCost < md.bestMode->rdCost)
-                    md.bestMode = bestInter;
+                /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+                if (m_slice->m_sliceType == B_SLICE)
+                {
+                    if (md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+                    {
+                        encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+                        checkBestMode(md.pred[PRED_BIDIR], depth);
+                    }
+                }
 
                 if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
                     md.bestMode->sa8dCost == MAX_INT64)
@@ -865,16 +898,19 @@
                     md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
                     checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
                     encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
-                    if (md.pred[PRED_INTRA].rdCost < md.bestMode->rdCost)
-                        md.bestMode = &md.pred[PRED_INTRA];
+                    checkBestMode(md.pred[PRED_INTRA], depth);
                 }
             }
             else
             {
-                /* SA8D choice between merge/skip, inter, and intra */
+                /* SA8D choice between merge/skip, inter, bidir, and intra */
                 if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
                     md.bestMode = bestInter;
 
+                if (m_slice->m_sliceType == B_SLICE &&
+                    md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
+                    md.bestMode = &md.pred[PRED_BIDIR];
+
                 if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
                 {
                     md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
@@ -1052,9 +1088,19 @@
             checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
             checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
+            if (m_slice->m_sliceType == B_SLICE)
+            {
+                md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+                checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+                if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
+                {
+                    encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+                    checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth);
+                }
+            }
+
             if (m_param->bEnableRectInter)
             {
-                // Nx2N rect
                 if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
                 {
                     md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
@@ -1486,6 +1532,110 @@
     }
 }
 
+void Analysis::checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom)
+{
+    CUData& cu = bidir2Nx2N.cu;
+
+    if (cu.isBipredRestriction() || inter2Nx2N.bestME[0].cost == MAX_UINT || inter2Nx2N.bestME[1].cost == MAX_UINT)
+    {
+        bidir2Nx2N.sa8dCost = MAX_INT64;
+        bidir2Nx2N.rdCost = MAX_INT64;
+        return;
+    }
+
+    const Yuv& fencYuv = *bidir2Nx2N.fencYuv;
+    MV   mvzero(0, 0);
+    int  partEnum = cuGeom.log2CUSize - 2;
+
+    bidir2Nx2N.bestME[0] = inter2Nx2N.bestME[0];
+    bidir2Nx2N.bestME[1] = inter2Nx2N.bestME[1];
+    int ref0    = bidir2Nx2N.bestME[0].ref;
+    MV  mvp0    = bidir2Nx2N.bestME[0].mvp;
+    int mvpIdx0 = bidir2Nx2N.bestME[0].mvpIdx;
+    int ref1    = bidir2Nx2N.bestME[1].ref;
+    MV  mvp1    = bidir2Nx2N.bestME[1].mvp;
+    int mvpIdx1 = bidir2Nx2N.bestME[1].mvpIdx;
+
+    bidir2Nx2N.initCosts();
+    cu.setPartSizeSubParts(SIZE_2Nx2N);
+    cu.setPredModeSubParts(MODE_INTER);
+    cu.setPUInterDir(3, 0, 0);
+    cu.setPURefIdx(0, (char)ref0, 0, 0);
+    cu.setPURefIdx(1, (char)ref1, 0, 0);
+    cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0;
+    cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1;
+    cu.m_mergeFlag[0] = 0;
+
+    /* Estimate cost of BIDIR using best 2Nx2N L0 and L1 motion vectors */
+    cu.setPUMv(0, bidir2Nx2N.bestME[0].mv, 0, 0);
+    cu.m_mvd[0][0] = bidir2Nx2N.bestME[0].mv - mvp0;
+
+    cu.setPUMv(1, bidir2Nx2N.bestME[1].mv, 0, 0);
+    cu.m_mvd[1][0] = bidir2Nx2N.bestME[1].mv - mvp1;
+
+    prepMotionCompensation(cu, cuGeom, 0);
+    motionCompensation(bidir2Nx2N.predYuv, true, true);
+
+    int sa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size);
+    bidir2Nx2N.sa8dBits = bidir2Nx2N.bestME[0].bits + bidir2Nx2N.bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
+    bidir2Nx2N.sa8dCost = sa8d + m_rdCost.getCost(bidir2Nx2N.sa8dBits);
+
+    bool bTryZero = bidir2Nx2N.bestME[0].mv.notZero() || bidir2Nx2N.bestME[1].mv.notZero();
+    if (bTryZero)
+    {
+        /* Do not try zero MV if unidir motion predictors are beyond
+         * valid search area */
+        MV mvmin, mvmax;
+        int merange = X265_MAX(m_param->sourceWidth, m_param->sourceHeight);
+        setSearchRange(cu, mvzero, merange, mvmin, mvmax);
+        mvmax.y += 2; // there is some pad for subpel refine
+        mvmin <<= 2;
+        mvmax <<= 2;
+
+        bTryZero &= bidir2Nx2N.bestME[0].mvp.checkRange(mvmin, mvmax);
+        bTryZero &= bidir2Nx2N.bestME[1].mvp.checkRange(mvmin, mvmax);
+    }
+    if (bTryZero)
+    {
+        /* Estimate cost of BIDIR using coincident blocks */
+        Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
+        pixel *fref0 = m_slice->m_mref[0][ref0].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx);
+        pixel *fref1 = m_slice->m_mref[1][ref1].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx);
+        intptr_t refStride = m_slice->m_mref[0][0].lumaStride;
+
+        primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, fref0, refStride, fref1, refStride, 32);
+        int sa8dCost = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
+
+        uint32_t bits0 = bidir2Nx2N.bestME[0].bits - m_me.bitcost(bidir2Nx2N.bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
+        uint32_t bits1 = bidir2Nx2N.bestME[1].bits - m_me.bitcost(bidir2Nx2N.bestME[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
+        uint32_t zcost = sa8dCost + m_rdCost.getCost(bits0) + m_rdCost.getCost(bits1);
+
+        /* refine MVP selection for zero mv, updates: mvp, mvpidx, bits, cost */
+        checkBestMVP(inter2Nx2N.amvpCand[0][ref0], mvzero, mvp0, mvpIdx0, bits0, zcost);
+        checkBestMVP(inter2Nx2N.amvpCand[1][ref1], mvzero, mvp1, mvpIdx1, bits1, zcost);
+
+        uint32_t zbits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
+        zcost = sa8dCost + m_rdCost.getCost(zbits);
+
+        if (zcost < bidir2Nx2N.sa8dCost)
+        {
+            bidir2Nx2N.sa8dBits = zbits;
+            bidir2Nx2N.sa8dCost = zcost;
+
+            cu.setPUMv(0, mvzero, 0, 0);
+            cu.m_mvd[0][0] = mvzero - mvp0;
+            cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0;
+
+            cu.setPUMv(1, mvzero, 0, 0);
+            cu.m_mvd[1][0] = mvzero - mvp1;
+            cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1;
+
+            prepMotionCompensation(cu, cuGeom, 0);
+            motionCompensation(bidir2Nx2N.predYuv, true, true);
+        }
+    }
+}
+
 void Analysis::encodeResidue(const CUData& ctu, const CUGeom& cuGeom)
 {
     if (cuGeom.depth < ctu.m_cuDepth[cuGeom.encodeIdx] && cuGeom.depth < g_maxCUDepth)
diff -r 1e04e178a349 -r 3bb94141e18b source/encoder/analysis.h
--- a/source/encoder/analysis.h	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/analysis.h	Sat Nov 08 16:10:53 2014 -0600
@@ -49,6 +49,7 @@
         PRED_SKIP,
         PRED_INTRA,
         PRED_2Nx2N,
+        PRED_BIDIR,
         PRED_Nx2N,
         PRED_2NxN,
         PRED_SPLIT,
@@ -105,6 +106,8 @@
     void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
     void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly);
 
+    void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
+
     /* encode current bestMode losslessly, pick best RD cost */
     void tryLossless(const CUGeom& cuGeom);
 
diff -r 1e04e178a349 -r 3bb94141e18b source/encoder/search.cpp
--- a/source/encoder/search.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/search.cpp	Sat Nov 08 16:10:53 2014 -0600
@@ -1984,10 +1984,6 @@
             }
         }
 
-        MotionData bidir[2];
-        uint32_t bidirCost = MAX_UINT;
-        int bidirBits = 0;
-
         interMode.bestME[0].cost = MAX_UINT;
         interMode.bestME[1].cost = MAX_UINT;
 
@@ -2162,12 +2158,18 @@
         }
 
         /* Bi-directional prediction */
-        if (slice->isInterB() && !cu.isBipredRestriction() && interMode.bestME[0].cost != MAX_UINT && interMode.bestME[1].cost != MAX_UINT)
+        MotionData bidir[2];
+        uint32_t bidirCost = MAX_UINT;
+        int bidirBits = 0;
+
+        if (slice->isInterB() && !cu.isBipredRestriction() &&  /* biprediction is possible for this PU */
+            cu.m_partSize[m_puAbsPartIdx] != SIZE_2Nx2N &&     /* 2Nx2N biprediction is handled elsewhere */
+            interMode.bestME[0].cost != MAX_UINT && interMode.bestME[1].cost != MAX_UINT)
         {
             bidir[0] = interMode.bestME[0];
             bidir[1] = interMode.bestME[1];
 
-            /* Generate reference subpels */
+            /* Generate reference subpels. TODO: This ignores reference slice weights */
             PicYuv* refPic0  = slice->m_refPicList[0][interMode.bestME[0].ref]->m_reconPic;
             PicYuv* refPic1  = slice->m_refPicList[1][interMode.bestME[1].ref]->m_reconPic;
             Yuv*    bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;
@@ -2235,11 +2237,6 @@
                     bidirBits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
                 }
             }
-
-            /* Ugly hack - since BIDIR is not yet an RD decision, add a penalty
-             * if psy-rd is enabled */
-            if (m_rdCost.m_psyRd)
-                bidirCost += (m_rdCost.m_psyRd * bidirCost) >> 8;
         }
 
         /* select best option and store into CU */


More information about the x265-devel mailing list