[x265] [PATCH 02 of 10 RFC] analysis: at RD 5/6 avoid motion references if not used by split blocks

Steve Borho steve at borho.org
Tue Mar 31 03:29:38 CEST 2015


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1426555173 18000
#      Mon Mar 16 20:19:33 2015 -0500
# Node ID 7e7bb565c9776e5c46dcbcd25df27e0e0dbfe071
# Parent  af259ac3d304009043f95f72d6d5b7d1519a1838
analysis: at RD 5/6 avoid motion references if not used by split blocks

diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/analysis.cpp	Mon Mar 16 20:19:33 2015 -0500
@@ -432,6 +432,8 @@
         }
         else
         {
+            uint32_t refMasks[2] = { 0, 0 };
+
             switch (pmode.modes[task])
             {
             case PRED_INTRA:
@@ -441,7 +443,7 @@
                 break;
 
             case PRED_2Nx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, false);
+                slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, false, refMasks);
                 md.pred[PRED_BIDIR].rdCost = MAX_INT64;
                 if (m_slice->m_sliceType == B_SLICE)
                 {
@@ -452,27 +454,27 @@
                 break;
 
             case PRED_Nx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, false);
+                slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, false, refMasks);
                 break;
 
             case PRED_2NxN:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, false);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, false, refMasks);
                 break;
 
             case PRED_2NxnU:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, bMergeOnly, refMasks);
                 break;
 
             case PRED_2NxnD:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, bMergeOnly, refMasks);
                 break;
 
             case PRED_nLx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, bMergeOnly, refMasks);
                 break;
 
             case PRED_nRx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, bMergeOnly, refMasks);
                 break;
 
             default:
@@ -1025,7 +1027,7 @@
         md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
 }
 
-void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
+uint32_t Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
 {
     uint32_t depth = cuGeom.depth;
     ModeDepth& md = m_modeDepth[depth];
@@ -1058,6 +1060,7 @@
         }
     }
 
+    uint32_t splitRefs[4] = { 0, 0, 0, 0 };
     bool foundSkip = false;
 
     /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
@@ -1089,7 +1092,7 @@
             {
                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
                 m_rqt[nextDepth].cur.load(*nextContext);
-                compressInterCU_rd5_6(parentCTU, childGeom, zOrder);
+                splitRefs[subPartIdx] = compressInterCU_rd5_6(parentCTU, childGeom, zOrder);
 
                 // Save best CU and pred data for this sub CU
                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1112,11 +1115,19 @@
 	    checkDQPForSplitPred(splitPred->cu, cuGeom);
     }
 
+    /* Split CUs
+     *   0  1
+     *   2  3 */
+    uint32_t allSplitRefs = splitRefs[0] | splitRefs[1] | splitRefs[2] | splitRefs[3];
+
     /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */
     if (mightNotSplit && !(foundSkip && m_param->bEnableEarlySkip))
     {
+        uint32_t refMasks[2];
+
+        refMasks[0] = allSplitRefs;
         md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
-        checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
+        checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false, refMasks);
         checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
         if (m_slice->m_sliceType == B_SLICE)
@@ -1133,12 +1144,16 @@
 
         if (m_param->bEnableRectInter)
         {
+            refMasks[0] = splitRefs[0] | splitRefs[2]; /* left */
+            refMasks[1] = splitRefs[1] | splitRefs[3]; /* right */
             md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
-            checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
+            checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false, refMasks);
             checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
 
+            refMasks[0] = splitRefs[0] | splitRefs[1]; /* top */
+            refMasks[1] = splitRefs[2] | splitRefs[3]; /* bot */
             md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
-            checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
+            checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false, refMasks);
             checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
         }
 
@@ -1160,22 +1175,30 @@
 
             if (bHor)
             {
+                refMasks[0] = splitRefs[0] | splitRefs[1]; /* 25% top */
+                refMasks[1] = allSplitRefs;                /* 75% bot */
                 md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
-                checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
+                checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly, refMasks);
                 checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
 
+                refMasks[0] = allSplitRefs;                /* 75% top */
+                refMasks[1] = splitRefs[2] | splitRefs[3]; /* 25% bot */
                 md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
-                checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
+                checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly, refMasks);
                 checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
             }
             if (bVer)
             {
+                refMasks[0] = splitRefs[0] | splitRefs[2]; /* 25% left */
+                refMasks[1] = allSplitRefs;                /* 75% right */
                 md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
-                checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
+                checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly, refMasks);
                 checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
 
+                refMasks[0] = allSplitRefs;                /* 75% left */
+                refMasks[1] = splitRefs[1] | splitRefs[3]; /* 25% right */
                 md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
-                checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
+                checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly, refMasks);
                 checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
             }
         }
@@ -1206,9 +1229,63 @@
         
         checkBestMode(md.pred[PRED_SPLIT], depth);
 
+    /* determine which motion references the parent CU should search */
+    uint32_t refMask;
+    if (md.bestMode == &md.pred[PRED_SPLIT])
+        refMask = allSplitRefs;
+    else if (md.bestMode->cu.isIntra(0))
+    {
+        /* use 2Nx2N inter references */
+        CUData& cu = md.pred[PRED_2Nx2N].cu;
+        switch (cu.m_interDir[0])
+        {
+        case 1:
+            refMask = 1 << cu.m_refIdx[0][0];
+            break;
+        case 2:
+            refMask = 1 << (cu.m_refIdx[1][0] + 16);
+            break;
+        case 3:
+            refMask = 1 << cu.m_refIdx[0][0];
+            refMask |= 1 << (cu.m_refIdx[1][0] + 16);
+            break;
+        }
+    }
+    else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)
+    {
+        /* use best merge/inter 2Nx2N mode */
+        CUData& cu = md.bestMode->cu;
+        switch (cu.m_interDir[0])
+        {
+        case 1:
+            refMask = 1 << cu.m_refIdx[0][0];
+            break;
+        case 2:
+            refMask = 1 << (cu.m_refIdx[1][0] + 16);
+            break;
+        case 3:
+            refMask = 1 << cu.m_refIdx[0][0];
+            refMask |= 1 << (cu.m_refIdx[1][0] + 16);
+            break;
+        }
+    }
+    else
+    {
+        /* Else this CU has two inter parts */
+        Mode& m = *md.bestMode;
+        refMask = (1 << m.bestME[0][0].ref) | (1 << m.bestME[1][0].ref);
+        if (m_slice->m_sliceType == B_SLICE)
+        {
+            refMask |= 1 << (m.bestME[0][1].ref + 16);
+            refMask |= 1 << (m.bestME[1][1].ref + 16);
+        }
+    }
+
     /* Copy best data to encData CTU and recon */
     md.bestMode->cu.copyToPic(depth);
     md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
+
+    return refMask;
 }
 
 /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */
@@ -1479,8 +1556,8 @@
             }
         }
     }
-
-    predInterSearch(interMode, cuGeom, false, m_bChromaSa8d);
+    uint32_t refMask[2] = { 0, 0 };
+    predInterSearch(interMode, cuGeom, false, m_bChromaSa8d, refMask);
 
     /* predInterSearch sets interMode.sa8dBits */
     const Yuv& fencYuv = *interMode.fencYuv;
@@ -1508,7 +1585,7 @@
     }
 }
 
-void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly)
+void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly, uint32_t refMask[2])
 {
     interMode.initCosts();
     interMode.cu.setPartSizeSubParts(partSize);
@@ -1528,7 +1605,7 @@
         }
     }
 
-    predInterSearch(interMode, cuGeom, bMergeOnly, true);
+    predInterSearch(interMode, cuGeom, bMergeOnly, true, refMask);
 
     /* predInterSearch sets interMode.sa8dBits, but this is ignored */
     encodeResAndCalcRdInterCU(interMode, cuGeom);
diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/analysis.h	Mon Mar 16 20:19:33 2015 -0500
@@ -114,7 +114,7 @@
     /* full analysis for a P or B slice CU */
     void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
     void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
-    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
+    uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
 
     /* measure merge and skip */
     void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
@@ -122,7 +122,7 @@
 
     /* measure inter options */
     void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
-    void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly);
+    void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly, uint32_t refmask[2]);
 
     void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
 
diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/search.cpp	Mon Mar 16 20:19:33 2015 -0500
@@ -1977,7 +1977,7 @@
 }
 
 /* find the best inter prediction for each PU of specified mode */
-void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D)
+void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D, uint32_t refMasks[2])
 {
     ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);
 
@@ -2124,10 +2124,15 @@
         }
         if (bDoUnidir)
         {
+            uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
+
             for (int list = 0; list < numPredDir; list++)
             {
                 for (int ref = 0; ref < numRefIdx[list]; ref++)
                 {
+                    if (!(refMask & (1 << ref)))
+                        continue;
+
                     uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
                     bits += getTUBits(ref, numRefIdx[list]);
 
@@ -2182,6 +2187,9 @@
                         bestME[list].bits = bits;
                     }
                 }
+
+                /* the second list ref bits start at bit 16 */
+                refMask >>= 16;
             }
         }
 
diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/search.h
--- a/source/encoder/search.h	Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/search.h	Mon Mar 16 20:19:33 2015 -0500
@@ -301,7 +301,7 @@
     void     encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
 
     // estimation inter prediction (non-skip)
-    void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma);
+    void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma, uint32_t masks[2]);
 
     // encode residual and compute rd-cost for inter mode
     void     encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);


More information about the x265-devel mailing list