[x265] [PATCH 2 of 2] search: drop bMergeOnly argument to predInterSearch()

Steve Borho steve at borho.org
Sat Apr 25 07:45:29 CEST 2015


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1429940485 18000
#      Sat Apr 25 00:41:25 2015 -0500
# Node ID 318aa6a4eaf2d58cf7d223bf92448d94cc439a5b
# Parent  6a0a37c01cff03cadd44691a0fe447d17ec0b14f
search: drop bMergeOnly argument to predInterSearch()

This was a hold-over from the HM which never wanted to perform motion searches
for AMP PUs for 64x64 CUs. Presumably because they were never optimized.
Because of the way the rd-levels were developed, RD levels 0..4 would always
hard-disable bMergeOnly but those presets never used --amp by default. But this
left us in the odd situation of --preset medium --rect --amp doing more ME work
than --preset slower --rect --amp.

This patch makes AMP partitions always perform motion estimation, regardless of
CU size and RD level. It should give a small improvement to compression
efficiency at slow presets for a minimal performance cost.

diff -r 6a0a37c01cff -r 318aa6a4eaf2 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/analysis.cpp	Sat Apr 25 00:41:25 2015 -0500
@@ -394,7 +394,6 @@
     }
 
     ModeDepth& md = m_modeDepth[pmode.cuGeom.depth];
-    bool bMergeOnly = pmode.cuGeom.log2CUSize == 6;
 
     /* setup slave Analysis */
     if (&slave != this)
@@ -475,7 +474,7 @@
                 break;
 
             case PRED_2Nx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, false);
+                slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N);
                 md.pred[PRED_BIDIR].rdCost = MAX_INT64;
                 if (m_slice->m_sliceType == B_SLICE)
                 {
@@ -486,27 +485,27 @@
                 break;
 
             case PRED_Nx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, false);
+                slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N);
                 break;
 
             case PRED_2NxN:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, false);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN);
                 break;
 
             case PRED_2NxnU:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU);
                 break;
 
             case PRED_2NxnD:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD);
                 break;
 
             case PRED_nLx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N);
                 break;
 
             case PRED_nRx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N);
                 break;
 
             default:
@@ -1120,7 +1119,7 @@
         if (!earlySkip)
         {
             md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-            checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
+            checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
             checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
             if (m_slice->m_sliceType == B_SLICE)
@@ -1137,19 +1136,17 @@
             if (m_param->bEnableRectInter)
             {
                 md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
+                checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
                 checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
 
                 md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-                checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
+                checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
                 checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
             }
 
             // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
             if (m_slice->m_sps->maxAMPDepth > depth)
             {
-                bool bMergeOnly = cuGeom.log2CUSize == 6;
-
                 bool bHor = false, bVer = false;
                 if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
                     bHor = true;
@@ -1164,21 +1161,21 @@
                 if (bHor)
                 {
                     md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU);
                     checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
 
                     md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD);
                     checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
                 }
                 if (bVer)
                 {
                     md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N);
                     checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
 
                     md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N);
                     checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
                 }
             }
@@ -1533,7 +1530,7 @@
         }
     }
 
-    predInterSearch(interMode, cuGeom, false, m_bChromaSa8d);
+    predInterSearch(interMode, cuGeom, m_bChromaSa8d);
 
     /* predInterSearch sets interMode.sa8dBits */
     const Yuv& fencYuv = *interMode.fencYuv;
@@ -1561,7 +1558,7 @@
     }
 }
 
-void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly)
+void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)
 {
     interMode.initCosts();
     interMode.cu.setPartSizeSubParts(partSize);
@@ -1581,7 +1578,7 @@
         }
     }
 
-    predInterSearch(interMode, cuGeom, bMergeOnly, true);
+    predInterSearch(interMode, cuGeom, true);
 
     /* predInterSearch sets interMode.sa8dBits, but this is ignored */
     encodeResAndCalcRdInterCU(interMode, cuGeom);
diff -r 6a0a37c01cff -r 318aa6a4eaf2 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/analysis.h	Sat Apr 25 00:41:25 2015 -0500
@@ -125,7 +125,7 @@
 
     /* measure inter options */
     void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
-    void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly);
+    void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
 
     void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
 
diff -r 6a0a37c01cff -r 318aa6a4eaf2 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/search.cpp	Sat Apr 25 00:41:25 2015 -0500
@@ -1977,7 +1977,7 @@
 }
 
 /* find the best inter prediction for each PU of specified mode */
-void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D)
+void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC)
 {
     ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);
 
@@ -1998,7 +1998,6 @@
     Yuv&     tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
 
     MergeData merge;
-    uint32_t mrgCost;
     memset(&merge, 0, sizeof(merge));
 
     for (int puIdx = 0; puIdx < numPart; puIdx++)
@@ -2009,27 +2008,7 @@
         m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height);
 
         /* find best cost merge candidate. note: 2Nx2N merge and bidir are handled as separate modes */
-        if (cu.m_partSize[0] != SIZE_2Nx2N)
-        {
-            mrgCost = mergeEstimation(cu, cuGeom, pu, puIdx, merge);
-
-            if (bMergeOnly && mrgCost != MAX_UINT)
-            {
-                cu.m_mergeFlag[pu.puAbsPartIdx] = true;
-                cu.m_mvpIdx[0][pu.puAbsPartIdx] = merge.index; // merge candidate ID is stored in L0 MVP idx
-                cu.setPUInterDir(merge.dir, pu.puAbsPartIdx, puIdx);
-                cu.setPUMv(0, merge.mvField[0].mv, pu.puAbsPartIdx, puIdx);
-                cu.setPURefIdx(0, merge.mvField[0].refIdx, pu.puAbsPartIdx, puIdx);
-                cu.setPUMv(1, merge.mvField[1].mv, pu.puAbsPartIdx, puIdx);
-                cu.setPURefIdx(1, merge.mvField[1].refIdx, pu.puAbsPartIdx, puIdx);
-                totalmebits += merge.bits;
-
-                motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);
-                continue;
-            }
-        }
-        else
-            mrgCost = MAX_UINT;
+        uint32_t mrgCost = numPart == 1 ? MAX_UINT : mergeEstimation(cu, cuGeom, pu, puIdx, merge);
 
         bestME[0].cost = MAX_UINT;
         bestME[1].cost = MAX_UINT;
@@ -2359,7 +2338,7 @@
             totalmebits += bestME[1].bits;
         }
 
-        motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);
+        motionCompensation(cu, pu, *predYuv, true, bChromaMC);
     }
     X265_CHECK(interMode.ok(), "inter mode is not ok");
     interMode.sa8dBits += totalmebits;
diff -r 6a0a37c01cff -r 318aa6a4eaf2 source/encoder/search.h
--- a/source/encoder/search.h	Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/search.h	Sat Apr 25 00:41:25 2015 -0500
@@ -301,7 +301,7 @@
     void     encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
 
     // estimation inter prediction (non-skip)
-    void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma);
+    void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC);
 
     // encode residual and compute rd-cost for inter mode
     void     encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);


More information about the x265-devel mailing list