[x265] [PATCH 4 of 4] search: allow AMP to use motion estimation for 64x64 CUs

Steve Borho steve at borho.org
Thu Apr 30 03:09:50 CEST 2015


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1429940485 18000
#      Sat Apr 25 00:41:25 2015 -0500
# Node ID b75e05d2a9a7b95513d8665dfc19070335aa3a11
# Parent  cbc2004d147bf9566efba942c2a2b3ab35b824f7
search: allow AMP to use motion estimation for 64x64 CUs

This was a hold-over from the HM which never wanted to perform motion searches
for AMP PUs for 64x64 CUs. Presumably because they were never optimized.
Because of the way the rd-levels were developed, RD levels 0..4 always
hard-coded bMergeOnly to false, but to compensate they never attempted AMP
modes at 64x64 CUs.

This patch makes AMP partitions always perform motion estimation, regardless of
CU size and RD level, and it removes the bMergeOnly argument to predInterSearch.
It should give a small improvement to compression efficiency at slower presets
for a minimal performance cost (since 64x64 inter analysis is relatively rare).

diff -r cbc2004d147b -r b75e05d2a9a7 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed Apr 29 14:50:26 2015 -0500
+++ b/source/encoder/analysis.cpp	Sat Apr 25 00:41:25 2015 -0500
@@ -370,7 +370,6 @@
     }
 
     ModeDepth& md = m_modeDepth[pmode.cuGeom.depth];
-    bool bMergeOnly = pmode.cuGeom.log2CUSize == 6;
 
     /* setup slave Analysis */
     if (&slave != this)
@@ -441,7 +440,7 @@
                 break;
 
             case PRED_2Nx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, false);
+                slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N);
                 md.pred[PRED_BIDIR].rdCost = MAX_INT64;
                 if (m_slice->m_sliceType == B_SLICE)
                 {
@@ -452,27 +451,27 @@
                 break;
 
             case PRED_Nx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, false);
+                slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N);
                 break;
 
             case PRED_2NxN:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, false);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN);
                 break;
 
             case PRED_2NxnU:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU);
                 break;
 
             case PRED_2NxnD:
-                slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD);
                 break;
 
             case PRED_nLx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N);
                 break;
 
             case PRED_nRx2N:
-                slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, bMergeOnly);
+                slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N);
                 break;
 
             default:
@@ -505,7 +504,7 @@
 
     if (mightNotSplit && depth >= minDepth)
     {
-        int bTryAmp = m_slice->m_sps->maxAMPDepth > depth && (cuGeom.log2CUSize < 6 || m_param->rdLevel > 4);
+        int bTryAmp = m_slice->m_sps->maxAMPDepth > depth;
         int bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
 
         PMODE pmode(*this, cuGeom);
@@ -795,7 +794,7 @@
                     bestInter = &md.pred[PRED_2NxN];
             }
 
-            if (m_slice->m_sps->maxAMPDepth > depth && cuGeom.log2CUSize < 6)
+            if (m_slice->m_sps->maxAMPDepth > depth)
             {
                 bool bHor = false, bVer = false;
                 if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
@@ -1076,7 +1075,7 @@
         if (!earlySkip)
         {
             md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-            checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
+            checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
             checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
             if (m_slice->m_sliceType == B_SLICE)
@@ -1093,19 +1092,17 @@
             if (m_param->bEnableRectInter)
             {
                 md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
+                checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
                 checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
 
                 md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-                checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
+                checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
                 checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
             }
 
             // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
             if (m_slice->m_sps->maxAMPDepth > depth)
             {
-                bool bMergeOnly = cuGeom.log2CUSize == 6;
-
                 bool bHor = false, bVer = false;
                 if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
                     bHor = true;
@@ -1120,21 +1117,21 @@
                 if (bHor)
                 {
                     md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU);
                     checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
 
                     md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD);
                     checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
                 }
                 if (bVer)
                 {
                     md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N);
                     checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
 
                     md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
+                    checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N);
                     checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
                 }
             }
@@ -1485,7 +1482,7 @@
         }
     }
 
-    predInterSearch(interMode, cuGeom, false, m_bChromaSa8d);
+    predInterSearch(interMode, cuGeom, m_bChromaSa8d);
 
     /* predInterSearch sets interMode.sa8dBits */
     const Yuv& fencYuv = *interMode.fencYuv;
@@ -1513,7 +1510,7 @@
     }
 }
 
-void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly)
+void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)
 {
     interMode.initCosts();
     interMode.cu.setPartSizeSubParts(partSize);
@@ -1533,7 +1530,7 @@
         }
     }
 
-    predInterSearch(interMode, cuGeom, bMergeOnly, true);
+    predInterSearch(interMode, cuGeom, true);
 
     /* predInterSearch sets interMode.sa8dBits, but this is ignored */
     encodeResAndCalcRdInterCU(interMode, cuGeom);
diff -r cbc2004d147b -r b75e05d2a9a7 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Wed Apr 29 14:50:26 2015 -0500
+++ b/source/encoder/analysis.h	Sat Apr 25 00:41:25 2015 -0500
@@ -122,7 +122,7 @@
 
     /* measure inter options */
     void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
-    void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly);
+    void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
 
     void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
 
diff -r cbc2004d147b -r b75e05d2a9a7 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Wed Apr 29 14:50:26 2015 -0500
+++ b/source/encoder/search.cpp	Sat Apr 25 00:41:25 2015 -0500
@@ -2006,7 +2006,7 @@
 }
 
 /* find the best inter prediction for each PU of specified mode */
-void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D)
+void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC)
 {
     ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);
 
@@ -2027,7 +2027,6 @@
     Yuv&     tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
 
     MergeData merge;
-    uint32_t mrgCost;
     memset(&merge, 0, sizeof(merge));
 
     for (int puIdx = 0; puIdx < numPart; puIdx++)
@@ -2038,27 +2037,7 @@
         m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height);
 
         /* find best cost merge candidate. note: 2Nx2N merge and bidir are handled as separate modes */
-        if (cu.m_partSize[0] != SIZE_2Nx2N)
-        {
-            mrgCost = mergeEstimation(cu, cuGeom, pu, puIdx, merge);
-
-            if (bMergeOnly && mrgCost != MAX_UINT)
-            {
-                cu.m_mergeFlag[pu.puAbsPartIdx] = true;
-                cu.m_mvpIdx[0][pu.puAbsPartIdx] = merge.index; // merge candidate ID is stored in L0 MVP idx
-                cu.setPUInterDir(merge.dir, pu.puAbsPartIdx, puIdx);
-                cu.setPUMv(0, merge.mvField[0].mv, pu.puAbsPartIdx, puIdx);
-                cu.setPURefIdx(0, merge.mvField[0].refIdx, pu.puAbsPartIdx, puIdx);
-                cu.setPUMv(1, merge.mvField[1].mv, pu.puAbsPartIdx, puIdx);
-                cu.setPURefIdx(1, merge.mvField[1].refIdx, pu.puAbsPartIdx, puIdx);
-                totalmebits += merge.bits;
-
-                motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);
-                continue;
-            }
-        }
-        else
-            mrgCost = MAX_UINT;
+        uint32_t mrgCost = numPart == 1 ? MAX_UINT : mergeEstimation(cu, cuGeom, pu, puIdx, merge);
 
         bestME[0].cost = MAX_UINT;
         bestME[1].cost = MAX_UINT;
@@ -2337,7 +2316,7 @@
             totalmebits += bestME[1].bits;
         }
 
-        motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);
+        motionCompensation(cu, pu, *predYuv, true, bChromaMC);
     }
     X265_CHECK(interMode.ok(), "inter mode is not ok");
     interMode.sa8dBits += totalmebits;
diff -r cbc2004d147b -r b75e05d2a9a7 source/encoder/search.h
--- a/source/encoder/search.h	Wed Apr 29 14:50:26 2015 -0500
+++ b/source/encoder/search.h	Sat Apr 25 00:41:25 2015 -0500
@@ -301,7 +301,7 @@
     void     encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
 
     // estimation inter prediction (non-skip)
-    void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma);
+    void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC);
 
     // encode residual and compute rd-cost for inter mode
     void     encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);


More information about the x265-devel mailing list