[x265] [PATCH] analysis: avoid motion references if not used by split blocks in pmode

ashok at multicorewareinc.com ashok at multicorewareinc.com
Fri Aug 14 10:40:09 CEST 2015


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1439540228 -19800
#      Fri Aug 14 13:47:08 2015 +0530
# Node ID e4293d18467ea5603d6a39a37f89ab0cb94e28e1
# Parent  9e26bef14543025908ed979b3d217417baf1ac8f
analysis: avoid motion references if not used by split blocks in pmode

diff -r 9e26bef14543 -r e4293d18467e source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Aug 14 13:47:08 2015 +0530
+++ b/source/encoder/analysis.cpp	Fri Aug 14 13:47:08 2015 +0530
@@ -398,32 +398,52 @@
                 break;
 
             case PRED_2Nx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3];
+
                 slave.checkInter_rd0_4(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, refMasks);
                 if (m_slice->m_sliceType == B_SLICE)
                     slave.checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], pmode.cuGeom);
                 break;
 
             case PRED_Nx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* left */
+                refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* right */
+
                 slave.checkInter_rd0_4(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, refMasks);
                 break;
 
             case PRED_2NxN:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* top */
+                refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* bot */
+
                 slave.checkInter_rd0_4(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, refMasks);
                 break;
 
             case PRED_2NxnU:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* 25% top */
+                refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% bot */
+
                 slave.checkInter_rd0_4(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, refMasks);
                 break;
 
             case PRED_2NxnD:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% top */
+                refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* 25% bot */
+
                 slave.checkInter_rd0_4(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, refMasks);
                 break;
 
             case PRED_nLx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* 25% left */
+                refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% right */
+
                 slave.checkInter_rd0_4(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, refMasks);
                 break;
 
             case PRED_nRx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% left */
+                refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* 25% right */
+
                 slave.checkInter_rd0_4(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, refMasks);
                 break;
 
@@ -443,6 +463,8 @@
                 break;
 
             case PRED_2Nx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3];
+
                 slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, refMasks);
                 md.pred[PRED_BIDIR].rdCost = MAX_INT64;
                 if (m_slice->m_sliceType == B_SLICE)
@@ -454,26 +476,42 @@
                 break;
 
             case PRED_Nx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* left */
+                refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* right */
+
                 slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, refMasks);
                 break;
 
             case PRED_2NxN:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* top */
+                refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* bot */
+
                 slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, refMasks);
                 break;
 
             case PRED_2NxnU:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* 25% top */
+                refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% bot */
+
                 slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, refMasks);
                 break;
 
             case PRED_2NxnD:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% top */
+                refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* 25% bot */
                 slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, refMasks);
                 break;
 
             case PRED_nLx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* 25% left */
+                refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% right */
+
                 slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, refMasks);
                 break;
 
             case PRED_nRx2N:
+                refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% left */
+                refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* 25% right */
                 slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, refMasks);
                 break;
 
@@ -492,7 +530,7 @@
     while (task >= 0);
 }
 
-void Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
+uint32_t Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
 {
     uint32_t depth = cuGeom.depth;
     uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -502,6 +540,7 @@
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
     uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU, cuGeom) : 0;
+    uint32_t splitRefs[4] = { 0, 0, 0, 0 };
 
     X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n");
 
@@ -551,7 +590,7 @@
                 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
                     nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
 
-                compressInterCU_dist(parentCTU, childGeom, nextQP);
+                splitRefs[subPartIdx] = compressInterCU_dist(parentCTU, childGeom, nextQP);
 
                 // Save best CU and pred data for this sub CU
                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -603,6 +642,8 @@
             md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N;
         }
 
+        m_splitRefIdx[0] = splitRefs[0]; m_splitRefIdx[1] = splitRefs[1]; m_splitRefIdx[2] = splitRefs[2]; m_splitRefIdx[3] = splitRefs[3];
+
         pmode.tryBondPeers(*m_frame->m_encData->m_jobProvider, pmode.m_jobTotal);
 
         /* participate in processing jobs, until all are distributed */
@@ -738,6 +779,22 @@
     if (mightSplit && !bNoSplit)
         checkBestMode(md.pred[PRED_SPLIT], depth);
 
+    /* determine which motion references the parent CU should search */
+    uint32_t refMask;
+    if (!(m_param->limitReferences & X265_REF_LIMIT_DEPTH))
+        refMask = 0;
+    else if (md.bestMode == &md.pred[PRED_SPLIT])
+        refMask = splitRefs[0] | splitRefs[1] | splitRefs[2] | splitRefs[3];
+    else
+    {
+        /* use best merge/inter mode, in case of intra use 2Nx2N inter references */
+        CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu;
+        uint32_t numPU = cu.getNumPartInter(0);
+        refMask = 0;
+        for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0))
+            refMask |= cu.getBestRefIdx(subPartIdx);
+    }
+
     if (mightNotSplit)
     {
         /* early-out statistics */
@@ -749,8 +806,11 @@
     }
 
     /* Copy best data to encData CTU and recon */
+    X265_CHECK(md.bestMode->ok(), "best mode is not ok");
     md.bestMode->cu.copyToPic(depth);
     md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
+
+    return refMask;
 }
 
 uint32_t Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
diff -r 9e26bef14543 -r e4293d18467e source/encoder/analysis.h
--- a/source/encoder/analysis.h	Fri Aug 14 13:47:08 2015 +0530
+++ b/source/encoder/analysis.h	Fri Aug 14 13:47:08 2015 +0530
@@ -108,11 +108,13 @@
     int32_t*             m_reuseRef;
     uint32_t*            m_reuseBestMergeCand;
 
+    uint32_t m_splitRefIdx[4];
+
     /* full analysis for an I-slice CU */
     void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
 
     /* full analysis for a P or B slice CU */
-    void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
+    uint32_t compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
     uint32_t compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
     uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
 
diff -r 9e26bef14543 -r e4293d18467e source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Aug 14 13:47:08 2015 +0530
+++ b/source/encoder/encoder.cpp	Fri Aug 14 13:47:08 2015 +0530
@@ -1525,6 +1525,12 @@
         p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
     }
 
+    if (p->bDistributeModeAnalysis && (p->limitReferences >> 1) && 1)
+    {
+        x265_log(p, X265_LOG_WARNING, "Limit reference options 2 and 3 are not supported with pmode. Disabling limit reference\n");
+        p->limitReferences = 0;
+    }
+
     if (p->bEnableTemporalSubLayers && !p->bframes)
     {
         x265_log(p, X265_LOG_WARNING, "B frames not enabled, temporal sublayer disabled\n");


More information about the x265-devel mailing list