[x265] [PATCH] reuse analysis information from pass 1 to effectively reduce computation in pass 2

Mon Dec 26 17:00:10 CET 2016

# HG changeset patch
# User Santhoshini Sekar <santhoshini at multicorewareinc.com>
# Date 1482321849 -19800
#      Wed Dec 21 17:34:09 2016 +0530
# Node ID 9216f2375f1b26d96b6023f734be5f6bb8e888a0
# Parent  82e4e3b0bb460c0fe140953342e12a0a1b3da004
reuse analysis information from pass 1 to effectively reduce computation in pass 2

diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp
+++ b/source/encoder/analysis.cpp
@@ -146,6 +146,23 @@
     m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
 
     uint32_t numPartition = ctu.m_numPartitions;
+    if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead)
+    {
+        m_multipassAnalysis = (analysis2PassFrameData*)m_frame->m_analysis2Pass.analysisFramedata;
+        m_multipassDepth = &m_multipassAnalysis->depth[ctu.m_cuAddr * ctu.m_numPartitions];
+        if (m_slice->m_sliceType != I_SLICE)
+        {
+            int numPredDir = m_slice->isInterP() ? 1 : 2;
+            for (int dir = 0; dir < numPredDir; dir++)
+            {
+                m_multipassMv[dir] = &m_multipassAnalysis->m_mv[dir][ctu.m_cuAddr * ctu.m_numPartitions];
+                m_multipassMvpIdx[dir] = &m_multipassAnalysis->mvpIdx[dir][ctu.m_cuAddr * ctu.m_numPartitions];
+                m_multipassRef[dir] = &m_multipassAnalysis->ref[dir][ctu.m_cuAddr * ctu.m_numPartitions];
+            }
+            m_multipassModes = &m_multipassAnalysis->modes[ctu.m_cuAddr * ctu.m_numPartitions];
+        }
+    }
+
     if (m_param->analysisMode && m_slice->m_sliceType != I_SLICE)
     {
         int numPredDir = m_slice->isInterP() ? 1 : 2;
@@ -1015,6 +1032,22 @@
             }
         }
     }
+    if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+    {
+        if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
+        {
+            if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
+            {
+                md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+                md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+                checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+                skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
+                if (m_param->rdLevel)
+                    skipModes = m_param->bEnableEarlySkip && md.bestMode;
+            }
+        }
+    }
 
     /* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */
     if (mightNotSplit && depth >= minDepth && !md.bestMode) /* TODO: Re-evaluate if analysis load/save still works */
@@ -1562,6 +1595,28 @@
         }
     }
 
+    if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+    {
+        if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
+        {
+            if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
+            {
+                md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+                md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+                checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+                skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
+                refMasks[0] = allSplitRefs;
+                md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+                checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+
+                if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+                    skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
+            }
+        }
+    }
+
     /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
     if (mightNotSplit && !md.bestMode)
     {
@@ -2310,6 +2365,21 @@
                 bestME[i].ref = m_reuseRef[refOffset + index++];
         }
     }
+
+    if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+    {
+        uint32_t numPU = interMode.cu.getNumPartInter(0);
+        for (uint32_t part = 0; part < numPU; part++)
+        {
+            MotionData* bestME = interMode.bestME[part];
+            for (int32_t i = 0; i < numPredDir; i++)
+            {
+                bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx];
+                bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx];
+                bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx];
+            }
+        }
+    }
     predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400), refMask);
 
     /* predInterSearch sets interMode.sa8dBits */
@@ -2359,6 +2429,22 @@
                 bestME[i].ref = m_reuseRef[refOffset + index++];
         }
     }
+
+    if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+    {
+        uint32_t numPU = interMode.cu.getNumPartInter(0);
+        for (uint32_t part = 0; part < numPU; part++)
+        {
+            MotionData* bestME = interMode.bestME[part];
+            for (int32_t i = 0; i < numPredDir; i++)
+            {
+                bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx];
+                bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx];
+                bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx];
+            }
+        }
+    }
+
     predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, refMask);
 
     /* predInterSearch sets interMode.sa8dBits, but this is ignored */
diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
--- a/source/encoder/analysis.h
+++ b/source/encoder/analysis.h
@@ -130,6 +130,13 @@
     uint32_t             m_splitRefIdx[4];
     uint64_t*            cacheCost;
 
+
+    analysis2PassFrameData* m_multipassAnalysis;
+    uint8_t*                m_multipassDepth;
+    MV*                     m_multipassMv[2];
+    int*                    m_multipassMvpIdx[2];
+    int32_t*                m_multipassRef[2];
+    uint8_t*                m_multipassModes;
     /* refine RD based on QP for rd-levels 5 and 6 */
     void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp);
 
diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp
--- a/source/encoder/search.cpp
+++ b/source/encoder/search.cpp
@@ -2128,7 +2128,7 @@
         cu.getNeighbourMV(puIdx, pu.puAbsPartIdx, interMode.interNeighbours);
 
         /* Uni-directional prediction */
-        if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+        if (m_param->analysisMode == X265_ANALYSIS_LOAD || (m_param->analysisMultiPassRefine && m_param->rc.bStatRead))
         {
             for (int list = 0; list < numPredDir; list++)
             {
@@ -2153,7 +2153,11 @@
                         m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
                 }
                 setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
-                int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv,
+                MV mvpIn = mvp;
+                if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx)
+                    mvpIn = bestME[list].mv;
+                    
+                int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvpIn, numMvc, mvc, m_param->searchRange, outmv,
                   m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
 
                 /* Get total cost of partition, but only include MV bit cost once */
@@ -2162,7 +2166,22 @@
                 uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);
 
                 /* Refine MVP selection, updates: mvpIdx, bits, cost */
-                mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);
+                if (!m_param->analysisMultiPassRefine)
+                    mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);
+                else
+                {
+                    /* It is more accurate to compare with actual mvp that was used in motionestimate than amvp[mvpIdx]. Here 
+                      the actual mvp is bestME from pass 1 for that mvpIdx */
+                    int diffBits = m_me.bitcost(outmv, amvp[!mvpIdx]) - m_me.bitcost(outmv, mvpIn);
+                    if (diffBits < 0)
+                    {
+                        mvpIdx = !mvpIdx;
+                        uint32_t origOutBits = bits;
+                        bits = origOutBits + diffBits;
+                        cost = (cost - m_rdCost.getCost(origOutBits)) + m_rdCost.getCost(bits);
+                    }
+                    mvp = amvp[mvpIdx];
+                }
 
                 if (cost < bestME[list].cost)
                 {
diff --git a/source/test/rate-control-tests.txt b/source/test/rate-control-tests.txt
--- a/source/test/rate-control-tests.txt
+++ b/source/test/rate-control-tests.txt
@@ -43,3 +43,9 @@
 RaceHorses_416x240_30_10bit.yuv,--preset medium --crf 26 --vbv-maxrate 1000 --vbv-bufsize 1000 --pass 1,--preset fast --bitrate 1000  --vbv-maxrate 1000 --vbv-bufsize 700 --pass 3 -F4,--preset slow --bitrate 500 --vbv-maxrate 500  --vbv-bufsize 700 --pass 2 -F4
 sita_1920x1080_30.yuv, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers
 sita_1920x1080_30.yuv, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps
+
+# multi-pass rate control and analysis
+ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 1  --multi-pass-opt-analysis  --hash 1 --ssim --psnr
+ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 2  --multi-pass-opt-analysis  --hash 1 --ssim --psnr
+big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 1  --multi-pass-opt-analysis  --hash 1 --ssim --psnr
+big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 2  --multi-pass-opt-analysis  --hash 1 --ssim --psnr
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265.patch
Type: text/x-patch
Size: 10740 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161226/281a0c9a/attachment.bin>