[x265] [PATCH] reuse analysis information from pass 1 to effectively reduce computation in pass 2
santhoshini at multicorewareinc.com
santhoshini at multicorewareinc.com
Mon Dec 26 17:00:10 CET 2016
# HG changeset patch
# User Santhoshini Sekar <santhoshini at multicorewareinc.com>
# Date 1482321849 -19800
# Wed Dec 21 17:34:09 2016 +0530
# Node ID 9216f2375f1b26d96b6023f734be5f6bb8e888a0
# Parent 82e4e3b0bb460c0fe140953342e12a0a1b3da004
reuse analysis information from pass 1 to effectively reduce computation in pass 2
diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp
+++ b/source/encoder/analysis.cpp
@@ -146,6 +146,23 @@
m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
uint32_t numPartition = ctu.m_numPartitions;
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead)
+ {
+ m_multipassAnalysis = (analysis2PassFrameData*)m_frame->m_analysis2Pass.analysisFramedata;
+ m_multipassDepth = &m_multipassAnalysis->depth[ctu.m_cuAddr * ctu.m_numPartitions];
+ if (m_slice->m_sliceType != I_SLICE)
+ {
+ int numPredDir = m_slice->isInterP() ? 1 : 2;
+ for (int dir = 0; dir < numPredDir; dir++)
+ {
+ m_multipassMv[dir] = &m_multipassAnalysis->m_mv[dir][ctu.m_cuAddr * ctu.m_numPartitions];
+ m_multipassMvpIdx[dir] = &m_multipassAnalysis->mvpIdx[dir][ctu.m_cuAddr * ctu.m_numPartitions];
+ m_multipassRef[dir] = &m_multipassAnalysis->ref[dir][ctu.m_cuAddr * ctu.m_numPartitions];
+ }
+ m_multipassModes = &m_multipassAnalysis->modes[ctu.m_cuAddr * ctu.m_numPartitions];
+ }
+ }
+
if (m_param->analysisMode && m_slice->m_sliceType != I_SLICE)
{
int numPredDir = m_slice->isInterP() ? 1 : 2;
@@ -1015,6 +1032,22 @@
}
}
}
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+ {
+ if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
+ {
+ if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
+ {
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+ skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
+ if (m_param->rdLevel)
+ skipModes = m_param->bEnableEarlySkip && md.bestMode;
+ }
+ }
+ }
/* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */
if (mightNotSplit && depth >= minDepth && !md.bestMode) /* TODO: Re-evaluate if analysis load/save still works */
@@ -1562,6 +1595,28 @@
}
}
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+ {
+ if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
+ {
+ if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
+ {
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+ skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
+ refMasks[0] = allSplitRefs;
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+ checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+
+ if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+ skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
+ }
+ }
+ }
+
/* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
if (mightNotSplit && !md.bestMode)
{
@@ -2310,6 +2365,21 @@
bestME[i].ref = m_reuseRef[refOffset + index++];
}
}
+
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+ {
+ uint32_t numPU = interMode.cu.getNumPartInter(0);
+ for (uint32_t part = 0; part < numPU; part++)
+ {
+ MotionData* bestME = interMode.bestME[part];
+ for (int32_t i = 0; i < numPredDir; i++)
+ {
+ bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx];
+ bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx];
+ bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx];
+ }
+ }
+ }
predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400), refMask);
/* predInterSearch sets interMode.sa8dBits */
@@ -2359,6 +2429,22 @@
bestME[i].ref = m_reuseRef[refOffset + index++];
}
}
+
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+ {
+ uint32_t numPU = interMode.cu.getNumPartInter(0);
+ for (uint32_t part = 0; part < numPU; part++)
+ {
+ MotionData* bestME = interMode.bestME[part];
+ for (int32_t i = 0; i < numPredDir; i++)
+ {
+ bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx];
+ bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx];
+ bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx];
+ }
+ }
+ }
+
predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, refMask);
/* predInterSearch sets interMode.sa8dBits, but this is ignored */
diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
--- a/source/encoder/analysis.h
+++ b/source/encoder/analysis.h
@@ -130,6 +130,13 @@
uint32_t m_splitRefIdx[4];
uint64_t* cacheCost;
+
+ analysis2PassFrameData* m_multipassAnalysis;
+ uint8_t* m_multipassDepth;
+ MV* m_multipassMv[2];
+ int* m_multipassMvpIdx[2];
+ int32_t* m_multipassRef[2];
+ uint8_t* m_multipassModes;
/* refine RD based on QP for rd-levels 5 and 6 */
void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp);
diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp
--- a/source/encoder/search.cpp
+++ b/source/encoder/search.cpp
@@ -2128,7 +2128,7 @@
cu.getNeighbourMV(puIdx, pu.puAbsPartIdx, interMode.interNeighbours);
/* Uni-directional prediction */
- if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD || (m_param->analysisMultiPassRefine && m_param->rc.bStatRead))
{
for (int list = 0; list < numPredDir; list++)
{
@@ -2153,7 +2153,11 @@
m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
}
setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
- int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv,
+ MV mvpIn = mvp;
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx)
+ mvpIn = bestME[list].mv;
+
+ int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvpIn, numMvc, mvc, m_param->searchRange, outmv,
m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
/* Get total cost of partition, but only include MV bit cost once */
@@ -2162,7 +2166,22 @@
uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);
/* Refine MVP selection, updates: mvpIdx, bits, cost */
- mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);
+ if (!m_param->analysisMultiPassRefine)
+ mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);
+ else
+ {
+ /* It is more accurate to compare with actual mvp that was used in motionestimate than amvp[mvpIdx]. Here
+ the actual mvp is bestME from pass 1 for that mvpIdx */
+ int diffBits = m_me.bitcost(outmv, amvp[!mvpIdx]) - m_me.bitcost(outmv, mvpIn);
+ if (diffBits < 0)
+ {
+ mvpIdx = !mvpIdx;
+ uint32_t origOutBits = bits;
+ bits = origOutBits + diffBits;
+ cost = (cost - m_rdCost.getCost(origOutBits)) + m_rdCost.getCost(bits);
+ }
+ mvp = amvp[mvpIdx];
+ }
if (cost < bestME[list].cost)
{
diff --git a/source/test/rate-control-tests.txt b/source/test/rate-control-tests.txt
--- a/source/test/rate-control-tests.txt
+++ b/source/test/rate-control-tests.txt
@@ -43,3 +43,9 @@
RaceHorses_416x240_30_10bit.yuv,--preset medium --crf 26 --vbv-maxrate 1000 --vbv-bufsize 1000 --pass 1,--preset fast --bitrate 1000 --vbv-maxrate 1000 --vbv-bufsize 700 --pass 3 -F4,--preset slow --bitrate 500 --vbv-maxrate 500 --vbv-bufsize 700 --pass 2 -F4
sita_1920x1080_30.yuv, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers
sita_1920x1080_30.yuv, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps
+
+# multi-pass rate control and analysis
+ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 1 --multi-pass-opt-analysis --hash 1 --ssim --psnr
+ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 2 --multi-pass-opt-analysis --hash 1 --ssim --psnr
+big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 1 --multi-pass-opt-analysis --hash 1 --ssim --psnr
+big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 2 --multi-pass-opt-analysis --hash 1 --ssim --psnr
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265.patch
Type: text/x-patch
Size: 10740 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161226/281a0c9a/attachment.bin>
More information about the x265-devel
mailing list