[x265-commits] [x265] MV refinement for multipass encoding
Ashok Kumar Mishra
ashok at multicorewareinc.com
Wed Jun 7 01:03:03 CEST 2017
details: http://hg.videolan.org/x265/rev/e75d5f5eeae3
branches:
changeset: 11789:e75d5f5eeae3
user: Ashok Kumar Mishra <ashok at multicorewareinc.com>
date: Mon Jun 05 15:20:44 2017 +0530
description:
MV refinement for multipass encoding
diffstat:
doc/reST/cli.rst | 6 +
source/CMakeLists.txt | 2 +-
source/common/param.cpp | 3 +
source/encoder/analysis.cpp | 17 +++-
source/encoder/encoder.cpp | 9 ++
source/encoder/motion.cpp | 133 ++++++++++++++++++++++++++++++++++++++++++++
source/encoder/motion.h | 1 +
source/encoder/search.cpp | 11 +++
source/encoder/search.h | 1 +
source/x265.h | 3 +
source/x265cli.h | 3 +
11 files changed, 183 insertions(+), 6 deletions(-)
diffs (truncated from 338 to 300 lines):
diff -r de49a722b256 -r e75d5f5eeae3 doc/reST/cli.rst
--- a/doc/reST/cli.rst Wed May 24 20:01:59 2017 +0530
+++ b/doc/reST/cli.rst Mon Jun 05 15:20:44 2017 +0530
@@ -911,6 +911,12 @@ not match.
inter modes for blocks of size one smaller than the min-cu-size of the
incoming analysis data from the previous encode. Default disabled.
+.. option:: --refine-mv
+
+ Enables refinement of motion vector for scaled video. Evaluates the best
+ motion vector by searching the surrounding eight integer and subpel pixel
+ positions.
+
Options which affect the transform unit quad-tree, sometimes referred to
as the residual quad-tree (RQT).
diff -r de49a722b256 -r e75d5f5eeae3 source/CMakeLists.txt
--- a/source/CMakeLists.txt Wed May 24 20:01:59 2017 +0530
+++ b/source/CMakeLists.txt Mon Jun 05 15:20:44 2017 +0530
@@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CP
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 120)
+set(X265_BUILD 121)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r de49a722b256 -r e75d5f5eeae3 source/common/param.cpp
--- a/source/common/param.cpp Wed May 24 20:01:59 2017 +0530
+++ b/source/common/param.cpp Mon Jun 05 15:20:44 2017 +0530
@@ -280,6 +280,7 @@ void x265_param_default(x265_param* para
param->scaleFactor = 0;
param->intraRefine = 0;
param->interRefine = 0;
+ param->mvRefine = 0;
}
int x265_param_default_preset(x265_param* param, const char* preset, const char* tune)
@@ -963,6 +964,7 @@ int x265_param_parse(x265_param* p, cons
OPT("scale-factor") p->scaleFactor = atoi(value);
OPT("refine-intra")p->intraRefine = atobool(value);
OPT("refine-inter")p->interRefine = atobool(value);
+ OPT("refine-mv")p->mvRefine = atobool(value);
else
return X265_PARAM_BAD_NAME;
}
@@ -1685,6 +1687,7 @@ char *x265_param2string(x265_param* p, i
s += sprintf(s, " scale-factor=%d", p->scaleFactor);
s += sprintf(s, " refine-intra=%d", p->intraRefine);
s += sprintf(s, " refine-inter=%d", p->interRefine);
+ s += sprintf(s, " refine-mv=%d", p->mvRefine);
BOOL(p->bLimitSAO, "limit-sao");
s += sprintf(s, " ctu-info=%d", p->bCTUInfo);
#undef BOOL
diff -r de49a722b256 -r e75d5f5eeae3 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed May 24 20:01:59 2017 +0530
+++ b/source/encoder/analysis.cpp Mon Jun 05 15:20:44 2017 +0530
@@ -2267,14 +2267,16 @@ void Analysis::recodeCU(const CUData& pa
int cuIdx = (mode.cu.m_cuAddr * parentCTU.m_numPartitions) + cuGeom.absPartIdx;
mode.cu.m_mergeFlag[pu.puAbsPartIdx] = interDataCTU->mergeFlag[cuIdx + part];
mode.cu.setPUInterDir(interDataCTU->interDir[cuIdx + part], pu.puAbsPartIdx, part);
- for (int dir = 0; dir < m_slice->isInterB() + 1; dir++)
+ for (int list = 0; list < m_slice->isInterB() + 1; list++)
{
- mode.cu.setPUMv(dir, interDataCTU->mv[dir][cuIdx + part], pu.puAbsPartIdx, part);
- mode.cu.setPURefIdx(dir, interDataCTU->refIdx[dir][cuIdx + part], pu.puAbsPartIdx, part);
- mode.cu.m_mvpIdx[dir][pu.puAbsPartIdx] = interDataCTU->mvpIdx[dir][cuIdx + part];
+ mode.cu.setPUMv(list, interDataCTU->mv[list][cuIdx + part], pu.puAbsPartIdx, part);
+ mode.cu.setPURefIdx(list, interDataCTU->refIdx[list][cuIdx + part], pu.puAbsPartIdx, part);
+ mode.cu.m_mvpIdx[list][pu.puAbsPartIdx] = interDataCTU->mvpIdx[list][cuIdx + part];
}
if (!mode.cu.m_mergeFlag[pu.puAbsPartIdx])
{
+ if (m_param->mvRefine)
+ m_me.setSourcePU(*mode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, false);
//AMVP
MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2];
mode.cu.getNeighbourMV(part, pu.puAbsPartIdx, mode.interNeighbours);
@@ -2285,6 +2287,12 @@ void Analysis::recodeCU(const CUData& pa
continue;
mode.cu.getPMV(mode.interNeighbours, list, ref, mode.amvpCand[list][ref], mvc);
MV mvp = mode.amvpCand[list][ref][mode.cu.m_mvpIdx[list][pu.puAbsPartIdx]];
+ if (m_param->mvRefine)
+ {
+ MV outmv;
+ searchMV(mode, pu, list, ref, outmv);
+ mode.cu.setPUMv(list, outmv, pu.puAbsPartIdx, part);
+ }
mode.cu.m_mvd[list][pu.puAbsPartIdx] = mode.cu.m_mv[list][pu.puAbsPartIdx] - mvp;
}
}
@@ -2293,7 +2301,6 @@ void Analysis::recodeCU(const CUData& pa
MVField candMvField[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists
uint8_t candDir[MRG_MAX_NUM_CANDS];
mode.cu.getInterMergeCandidates(pu.puAbsPartIdx, part, candMvField, candDir);
- mode.cu.m_mvpIdx[0][pu.puAbsPartIdx] = interDataCTU->mvpIdx[0][cuIdx + part];
uint8_t mvpIdx = mode.cu.m_mvpIdx[0][pu.puAbsPartIdx];
mode.cu.setPUInterDir(candDir[mvpIdx], pu.puAbsPartIdx, part);
mode.cu.setPUMv(0, candMvField[mvpIdx][0].mv, pu.puAbsPartIdx, part);
diff -r de49a722b256 -r e75d5f5eeae3 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Wed May 24 20:01:59 2017 +0530
+++ b/source/encoder/encoder.cpp Mon Jun 05 15:20:44 2017 +0530
@@ -2310,6 +2310,15 @@ void Encoder::configure(x265_param *p)
x265_log(p, X265_LOG_WARNING, "Inter refinement does not support limitTU. Disabling limitTU.\n");
p->limitTU = 0;
}
+
+ if (p->mvRefine)
+ {
+ if (p->analysisMode != X265_ANALYSIS_LOAD || p->analysisRefineLevel < 10 || !p->scaleFactor)
+ {
+ x265_log(p, X265_LOG_WARNING, "MV refinement requires analysis load, refine-level 10, scale factor. Disabling inter refine.\n");
+ p->mvRefine = 0;
+ }
+ }
if ((p->analysisMultiPassRefine || p->analysisMultiPassDistortion) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation))
{
diff -r de49a722b256 -r e75d5f5eeae3 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Wed May 24 20:01:59 2017 +0530
+++ b/source/encoder/motion.cpp Mon Jun 05 15:20:44 2017 +0530
@@ -598,6 +598,139 @@ void MotionEstimate::StarPatternSearch(R
}
}
+void MotionEstimate::refineMV(ReferencePlanes* ref,
+ const MV& mvmin,
+ const MV& mvmax,
+ const MV& qmvp,
+ MV& outQMv)
+{
+ ALIGN_VAR_16(int, costs[16]);
+ if (ctuAddr >= 0)
+ blockOffset = ref->reconPic->getLumaAddr(ctuAddr, absPartIdx) - ref->reconPic->getLumaAddr(0);
+ intptr_t stride = ref->lumaStride;
+ pixel* fenc = fencPUYuv.m_buf[0];
+ pixel* fref = ref->fpelPlane[0] + blockOffset;
+
+ setMVP(qmvp);
+
+ MV qmvmin = mvmin.toQPel();
+ MV qmvmax = mvmax.toQPel();
+
+ /* The term cost used here means satd/sad values for that particular search.
+ * The costs used in ME integer search only includes the SAD cost of motion
+ * residual and sqrtLambda times MVD bits. The subpel refine steps use SATD
+ * cost of residual and sqrtLambda * MVD bits.
+ */
+
+ // measure SATD cost at clipped QPEL MVP
+ MV pmv = qmvp.clipped(qmvmin, qmvmax);
+ MV bestpre = pmv;
+ int bprecost;
+
+ bprecost = subpelCompare(ref, pmv, sad);
+
+ /* re-measure full pel rounded MVP with SAD as search start point */
+ MV bmv = pmv.roundToFPel();
+ int bcost = bprecost;
+ if (pmv.isSubpel())
+ bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
+
+ /* square refine */
+ int dir = 0;
+ COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
+ if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y))
+ COPY2_IF_LT(bcost, costs[0], dir, 1);
+ if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y))
+ COPY2_IF_LT(bcost, costs[1], dir, 2);
+ COPY2_IF_LT(bcost, costs[2], dir, 3);
+ COPY2_IF_LT(bcost, costs[3], dir, 4);
+ COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs);
+ if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y))
+ COPY2_IF_LT(bcost, costs[0], dir, 5);
+ if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y))
+ COPY2_IF_LT(bcost, costs[1], dir, 6);
+ if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y))
+ COPY2_IF_LT(bcost, costs[2], dir, 7);
+ if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y))
+ COPY2_IF_LT(bcost, costs[3], dir, 8);
+ bmv += square1[dir];
+
+ if (bprecost < bcost)
+ {
+ bmv = bestpre;
+ bcost = bprecost;
+ }
+ else
+ bmv = bmv.toQPel(); // promote search bmv to qpel
+
+ // TO DO: Change SubpelWorkload to fine tune MV
+ // Now it is set to 5 for experiment.
+ // const SubpelWorkload& wl = workload[this->subpelRefine];
+ const SubpelWorkload& wl = workload[5];
+
+ pixelcmp_t hpelcomp;
+
+ if (wl.hpel_satd)
+ {
+ bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
+ hpelcomp = satd;
+ }
+ else
+ hpelcomp = sad;
+
+ for (int iter = 0; iter < wl.hpel_iters; iter++)
+ {
+ int bdir = 0;
+ for (int i = 1; i <= wl.hpel_dirs; i++)
+ {
+ MV qmv = bmv + square1[i] * 2;
+
+ // check mv range for slice bound
+ if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y))
+ continue;
+
+ int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
+ COPY2_IF_LT(bcost, cost, bdir, i);
+ }
+
+ if (bdir)
+ bmv += square1[bdir] * 2;
+ else
+ break;
+ }
+
+ /* if HPEL search used SAD, remeasure with SATD before QPEL */
+ if (!wl.hpel_satd)
+ bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
+
+ for (int iter = 0; iter < wl.qpel_iters; iter++)
+ {
+ int bdir = 0;
+ for (int i = 1; i <= wl.qpel_dirs; i++)
+ {
+ MV qmv = bmv + square1[i];
+
+ // check mv range for slice bound
+ if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y))
+ continue;
+
+ int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
+ COPY2_IF_LT(bcost, cost, bdir, i);
+ }
+
+ if (bdir)
+ bmv += square1[bdir];
+ else
+ break;
+ }
+
+ // check mv range for slice bound
+ X265_CHECK(((pmv.y >= qmvmin.y) & (pmv.y <= qmvmax.y)), "mv beyond range!");
+
+ x265_emms();
+ outQMv = bmv;
+}
+
int MotionEstimate::motionEstimate(ReferencePlanes *ref,
const MV & mvmin,
const MV & mvmax,
diff -r de49a722b256 -r e75d5f5eeae3 source/encoder/motion.h
--- a/source/encoder/motion.h Wed May 24 20:01:59 2017 +0530
+++ b/source/encoder/motion.h Mon Jun 05 15:20:44 2017 +0530
@@ -92,6 +92,7 @@ public:
chromaSatd(refYuv.getCrAddr(puPartIdx), refYuv.m_csize, fencPUYuv.m_buf[2], fencPUYuv.m_csize);
}
+ void refineMV(ReferencePlanes* ref, const MV& mvmin, const MV& mvmax, const MV& qmvp, MV& outQMv);
int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv, pixel *srcReferencePlane = 0);
int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
diff -r de49a722b256 -r e75d5f5eeae3 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed May 24 20:01:59 2017 +0530
+++ b/source/encoder/search.cpp Mon Jun 05 15:20:44 2017 +0530
@@ -2108,6 +2108,17 @@ void Search::singleMotionEstimation(Sear
}
}
+void Search::searchMV(Mode& interMode, const PredictionUnit& pu, int list, int ref, MV& outmv)
+{
+ CUData& cu = interMode.cu;
+ const Slice *slice = m_slice;
+ MV mv = cu.m_mv[list][pu.puAbsPartIdx];
+ cu.clipMv(mv);
+ MV mvmin, mvmax;
+ setSearchRange(cu, mv, m_param->searchRange, mvmin, mvmax);
+ m_me.refineMV(&slice->m_mref[list][ref], mvmin, mvmax, mv, outmv);
+}
+
/* find the best inter prediction for each PU of specified mode */
void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t refMasks[2])
{
diff -r de49a722b256 -r e75d5f5eeae3 source/encoder/search.h
--- a/source/encoder/search.h Wed May 24 20:01:59 2017 +0530
+++ b/source/encoder/search.h Mon Jun 05 15:20:44 2017 +0530
@@ -311,6 +311,7 @@ public:
// estimation inter prediction (non-skip)
void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]);
More information about the x265-commits
mailing list