[x265] [PATCH] analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6
santhoshini at multicorewareinc.com
santhoshini at multicorewareinc.com
Thu May 21 06:45:10 CEST 2015
# HG changeset patch
# User Santhoshini Sekar<santhoshini at multicorewareinc.com>
# Date 1432182660 -19800
# Thu May 21 10:01:00 2015 +0530
# Node ID 630b378b744f4bf442839680f5120d7d299d2acd
# Parent dc4fcfc574ade14ecc841797ad08be9753fad58e
analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6
diff -r dc4fcfc574ad -r 630b378b744f source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed May 20 12:17:44 2015 -0500
+++ b/source/encoder/analysis.cpp Thu May 21 10:01:00 2015 +0530
@@ -742,6 +742,24 @@
cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
}
+ /* If zero-residual, do not bother doing subpelRefine */
+ bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
+ if (subpelRefine && m_param->rdLevel > 4)
+ {
+ int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
+ if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
+ setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
+ uint64_t bcost = md.bestMode->rdCost;
+ int bdir = 0;
+ for (int i = 1; i <= hpelDirs; i++)
+ {
+ qPelRefine(*md.bestMode, cuGeom, true, i);
+ encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
+ COPY2_IF_LT(bcost, md.bestMode->rdCost, bdir, i);
+ }
+ qPelRefine(*md.bestMode, cuGeom, true, bdir);
+ encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
+ }
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
@@ -1312,6 +1330,24 @@
checkBestMode(*splitPred, depth);
}
+ /* If zero-residual, do not bother doing subpelRefine */
+ bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
+ if (subpelRefine)
+ {
+ int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
+ uint64_t bcost = md.bestMode->rdCost;
+ int bdir = 0;
+ if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
+ setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
+ for (int i = 1; i <= hpelDirs; i++)
+ {
+ qPelRefine(*md.bestMode, cuGeom, true, i);
+ encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
+ COPY2_IF_LT(bcost, md.bestMode->rdCost, bdir, i);
+ }
+ qPelRefine(*md.bestMode, cuGeom, true, bdir);
+ encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
+ }
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
diff -r dc4fcfc574ad -r 630b378b744f source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Wed May 20 12:17:44 2015 -0500
+++ b/source/encoder/motion.cpp Thu May 21 10:01:00 2015 +0530
@@ -155,6 +155,11 @@
workload[subme].qpel_iters / 2;
}
+int MotionEstimate::hpelDirCount(int subme)
+{
+ return workload[subme].hpel_dirs;
+}
+
MotionEstimate::~MotionEstimate()
{
fencPUYuv.destroy();
@@ -1205,6 +1210,49 @@
return bcost;
}
+int MotionEstimate::qPelCompare(ReferencePlanes *ref,
+ const MV & mvmin,
+ const MV & mvmax,
+ const MV& mvp,
+ const MV& mv,
+ MV & outQMv,
+ int halfPelIdx)
+{
+ setMVP(mvp);
+
+ MV qmvmin = mvmin.toQPel();
+ MV qmvmax = mvmax.toQPel();
+
+ MV fmv = mv.roundToFPel();
+ fmv = fmv.clipped(qmvmin, qmvmax);
+ int bcost = INT_MAX;
+ const SubpelWorkload& wl = workload[this->subpelRefine];
+
+ MV hmv = fmv + square1[halfPelIdx] * 2;
+ bcost = subpelCompare(ref, hmv, satd) + mvcost(hmv);
+ MV bmv = hmv;
+
+ for (int iter = 0; iter < wl.qpel_iters; iter++)
+ {
+ int bdir = 0;
+ for (int i = 1; i <= wl.qpel_dirs; i++)
+ {
+ MV qmv = hmv + square1[i];
+ int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
+ COPY2_IF_LT(bcost, cost, bdir, i);
+ }
+
+ if (bdir)
+ bmv += square1[bdir];
+ else
+ break;
+ }
+
+ x265_emms();
+ outQMv = bmv;
+ return bcost;
+}
+
int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
{
intptr_t refStride = ref->lumaStride;
diff -r dc4fcfc574ad -r 630b378b744f source/encoder/motion.h
--- a/source/encoder/motion.h Wed May 20 12:17:44 2015 -0500
+++ b/source/encoder/motion.h Thu May 21 10:01:00 2015 +0530
@@ -69,6 +69,7 @@
static void initScales();
static int hpelIterationCount(int subme);
+ static int hpelDirCount(int subme);
void init(int method, int refine, int csp);
/* Methods called at slice setup */
@@ -90,6 +91,7 @@
}
int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
+ int qPelCompare(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & mvp, const MV & mv, MV & outQMv, int halfPelIdx);
int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
diff -r dc4fcfc574ad -r 630b378b744f source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed May 20 12:17:44 2015 -0500
+++ b/source/encoder/search.cpp Thu May 21 10:01:00 2015 +0530
@@ -2348,6 +2348,54 @@
interMode.sa8dBits += totalmebits;
}
+void Search::qPelRefine(Mode& interMode, const CUGeom& cuGeom, bool bChromaSA8D, int halfpelIdx)
+{
+ CUData& cu = interMode.cu;
+ Yuv* predYuv = &interMode.predYuv;
+
+ const Slice *slice = m_slice;
+ uint32_t interDir = cu.m_interDir[0];
+
+ const int* numRefIdx = slice->m_numRefIdx;
+
+ MotionData* bestME = interMode.bestME[0];
+ PredictionUnit pu(cu, cuGeom, 0);
+
+ for (uint32_t list = 0; list < 2; list++)
+ {
+ if (interDir & (1 << list))
+ {
+ int ref = bestME[list].ref;
+ uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
+ bits += getTUBits(ref, numRefIdx[list]);
+
+ int merange = m_param->searchRange;
+
+ MV mvmin, mvmax, outmv, mvp = interMode.bestME[0][0].mvp;
+ MV mv = interMode.bestME[0][0].mv;
+
+ int satdCost;
+ setSearchRange(cu, mv, merange, mvmin, mvmax);
+ satdCost = m_me.qPelCompare(&slice->m_mref[list][ref], mvmin, mvmax, mvp, mv, outmv, halfpelIdx);
+
+ /* Get total cost of partition, but only include MV bit cost once */
+ bits += m_me.bitcost(outmv);
+ uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
+
+ if (cost < bestME[list].cost)
+ {
+ bestME[list].mv = outmv;
+ bestME[list].cost = cost;
+ bestME[list].bits = bits;
+ }
+ }
+ }
+
+ motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);
+
+ X265_CHECK(interMode.ok(), "inter mode is not ok");
+}
+
void Search::getBlkBits(PartSize cuMode, bool bPSlice, int partIdx, uint32_t lastMode, uint32_t blockBit[3])
{
if (cuMode == SIZE_2Nx2N)
diff -r dc4fcfc574ad -r 630b378b744f source/encoder/search.h
--- a/source/encoder/search.h Wed May 20 12:17:44 2015 -0500
+++ b/source/encoder/search.h Thu May 21 10:01:00 2015 +0530
@@ -311,6 +311,7 @@
// estimation inter prediction (non-skip)
void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]);
+ void qPelRefine(Mode& interMode, const CUGeom& cuGeom, bool bChroma, int halfpelIdx);
// encode residual and compute rd-cost for inter mode
void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
More information about the x265-devel
mailing list