[x265] [PATCH] search: move MVP Index selection to motion estimate to avoid code duplication
gopu at multicorewareinc.com
gopu at multicorewareinc.com
Wed Apr 29 09:43:43 CEST 2015
# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1430293410 -19800
# Wed Apr 29 13:13:30 2015 +0530
# Node ID cc6da5218a188ded2d239bb8dbf6c3399978c93f
# Parent c4d9ee2cef03ef74f5623784d514ffcdf725bec4
search: move MVP Index selection to motion estimate to avoid code duplication
diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Tue Apr 28 14:34:45 2015 -0500
+++ b/source/encoder/motion.cpp Wed Apr 29 13:13:30 2015 +0530
@@ -1280,3 +1280,45 @@
return cost;
}
+
+int MotionEstimate::getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel)
+{
+ pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx);
+ intptr_t dstStride = dstYuv.m_size;
+ int mvpidx = 0;
+ int mvpcost = MotionEstimate::COST_MAX;
+ intptr_t srcStride = refPic.m_stride;
+ int cost;
+
+ if (amvp[0] == amvp[1])
+ mvpidx = 0;
+
+ for (int i = 0; i < AMVP_NUM_CANDS; i++)
+ {
+ int xFrac = amvp[i].x & 0x3;
+ int yFrac = amvp[i].y & 0x3;
+ intptr_t srcOffset = (amvp[i].x >> 2) + (amvp[i].y >> 2) * srcStride;
+ const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
+
+ // NOTE: skip mvCand if Y is > merange and -FN>1
+ if (bFrameParallel && (amvp[i].y >= (merange + 1) * 4))
+ continue;
+
+ if (!(yFrac | xFrac))
+ primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
+ else if (!yFrac)
+ primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
+ else if (!xFrac)
+ primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
+ else
+ primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
+
+ cost = bufSAD(dst, dstStride);
+ if (cost < mvpcost)
+ {
+ mvpcost = cost;
+ mvpidx = i;
+ }
+ }
+ return mvpidx;
+}
diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.h
--- a/source/encoder/motion.h Tue Apr 28 14:34:45 2015 -0500
+++ b/source/encoder/motion.h Wed Apr 29 13:13:30 2015 +0530
@@ -29,6 +29,7 @@
#include "mv.h"
#include "bitcost.h"
#include "yuv.h"
+#include "predict.h"
namespace x265 {
// private x265 namespace
@@ -93,6 +94,8 @@
int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
+ int getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel);
+
protected:
inline void StarPatternSearch(ReferencePlanes *ref,
diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/search.cpp
--- a/source/encoder/search.cpp Tue Apr 28 14:34:45 2015 -0500
+++ b/source/encoder/search.cpp Wed Apr 29 13:13:30 2015 +0530
@@ -1923,38 +1923,17 @@
MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
int numMvc = interMode.cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
- int mvpIdx = 0;
int merange = m_param->searchRange;
MotionData* bestME = interMode.bestME[part];
-
- if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
- {
- uint32_t bestCost = MAX_INT;
- for (int i = 0; i < AMVP_NUM_CANDS; i++)
- {
- MV mvCand = interMode.amvpCand[list][ref][i];
-
- // NOTE: skip mvCand if Y is > merange and -FN>1
- if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
- continue;
-
- interMode.cu.clipMv(mvCand);
-
- Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
- predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand);
- uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
-
- if (bestCost > cost)
- {
- bestCost = cost;
- mvpIdx = i;
- }
- }
- }
-
- MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
+ MV mvmin, mvmax, outmv, mvp;
+ MV *amvpCand = interMode.amvpCand[list][ref];
+
+ // Pick the best possible MVP IDX from AMVP candidates based on least residual
+ Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
+ int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
+ mvp = amvpCand[mvpIdx];
+
setSearchRange(interMode.cu, mvp, merange, mvmin, mvmax);
-
int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
/* Get total cost of partition, but only include MV bit cost once */
@@ -1962,7 +1941,7 @@
uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
/* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
- checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+ checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
/* tie goes to the smallest ref ID, just like --no-pme */
ScopedLock _lock(master.m_meLock);
@@ -2051,46 +2030,23 @@
bits += getTUBits(ref, numRefIdx[list]);
int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
-
- // Pick the best possible MVP from AMVP candidates based on least residual
- int mvpIdx = 0;
int merange = m_param->searchRange;
-
- if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
- {
- uint32_t bestCost = MAX_INT;
- for (int i = 0; i < AMVP_NUM_CANDS; i++)
- {
- MV mvCand = interMode.amvpCand[list][ref][i];
-
- // NOTE: skip mvCand if Y is > merange and -FN>1
- if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
- continue;
-
- cu.clipMv(mvCand);
- predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
- uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
-
- if (bestCost > cost)
- {
- bestCost = cost;
- mvpIdx = i;
- }
- }
- }
-
- MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
-
- int satdCost;
+ MV mvmin, mvmax, outmv, mvp;
+ MV *amvpCand = interMode.amvpCand[list][ref];
+
+ // Pick the best possible MVP IDX from AMVP candidates based on least residual
+ int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
+ mvp = amvpCand[mvpIdx];
+
setSearchRange(cu, mvp, merange, mvmin, mvmax);
- satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
+ int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
/* Get total cost of partition, but only include MV bit cost once */
bits += m_me.bitcost(outmv);
uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
/* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
- checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+ checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
if (cost < bestME[list].cost)
{
@@ -2134,35 +2090,13 @@
bits += getTUBits(ref, numRefIdx[list]);
int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
-
- // Pick the best possible MVP from AMVP candidates based on least residual
- int mvpIdx = 0;
int merange = m_param->searchRange;
-
- if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
- {
- uint32_t bestCost = MAX_INT;
- for (int i = 0; i < AMVP_NUM_CANDS; i++)
- {
- MV mvCand = interMode.amvpCand[list][ref][i];
-
- // NOTE: skip mvCand if Y is > merange and -FN>1
- if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
- continue;
-
- cu.clipMv(mvCand);
- predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
- uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
-
- if (bestCost > cost)
- {
- bestCost = cost;
- mvpIdx = i;
- }
- }
- }
-
- MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
+ MV mvmin, mvmax, outmv, mvp;
+ MV *amvpCand = interMode.amvpCand[list][ref];
+
+ // Pick the best possible MVP IDX from AMVP candidates based on least residual
+ int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
+ mvp = amvpCand[mvpIdx];
setSearchRange(cu, mvp, merange, mvmin, mvmax);
int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
@@ -2172,7 +2106,7 @@
uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
/* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
- checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+ checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
if (cost < bestME[list].cost)
{
More information about the x265-devel
mailing list