[x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate
Steve Borho
steve at borho.org
Tue Apr 15 20:26:50 CEST 2014
On Tue, Apr 15, 2014 at 6:30 AM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1397561438 -28800
> # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a
> # Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
> cleanup: reduce data size and dependency on MotionEstimate
>
> diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800
> @@ -111,8 +111,6 @@
> m_rdCost = rdCost;
>
> initTempBuff(cfg->param->internalCsp);
> - m_me.setSearchMethod(cfg->param->searchMethod);
> - m_me.setSubpelRefine(cfg->param->subpelRefine);
>
> /* When frame parallelism is active, only 'refLagPixels' of reference frames will be guaranteed
> * available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */
> @@ -2191,7 +2189,7 @@
> cu->getCUMvField(REF_PIC_LIST_1)->m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx;
>
> motionCompensation(cu, &m_predTempYuv, REF_PIC_LIST_X, puIdx, true, false);
> - uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
> + uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
> uint32_t bitsCand = mergeCand + 1;
> if (mergeCand == m_cfg->param->maxNumMergeCand - 1)
> {
> @@ -2314,7 +2312,7 @@
> cu->clipMv(mvCand);
>
> xPredInterLumaBlk(cu, cu->getSlice()->getRefPic(l, ref)->getPicYuvRec(), partAddr, &mvCand, roiWidth, roiHeight, &m_predTempYuv);
> - uint32_t cost = m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
> + uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
> cost = m_rdCost->calcRdSADCost(cost, MVP_IDX_BITS);
>
> if (bestCost > cost)
> @@ -2328,11 +2326,11 @@
>
> int merange = m_cfg->param->searchRange;
> xSetSearchRange(cu, mvp, merange, mvmin, mvmax);
> - int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
> + int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg->param->searchMethod, m_cfg->param->subpelRefine);
>
> /* Get total cost of partition, but only include MV bit cost once */
> - bits += m_me.bitcost(outmv);
> - uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost->getCost(bits);
> + bits += m_me.bitcost(outmv, mvp);
> + uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + m_rdCost->getCost(bits);
>
> /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
> xCheckBestMVP(&amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, cost);
> @@ -2368,7 +2366,7 @@
>
> int partEnum = partitionFromSizes(roiWidth, roiHeight);
> primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32);
> - int satdCost = m_me.bufSATD(avg, roiWidth);
> + int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
>
> bidirBits = list[0].bits + list[1].bits + listSelBits[2] - (listSelBits[0] + listSelBits[1]);
> bidirCost = satdCost + m_rdCost->getCost(bidirBits);
> @@ -2397,17 +2395,15 @@
> intptr_t refStride = m_mref[0][0]->lumaStride;
>
> primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, refStride, ref1, refStride, 32);
> - satdCost = m_me.bufSATD(avg, roiWidth);
> + satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
>
> MV mvp0 = list[0].mvp;
> int mvpIdx0 = list[0].mvpIdx;
> - m_me.setMVP(mvp0);
> - uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + m_me.bitcost(mvzero);
> + uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
>
> MV mvp1 = list[1].mvp;
> int mvpIdx1 = list[1].mvpIdx;
> - m_me.setMVP(mvp1);
> - uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + m_me.bitcost(mvzero);
> + uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
>
> uint32_t cost = satdCost + m_rdCost->getCost(bits0) + m_rdCost->getCost(bits1);
>
> @@ -2556,9 +2552,8 @@
> {
> assert(amvpInfo->m_mvCand[outMvpIdx] == mvPred);
>
> - m_me.setMVP(mvPred);
> int bestMvpIdx = outMvpIdx;
> - int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS;
> + int mvBitsOrig = m_me.bitcost(mv, mvPred) + MVP_IDX_BITS;
> int bestMvBits = mvBitsOrig;
>
> for (int mvpIdx = 0; mvpIdx < AMVP_MAX_NUM_CANDS; mvpIdx++)
> @@ -2566,8 +2561,7 @@
> if (mvpIdx == outMvpIdx)
> continue;
>
> - m_me.setMVP(amvpInfo->m_mvCand[mvpIdx]);
> - int mvbits = m_me.bitcost(mv) + MVP_IDX_BITS;
> + int mvbits = m_me.bitcost(mv, amvpInfo->m_mvCand[mvpIdx]) + MVP_IDX_BITS;
>
> if (mvbits < bestMvBits)
> {
> diff -r 1cf67a7b362d -r dd78d554f78d source/encoder/bitcost.h
> --- a/source/encoder/bitcost.h Mon Apr 14 21:26:37 2014 -0500
> +++ b/source/encoder/bitcost.h Tue Apr 15 19:30:38 2014 +0800
> @@ -35,36 +35,26 @@
> {
> public:
>
> - BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0) {}
> + BitCost() : m_cost(NULL) {}
>
> void setQP(unsigned int qp);
>
> - void setMVP(const MV& mvp) { m_mvp = mvp; m_cost_mvx = m_cost - mvp.x; m_cost_mvy = m_cost - mvp.y; }
> -
> // return bit cost of motion vector difference, multiplied by lambda
> - inline uint16_t mvcost(const MV& mv) const { return m_cost_mvx[mv.x] + m_cost_mvy[mv.y]; }
> + inline uint16_t mvcost(const MV mv, const MV mvp) const { return m_cost[mv.x - mvp.x] + m_cost[mv.y - mvp.y]; }
>
> // return bit cost of motion vector difference, without lambda
> - inline uint16_t bitcost(const MV& mv) const
> + inline uint16_t bitcost(const MV mv, const MV mvp) const
> {
> - return (uint16_t)(s_bitsizes[(abs(mv.x - m_mvp.x) << 1) + !!(mv.x < m_mvp.x)] +
> - s_bitsizes[(abs(mv.y - m_mvp.y) << 1) + !!(mv.y < m_mvp.y)] + 0.5f);
> + return (uint16_t)(s_bitsizes[(abs(mv.x - mvp.x) << 1) + !!(mv.x < mvp.x)] +
> + s_bitsizes[(abs(mv.y - mvp.y) << 1) + !!(mv.y < mvp.y)] + 0.5f);
> }
>
> static void destroy();
>
> protected:
>
> - uint16_t *m_cost_mvx;
> -
> - uint16_t *m_cost_mvy;
> -
This seems an odd trade-off to me, saving 16 bytes to add an
additional two subtract operations to every MV cost evaluation.
Does this change measure well as a performance improvement?
--
Steve Borho
More information about the x265-devel
mailing list