[x265] [PATCH For REVIEW] Bidir ME: new logic adapted from x264
Steve Borho
steve at borho.org
Mon Sep 30 22:18:22 CEST 2013
On Mon, Sep 30, 2013 at 2:05 AM, <deepthidevaki at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Deepthi Devaki <deepthidevaki at multicorewareinc.com>
> # Date 1380523193 -19800
> # Node ID bb238e8b36007aad896884009c720d26df8775c5
> # Parent 55edc34e253c14d3eccb83a7d1db43774349ff9a
> Bidir ME: new logic adapted from x264
>
> L0 and L1 MVs from unidir ME used for bidir MV. bidir cost is calculated
> from the average of references. Performance/PSNR with new bidir with
> commandline
> x265.exe FourPeople_1280x720_60.y4m -f 100 --b-adapt 0 -b 3 --ref 1
> --hash 1 -o four.hevc -r recon.yuv
> New: (2.24 fps), 515.16 kb/s, Global PSNR: 39.704
> Orig: (2.05 fps), 519.47 kb/s, Global PSNR: 39.711
>
> diff -r 55edc34e253c -r bb238e8b3600 source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Sat Sep 28 22:54:44 2013
> -0500
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Sep 30 12:09:53 2013
> +0530
> @@ -2289,7 +2289,7 @@
> * \param bUseRes
> * \returns void
> */
> -void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, bool bUseMRG)
> +void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* /*fencYuv*/,
> TComYuv* predYuv, bool bUseMRG)
> {
> m_predYuv[0].clear();
> m_predYuv[1].clear();
> @@ -2317,7 +2317,6 @@
>
> UInt partAddr;
> int roiWidth, roiHeight;
> - int refStart, refEnd;
>
> PartSize partSize = cu->getPartitionSize(0);
> int bestBiPRefIdxL1 = 0;
> @@ -2483,8 +2482,6 @@
> // Bi-directional prediction
> if ((cu->getSlice()->isInterB()) &&
> (cu->isBipredRestriction(partIdx) == false))
> {
> - UInt motBits[2];
> -
> mvBidir[0] = mv[0];
> mvBidir[1] = mv[1];
> refIdxBidir[0] = refIdx[0];
> @@ -2492,112 +2489,17 @@
>
> ::memcpy(mvPredBi, mvPred, sizeof(mvPred));
> ::memcpy(mvpIdxBi, mvpIdx, sizeof(mvpIdx));
> -
> - if (cu->getSlice()->getMvdL1ZeroFlag())
> - {
> - xCopyAMVPInfo(&amvpInfo[1][bestBiPRefIdxL1],
> cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());
> - cu->setMVPIdxSubParts(bestBiPMvpL1, REF_PIC_LIST_1,
> partAddr, partIdx, cu->getDepth(partAddr));
> - mvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
> - mvPredBi[1][bestBiPRefIdxL1] =
> cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()->m_mvCand[bestBiPMvpL1];
> -
> - mvBidir[1] = mvPredBi[1][bestBiPRefIdxL1];
> - refIdxBidir[1] = bestBiPRefIdxL1;
> -
> cu->getCUMvField(REF_PIC_LIST_1)->setAllMv(mvBidir[1], partSize, partAddr,
> 0, partIdx);
> -
> cu->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx(refIdxBidir[1], partSize,
> partAddr, 0, partIdx);
> - motionCompensation(cu, &m_predYuv[1], REF_PIC_LIST_1,
> partIdx);
> -
> - motBits[0] = bits[0] - mbBits[0];
> - motBits[1] = mbBits[1];
> -
> - if (cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 1)
> - {
> - motBits[1] += bestBiPRefIdxL1 + 1;
> - if (bestBiPRefIdxL1 ==
> cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) - 1) motBits[1]--;
> - }
> -
> - motBits[1] +=
> m_mvpIdxCost[mvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
> -
> - bits[2] = mbBits[2] + motBits[0] + motBits[1];
> -
> - mvTemp[1][bestBiPRefIdxL1] = mvBidir[1];
> - }
> - else
> - {
> - motBits[0] = bits[0] - mbBits[0];
> - motBits[1] = bits[1] - mbBits[1];
> - bits[2] = mbBits[2] + motBits[0] + motBits[1];
> - }
> -
> - int refList = 0;
> - if (listCost[0] <= listCost[1])
> - {
> - refList = 1;
> - }
> - else
> - {
> - refList = 0;
> - }
> - if (!cu->getSlice()->getMvdL1ZeroFlag())
> - {
> - cu->getCUMvField(RefPicList(1 -
> refList))->setAllMv(mv[1 - refList], partSize, partAddr, 0, partIdx);
> - cu->getCUMvField(RefPicList(1 -
> refList))->setAllRefIdx(refIdx[1 - refList], partSize, partAddr, 0,
> partIdx);
> - motionCompensation(cu, &m_predYuv[1 - refList],
> RefPicList(1 - refList), partIdx);
> - }
> - RefPicList picList = (refList ? REF_PIC_LIST_1 :
> REF_PIC_LIST_0);
> -
> - if (cu->getSlice()->getMvdL1ZeroFlag())
> - {
> - refList = 0;
> - picList = REF_PIC_LIST_0;
> - }
> -
> - bool bChanged = false;
> -
> - refStart = 0;
> - refEnd = cu->getSlice()->getNumRefIdx(picList) - 1;
> -
> - for (int refIdxTmp = refStart; refIdxTmp <= refEnd;
> refIdxTmp++)
> - {
> - bitsTemp = mbBits[2] + motBits[1 - refList];
> - if (cu->getSlice()->getNumRefIdx(picList) > 1)
> - {
> - bitsTemp += refIdxTmp + 1;
> - if (refIdxTmp ==
> cu->getSlice()->getNumRefIdx(picList) - 1) bitsTemp--;
> - }
> - bitsTemp +=
> m_mvpIdxCost[mvpIdxBi[refList][refIdxTmp]][AMVP_MAX_NUM_CANDS];
> - // call bidir ME
> - xMotionEstimation(cu, fencYuv, partIdx, picList,
> &mvPredBi[refList][refIdxTmp], refIdxTmp, mvTemp[refList][refIdxTmp],
> - bitsTemp, costTemp);
> - xCopyAMVPInfo(&amvpInfo[refList][refIdxTmp],
> cu->getCUMvField(picList)->getAMVPInfo());
> - xCheckBestMVP(cu, picList,
> mvTemp[refList][refIdxTmp], mvPredBi[refList][refIdxTmp],
> mvpIdxBi[refList][refIdxTmp],
> - bitsTemp, costTemp);
> -
> - if (costTemp < costbi)
> - {
> - bChanged = true;
> -
> - mvBidir[refList] = mvTemp[refList][refIdxTmp];
> - refIdxBidir[refList] = refIdxTmp;
> -
> - costbi = costTemp;
> - motBits[refList] = bitsTemp - mbBits[2] -
> motBits[1 - refList];
> - bits[2] = bitsTemp;
> - }
> - } // for loop-refIdxTmp
> -
> - if (!bChanged)
> - {
> - if (costbi <= listCost[0] && costbi <= listCost[1])
> - {
> - xCopyAMVPInfo(&amvpInfo[0][refIdxBidir[0]],
> cu->getCUMvField(REF_PIC_LIST_0)->getAMVPInfo());
> - xCheckBestMVP(cu, REF_PIC_LIST_0, mvBidir[0],
> mvPredBi[0][refIdxBidir[0]], mvpIdxBi[0][refIdxBidir[0]], bits[2], costbi);
> - if (!cu->getSlice()->getMvdL1ZeroFlag())
> - {
> - xCopyAMVPInfo(&amvpInfo[1][refIdxBidir[1]],
> cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());
> - xCheckBestMVP(cu, REF_PIC_LIST_1, mvBidir[1],
> mvPredBi[1][refIdxBidir[1]], mvpIdxBi[1][refIdxBidir[1]], bits[2], costbi);
> - }
> - }
> - }
> +
> + pixel *ref0,*ref1;
> + ref0 =
> cu->getSlice()->m_mref[REF_PIC_LIST_0][refIdx[0]]->fpelPlane + (pu -
> fenc->getLumaAddr()) + (mv[0].x >> 2) + (mv[0].y >> 2) * fenc->getStride();
> + ref1 =
> cu->getSlice()->m_mref[REF_PIC_LIST_1][refIdx[1]]->fpelPlane + (pu -
> fenc->getLumaAddr()) + (mv[1].x >> 2) + (mv[1].y >> 2) * fenc->getStride();
> +
> + pixel avg[MAX_CU_SIZE * MAX_CU_SIZE];
> +
> + int partEnum = PartitionFromSizes(roiWidth, roiHeight);
> + primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0,
> ref1, fenc->getStride(), fenc->getStride());
> + int satdCost = primitives.satd[partEnum](pu,
> fenc->getStride(), avg, roiWidth);
> + costbi = satdCost + m_rdCost->getCost(bits[0]) +
> m_rdCost->getCost(bits[1]);
> } // if (B_SLICE)
> } //end if bTestNormalMC
>
> diff -r 55edc34e253c -r bb238e8b3600 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Sat Sep 28 22:54:44 2013 -0500
> +++ b/source/common/pixel.cpp Mon Sep 30 12:09:53 2013 +0530
> @@ -578,6 +578,21 @@
> }
> }
>
> +template<int lx, int ly>
> +void pixelavg_pp(pixel* dst, intptr_t dstride, pixel* src0, pixel* src1,
> intptr_t sstride0, intptr_t sstride1)
> +{
> + for( int y = 0; y < ly; y++ )
>
white-space
> + {
> + for( int x = 0; x < lx; x++ )
> + {
> + dst[x] = ( src0[x] + src1[x] + 1 ) >> 1;
> + }
> + src0 += sstride0;
> + src1 += sstride1;
> + dst += dstride;
> + }
> +}
>
lowres QPEL should be modified to use this (once we have an ASM/SIMD
version)
> +
> void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
> {
> int x;
> @@ -647,6 +662,7 @@
> SET_FUNC_PRIMITIVE_TABLE_C2(sad)
> SET_FUNC_PRIMITIVE_TABLE_C2(sad_x3)
> SET_FUNC_PRIMITIVE_TABLE_C2(sad_x4)
> + SET_FUNC_PRIMITIVE_TABLE_C2(pixelavg_pp)
>
> // satd
> p.satd[PARTITION_4x4] = satd_4x4;
> diff -r 55edc34e253c -r bb238e8b3600 source/common/primitives.h
> --- a/source/common/primitives.h Sat Sep 28 22:54:44 2013 -0500
> +++ b/source/common/primitives.h Mon Sep 30 12:09:53 2013 +0530
> @@ -196,6 +196,7 @@
> typedef void (*pixelsub_sp_t)(int bx, int by, short *dst, intptr_t
> dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
> typedef void (*pixeladd_ss_t)(int bx, int by, short *dst, intptr_t
> dstride, short *src0, short *src1, intptr_t sstride0, intptr_t sstride1);
> typedef void (*pixeladd_pp_t)(int bx, int by, pixel *dst, intptr_t
> dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
> +typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0,
> pixel *src1, intptr_t sstride0, intptr_t sstride1);
> typedef void (*blockfil_s_t)(short *dst, intptr_t dstride, short val);
>
> typedef void (*intra_dc_t)(pixel* above, pixel* left, pixel* dst,
> intptr_t dstStride, int width, int bFilter);
> @@ -290,6 +291,7 @@
> pixelsub_sp_t pixelsub_sp;
> pixeladd_ss_t pixeladd_ss;
> pixeladd_pp_t pixeladd_pp;
> + pixelavg_pp_t pixelavg_pp[NUM_PARTITIONS];
>
> filterVwghtd_t filterVwghtd;
> filterHwghtd_t filterHwghtd;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130930/5af4831f/attachment.html>
More information about the x265-devel
mailing list