<div dir="ltr">Pushed to default.  <br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed, Jul 10, 2019 at 9:47 AM <<a href="mailto:pooja@multicorewareinc.com">pooja@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"># HG changeset patch<br>

# User Pooja Venkatesan <<a href="mailto:pooja@multicorewareinc.com" target="_blank">pooja@multicorewareinc.com</a>><br>

# Date 1562562567 -19800<br>

#      Mon Jul 08 10:39:27 2019 +0530<br>

# Node ID 2dcff9aea06f0f1c396fd2a62104e4fd5029bf40<br>

# Parent  14a235657a2011aa28d45544f33b7186c33b9218<br>

motion: Perform ME on each HME level<br>

<br>

This patch does the following:<br>

1) Perform level-0 ME<br>

2) Use the MVs as predictor for next level ME<br>

3) Restrict full-search within a range when HME is enabled<br>

<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/common/lowres.cpp<br>

--- a/source/common/lowres.cpp  Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/common/lowres.cpp  Mon Jul 08 10:39:27 2019 +0530<br>

@@ -65,6 +65,7 @@<br>

     maxBlocksInColFullRes = maxBlocksInCol * 2;<br>

     int cuCount = maxBlocksInRow * maxBlocksInCol;<br>

     int cuCountFullRes = (qgSize > 8) ? cuCount : cuCount << 2;<br>

+    isHMELowres = param->bEnableHME ? 1 : 0;<br>

<br>

     /* rounding the width to multiple of lowres CU size */<br>

     width = maxBlocksInRow * X265_LOWRES_CU_SIZE;<br>

@@ -176,6 +177,16 @@<br>

         CHECKED_MALLOC(lowresMvs[1][i], MV, cuCount);<br>

         CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCount);<br>

         CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCount);<br>

+        if (bEnableHME)<br>

+        {<br>

+            int maxBlocksInRowLowerRes = ((width/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>

+            int maxBlocksInColLowerRes = ((lines/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>

+            int cuCountLowerRes = maxBlocksInRowLowerRes * maxBlocksInColLowerRes;<br>

+            CHECKED_MALLOC(lowerResMvs[0][i], MV, cuCountLowerRes);<br>

+            CHECKED_MALLOC(lowerResMvs[1][i], MV, cuCountLowerRes);<br>

+            CHECKED_MALLOC(lowerResMvCosts[0][i], int32_t, cuCountLowerRes);<br>

+            CHECKED_MALLOC(lowerResMvCosts[1][i], int32_t, cuCountLowerRes);<br>

+        }<br>

     }<br>

<br>

     return true;<br>

@@ -207,6 +218,13 @@<br>

         X265_FREE(lowresMvs[1][i]);<br>

         X265_FREE(lowresMvCosts[0][i]);<br>

         X265_FREE(lowresMvCosts[1][i]);<br>

+        if (bEnableHME)<br>

+        {<br>

+            X265_FREE(lowerResMvs[0][i]);<br>

+            X265_FREE(lowerResMvs[1][i]);<br>

+            X265_FREE(lowerResMvCosts[0][i]);<br>

+            X265_FREE(lowerResMvCosts[1][i]);<br>

+        }<br>

     }<br>

     X265_FREE(qpAqOffset);<br>

     X265_FREE(invQscaleFactor);<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/common/lowres.h<br>

--- a/source/common/lowres.h    Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/common/lowres.h    Mon Jul 08 10:39:27 2019 +0530<br>

@@ -46,6 +46,7 @@<br>

<br>

     bool     isWeighted;<br>

     bool     isLowres;<br>

+    bool     isHMELowres;<br>

<br>

     intptr_t lumaStride;<br>

     intptr_t chromaStride;<br>

@@ -63,46 +64,58 @@<br>

<br>

     /* lowres motion compensation, you must provide a buffer and stride for QPEL averaged pixels<br>

      * in case QPEL is required.  Else it returns a pointer to the HPEL pixels */<br>

-    inline pixel *lowresMC(intptr_t blockOffset, const MV& qmv, pixel *buf, intptr_t& outstride)<br>

+    inline pixel *lowresMC(intptr_t blockOffset, const MV& qmv, pixel *buf, intptr_t& outstride, bool hme)<br>

     {<br>

+        intptr_t YStride = hme ? lumaStride / 2 : lumaStride;<br>

+        pixel *plane[4];<br>

+        for (int i = 0; i < 4; i++)<br>

+        {<br>

+            plane[i] = hme ? lowerResPlane[i] : lowresPlane[i];<br>

+        }<br>

         if ((qmv.x | qmv.y) & 1)<br>

         {<br>

             int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);<br>

-            pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;<br>

+            pixel *frefA = plane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;<br>

             int qmvx = qmv.x + (qmv.x & 1);<br>

             int qmvy = qmv.y + (qmv.y & 1);<br>

             int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);<br>

-            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;<br>

-            primitives.pu[LUMA_8x8].pixelavg_pp[(outstride % 64 == 0) && (lumaStride % 64 == 0)](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);<br>

+            pixel *frefB = plane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * YStride;<br>

+            primitives.pu[LUMA_8x8].pixelavg_pp[(outstride % 64 == 0) && (YStride % 64 == 0)](buf, outstride, frefA, YStride, frefB, YStride, 32);<br>

             return buf;<br>

         }<br>

         else<br>

         {<br>

-            outstride = lumaStride;<br>

+            outstride = YStride;<br>

             int hpel = (qmv.y & 2) | ((qmv.x & 2) >> 1);<br>

-            return lowresPlane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;<br>

+            return plane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;<br>

         }<br>

     }<br>

<br>

-    inline int lowresQPelCost(pixel *fenc, intptr_t blockOffset, const MV& qmv, pixelcmp_t comp)<br>

+    inline int lowresQPelCost(pixel *fenc, intptr_t blockOffset, const MV& qmv, pixelcmp_t comp, bool hme)<br>

     {<br>

+        intptr_t YStride = hme ? lumaStride / 2 : lumaStride;<br>

+        pixel *plane[4];<br>

+        for (int i = 0; i < 4; i++)<br>

+        {<br>

+            plane[i] = hme ? lowerResPlane[i] : lowresPlane[i];<br>

+        }<br>

         if ((qmv.x | qmv.y) & 1)<br>

         {<br>

             ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);<br>

             int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);<br>

-            pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;<br>

+            pixel *frefA = plane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;<br>

             int qmvx = qmv.x + (qmv.x & 1);<br>

             int qmvy = qmv.y + (qmv.y & 1);<br>

             int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);<br>

-            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;<br>

-            primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);<br>

+            pixel *frefB = plane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * YStride;<br>

+            primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](subpelbuf, 8, frefA, YStride, frefB, YStride, 32);<br>

             return comp(fenc, FENC_STRIDE, subpelbuf, 8);<br>

         }<br>

         else<br>

         {<br>

             int hpel = (qmv.y & 2) | ((qmv.x & 2) >> 1);<br>

-            pixel *fref = lowresPlane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;<br>

-            return comp(fenc, FENC_STRIDE, fref, lumaStride);<br>

+            pixel *fref = plane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;<br>

+            return comp(fenc, FENC_STRIDE, fref, YStride);<br>

         }<br>

     }<br>

 };<br>

@@ -188,6 +201,8 @@<br>

<br>

     /* Hierarchical Motion Estimation */<br>

     bool      bEnableHME;<br>

+    int32_t*  lowerResMvCosts[2][X265_BFRAME_MAX + 2];<br>

+    MV*       lowerResMvs[2][X265_BFRAME_MAX + 2];<br>

<br>

     /* used for vbvLookahead */<br>

     int       plannedType[X265_LOOKAHEAD_MAX + 1];<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/encoder.cpp<br>

--- a/source/encoder/encoder.cpp        Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/encoder.cpp        Mon Jul 08 10:39:27 2019 +0530<br>

@@ -3387,6 +3387,10 @@<br>

             x265_log(p, X265_LOG_WARNING, "Source height < 540p is too low for HME. Disabling HME.\n");<br>

             p->bEnableHME = 0;<br>

         }<br>

+        if (m_param->bEnableHME && m_param->searchMethod != m_param->hmeSearchMethod[2])<br>

+        {<br>

+            m_param->searchMethod = m_param->hmeSearchMethod[2];<br>

+        }<br>

     }<br>

 }<br>

<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/motion.cpp<br>

--- a/source/encoder/motion.cpp Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/motion.cpp Mon Jul 08 10:39:27 2019 +0530<br>

@@ -104,6 +104,8 @@<br>

     ctuAddr = -1;<br>

     absPartIdx = -1;<br>

     searchMethod = X265_HEX_SEARCH;<br>

+    searchMethodL0 = X265_HEX_SEARCH;<br>

+    searchMethodL1 = X265_HEX_SEARCH;<br>

     subpelRefine = 2;<br>

     blockwidth = blockheight = 0;<br>

     blockOffset = 0;<br>

@@ -162,7 +164,7 @@<br>

 }<br>

<br>

 /* Called by lookahead, luma only, no use of PicYuv */<br>

-void MotionEstimate::setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int method, const int refine)<br>

+void MotionEstimate::setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int method, const int searchL0, const int searchL1, const int refine)<br>

 {<br>

     partEnum = partitionFromSizes(pwidth, pheight);<br>

     X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");<br>

@@ -179,6 +181,8 @@<br>

<br>

     /* Search params */<br>

     searchMethod = method;<br>

+    searchMethodL0 = searchL0;<br>

+    searchMethodL1 = searchL1;<br>

     subpelRefine = refine;<br>

<br>

     /* copy PU block into cache */<br>

@@ -743,9 +747,10 @@<br>

                                    pixel *          srcReferencePlane)<br>

 {<br>

     ALIGN_VAR_16(int, costs[16]);<br>

+    bool hme = srcReferencePlane && srcReferencePlane == ref->fpelLowerResPlane[0];<br>

     if (ctuAddr >= 0)<br>

         blockOffset = ref->reconPic->getLumaAddr(ctuAddr, absPartIdx) - ref->reconPic->getLumaAddr(0);<br>

-    intptr_t stride = ref->lumaStride;<br>

+    intptr_t stride = hme ? ref->lumaStride / 2 : ref->lumaStride;<br>

     pixel* fenc = fencPUYuv.m_buf[0];<br>

     pixel* fref = srcReferencePlane == 0 ? ref->fpelPlane[0] + blockOffset : srcReferencePlane + blockOffset;<br>

<br>

@@ -767,7 +772,7 @@<br>

     int bprecost;<br>

<br>

     if (ref->isLowres)<br>

-        bprecost = ref->lowresQPelCost(fenc, blockOffset, pmv, sad);<br>

+        bprecost = ref->lowresQPelCost(fenc, blockOffset, pmv, sad, hme);<br>

     else<br>

         bprecost = subpelCompare(ref, pmv, sad);<br>

<br>

@@ -808,7 +813,8 @@<br>

     pmv = pmv.roundToFPel();<br>

     MV omv = bmv;  // current search origin or starting point<br>

<br>

-    switch (searchMethod)<br>

+    int search = ref->isHMELowres ? (hme ? searchMethodL0 : searchMethodL1) : searchMethod;<br>

+    switch (search)<br>

     {<br>

     case X265_DIA_SEARCH:<br>

     {<br>

@@ -1391,11 +1397,20 @@<br>

     {<br>

         // dead slow exhaustive search, but at least it uses sad_x4()<br>

         MV tmv;<br>

-        for (tmv.y = mvmin.y; tmv.y <= mvmax.y; tmv.y++)<br>

+        int32_t mvmin_y = mvmin.y, mvmin_x = mvmin.x, mvmax_y = mvmax.y, mvmax_x = mvmax.x;<br>

+        if (ref->isHMELowres)<br>

         {<br>

-            for (tmv.x = mvmin.x; tmv.x <= mvmax.x; tmv.x++)<br>

+            merange = (merange < 0 ? -merange : merange);<br>

+            mvmin_y = X265_MAX(mvmin.y, -merange);<br>

+            mvmin_x = X265_MAX(mvmin.x, -merange);<br>

+            mvmax_y = X265_MIN(mvmax.y, merange);<br>

+            mvmax_x = X265_MIN(mvmax.x, merange);<br>

+        }<br>

+        for (tmv.y = mvmin_y; tmv.y <= mvmax_y; tmv.y++)<br>

+        {<br>

+            for (tmv.x = mvmin_x; tmv.x <= mvmax_x; tmv.x++)<br>

             {<br>

-                if (tmv.x + 3 <= mvmax.x)<br>

+                if (tmv.x + 3 <= mvmax_x)<br>

                 {<br>

                     pixel *pix_base = fref + tmv.y * stride + tmv.x;<br>

                     sad_x4(fenc,<br>

@@ -1463,12 +1478,12 @@<br>

             if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y))<br>

                 continue;<br>

<br>

-            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);<br>

+            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad, hme) + mvcost(qmv);<br>

             COPY2_IF_LT(bcost, cost, bdir, i);<br>

         }<br>

<br>

         bmv += square1[bdir] * 2;<br>

-        bcost = ref->lowresQPelCost(fenc, blockOffset, bmv, satd) + mvcost(bmv);<br>

+        bcost = ref->lowresQPelCost(fenc, blockOffset, bmv, satd, hme) + mvcost(bmv);<br>

<br>

         bdir = 0;<br>

         for (int i = 1; i <= wl.qpel_dirs; i++)<br>

@@ -1479,7 +1494,7 @@<br>

             if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y))<br>

                 continue;<br>

<br>

-            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);<br>

+            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd, hme) + mvcost(qmv);<br>

             COPY2_IF_LT(bcost, cost, bdir, i);<br>

         }<br>

<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/motion.h<br>

--- a/source/encoder/motion.h   Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/motion.h   Mon Jul 08 10:39:27 2019 +0530<br>

@@ -44,6 +44,8 @@<br>

     int absPartIdx;  // part index of PU, including CU offset within CTU<br>

<br>

     int searchMethod;<br>

+    int searchMethodL0;<br>

+    int searchMethodL1;<br>

     int subpelRefine;<br>

<br>

     int blockwidth;<br>

@@ -76,7 +78,7 @@<br>

<br>

     /* Methods called at slice setup */<br>

<br>

-    void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int searchMethod, const int subpelRefine);<br>

+    void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int searchMethod, const int searchL0, const int searchL1, const int subpelRefine);<br>

     void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int searchMethod, const int subpelRefine, bool bChroma);<br>

<br>

     /* buf*() and motionEstimate() methods all use cached fenc pixels and thus<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/search.cpp<br>

--- a/source/encoder/search.cpp Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/search.cpp Mon Jul 08 10:39:27 2019 +0530<br>

@@ -2096,13 +2096,16 @@<br>

<br>

     const MV* amvp = interMode.amvpCand[list][ref];<br>

     int mvpIdx = selectMVP(interMode.cu, pu, amvp, list, ref);<br>

-    MV mvmin, mvmax, outmv, mvp = amvp[mvpIdx];<br>

+    bool bLowresMVP = false;<br>

+    MV mvmin, mvmax, outmv, mvp = amvp[mvpIdx], mvp_lowres;<br>

<br>

     if (!m_param->analysisSave && !m_param->analysisLoad) /* Prevents load/save outputs from diverging if lowresMV is not available */<br>

     {<br>

         MV lmv = getLowresMV(interMode.cu, pu, list, ref);<br>

         if (lmv.notZero())<br>

             mvc[numMvc++] = lmv;<br>

+        if (m_param->bEnableHME)<br>

+            mvp_lowres = lmv;<br>

     }<br>

<br>

     setSearchRange(interMode.cu, mvp, m_param->searchRange, mvmin, mvmax);<br>

@@ -2110,11 +2113,28 @@<br>

     int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices, <br>

       m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);<br>

<br>

+    if (m_param->bEnableHME && mvp_lowres.notZero() && mvp_lowres != mvp)<br>

+    {<br>

+        MV outmv_lowres;<br>

+        setSearchRange(interMode.cu, mvp_lowres, m_param->searchRange, mvmin, mvmax);<br>

+        int lowresMvCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp_lowres, numMvc, mvc, m_param->searchRange, outmv_lowres, m_param->maxSlices,<br>

+            m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);<br>

+        if (lowresMvCost < satdCost)<br>

+        {<br>

+            outmv = outmv_lowres;<br>

+            satdCost = lowresMvCost;<br>

+            bLowresMVP = true;<br>

+        }<br>

+    }<br>

     /* Get total cost of partition, but only include MV bit cost once */<br>

     bits += m_me.bitcost(outmv);<br>

     uint32_t mvCost = m_me.mvcost(outmv);<br>

     uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);<br>

<br>

+    /* Update LowresMVP to best AMVP cand*/<br>

+    if (bLowresMVP)<br>

+        updateMVP(amvp[mvpIdx], outmv, bits, cost, mvp_lowres);<br>

+<br>

     /* Refine MVP selection, updates: mvpIdx, bits, cost */<br>

     mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);<br>

<br>

@@ -2346,13 +2366,16 @@<br>

<br>

                     const MV* amvp = interMode.amvpCand[list][ref];<br>

                     int mvpIdx = selectMVP(cu, pu, amvp, list, ref);<br>

-                    MV mvmin, mvmax, outmv, mvp = amvp[mvpIdx];<br>

+                    MV mvmin, mvmax, outmv, mvp = amvp[mvpIdx], mvp_lowres;<br>

+                    bool bLowresMVP = false;<br>

<br>

                     if (!m_param->analysisSave && !m_param->analysisLoad) /* Prevents load/save outputs from diverging when lowresMV is not available */<br>

                     {<br>

                         MV lmv = getLowresMV(cu, pu, list, ref);<br>

                         if (lmv.notZero())<br>

                             mvc[numMvc++] = lmv;<br>

+                        if (m_param->bEnableHME)<br>

+                            mvp_lowres = lmv;<br>

                     }<br>

                     if (m_param->searchMethod == X265_SEA)<br>

                     {<br>

@@ -2365,10 +2388,27 @@<br>

                     int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices, <br>

                       m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);<br>

<br>

+                    if (m_param->bEnableHME && mvp_lowres.notZero() && mvp_lowres != mvp)<br>

+                    {<br>

+                        MV outmv_lowres;<br>

+                        setSearchRange(cu, mvp_lowres, m_param->searchRange, mvmin, mvmax);<br>

+                        int lowresMvCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp_lowres, numMvc, mvc, m_param->searchRange, outmv_lowres, m_param->maxSlices,<br>

+                            m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);<br>

+                        if (lowresMvCost < satdCost)<br>

+                        {<br>

+                            outmv = outmv_lowres;<br>

+                            satdCost = lowresMvCost;<br>

+                            bLowresMVP = true;<br>

+                        }<br>

+                    }<br>

+<br>

                     /* Get total cost of partition, but only include MV bit cost once */<br>

                     bits += m_me.bitcost(outmv);<br>

                     uint32_t mvCost = m_me.mvcost(outmv);<br>

                     uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);<br>

+                    /* Update LowresMVP to best AMVP cand*/<br>

+                    if (bLowresMVP)<br>

+                        updateMVP(amvp[mvpIdx], outmv, bits, cost, mvp_lowres);<br>

<br>

                     /* Refine MVP selection, updates: mvpIdx, bits, cost */<br>

                     mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);<br>

@@ -2631,6 +2671,15 @@<br>

     return amvpCand[mvpIdx];<br>

 }<br>

<br>

+/* Update to default MVP when using an alternative mvp */<br>

+void Search::updateMVP(const MV amvp, const MV& mv, uint32_t& outBits, uint32_t& outCost, const MV& alterMVP)<br>

+{<br>

+    int diffBits = m_me.bitcost(mv, amvp) - m_me.bitcost(mv, alterMVP);<br>

+    uint32_t origOutBits = outBits;<br>

+    outBits = origOutBits + diffBits;<br>

+    outCost = (outCost - m_rdCost.getCost(origOutBits)) + m_rdCost.getCost(outBits);<br>

+}<br>

+<br>

 void Search::setSearchRange(const CUData& cu, const MV& mvp, int merange, MV& mvmin, MV& mvmax) const<br>

 {<br>

     MV dist((int32_t)merange << 2, (int32_t)merange << 2);<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/search.h<br>

--- a/source/encoder/search.h   Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/search.h   Mon Jul 08 10:39:27 2019 +0530<br>

@@ -425,6 +425,7 @@<br>

     void     setSearchRange(const CUData& cu, const MV& mvp, int merange, MV& mvmin, MV& mvmax) const;<br>

     uint32_t mergeEstimation(CUData& cu, const CUGeom& cuGeom, const PredictionUnit& pu, int puIdx, MergeData& m);<br>

     static void getBlkBits(PartSize cuMode, bool bPSlice, int puIdx, uint32_t lastMode, uint32_t blockBit[3]);<br>

+    void      updateMVP(const MV amvp, const MV& mv, uint32_t& outBits, uint32_t& outCost, const MV& alterMVP);<br>

<br>

     /* intra helper functions */<br>

     enum { MAX_RD_INTRA_MODES = 16 };<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/slicetype.cpp<br>

--- a/source/encoder/slicetype.cpp      Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/slicetype.cpp      Mon Jul 08 10:39:27 2019 +0530<br>

@@ -664,6 +664,7 @@<br>

     weightedRef.lumaStride = fenc.lumaStride;<br>

     weightedRef.isLowres = true;<br>

     weightedRef.isWeighted = false;<br>

+    weightedRef.isHMELowres = ref.bEnableHME;<br>

<br>

     /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */<br>

     float guessScale, fencMean, refMean;<br>

@@ -759,6 +760,8 @@<br>

     m_extendGopBoundary = false;<br>

     m_8x8Height = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>

     m_8x8Width = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>

+    m_4x4Height = ((m_param->sourceHeight / 4) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>

+    m_4x4Width = ((m_param->sourceWidth / 4) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>

     m_cuCount = m_8x8Width * m_8x8Height;<br>

     m_8x8Blocks = m_8x8Width > 2 && m_8x8Height > 2 ? (m_cuCount + 4 - 2 * (m_8x8Width + m_8x8Height)) : m_cuCount;<br>

     m_isFadeIn = false;<br>

@@ -2782,16 +2785,32 @@<br>

<br>

             X265_CHECK(i < MAX_COOP_SLICES, "impossible number of coop slices\n");<br>

<br>

-            int firstY = m_lookahead.m_numRowsPerSlice * i;<br>

-            int lastY = (i == m_jobTotal - 1) ? m_lookahead.m_8x8Height - 1 : m_lookahead.m_numRowsPerSlice * (i + 1) - 1;<br>

-<br>

-            bool lastRow = true;<br>

+            int firstY, lastY;<br>

+            bool lastRow;<br>

+            if (m_lookahead.m_param->bEnableHME)<br>

+            {<br>

+                int numRowsPerSlice = m_lookahead.m_4x4Height / m_lookahead.m_param->lookaheadSlices;<br>

+                numRowsPerSlice = X265_MIN(X265_MAX(numRowsPerSlice, 5), m_lookahead.m_4x4Height);<br>

+                firstY = numRowsPerSlice * i;<br>

+                lastY = (i == m_jobTotal - 1) ? m_lookahead.m_4x4Height - 1 : numRowsPerSlice * (i + 1) - 1;<br>

+                lastRow = true;<br>

+                for (int cuY = lastY; cuY >= firstY; cuY--)<br>

+                {<br>

+                    for (int cuX = m_lookahead.m_4x4Width - 1; cuX >= 0; cuX--)<br>

+                        estimateCUCost(tld, cuX, cuY, m_coop.p0, m_coop.p1, m_coop.b, m_coop.bDoSearch, lastRow, i, 1);<br>

+                    lastRow = false;<br>

+                }<br>

+            }<br>

+<br>

+            firstY = m_lookahead.m_numRowsPerSlice * i;<br>

+            lastY = (i == m_jobTotal - 1) ? m_lookahead.m_8x8Height - 1 : m_lookahead.m_numRowsPerSlice * (i + 1) - 1;<br>

+            lastRow = true;<br>

             for (int cuY = lastY; cuY >= firstY; cuY--)<br>

             {<br>

                 m_frames[m_coop.b]->rowSatds[m_coop.b - m_coop.p0][m_coop.p1 - m_coop.b][cuY] = 0;<br>

<br>

                 for (int cuX = m_lookahead.m_8x8Width - 1; cuX >= 0; cuX--)<br>

-                    estimateCUCost(tld, cuX, cuY, m_coop.p0, m_coop.p1, m_coop.b, m_coop.bDoSearch, lastRow, i);<br>

+                    estimateCUCost(tld, cuX, cuY, m_coop.p0, m_coop.p1, m_coop.b, m_coop.bDoSearch, lastRow, i, 0);<br>

<br>

                 lastRow = false;<br>

             }<br>

@@ -2864,13 +2883,25 @@<br>

         }<br>

         else<br>

         {<br>

-            bool lastRow = true;<br>

+            /* Calculate MVs for 1/16th resolution*/<br>

+            bool lastRow;<br>

+            if (param->bEnableHME)<br>

+            {<br>

+                lastRow = true;<br>

+                for (int cuY = m_lookahead.m_4x4Height - 1; cuY >= 0; cuY--)<br>

+                {<br>

+                    for (int cuX = m_lookahead.m_4x4Width - 1; cuX >= 0; cuX--)<br>

+                        estimateCUCost(tld, cuX, cuY, p0, p1, b, bDoSearch, lastRow, -1, 1);<br>

+                    lastRow = false;<br>

+                }<br>

+            }<br>

+            lastRow = true;<br>

             for (int cuY = m_lookahead.m_8x8Height - 1; cuY >= 0; cuY--)<br>

             {<br>

                 fenc->rowSatds[b - p0][p1 - b][cuY] = 0;<br>

<br>

                 for (int cuX = m_lookahead.m_8x8Width - 1; cuX >= 0; cuX--)<br>

-                    estimateCUCost(tld, cuX, cuY, p0, p1, b, bDoSearch, lastRow, -1);<br>

+                    estimateCUCost(tld, cuX, cuY, p0, p1, b, bDoSearch, lastRow, -1, 0);<br>

<br>

                 lastRow = false;<br>

             }<br>

@@ -2891,23 +2922,27 @@<br>

     return score;<br>

 }<br>

<br>

-void CostEstimateGroup::estimateCUCost(LookaheadTLD& tld, int cuX, int cuY, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice)<br>

+void CostEstimateGroup::estimateCUCost(LookaheadTLD& tld, int cuX, int cuY, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme)<br>

 {<br>

     Lowres *fref0 = m_frames[p0];<br>

     Lowres *fref1 = m_frames[p1];<br>

     Lowres *fenc  = m_frames[b];<br>

<br>

-    ReferencePlanes *wfref0 = fenc->weightedRef[b - p0].isWeighted ? &fenc->weightedRef[b - p0] : fref0;<br>

-<br>

-    const int widthInCU = m_lookahead.m_8x8Width;<br>

-    const int heightInCU = m_lookahead.m_8x8Height;<br>

+    ReferencePlanes *wfref0 = fenc->weightedRef[b - p0].isWeighted && !hme ? &fenc->weightedRef[b - p0] : fref0;<br>

+<br>

+    const int widthInCU = hme ? m_lookahead.m_4x4Width : m_lookahead.m_8x8Width;<br>

+    const int heightInCU = hme ? m_lookahead.m_4x4Height : m_lookahead.m_8x8Height;<br>

     const int bBidir = (b < p1);<br>

     const int cuXY = cuX + cuY * widthInCU;<br>

+    const int cuXY_4x4 = (cuX / 2) + (cuY / 2) * widthInCU / 2;<br>

     const int cuSize = X265_LOWRES_CU_SIZE;<br>

-    const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * fenc->lumaStride;<br>

-<br>

-    if (bBidir || bDoSearch[0] || bDoSearch[1])<br>

-        tld.me.setSourcePU(fenc->lowresPlane[0], fenc->lumaStride, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, 1);<br>

+    const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * (hme ? fenc->lumaStride/2 : fenc->lumaStride);<br>

+<br>

+    if ((bBidir || bDoSearch[0] || bDoSearch[1]) && hme)<br>

+        tld.me.setSourcePU(fenc->lowerResPlane[0], fenc->lumaStride / 2, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);<br>

+    else if((bBidir || bDoSearch[0] || bDoSearch[1]) && !hme)<br>

+        tld.me.setSourcePU(fenc->lowresPlane[0], fenc->lumaStride, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);<br>

+<br>

<br>

     /* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */<br>

     int lowresPenalty = 4;<br>

@@ -2926,7 +2961,7 @@<br>

<br>

     for (int i = 0; i < 1 + bBidir; i++)<br>

     {<br>

-        int& fencCost = fenc->lowresMvCosts[i][listDist[i]][cuXY];<br>

+        int& fencCost = hme ? fenc->lowerResMvCosts[i][listDist[i]][cuXY] : fenc->lowresMvCosts[i][listDist[i]][cuXY];<br>

         int skipCost = INT_MAX;<br>

<br>

         if (!bDoSearch[i])<br>

@@ -2936,8 +2971,8 @@<br>

         }<br>

<br>

         int numc = 0;<br>

-        MV mvc[4], mvp;<br>

-        MV* fencMV = &fenc->lowresMvs[i][listDist[i]][cuXY];<br>

+        MV mvc[5], mvp;<br>

+        MV* fencMV = hme ? &fenc->lowerResMvs[i][listDist[i]][cuXY] : &fenc->lowresMvs[i][listDist[i]][cuXY];<br>

         ReferencePlanes* fref = i ? fref1 : wfref0;<br>

<br>

         /* Reverse-order MV prediction */<br>

@@ -2952,6 +2987,10 @@<br>

             if (cuX < widthInCU - 1)<br>

                 MVC(fencMV[widthInCU + 1]);<br>

         }<br>

+        if (fenc->lowerResMvs[0][0] && !hme && fenc->lowerResMvCosts[i][listDist[i]][cuXY_4x4] > 0)<br>

+        {<br>

+            MVC((fenc->lowerResMvs[i][listDist[i]][cuXY_4x4]) * 2);<br>

+        }<br>

 #undef MVC<br>

<br>

         if (!numc)<br>

@@ -2967,7 +3006,7 @@<br>

             for (int idx = 0; idx < numc; idx++)<br>

             {<br>

                 intptr_t stride = X265_LOWRES_CU_SIZE;<br>

-                pixel *src = fref->lowresMC(pelOffset, mvc[idx], subpelbuf, stride);<br>

+                pixel *src = fref->lowresMC(pelOffset, mvc[idx], subpelbuf, stride, hme);<br>

                 int cost = tld.me.bufSATD(src, stride);<br>

                 COPY2_IF_LT(mvpcost, cost, mvp, mvc[idx]);<br>

                 /* Except for mv0 case, everyting else is likely to have enough residual to not trigger the skip. */<br>

@@ -2978,7 +3017,10 @@<br>

<br>

         /* ME will never return a cost larger than the cost @MVP, so we do not<br>

          * have to check that ME cost is more than the estimated merge cost */<br>

-        fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices);<br>

+        if(!hme)<br>

+            fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices);<br>

+        else<br>

+            fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices, fref->lowerResPlane[0]);<br>

         if (skipCost < 64 && skipCost < fencCost && bBidir)<br>

         {<br>

             fencCost = skipCost;<br>

@@ -2986,6 +3028,8 @@<br>

         }<br>

         COPY2_IF_LT(bcost, fencCost, listused, i + 1);<br>

     }<br>

+    if (hme)<br>

+        return;<br>

<br>

     if (bBidir) /* B, also consider bidir */<br>

     {<br>

@@ -2995,8 +3039,8 @@<br>

         ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);<br>

         ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);<br>

         intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;<br>

-        pixel *src0 = fref0->lowresMC(pelOffset, fenc->lowresMvs[0][listDist[0]][cuXY], subpelbuf0, stride0);<br>

-        pixel *src1 = fref1->lowresMC(pelOffset, fenc->lowresMvs[1][listDist[1]][cuXY], subpelbuf1, stride1);<br>

+        pixel *src0 = fref0->lowresMC(pelOffset, fenc->lowresMvs[0][listDist[0]][cuXY], subpelbuf0, stride0, 0);<br>

+        pixel *src1 = fref1->lowresMC(pelOffset, fenc->lowresMvs[1][listDist[1]][cuXY], subpelbuf1, stride1, 0);<br>

         ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);<br>

         primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);<br>

         int bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/slicetype.h<br>

--- a/source/encoder/slicetype.h        Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/slicetype.h        Mon Jul 08 10:39:27 2019 +0530<br>

@@ -124,6 +124,10 @@<br>

     int           m_inputCount;<br>

     double        m_cuTreeStrength;<br>

<br>

+    /* HME */<br>

+    int           m_4x4Width;<br>

+    int           m_4x4Height;<br>

+<br>

     bool          m_isActive;<br>

     bool          m_sliceTypeBusy;<br>

     bool          m_bAdaptiveQuant;<br>

@@ -246,7 +250,7 @@<br>

     void    processTasks(int workerThreadID);<br>

<br>

     int64_t estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool intraPenalty);<br>

-    void    estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice);<br>

+    void    estimateCUCost(LookaheadTLD& tld, int cux, int cuy, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme);<br>

<br>

     CostEstimateGroup& operator=(const CostEstimateGroup&);<br>

 };<br>

diff -r 14a235657a20 -r 2dcff9aea06f source/encoder/weightPrediction.cpp<br>

--- a/source/encoder/weightPrediction.cpp       Fri Jul 05 11:17:26 2019 +0530<br>

+++ b/source/encoder/weightPrediction.cpp       Mon Jul 08 10:39:27 2019 +0530<br>

@@ -82,7 +82,7 @@<br>

             /* clip MV to available pixels */<br>

             MV mv = mvs[cu];<br>

             mv = mv.clipped(mvmin, mvmax);<br>

-            pixel *tmp = ref.lowresMC(pixoff, mv, buf8x8, bstride);<br>

+            pixel *tmp = ref.lowresMC(pixoff, mv, buf8x8, bstride, 0);<br>

             <a href="http://primitives.cu" rel="noreferrer" target="_blank">primitives.cu</a>[BLOCK_8x8].copy_pp(mcout + pixoff, stride, tmp, bstride);<br>

         }<br>

     }<br>

_______________________________________________<br>

x265-devel mailing list<br>

<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>

<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>

</blockquote></div><br clear="all"><div><br></div>-- <br><div dir="ltr" class="gmail_signature"><div dir="ltr"><font face="georgia, serif">Regards,</font><div><font face="georgia, serif">Aruna</font></div></div></div>