[x265] [PATCH] search: move MVP Index selection to motion estimate to avoid code duplication

Wed Apr 29 09:43:43 CEST 2015

# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1430293410 -19800
#      Wed Apr 29 13:13:30 2015 +0530
# Node ID cc6da5218a188ded2d239bb8dbf6c3399978c93f
# Parent  c4d9ee2cef03ef74f5623784d514ffcdf725bec4
search: move MVP Index selection to motion estimate to avoid code duplication

diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.cpp

--- a/source/encoder/motion.cpp	Tue Apr 28 14:34:45 2015 -0500
+++ b/source/encoder/motion.cpp	Wed Apr 29 13:13:30 2015 +0530
@@ -1280,3 +1280,45 @@
 
     return cost;
 }
+
+int MotionEstimate::getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel)
+{
+    pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx);
+    intptr_t dstStride = dstYuv.m_size;
+    int mvpidx = 0;
+    int mvpcost = MotionEstimate::COST_MAX;
+    intptr_t srcStride = refPic.m_stride;
+    int cost;
+
+    if (amvp[0] == amvp[1])
+        mvpidx = 0;
+
+    for (int i = 0; i < AMVP_NUM_CANDS; i++)
+    {
+        int xFrac = amvp[i].x & 0x3;
+        int yFrac = amvp[i].y & 0x3;
+        intptr_t srcOffset = (amvp[i].x >> 2) + (amvp[i].y >> 2) * srcStride;
+        const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
+
+        // NOTE: skip mvCand if Y is > merange and -FN>1
+        if (bFrameParallel && (amvp[i].y >= (merange + 1) * 4))
+            continue;
+
+        if (!(yFrac | xFrac))
+            primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
+        else if (!yFrac)
+            primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
+        else if (!xFrac)
+            primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
+        else
+            primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
+
+        cost = bufSAD(dst, dstStride);
+        if (cost < mvpcost)
+        {
+            mvpcost = cost;
+            mvpidx = i;
+        }
+    }
+    return mvpidx;
+}
diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.h
--- a/source/encoder/motion.h	Tue Apr 28 14:34:45 2015 -0500
+++ b/source/encoder/motion.h	Wed Apr 29 13:13:30 2015 +0530
@@ -29,6 +29,7 @@
 #include "mv.h"
 #include "bitcost.h"
 #include "yuv.h"
+#include "predict.h"
 
 namespace x265 {
 // private x265 namespace
@@ -93,6 +94,8 @@
 
     int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
 
+    int getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel);
+
 protected:
 
     inline void StarPatternSearch(ReferencePlanes *ref,
diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Tue Apr 28 14:34:45 2015 -0500
+++ b/source/encoder/search.cpp	Wed Apr 29 13:13:30 2015 +0530
@@ -1923,38 +1923,17 @@
     MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
     int numMvc = interMode.cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
 
-    int mvpIdx = 0;
     int merange = m_param->searchRange;
     MotionData* bestME = interMode.bestME[part];
-
-    if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
-    {
-        uint32_t bestCost = MAX_INT;
-        for (int i = 0; i < AMVP_NUM_CANDS; i++)
-        {
-            MV mvCand = interMode.amvpCand[list][ref][i];
-
-            // NOTE: skip mvCand if Y is > merange and -FN>1
-            if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
-                continue;
-
-            interMode.cu.clipMv(mvCand);
-
-            Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
-            predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand);
-            uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
-
-            if (bestCost > cost)
-            {
-                bestCost = cost;
-                mvpIdx = i;
-            }
-        }
-    }
-
-    MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
+    MV mvmin, mvmax, outmv, mvp;
+    MV *amvpCand = interMode.amvpCand[list][ref];
+
+    // Pick the best possible MVP IDX from AMVP candidates based on least residual
+    Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
+    int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
+    mvp = amvpCand[mvpIdx];
+
     setSearchRange(interMode.cu, mvp, merange, mvmin, mvmax);
-
     int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
 
     /* Get total cost of partition, but only include MV bit cost once */
@@ -1962,7 +1941,7 @@
     uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
 
     /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
-    checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+    checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
 
     /* tie goes to the smallest ref ID, just like --no-pme */
     ScopedLock _lock(master.m_meLock);
@@ -2051,46 +2030,23 @@
                 bits += getTUBits(ref, numRefIdx[list]);
 
                 int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
-
-                // Pick the best possible MVP from AMVP candidates based on least residual
-                int mvpIdx = 0;
                 int merange = m_param->searchRange;
-
-                if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
-                {
-                    uint32_t bestCost = MAX_INT;
-                    for (int i = 0; i < AMVP_NUM_CANDS; i++)
-                    {
-                        MV mvCand = interMode.amvpCand[list][ref][i];
-
-                        // NOTE: skip mvCand if Y is > merange and -FN>1
-                        if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
-                            continue;
-
-                        cu.clipMv(mvCand);
-                        predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
-                        uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
-
-                        if (bestCost > cost)
-                        {
-                            bestCost = cost;
-                            mvpIdx = i;
-                        }
-                    }
-                }
-
-                MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
-
-                int satdCost;
+                MV mvmin, mvmax, outmv, mvp;
+                MV *amvpCand = interMode.amvpCand[list][ref];
+
+                // Pick the best possible MVP IDX from AMVP candidates based on least residual
+                int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
+                mvp = amvpCand[mvpIdx];
+
                 setSearchRange(cu, mvp, merange, mvmin, mvmax);
-                satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
+                int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
 
                 /* Get total cost of partition, but only include MV bit cost once */
                 bits += m_me.bitcost(outmv);
                 uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
 
                 /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
-                checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+                checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
 
                 if (cost < bestME[list].cost)
                 {
@@ -2134,35 +2090,13 @@
                     bits += getTUBits(ref, numRefIdx[list]);
 
                     int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
-
-                    // Pick the best possible MVP from AMVP candidates based on least residual
-                    int mvpIdx = 0;
                     int merange = m_param->searchRange;
-
-                    if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
-                    {
-                        uint32_t bestCost = MAX_INT;
-                        for (int i = 0; i < AMVP_NUM_CANDS; i++)
-                        {
-                            MV mvCand = interMode.amvpCand[list][ref][i];
-
-                            // NOTE: skip mvCand if Y is > merange and -FN>1
-                            if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
-                                continue;
-
-                            cu.clipMv(mvCand);
-                            predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
-                            uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
-
-                            if (bestCost > cost)
-                            {
-                                bestCost = cost;
-                                mvpIdx = i;
-                            }
-                        }
-                    }
-
-                    MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
+                    MV mvmin, mvmax, outmv, mvp;
+                    MV *amvpCand = interMode.amvpCand[list][ref];
+
+                    // Pick the best possible MVP IDX from AMVP candidates based on least residual
+                    int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
+                    mvp = amvpCand[mvpIdx];
 
                     setSearchRange(cu, mvp, merange, mvmin, mvmax);
                     int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
@@ -2172,7 +2106,7 @@
                     uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
 
                     /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
-                    checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+                    checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
 
                     if (cost < bestME[list].cost)
                     {