[x265] [PATCH] Cache spatial and temporal PMVs before analyzing to find the best MV for each reference index

ashok at multicorewareinc.com ashok at multicorewareinc.com
Wed Mar 11 16:29:50 CET 2015


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1425891920 -19800
#      Mon Mar 09 14:35:20 2015 +0530
# Node ID 9008bd37535203fae6a6ff63b74b6778bdf229ea
# Parent  8f148ac8dbe4b68e88ceff84f40e33b29e888dc9
Cache spatial and temporal PMVs before analyzing to find the best MV for each reference index

diff -r 8f148ac8dbe4 -r 9008bd375352 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Tue Mar 10 15:46:36 2015 +0530
+++ b/source/common/cudata.cpp	Mon Mar 09 14:35:20 2015 +0530
@@ -1632,87 +1632,122 @@
     return count;
 }
 
-/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
-int CUData::fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const
+// Create the PMV list. Called for each reference index.
+int CUData::getPMV(InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const
 {
+    MV directMV[MD_ABOVE_LEFT + 1];
+    MV indirectMV[MD_ABOVE_LEFT + 1];
+    bool validDirect[MD_ABOVE_LEFT + 1];
+    bool validIndirect[MD_ABOVE_LEFT + 1];
+
+    // Left candidate.
+    validDirect[MD_BELOW_LEFT]  = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
+    validDirect[MD_LEFT]        = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
+    // Top candidate.
+    validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
+    validDirect[MD_ABOVE]       = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
+    validDirect[MD_ABOVE_LEFT]  = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
+
+    // Left candidate.
+    validIndirect[MD_BELOW_LEFT]  = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
+    validIndirect[MD_LEFT]        = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
+    // Top candidate.
+    validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
+    validIndirect[MD_ABOVE]       = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
+    validIndirect[MD_ABOVE_LEFT]  = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
+
     int num = 0;
-
-    // spatial MV
-    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
-
-    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
-
-    MV mv[MD_ABOVE_LEFT + 1];
-    MV mvOrder[MD_ABOVE_LEFT + 1];
-    bool valid[MD_ABOVE_LEFT + 1];
-    bool validOrder[MD_ABOVE_LEFT + 1];
-
-    valid[MD_BELOW_LEFT]  = addMVPCand(mv[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT);
-    valid[MD_LEFT]        = addMVPCand(mv[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT);
-    valid[MD_ABOVE_RIGHT] = addMVPCand(mv[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT);
-    valid[MD_ABOVE]       = addMVPCand(mv[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE);
-    valid[MD_ABOVE_LEFT]  = addMVPCand(mv[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT);
-
-    validOrder[MD_BELOW_LEFT]  = addMVPCandOrder(mvOrder[MD_BELOW_LEFT], picList, refIdx, partIdxLB, MD_BELOW_LEFT);
-    validOrder[MD_LEFT]        = addMVPCandOrder(mvOrder[MD_LEFT], picList, refIdx, partIdxLB, MD_LEFT);
-    validOrder[MD_ABOVE_RIGHT] = addMVPCandOrder(mvOrder[MD_ABOVE_RIGHT], picList, refIdx, partIdxRT, MD_ABOVE_RIGHT);
-    validOrder[MD_ABOVE]       = addMVPCandOrder(mvOrder[MD_ABOVE], picList, refIdx, partIdxRT, MD_ABOVE);
-    validOrder[MD_ABOVE_LEFT]  = addMVPCandOrder(mvOrder[MD_ABOVE_LEFT], picList, refIdx, partIdxLT, MD_ABOVE_LEFT);
-
     // Left predictor search
-    if (valid[MD_BELOW_LEFT])
-        amvpCand[num++] = mv[MD_BELOW_LEFT];
-    else if (valid[MD_LEFT])
-        amvpCand[num++] = mv[MD_LEFT];
-    else if (validOrder[MD_BELOW_LEFT])
-        amvpCand[num++] = mvOrder[MD_BELOW_LEFT];
-    else if (validOrder[MD_LEFT])
-        amvpCand[num++] = mvOrder[MD_LEFT];
+    if (validDirect[MD_BELOW_LEFT])
+        amvpCand[num++] = directMV[MD_BELOW_LEFT];
+    else if (validDirect[MD_LEFT])
+        amvpCand[num++] = directMV[MD_LEFT];
+    else if (validIndirect[MD_BELOW_LEFT])
+        amvpCand[num++] = indirectMV[MD_BELOW_LEFT];
+    else if (validIndirect[MD_LEFT])
+        amvpCand[num++] = indirectMV[MD_LEFT];
 
     bool bAddedSmvp = num > 0;
 
     // Above predictor search
-    if (valid[MD_ABOVE_RIGHT])
-        amvpCand[num++] = mv[MD_ABOVE_RIGHT];
-    else if (valid[MD_ABOVE])
-        amvpCand[num++] = mv[MD_ABOVE];
-    else if (valid[MD_ABOVE_LEFT])
-        amvpCand[num++] = mv[MD_ABOVE_LEFT];
+    if (validDirect[MD_ABOVE_RIGHT])
+        amvpCand[num++] = directMV[MD_ABOVE_RIGHT];
+    else if (validDirect[MD_ABOVE])
+        amvpCand[num++] = directMV[MD_ABOVE];
+    else if (validDirect[MD_ABOVE_LEFT])
+        amvpCand[num++] = directMV[MD_ABOVE_LEFT];
 
     if (!bAddedSmvp)
     {
-        if (validOrder[MD_ABOVE_RIGHT])
-            amvpCand[num++] = mvOrder[MD_ABOVE_RIGHT];
-        else if (validOrder[MD_ABOVE])
-            amvpCand[num++] = mvOrder[MD_ABOVE];
-        else if (validOrder[MD_ABOVE_LEFT])
-            amvpCand[num++] = mvOrder[MD_ABOVE_LEFT];
+        if (validIndirect[MD_ABOVE_RIGHT])
+            amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT];
+        else if (validIndirect[MD_ABOVE])
+            amvpCand[num++] = indirectMV[MD_ABOVE];
+        else if (validIndirect[MD_ABOVE_LEFT])
+            amvpCand[num++] = indirectMV[MD_ABOVE_LEFT];
     }
 
     int numMvc = 0;
     for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++)
     {
-        if (valid[dir] && mv[dir].notZero())
-            mvc[numMvc++] = mv[dir];
+        if (validDirect[dir] && directMV[dir].notZero())
+            pmv[numMvc++] = directMV[dir];
 
-        if (validOrder[dir] && mvOrder[dir].notZero())
-            mvc[numMvc++] = mvOrder[dir];
+        if (validIndirect[dir] && indirectMV[dir].notZero())
+            pmv[numMvc++] = indirectMV[dir];
     }
 
     if (num == 2)
+        num -= amvpCand[0] == amvpCand[1];
+
+    // Get the collocated candidate. At this step, either the first candidate
+    // was found or its value is 0.
+    if (m_slice->m_sps->bTemporalMVPEnabled && num < 2)
     {
-        if (amvpCand[0] == amvpCand[1])
-            num = 1;
-        else
-            /* AMVP_NUM_CANDS = 2 */
-            return numMvc;
+        int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList];
+        if (tempRefIdx != -1)
+        {
+            uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
+            const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
+            const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
+
+            // Scale the vector
+            int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf];
+            int colPOC = colCU->m_slice->m_poc;
+
+            int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
+            int curPOC = m_slice->m_poc;
+
+            pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC);
+        }
     }
 
+    while (num < AMVP_NUM_CANDS)
+        amvpCand[num++] = 0;
+
+    return numMvc;
+}
+
+/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
+void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const
+{
+    // Set the temporal neighbour to unavailable by default.
+    neighbours[MD_COLLOCATED].unifiedRef = -1;
+
+    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
+    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
+
+    // Load the spatial MVs.
+    getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT);
+    getInterNeighbourMV(neighbours + MD_LEFT,       partIdxLB, MD_LEFT);
+    getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT);
+    getInterNeighbourMV(neighbours + MD_ABOVE,      partIdxRT, MD_ABOVE);
+    getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT);
+
     if (m_slice->m_sps->bTemporalMVPEnabled)
     {
         uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
         uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
-        MV colmv;
 
         // co-located RightBottom temporal predictor (H)
         int ctuIdx = -1;
@@ -1741,45 +1776,17 @@
             else // is the right bottom corner of CTU
                 absPartAddr = 0;
         }
-        if (ctuIdx >= 0 && getColMVP(colmv, refIdx, picList, ctuIdx, absPartAddr))
-        {
-            amvpCand[num++] = colmv;
-            mvc[numMvc++] = colmv;
-        }
-        else
+
+        if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED)))
         {
             uint32_t partIdxCenter =  deriveCenterIdx(puIdx);
             uint32_t curCTUIdx = m_cuAddr;
-            if (getColMVP(colmv, refIdx, picList, curCTUIdx, partIdxCenter))
-            {
-                amvpCand[num++] = colmv;
-                mvc[numMvc++] = colmv;
-            }
+            getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED);
         }
     }
-
-    while (num < AMVP_NUM_CANDS)
-        amvpCand[num++] = 0;
-
-    return numMvc;
 }
 
-void CUData::clipMv(MV& outMV) const
-{
-    const uint32_t mvshift = 2;
-    uint32_t offset = 8;
-
-    int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
-    int16_t xmin = -(int16_t)((g_maxCUSize + offset + m_cuPelX - 1) << mvshift);
-
-    int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
-    int16_t ymin = -(int16_t)((g_maxCUSize + offset + m_cuPelY - 1) << mvshift);
-
-    outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
-    outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
-}
-
-bool CUData::addMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const
+void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const
 {
     const CUData* tmpCU = NULL;
     uint32_t idx = 0;
@@ -1802,103 +1809,77 @@
         tmpCU = getPUAboveLeft(idx, partUnitIdx);
         break;
     default:
-        return false;
+        break;
     }
 
     if (!tmpCU)
-        return false;
-
-    int refPOC = m_slice->m_refPOCList[picList][refIdx];
-    int partRefIdx = tmpCU->m_refIdx[picList][idx];
-    if (partRefIdx >= 0 && refPOC == tmpCU->m_slice->m_refPOCList[picList][partRefIdx])
     {
-        mvp = tmpCU->m_mv[picList][idx];
-        return true;
+        // Mark the PMV as unavailable.
+        for (int i = 0; i < 2; i++)
+            neighbour->refIdx[i] = -1;
+        return;
     }
 
-    int refPicList2nd = 0;
-    if (picList == 0)
-        refPicList2nd = 1;
-    else if (picList == 1)
-        refPicList2nd = 0;
+    for (int i = 0; i < 2; i++)
+    {
+        // Get the MV.
+        neighbour->mv[i] = tmpCU->m_mv[i][idx];
 
+        // Get the reference idx.
+        neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx];
+    }
+}
+
+void CUData::clipMv(MV& outMV) const
+{
+    const uint32_t mvshift = 2;
+    uint32_t offset = 8;
+
+    int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
+    int16_t xmin = -(int16_t)((g_maxCUSize + offset + m_cuPelX - 1) << mvshift);
+
+    int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
+    int16_t ymin = -(int16_t)((g_maxCUSize + offset + m_cuPelY - 1) << mvshift);
+
+    outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
+    outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
+}
+
+// Load direct spatial MV if available.
+bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
+{
     int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
-    int neibRefPOC;
-
-    partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx];
-    if (partRefIdx >= 0)
+    for (int i = 0; i < 2; i++, picList = !picList)
     {
-        neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx];
-        if (neibRefPOC == curRefPOC)
+        int partRefIdx = neighbours->refIdx[picList];
+        if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx])
         {
-            // Same reference frame but different list
-            mvp = tmpCU->m_mv[refPicList2nd][idx];
+            pmv = neighbours->mv[picList];
             return true;
         }
     }
     return false;
 }
 
-bool CUData::addMVPCandOrder(MV& outMV, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir) const
+// Load indirect spatial MV if available. An indirect MV has to be scaled.
+bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
 {
-    const CUData* tmpCU = NULL;
-    uint32_t idx = 0;
+    int curPOC = m_slice->m_poc;
+    int neibPOC = curPOC;
+    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
 
-    switch (dir)
+    for (int i = 0; i < 2; i++, picList = !picList)
     {
-    case MD_LEFT:
-        tmpCU = getPULeft(idx, partUnitIdx);
-        break;
-    case MD_ABOVE:
-        tmpCU = getPUAbove(idx, partUnitIdx);
-        break;
-    case MD_ABOVE_RIGHT:
-        tmpCU = getPUAboveRight(idx, partUnitIdx);
-        break;
-    case MD_BELOW_LEFT:
-        tmpCU = getPUBelowLeft(idx, partUnitIdx);
-        break;
-    case MD_ABOVE_LEFT:
-        tmpCU = getPUAboveLeft(idx, partUnitIdx);
-        break;
-    default:
-        return false;
+        int partRefIdx = neighbours->refIdx[picList];
+        if (partRefIdx >= 0)
+        {
+            int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx];
+            MV mvp = neighbours->mv[picList];
+
+            outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
+            return true;
+        }
     }
-
-    if (!tmpCU)
-        return false;
-
-    int refPicList2nd = 0;
-    if (picList == 0)
-        refPicList2nd = 1;
-    else if (picList == 1)
-        refPicList2nd = 0;
-
-    int curPOC = m_slice->m_poc;
-    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
-    int neibPOC = curPOC;
-    int neibRefPOC;
-
-    int partRefIdx = tmpCU->m_refIdx[picList][idx];
-    if (partRefIdx >= 0)
-    {
-        neibRefPOC = tmpCU->m_slice->m_refPOCList[picList][partRefIdx];
-        MV mvp = tmpCU->m_mv[picList][idx];
-
-        scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
-        return true;
-    }
-
-    partRefIdx = tmpCU->m_refIdx[refPicList2nd][idx];
-    if (partRefIdx >= 0)
-    {
-        neibRefPOC = tmpCU->m_slice->m_refPOCList[refPicList2nd][partRefIdx];
-        MV mvp = tmpCU->m_mv[refPicList2nd][idx];
-
-        scaleMvByPOCDist(outMV, mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
-        return true;
-    }
-
     return false;
 }
 
@@ -1907,12 +1888,8 @@
     const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
     const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
 
-    if (colCU->m_predMode[partUnitIdx] == MODE_NONE)
-        return false;
-
     uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
-
-    if (colCU->isIntra(absPartAddr))
+    if ((colCU->m_predMode[partUnitIdx] == MODE_NONE) || (colCU->isIntra(absPartAddr)))
         return false;
 
     int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
@@ -1921,7 +1898,7 @@
 
     if (colRefIdx < 0)
     {
-        colRefPicList = 1 - colRefPicList;
+        colRefPicList = !colRefPicList;
         colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
 
         if (colRefIdx < 0)
@@ -1936,24 +1913,52 @@
     int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
     int curPOC = m_slice->m_poc;
 
-    scaleMvByPOCDist(outMV, colmv, curPOC, curRefPOC, colPOC, colRefPOC);
+    outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC);
     return true;
 }
 
-void CUData::scaleMvByPOCDist(MV& outMV, const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
+// Cache the collocated MV.
+bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const
+{
+    const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
+    const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
+
+    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
+    if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
+        return false;
+
+    for (int list = 0; list < 2; list++)
+    {
+        neighbour->cuAddr[list] = cuAddr;
+        int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag;
+        int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
+
+        if (colRefIdx < 0)
+            colRefPicList = !colRefPicList;
+
+        neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr];
+        neighbour->refIdx[list] |= colRefPicList << 4;
+
+        neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr];
+    }
+
+    return neighbour->unifiedRef != -1;
+}
+
+MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
 {
     int diffPocD = colPOC - colRefPOC;
     int diffPocB = curPOC - curRefPOC;
 
     if (diffPocD == diffPocB)
-        outMV = inMV;
+        return inMV;
     else
     {
         int tdb   = x265_clip3(-128, 127, diffPocB);
         int tdd   = x265_clip3(-128, 127, diffPocD);
         int x     = (0x4000 + abs(tdd / 2)) / tdd;
         int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6);
-        outMV = scaleMv(inMV, scale);
+        return scaleMv(inMV, scale);
     }
 }
 
diff -r 8f148ac8dbe4 -r 9008bd375352 source/common/cudata.h
--- a/source/common/cudata.h	Tue Mar 10 15:46:36 2015 +0530
+++ b/source/common/cudata.h	Mon Mar 09 14:35:20 2015 +0530
@@ -64,7 +64,8 @@
     MD_ABOVE,       // MVP of above block
     MD_ABOVE_RIGHT, // MVP of above right block
     MD_BELOW_LEFT,  // MVP of below left block
-    MD_ABOVE_LEFT   // MVP of above left block
+    MD_ABOVE_LEFT,  // MVP of above left block
+    MD_COLLOCATED   // MVP of temporal neighbour
 };
 
 struct CUGeom
@@ -94,6 +95,26 @@
     int refIdx;
 };
 
+// Structure that keeps the neighbour's MV information.
+struct InterNeighbourMV
+{
+    // Neighbour MV. The index represents the list.
+    MV mv[2];
+
+    // Collocated right bottom CU addr.
+    uint32_t cuAddr[2];
+
+    // For spatial prediction, this field contains the reference index
+    // in each list (-1 if not available).
+    //
+    // For temporal prediction, the first value is used for the 
+    // prediction with list 0. The second value is used for the prediction 
+    // with list 1. For each value, the first four bits are the reference index 
+    // associated to the PMV, and the fifth bit is the list associated to the PMV.
+    // if both reference indices are -1, then unifiedRef is also -1
+    union { int16_t refIdx[2]; int32_t unifiedRef; };
+};
+
 typedef void(*cucopy_t)(uint8_t* dst, uint8_t* src); // dst and src are aligned to MIN(size, 32)
 typedef void(*cubcast_t)(uint8_t* dst, uint8_t val); // dst is aligned to MIN(size, 32)
 
@@ -197,7 +218,8 @@
     int8_t   getRefQP(uint32_t currAbsIdxInCTU) const;
     uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*candMvField)[2], uint8_t* candDir) const;
     void     clipMv(MV& outMV) const;
-    int      fillMvpCand(uint32_t puIdx, uint32_t absPartIdx, int picList, int refIdx, MV* amvpCand, MV* mvc) const;
+    int      getPMV(InterNeighbourMV *neighbours, uint32_t reference_list, uint32_t refIdx, MV* amvpCand, MV* pmv) const;
+    void     getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const;
     void     getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const;
     void     getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const;
 
@@ -244,12 +266,14 @@
     bool isDiffMER(int xN, int yN, int xP, int yP) const { return ((xN >> 2) != (xP >> 2)) || ((yN >> 2) != (yP >> 2)); }
 
     // add possible motion vector predictor candidates
-    bool addMVPCand(MV& mvp, int picList, int refIdx, uint32_t absPartIdx, MVP_DIR dir) const;
-    bool addMVPCandOrder(MV& mvp, int picList, int refIdx, uint32_t absPartIdx, MVP_DIR dir) const;
+    bool getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const;
+    bool getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t reference_list, uint32_t refIdx) const;
+    void getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const;
 
     bool getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int absPartIdx) const;
+    bool getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const;
 
-    void scaleMvByPOCDist(MV& outMV, const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const;
+    MV scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const;
 
     void     deriveLeftRightTopIdx(uint32_t puIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const;
 
diff -r 8f148ac8dbe4 -r 9008bd375352 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Tue Mar 10 15:46:36 2015 +0530
+++ b/source/encoder/search.cpp	Mon Mar 09 14:35:20 2015 +0530
@@ -1929,7 +1929,7 @@
     bits += getTUBits(ref, m_slice->m_numRefIdx[list]);
 
     MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
-    int numMvc = interMode.cu.fillMvpCand(part, pu.puAbsPartIdx, list, ref, interMode.amvpCand[list][ref], mvc);
+    int numMvc = interMode.cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
 
     int mvpIdx = 0;
     int merange = m_param->searchRange;
@@ -2047,34 +2047,36 @@
         getBlkBits((PartSize)cu.m_partSize[0], slice->isInterP(), puIdx, lastMode, m_listSelBits);
         bool bDoUnidir = true;
 
+        cu.getNeighbourMV(puIdx, pu.puAbsPartIdx, interMode.interNeighbours);
+
         /* Uni-directional prediction */
         if (m_param->analysisMode == X265_ANALYSIS_LOAD && bestME[0].ref >= 0)
         {
-            for (int l = 0; l < numPredDir; l++)
+            for (int list = 0; list < numPredDir; list++)
             {
-                int ref = bestME[l].ref;
-                uint32_t bits = m_listSelBits[l] + MVP_IDX_BITS;
-                bits += getTUBits(ref, numRefIdx[l]);
-
-                int numMvc = cu.fillMvpCand(puIdx, pu.puAbsPartIdx, l, ref, interMode.amvpCand[l][ref], mvc);
+                int ref = bestME[list].ref;
+                uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
+                bits += getTUBits(ref, numRefIdx[list]);
+
+                int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
 
                 // Pick the best possible MVP from AMVP candidates based on least residual
                 int mvpIdx = 0;
                 int merange = m_param->searchRange;
 
-                if (interMode.amvpCand[l][ref][0] != interMode.amvpCand[l][ref][1])
+                if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
                 {
                     uint32_t bestCost = MAX_INT;
                     for (int i = 0; i < AMVP_NUM_CANDS; i++)
                     {
-                        MV mvCand = interMode.amvpCand[l][ref][i];
+                        MV mvCand = interMode.amvpCand[list][ref][i];
 
                         // NOTE: skip mvCand if Y is > merange and -FN>1
                         if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
                             continue;
 
                         cu.clipMv(mvCand);
-                        predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[l][ref]->m_reconPic, mvCand);
+                        predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
                         uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
 
                         if (bestCost > cost)
@@ -2085,26 +2087,26 @@
                     }
                 }
 
-                MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[l][ref][mvpIdx];
+                MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
 
                 int satdCost;
                 setSearchRange(cu, mvp, merange, mvmin, mvmax);
-                satdCost = m_me.motionEstimate(&slice->m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
+                satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
 
                 /* Get total cost of partition, but only include MV bit cost once */
                 bits += m_me.bitcost(outmv);
                 uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
 
                 /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
-                checkBestMVP(interMode.amvpCand[l][ref], outmv, mvp, mvpIdx, bits, cost);
-
-                if (cost < bestME[l].cost)
+                checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+
+                if (cost < bestME[list].cost)
                 {
-                    bestME[l].mv = outmv;
-                    bestME[l].mvp = mvp;
-                    bestME[l].mvpIdx = mvpIdx;
-                    bestME[l].cost = cost;
-                    bestME[l].bits = bits;
+                    bestME[list].mv = outmv;
+                    bestME[list].mvp = mvp;
+                    bestME[list].mvpIdx = mvpIdx;
+                    bestME[list].cost = cost;
+                    bestME[list].bits = bits;
                 }
             }
             bDoUnidir = false;
@@ -2132,32 +2134,32 @@
         }
         if (bDoUnidir)
         {
-            for (int l = 0; l < numPredDir; l++)
+            for (int list = 0; list < numPredDir; list++)
             {
-                for (int ref = 0; ref < numRefIdx[l]; ref++)
+                for (int ref = 0; ref < numRefIdx[list]; ref++)
                 {
-                    uint32_t bits = m_listSelBits[l] + MVP_IDX_BITS;
-                    bits += getTUBits(ref, numRefIdx[l]);
-
-                    int numMvc = cu.fillMvpCand(puIdx, pu.puAbsPartIdx, l, ref, interMode.amvpCand[l][ref], mvc);
+                    uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
+                    bits += getTUBits(ref, numRefIdx[list]);
+
+                    int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
 
                     // Pick the best possible MVP from AMVP candidates based on least residual
                     int mvpIdx = 0;
                     int merange = m_param->searchRange;
 
-                    if (interMode.amvpCand[l][ref][0] != interMode.amvpCand[l][ref][1])
+                    if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
                     {
                         uint32_t bestCost = MAX_INT;
                         for (int i = 0; i < AMVP_NUM_CANDS; i++)
                         {
-                            MV mvCand = interMode.amvpCand[l][ref][i];
+                            MV mvCand = interMode.amvpCand[list][ref][i];
 
                             // NOTE: skip mvCand if Y is > merange and -FN>1
                             if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
                                 continue;
 
                             cu.clipMv(mvCand);
-                            predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[l][ref]->m_reconPic, mvCand);
+                            predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
                             uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
 
                             if (bestCost > cost)
@@ -2168,26 +2170,26 @@
                         }
                     }
 
-                    MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[l][ref][mvpIdx];
+                    MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
 
                     setSearchRange(cu, mvp, merange, mvmin, mvmax);
-                    int satdCost = m_me.motionEstimate(&slice->m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
+                    int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
 
                     /* Get total cost of partition, but only include MV bit cost once */
                     bits += m_me.bitcost(outmv);
                     uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
 
                     /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
-                    checkBestMVP(interMode.amvpCand[l][ref], outmv, mvp, mvpIdx, bits, cost);
-
-                    if (cost < bestME[l].cost)
+                    checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
+
+                    if (cost < bestME[list].cost)
                     {
-                        bestME[l].mv = outmv;
-                        bestME[l].mvp = mvp;
-                        bestME[l].mvpIdx = mvpIdx;
-                        bestME[l].ref = ref;
-                        bestME[l].cost = cost;
-                        bestME[l].bits = bits;
+                        bestME[list].mv = outmv;
+                        bestME[list].mvp = mvp;
+                        bestME[list].mvpIdx = mvpIdx;
+                        bestME[list].ref = ref;
+                        bestME[list].cost = cost;
+                        bestME[list].bits = bits;
                     }
                 }
             }
diff -r 8f148ac8dbe4 -r 9008bd375352 source/encoder/search.h
--- a/source/encoder/search.h	Tue Mar 10 15:46:36 2015 +0530
+++ b/source/encoder/search.h	Mon Mar 09 14:35:20 2015 +0530
@@ -100,6 +100,11 @@
 
     MotionData bestME[MAX_INTER_PARTS][2];
     MV         amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];
+//    MV         _amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];
+
+    // Neighbour MVs of the current partition. 5 spatial candidates and the
+    // temporal candidate.
+    InterNeighbourMV interNeighbours[6];
 
     uint64_t   rdCost;     // sum of partition (psy) RD costs          (sse(fenc, recon) + lambda2 * bits)
     uint64_t   sa8dCost;   // sum of partition sa8d distortion costs   (sa8d(fenc, pred) + lambda * bits)


More information about the x265-devel mailing list