[x265] refine merge related

Satoshi Nakagawa nakagawa424 at oki.com
Sun May 18 08:07:03 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1400392947 -32400
#      Sun May 18 15:02:27 2014 +0900
# Node ID 01a07bfcd77943142610b8a56c967e1bbf5080d1
# Parent  7d11f60c5dba82f5804c8bef7610d65c9ac278b4
refine merge related

diff -r 7d11f60c5dba -r 01a07bfcd779 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Fri May 16 14:31:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Sun May 18 15:02:27 2014 +0900
@@ -1918,27 +1918,27 @@
  * \param depth
  * \param mvFieldNeighbours
  * \param interDirNeighbours
- * \param numValidMergeCand
+ * \param maxNumMergeCand
  */
-void TComDataCU::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, TComMvField* mvFieldNeighbours, uint8_t* interDirNeighbours,
-                                         int& numValidMergeCand)
+void TComDataCU::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, TComMvField (*mvFieldNeighbours)[2], uint8_t* interDirNeighbours,
+                                         uint32_t& maxNumMergeCand)
 {
     uint32_t absPartAddr = m_absIdxInLCU + absPartIdx;
-    const uint32_t maxNumMergeCand = getSlice()->getMaxNumMergeCand();
     const bool isInterB = getSlice()->isInterB();
 
+    maxNumMergeCand = getSlice()->getMaxNumMergeCand();
+
     for (uint32_t i = 0; i < maxNumMergeCand; ++i)
     {
-        mvFieldNeighbours[(i << 1)].refIdx = NOT_VALID;
-        mvFieldNeighbours[(i << 1) + 1].refIdx = NOT_VALID;
+        mvFieldNeighbours[i][0].refIdx = NOT_VALID;
+        mvFieldNeighbours[i][1].refIdx = NOT_VALID;
     }
 
-    numValidMergeCand = maxNumMergeCand;
     // compute the location of the current PU
     int xP, yP, nPSW, nPSH;
     this->getPartPosition(puIdx, xP, yP, nPSW, nPSH);
 
-    int count = 0;
+    uint32_t count = 0;
 
     uint32_t partIdxLT, partIdxRT, partIdxLB;
     PartSize curPS = getPartitionSize(absPartIdx);
@@ -1957,10 +1957,10 @@
         // get Inter Dir
         interDirNeighbours[count] = cuLeft->getInterDir(leftPartIdx);
         // get Mv from Left
-        cuLeft->getMvField(cuLeft, leftPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count << 1]);
+        cuLeft->getMvField(cuLeft, leftPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count][0]);
         if (isInterB)
         {
-            cuLeft->getMvField(cuLeft, leftPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[(count << 1) + 1]);
+            cuLeft->getMvField(cuLeft, leftPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[count][1]);
         }
         count++;
         // early termination
@@ -1985,10 +1985,10 @@
         // get Inter Dir
         interDirNeighbours[count] = cuAbove->getInterDir(abovePartIdx);
         // get Mv from Left
-        cuAbove->getMvField(cuAbove, abovePartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count << 1]);
+        cuAbove->getMvField(cuAbove, abovePartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count][0]);
         if (isInterB)
         {
-            cuAbove->getMvField(cuAbove, abovePartIdx, REF_PIC_LIST_1, mvFieldNeighbours[(count << 1) + 1]);
+            cuAbove->getMvField(cuAbove, abovePartIdx, REF_PIC_LIST_1, mvFieldNeighbours[count][1]);
         }
         count++;
         // early termination
@@ -2010,10 +2010,10 @@
         // get Inter Dir
         interDirNeighbours[count] = cuAboveRight->getInterDir(aboveRightPartIdx);
         // get Mv from Left
-        cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count << 1]);
+        cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count][0]);
         if (isInterB)
         {
-            cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[(count << 1) + 1]);
+            cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[count][1]);
         }
         count++;
         // early termination
@@ -2035,10 +2035,10 @@
         // get Inter Dir
         interDirNeighbours[count] = cuLeftBottom->getInterDir(leftBottomPartIdx);
         // get Mv from Left
-        cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count << 1]);
+        cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count][0]);
         if (isInterB)
         {
-            cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[(count << 1) + 1]);
+            cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[count][1]);
         }
         count++;
         // early termination
@@ -2063,10 +2063,10 @@
             // get Inter Dir
             interDirNeighbours[count] = cuAboveLeft->getInterDir(aboveLeftPartIdx);
             // get Mv from Left
-            cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count << 1]);
+            cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, REF_PIC_LIST_0, mvFieldNeighbours[count][0]);
             if (isInterB)
             {
-                cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[(count << 1) + 1]);
+                cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, REF_PIC_LIST_1, mvFieldNeighbours[count][1]);
             }
             count++;
             // early termination
@@ -2134,7 +2134,7 @@
         if (bExistMV)
         {
             dir |= 1;
-            mvFieldNeighbours[2 * arrayAddr].setMvField(colmv, refIdx);
+            mvFieldNeighbours[arrayAddr][0].setMvField(colmv, refIdx);
         }
 
         if (isInterB)
@@ -2147,7 +2147,7 @@
             if (bExistMV)
             {
                 dir |= 2;
-                mvFieldNeighbours[2 * arrayAddr + 1].setMvField(colmv, refIdx);
+                mvFieldNeighbours[arrayAddr][1].setMvField(colmv, refIdx);
             }
         }
 
@@ -2168,11 +2168,11 @@
 
     if (isInterB)
     {
-        const int cutoff = count * (count - 1);
+        const uint32_t cutoff = count * (count - 1);
         uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
         uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
 
-        for (int idx = 0; idx < cutoff; idx++)
+        for (uint32_t idx = 0; idx < cutoff; idx++)
         {
             int i = priorityList0 & 3;
             int j = priorityList1 & 3;
@@ -2182,14 +2182,14 @@
             if ((interDirNeighbours[i] & 0x1) && (interDirNeighbours[j] & 0x2))
             {
                 // get Mv from cand[i] and cand[j]
-                int refIdxL0 = mvFieldNeighbours[i << 1].refIdx;
-                int refIdxL1 = mvFieldNeighbours[(j << 1) + 1].refIdx;
+                int refIdxL0 = mvFieldNeighbours[i][0].refIdx;
+                int refIdxL1 = mvFieldNeighbours[j][1].refIdx;
                 int refPOCL0 = m_slice->getRefPOC(REF_PIC_LIST_0, refIdxL0);
                 int refPOCL1 = m_slice->getRefPOC(REF_PIC_LIST_1, refIdxL1);
-                if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i << 1].mv == mvFieldNeighbours[(j << 1) + 1].mv))
+                if (!(refPOCL0 == refPOCL1 && mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv))
                 {
-                    mvFieldNeighbours[arrayAddr << 1].setMvField(mvFieldNeighbours[i << 1].mv, refIdxL0);
-                    mvFieldNeighbours[(arrayAddr << 1) + 1].setMvField(mvFieldNeighbours[(j << 1) + 1].mv, refIdxL1);
+                    mvFieldNeighbours[arrayAddr][0].setMvField(mvFieldNeighbours[i][0].mv, refIdxL0);
+                    mvFieldNeighbours[arrayAddr][1].setMvField(mvFieldNeighbours[j][1].mv, refIdxL1);
                     interDirNeighbours[arrayAddr] = 3;
 
                     arrayAddr++;
@@ -2208,12 +2208,12 @@
     while (arrayAddr < maxNumMergeCand)
     {
         interDirNeighbours[arrayAddr] = 1;
-        mvFieldNeighbours[arrayAddr << 1].setMvField(MV(0, 0), r);
+        mvFieldNeighbours[arrayAddr][0].setMvField(MV(0, 0), r);
 
         if (isInterB)
         {
             interDirNeighbours[arrayAddr] = 3;
-            mvFieldNeighbours[(arrayAddr << 1) + 1].setMvField(MV(0, 0), r);
+            mvFieldNeighbours[arrayAddr][1].setMvField(MV(0, 0), r);
         }
         arrayAddr++;
         if (refcnt == numRefIdx - 1)
diff -r 7d11f60c5dba -r 01a07bfcd779 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Fri May 16 14:31:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h	Sun May 18 15:02:27 2014 +0900
@@ -413,7 +413,7 @@
     void          deriveLeftRightTopIdxAdi(uint32_t& partIdxLT, uint32_t& partIdxRT, uint32_t partOffset, uint32_t partDepth);
 
     bool          hasEqualMotion(uint32_t absPartIdx, TComDataCU* candCU, uint32_t candAbsPartIdx);
-    void          getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, TComMvField* mFieldNeighbours, uint8_t* interDirNeighbours, int& numValidMergeCand);
+    void          getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, TComMvField (*mvFieldNeighbours)[2], uint8_t* interDirNeighbours, uint32_t& maxNumMergeCand);
     void          deriveLeftRightTopIdxGeneral(uint32_t absPartIdx, uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT);
     void          deriveLeftBottomIdxGeneral(uint32_t absPartIdx, uint32_t partIdx, uint32_t& partIdxLB);
 
diff -r 7d11f60c5dba -r 01a07bfcd779 source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h	Fri May 16 14:31:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComSlice.h	Sun May 18 15:02:27 2014 +0900
@@ -1420,6 +1420,8 @@
 
     int       getNumRefIdx(int e)                 { return m_numRefIdx[e]; }
 
+    const int* getNumRefIdx() const               { return m_numRefIdx; }
+
     TComPic*  getPic()                            { return m_pic; }
 
     TComPic*  getRefPic(int e, int refIdx) { return m_refPicList[e][refIdx]; }
diff -r 7d11f60c5dba -r 01a07bfcd779 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Fri May 16 14:31:01 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Sun May 18 15:02:27 2014 +0900
@@ -1208,22 +1208,17 @@
 void TEncCu::xCheckRDCostMerge2Nx2N(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode, TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest)
 {
     X265_CHECK(outTempCU->getSlice()->getSliceType() != I_SLICE, "I slice not expected\n");
-    TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists
+    TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists
     uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS];
-    int numValidMergeCand = 0;
-
-    for (uint32_t i = 0; i < outTempCU->getSlice()->getMaxNumMergeCand(); ++i)
-    {
-        interDirNeighbours[i] = 0;
-    }
+    uint32_t maxNumMergeCand = outTempCU->getSlice()->getMaxNumMergeCand();
 
     uint8_t depth = outTempCU->getDepth(0);
     outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth); // interprets depth relative to LCU level
     outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
-    outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, numValidMergeCand);
+    outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, maxNumMergeCand);
 
     int mergeCandBuffer[MRG_MAX_NUM_CANDS];
-    for (uint32_t i = 0; i < numValidMergeCand; ++i)
+    for (uint32_t i = 0; i < maxNumMergeCand; ++i)
     {
         mergeCandBuffer[i] = 0;
     }
@@ -1242,11 +1237,11 @@
 
     for (uint32_t noResidual = 0; noResidual < iteration; ++noResidual)
     {
-        for (uint32_t mergeCand = 0; mergeCand < numValidMergeCand; ++mergeCand)
+        for (uint32_t mergeCand = 0; mergeCand < maxNumMergeCand; ++mergeCand)
         {
             if (m_param->frameNumThreads > 1 &&
-                (mvFieldNeighbours[0 + 2 * mergeCand].mv.y >= (m_param->searchRange + 1) * 4 ||
-                 mvFieldNeighbours[1 + 2 * mergeCand].mv.y >= (m_param->searchRange + 1) * 4))
+                (mvFieldNeighbours[mergeCand][0].mv.y >= (m_param->searchRange + 1) * 4 ||
+                 mvFieldNeighbours[mergeCand][1].mv.y >= (m_param->searchRange + 1) * 4))
             {
                 continue;
             }
@@ -1261,8 +1256,8 @@
                     outTempCU->setMergeFlag(0, true);
                     outTempCU->setMergeIndex(0, mergeCand);
                     outTempCU->setInterDirSubParts(interDirNeighbours[mergeCand], 0, 0, depth); // interprets depth relative to LCU level
-                    outTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(mvFieldNeighbours[0 + 2 * mergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to outTempCU level
-                    outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[1 + 2 * mergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to outTempCU level
+                    outTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(mvFieldNeighbours[mergeCand][0], SIZE_2Nx2N, 0, 0); // interprets depth relative to outTempCU level
+                    outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[mergeCand][1], SIZE_2Nx2N, 0, 0); // interprets depth relative to outTempCU level
 
                     // do MC
                     m_search->motionCompensation(outTempCU, m_tmpPredYuv[depth], REF_PIC_LIST_X, 0);
diff -r 7d11f60c5dba -r 01a07bfcd779 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Fri May 16 14:31:01 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Sun May 18 15:02:27 2014 +0900
@@ -1210,7 +1210,7 @@
 
             if (slice->getSliceType() != I_SLICE &&
                 ((slice->getColFromL0Flag() == 1 && slice->getNumRefIdx(REF_PIC_LIST_0) > 1) ||
-                 (slice->getColFromL0Flag() == 0  && slice->getNumRefIdx(REF_PIC_LIST_1) > 1)))
+                 (slice->getColFromL0Flag() == 0 && slice->getNumRefIdx(REF_PIC_LIST_1) > 1)))
             {
                 WRITE_UVLC(slice->getColRefIdx(), "collocated_ref_idx");
             }
diff -r 7d11f60c5dba -r 01a07bfcd779 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Fri May 16 14:31:01 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Sun May 18 15:02:27 2014 +0900
@@ -2166,72 +2166,69 @@
  */
 uint32_t TEncSearch::xMergeEstimation(TComDataCU* cu, int puIdx, MergeData& m)
 {
-    uint32_t depth    = cu->getDepth(m.absPartIdx);
-    PartSize partSize = cu->getPartitionSize(0);
-
-    if (cu->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2() && partSize != SIZE_2Nx2N && cu->getCUSize(0) <= 8)
+    assert(cu->getPartitionSize(0) != SIZE_2Nx2N);
+
+    if (cu->getCUSize(0) <= 8 && cu->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2())
     {
-        cu->setPartSizeSubParts(SIZE_2Nx2N, 0, depth);
         if (puIdx == 0)
         {
-            cu->getInterMergeCandidates(0, 0, m.mvFieldNeighbours, m.interDirNeighbours, m.numValidMergeCand);
+            PartSize partSize = cu->getPartitionSize(0);
+            cu->getPartitionSize()[0] = SIZE_2Nx2N;
+            cu->getInterMergeCandidates(0, 0, m.mvFieldNeighbours, m.interDirNeighbours, m.maxNumMergeCand);
+            cu->getPartitionSize()[0] = partSize;
         }
-        cu->setPartSizeSubParts(partSize, 0, depth);
     }
     else
     {
-        cu->getInterMergeCandidates(m.absPartIdx, puIdx, m.mvFieldNeighbours, m.interDirNeighbours, m.numValidMergeCand);
+        cu->getInterMergeCandidates(m.absPartIdx, puIdx, m.mvFieldNeighbours, m.interDirNeighbours, m.maxNumMergeCand);
     }
 
     /* convert bidir merge candidates into unidir
      * TODO: why did the HM do this?, why use MV pairs below? */
     if (cu->isBipredRestriction())
     {
-        for (uint32_t mergeCand = 0; mergeCand < m.numValidMergeCand; ++mergeCand)
+        for (uint32_t mergeCand = 0; mergeCand < m.maxNumMergeCand; ++mergeCand)
         {
             if (m.interDirNeighbours[mergeCand] == 3)
             {
                 m.interDirNeighbours[mergeCand] = 1;
-                m.mvFieldNeighbours[(mergeCand << 1) + 1].setMvField(MV(0, 0), -1);
+                m.mvFieldNeighbours[mergeCand][1].refIdx = NOT_VALID;
             }
         }
     }
 
     uint32_t outCost = MAX_UINT;
-    for (uint32_t mergeCand = 0; mergeCand < m.numValidMergeCand; ++mergeCand)
+    for (uint32_t mergeCand = 0; mergeCand < m.maxNumMergeCand; ++mergeCand)
     {
         /* Prevent TMVP candidates from using unavailable reference pixels */
         if (m_cfg->param->frameNumThreads > 1 &&
-            (m.mvFieldNeighbours[0 + 2 * mergeCand].mv.y >= (m_cfg->param->searchRange + 1) * 4 ||
-             m.mvFieldNeighbours[1 + 2 * mergeCand].mv.y >= (m_cfg->param->searchRange + 1) * 4))
+            (m.mvFieldNeighbours[mergeCand][0].mv.y >= (m_cfg->param->searchRange + 1) * 4 ||
+             m.mvFieldNeighbours[mergeCand][1].mv.y >= (m_cfg->param->searchRange + 1) * 4))
         {
             continue;
         }
 
-        cu->getCUMvField(REF_PIC_LIST_0)->m_mv[m.absPartIdx] = m.mvFieldNeighbours[0 + 2 * mergeCand].mv;
-        cu->getCUMvField(REF_PIC_LIST_0)->m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[0 + 2 * mergeCand].refIdx;
-        cu->getCUMvField(REF_PIC_LIST_1)->m_mv[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].mv;
-        cu->getCUMvField(REF_PIC_LIST_1)->m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx;
+        cu->getCUMvField(REF_PIC_LIST_0)->m_mv[m.absPartIdx] = m.mvFieldNeighbours[mergeCand][0].mv;
+        cu->getCUMvField(REF_PIC_LIST_0)->m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[mergeCand][0].refIdx;
+        cu->getCUMvField(REF_PIC_LIST_1)->m_mv[m.absPartIdx] = m.mvFieldNeighbours[mergeCand][1].mv;
+        cu->getCUMvField(REF_PIC_LIST_1)->m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[mergeCand][1].refIdx;
 
         motionCompensation(cu, &m_predTempYuv, REF_PIC_LIST_X, puIdx, true, false);
         uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
-        uint32_t bitsCand = mergeCand + 1;
-        if (mergeCand == m_cfg->param->maxNumMergeCand - 1)
-        {
-            bitsCand--;
-        }
+        uint32_t bitsCand = getTUBits(mergeCand, m.maxNumMergeCand);
         costCand = costCand + m_rdCost->getCost(bitsCand);
         if (costCand < outCost)
         {
             outCost = costCand;
             m.bits = bitsCand;
-            m.mvField[0] = m.mvFieldNeighbours[0 + 2 * mergeCand];
-            m.mvField[1] = m.mvFieldNeighbours[1 + 2 * mergeCand];
-            m.interDir = m.interDirNeighbours[mergeCand];
             m.index = mergeCand;
         }
     }
 
+    m.mvField[0] = m.mvFieldNeighbours[m.index][0];
+    m.mvField[1] = m.mvFieldNeighbours[m.index][1];
+    m.interDir = m.interDirNeighbours[m.index];
+
     return outCost;
 }
 
@@ -2253,6 +2250,8 @@
     uint32_t lastMode = 0;
     int      totalmebits = 0;
 
+    const int* numRefIdx = cu->getSlice()->getNumRefIdx();
+
     MergeData merge;
 
     memset(&merge, 0, sizeof(merge));
@@ -2311,15 +2310,10 @@
         // Uni-directional prediction
         for (int l = 0; l < numPredDir; l++)
         {
-            for (int ref = 0; ref < cu->getSlice()->getNumRefIdx(l); ref++)
+            for (int ref = 0; ref < numRefIdx[l]; ref++)
             {
                 uint32_t bits = listSelBits[l] + MVP_IDX_BITS;
-                if (cu->getSlice()->getNumRefIdx(l) > 1)
-                {
-                    bits += ref + 1;
-                    if (ref == cu->getSlice()->getNumRefIdx(l) - 1)
-                        bits--;
-                }
+                bits += getTUBits(ref, numRefIdx[l]);
 
                 MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
                 int numMvc = cu->fillMvpCand(partIdx, partAddr, l, ref, &amvpInfo[l][ref], mvc);
diff -r 7d11f60c5dba -r 01a07bfcd779 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Fri May 16 14:31:01 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Sun May 18 15:02:27 2014 +0900
@@ -74,9 +74,9 @@
 struct MergeData
 {
     /* merge candidate data, cached between calls to xMergeEstimation */
-    TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS << 1];
+    TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2];
     uint8_t     interDirNeighbours[MRG_MAX_NUM_CANDS];
-    int         numValidMergeCand;
+    uint32_t    maxNumMergeCand;
 
     /* data updated for each partition */
     uint32_t    absPartIdx;
@@ -90,6 +90,11 @@
     uint32_t    bits;
 };
 
+inline int getTUBits(int idx, int numIdx)
+{
+    return idx + (idx < numIdx - 1);
+}
+
 // ====================================================================================================================
 // Class definition
 // ====================================================================================================================
diff -r 7d11f60c5dba -r 01a07bfcd779 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Fri May 16 14:31:01 2014 +0530
+++ b/source/encoder/compress.cpp	Sun May 18 15:02:27 2014 +0900
@@ -225,19 +225,14 @@
 void TEncCu::xComputeCostMerge2Nx2N(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& yuvReconBest)
 {
     X265_CHECK(outTempCU->getSlice()->getSliceType() != I_SLICE, "Evaluating merge in I slice\n");
-    TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists
+    TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists
     uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS];
-    int numValidMergeCand = 0;
-
-    for (uint32_t i = 0; i < outTempCU->getSlice()->getMaxNumMergeCand(); ++i)
-    {
-        interDirNeighbours[i] = 0;
-    }
+    uint32_t maxNumMergeCand = outTempCU->getSlice()->getMaxNumMergeCand();
 
     uint8_t depth = outTempCU->getDepth(0);
     outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth); // interprets depth relative to LCU level
     outTempCU->setCUTransquantBypassSubParts(m_CUTransquantBypassFlagValue, 0, depth);
-    outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, numValidMergeCand);
+    outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, maxNumMergeCand);
     outTempCU->setPredModeSubParts(MODE_INTER, 0, depth);
     outTempCU->setMergeFlag(0, true);
 
@@ -248,27 +243,22 @@
 
     int part = g_convertToBit[outTempCU->getCUSize(0)];
     int bestMergeCand = -1;
-    uint32_t bitsCand = 0;
 
-    for (int mergeCand = 0; mergeCand < numValidMergeCand; ++mergeCand)
+    for (uint32_t mergeCand = 0; mergeCand < maxNumMergeCand; ++mergeCand)
     {
         if (m_param->frameNumThreads <= 1 ||
-            (mvFieldNeighbours[0 + 2 * mergeCand].mv.y < (m_param->searchRange + 1) * 4 &&
-             mvFieldNeighbours[1 + 2 * mergeCand].mv.y < (m_param->searchRange + 1) * 4))
+            (mvFieldNeighbours[mergeCand][0].mv.y < (m_param->searchRange + 1) * 4 &&
+             mvFieldNeighbours[mergeCand][1].mv.y < (m_param->searchRange + 1) * 4))
         {
             // set MC parameters, interprets depth relative to LCU level
             outTempCU->setMergeIndex(0, mergeCand);
             outTempCU->setInterDirSubParts(interDirNeighbours[mergeCand], 0, 0, depth);
-            outTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(mvFieldNeighbours[0 + 2 * mergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
-            outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[1 + 2 * mergeCand], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
+            outTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(mvFieldNeighbours[mergeCand][0], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
+            outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[mergeCand][1], SIZE_2Nx2N, 0, 0); // interprets depth relative to rpcTempCU level
 
             // do MC only for Luma part
             m_search->motionCompensation(outTempCU, m_tmpPredYuv[depth], REF_PIC_LIST_X, 0, true, false);
-            bitsCand = mergeCand + 1;
-            if (mergeCand == (int)m_param->maxNumMergeCand - 1)
-            {
-                bitsCand--;
-            }
+            uint32_t bitsCand = getTUBits(mergeCand, maxNumMergeCand);
             outTempCU->m_totalBits = bitsCand;
             outTempCU->m_totalDistortion = primitives.sa8d[part](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
                                                                  m_tmpPredYuv[depth]->getLumaAddr(), m_tmpPredYuv[depth]->getStride());
@@ -297,8 +287,8 @@
     {
         outTempCU->setMergeIndex(0, bestMergeCand);
         outTempCU->setInterDirSubParts(interDirNeighbours[bestMergeCand], 0, 0, depth);
-        outTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(mvFieldNeighbours[0 + 2 * bestMergeCand], SIZE_2Nx2N, 0, 0);
-        outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[1 + 2 * bestMergeCand], SIZE_2Nx2N, 0, 0);
+        outTempCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField(mvFieldNeighbours[bestMergeCand][0], SIZE_2Nx2N, 0, 0);
+        outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[bestMergeCand][1], SIZE_2Nx2N, 0, 0);
         outTempCU->m_totalBits = outBestCU->m_totalBits;
         outTempCU->m_totalDistortion = outBestCU->m_totalDistortion;
         outTempCU->m_totalBits = 0;


More information about the x265-devel mailing list