[x265] [slices] fix multi-slices output non-determination bug

chen chenm003 at 163.com
Mon Oct 31 18:33:25 CET 2016


# HG changeset patch
# User Min Chen <min.chen at multicorewareinc.com>
# Date 1477935084 18000
# Node ID 9be03f08789954f772a50f26485a9c96ca745497
# Parent  b08109b3701e9b86010c5a5ed0ad7b3d6a051911
[slices] fix multi-slices output non-determination bug
---
 source/common/common.h          |    2 +-
 source/encoder/analysis.cpp     |    8 +-
 source/encoder/frameencoder.cpp |   15 ++---
 source/encoder/motion.cpp       |  116 +++++++++++++++++++++++++++-----------
 source/encoder/sao.cpp          |    7 ++
 source/encoder/search.cpp       |    3 +
 6 files changed, 104 insertions(+), 47 deletions(-)


diff -r b08109b3701e -r 9be03f087899 source/common/common.h
--- a/source/common/common.hFri Oct 28 10:28:15 2016 +0800
+++ b/source/common/common.hMon Oct 31 12:31:24 2016 -0500
@@ -176,7 +176,7 @@
 
 #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
 #define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
-#define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y);
+#define COPY1_IF_LT(x, y) {if ((y) < (x)) (x) = (y);}
 #define COPY2_IF_LT(x, y, a, b) \
     if ((y) < (x)) \
     { \
diff -r b08109b3701e -r 9be03f087899 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/analysis.cppMon Oct 31 12:31:24 2016 -0500
@@ -1942,12 +1942,12 @@
             if (m_param->maxSlices > 1)
             {
                 // NOTE: First row in slice can't negative
-                if ((candMvField[i][0].mv.y < m_sliceMinY) | (candMvField[i][1].mv.y < m_sliceMinY))
+                if (X265_MIN(candMvField[i][0].mv.y, candMvField[i][1].mv.y) < m_sliceMinY)
                     continue;
 
                 // Last row in slice can't reference beyond bound since it is another slice area
                 // TODO: we may beyond bound in future since these area have a chance to finish because we use parallel slices. Necessary prepare research on load balance
-                if ((candMvField[i][0].mv.y > m_sliceMaxY) | (candMvField[i][1].mv.y > m_sliceMaxY))
+                if (X265_MAX(candMvField[i][0].mv.y, candMvField[i][1].mv.y) > m_sliceMaxY)
                     continue;
             }
 
@@ -2072,12 +2072,12 @@
             if (m_param->maxSlices > 1)
             {
                 // NOTE: First row in slice can't negative
-                if ((candMvField[i][0].mv.y < m_sliceMinY) | (candMvField[i][1].mv.y < m_sliceMinY))
+                if (X265_MIN(candMvField[i][0].mv.y, candMvField[i][1].mv.y) < m_sliceMinY)
                     continue;
 
                 // Last row in slice can't reference beyond bound since it is another slice area
                 // TODO: we may beyond bound in future since these area have a chance to finish because we use parallel slices. Necessary prepare research on load balance
-                if ((candMvField[i][0].mv.y > m_sliceMaxY) | (candMvField[i][1].mv.y > m_sliceMaxY))
+                if (X265_MAX(candMvField[i][0].mv.y, candMvField[i][1].mv.y) > m_sliceMaxY)
                     continue;
             }
 
diff -r b08109b3701e -r 9be03f087899 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/frameencoder.cppMon Oct 31 12:31:24 2016 -0500
@@ -123,7 +123,7 @@
     int range  = m_param->searchRange;       /* fpel search */
     range += !!(m_param->searchMethod < 2);  /* diamond/hex range check lag */
     range += NTAPS_LUMA / 2;                 /* subpel filter half-length */
-    range += 2 + MotionEstimate::hpelIterationCount(m_param->subpelRefine) / 2; /* subpel refine steps */
+    range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine) + 1) / 2; /* subpel refine steps */
     m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range + g_maxCUSize - 1) / g_maxCUSize);
 
     // NOTE: 2 times of numRows because both Encoder and Filter in same queue
@@ -654,8 +654,7 @@
                 const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
                 const uint32_t row = sliceStartRow + rowInSlice;
 
-                if (row >= m_numRows)
-                    break;
+                X265_CHECK(row < m_numRows, "slices row fault was detected");
 
                 if (row > sliceEndRow)
                     continue;
@@ -674,7 +673,7 @@
                             refpic->m_reconRowFlag[rowIdx].waitForChange(0);
 
                         if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
-                            m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows, sliceEndRow + 1, sliceId);
+                            m_mref[l][ref].applyWeight(rowIdx, m_numRows, sliceEndRow, sliceId);
                     }
                 }
 
@@ -714,7 +713,7 @@
                             refpic->m_reconRowFlag[rowIdx].waitForChange(0);
 
                         if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
-                            m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows, m_numRows, 0);
+                            m_mref[list][ref].applyWeight(rowIdx, m_numRows, m_numRows, 0);
                     }
                 }
 
@@ -1187,8 +1186,8 @@
     // TODO: specially case handle on first and last row
 
     // Initialize restrict on MV range in slices
-    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) + 2 * 4;
-    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * (g_maxCUSize * 4) - 3 * 4);
+    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) + 3 * 4;
+    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * (g_maxCUSize * 4) - 4 * 4);
 
     // Handle single row slice
     if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
@@ -1502,7 +1501,7 @@
 
         ScopedLock self(curRow.lock);
         if ((m_bAllRowsStop && intRow > m_vbvResetTriggerRow) ||
-            (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < m_rows[row].completed + 2))
+            (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < curRow.completed + 2))
         {
             curRow.active = false;
             curRow.busy = false;
diff -r b08109b3701e -r 9be03f087899 source/encoder/motion.cpp
--- a/source/encoder/motion.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/motion.cppMon Oct 31 12:31:24 2016 -0500
@@ -278,10 +278,14 @@
         costs[1] += mvcost((omv + MV(m1x, m1y)) << 2); \
         costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
         costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
-        COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
-        COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
-        COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
-        COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
+        if ((g_maxSlices == 1) | ((omv.y + m0y >= mvmin.y) & (omv.y + m0y <= mvmax.y))) \
+            COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
+        if ((g_maxSlices == 1) | ((omv.y + m1y >= mvmin.y) & (omv.y + m1y <= mvmax.y))) \
+            COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
+        if ((g_maxSlices == 1) | ((omv.y + m2y >= mvmin.y) & (omv.y + m2y <= mvmax.y))) \
+            COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
+        if ((g_maxSlices == 1) | ((omv.y + m3y >= mvmin.y) & (omv.y + m3y <= mvmax.y))) \
+            COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
     }
 
 #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
@@ -659,8 +663,10 @@
         do
         {
             COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
-            COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
-            COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
+            if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+                COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
+            if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+                COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
             COPY1_IF_LT(bcost, (costs[2] << 4) + 4);
             COPY1_IF_LT(bcost, (costs[3] << 4) + 12);
             if (!(bcost & 15))
@@ -698,36 +704,57 @@
       /* equivalent to the above, but eliminates duplicate candidates */
         COST_MV_X3_DIR(-2, 0, -1, 2,  1, 2, costs);
         bcost <<= 3;
-        COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
-        COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
-        COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
+        if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <= mvmax.y)))
+            COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
+        if ((g_maxSlices == 1) | ((bmv.y + 2 >= mvmin.y) & (bmv.y + 2 <= mvmax.y)))
+        {
+            COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
+            COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
+        }
+
         COST_MV_X3_DIR(2, 0,  1, -2, -1, -2, costs);
-        COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
-        COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
-        COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
+        if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <= mvmax.y)))
+            COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
+        if ((g_maxSlices == 1) | ((bmv.y - 2 >= mvmin.y) & (bmv.y - 2 <= mvmax.y)))
+        {
+            COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
+            COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
+        }
 
         if (bcost & 7)
         {
             int dir = (bcost & 7) - 2;
-            bmv += hex2[dir + 1];
 
-            /* half hexagon, not overlapping the previous iteration */
-            for (int i = (merange >> 1) - 1; i > 0 && bmv.checkRange(mvmin, mvmax); i--)
+            if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
             {
-                COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
-                               hex2[dir + 1].x, hex2[dir + 1].y,
-                               hex2[dir + 2].x, hex2[dir + 2].y,
-                               costs);
-                bcost &= ~7;
-                COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
-                COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
-                COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
-                if (!(bcost & 7))
-                    break;
-                dir += (bcost & 7) - 2;
-                dir = mod6m1[dir + 1];
                 bmv += hex2[dir + 1];
-            }
+
+                /* half hexagon, not overlapping the previous iteration */
+                for (int i = (merange >> 1) - 1; i > 0 && bmv.checkRange(mvmin, mvmax); i--)
+                {
+                    COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
+                        hex2[dir + 1].x, hex2[dir + 1].y,
+                        hex2[dir + 2].x, hex2[dir + 2].y,
+                        costs);
+                    bcost &= ~7;
+
+                    if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 0].y >= mvmin.y) & (bmv.y + hex2[dir + 0].y <= mvmax.y)))
+                        COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
+
+                    if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
+                        COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
+
+                    if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 2].y >= mvmin.y) & (bmv.y + hex2[dir + 2].y <= mvmax.y)))
+                        COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
+
+                    if (!(bcost & 7))
+                        break;
+
+                    dir += (bcost & 7) - 2;
+                    dir = mod6m1[dir + 1];
+                    bmv += hex2[dir + 1];
+                }
+            } // if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
         }
         bcost >>= 3;
 #endif // if 0
@@ -735,15 +762,21 @@
         /* square refine */
         int dir = 0;
         COST_MV_X4_DIR(0, -1,  0, 1, -1, 0, 1, 0, costs);
-        COPY2_IF_LT(bcost, costs[0], dir, 1);
-        COPY2_IF_LT(bcost, costs[1], dir, 2);
+        if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+            COPY2_IF_LT(bcost, costs[0], dir, 1);
+        if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+            COPY2_IF_LT(bcost, costs[1], dir, 2);
         COPY2_IF_LT(bcost, costs[2], dir, 3);
         COPY2_IF_LT(bcost, costs[3], dir, 4);
         COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs);
-        COPY2_IF_LT(bcost, costs[0], dir, 5);
-        COPY2_IF_LT(bcost, costs[1], dir, 6);
-        COPY2_IF_LT(bcost, costs[2], dir, 7);
-        COPY2_IF_LT(bcost, costs[3], dir, 8);
+        if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+            COPY2_IF_LT(bcost, costs[0], dir, 5);
+        if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+            COPY2_IF_LT(bcost, costs[1], dir, 6);
+        if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+            COPY2_IF_LT(bcost, costs[2], dir, 7);
+        if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+            COPY2_IF_LT(bcost, costs[3], dir, 8);
         bmv += square1[dir];
         break;
     }
@@ -756,6 +789,7 @@
         /* refine predictors */
         omv = bmv;
         ucost1 = bcost;
+        X265_CHECK((g_maxSlices == 1) | ((pmv.y >= mvmin.y) & (pmv.y <= mvmax.y)), "pmv outside of search range!");
         DIA1_ITER(pmv.x, pmv.y);
         if (pmv.notZero())
             DIA1_ITER(0, 0);
@@ -1099,6 +1133,7 @@
     if ((g_maxSlices > 1) & ((bmv.y < qmvmin.y) | (bmv.y > qmvmax.y)))
     {
         bmv.y = x265_min(x265_max(bmv.y, qmvmin.y), qmvmax.y);
+        bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
     }
 
     if (!bcost)
@@ -1113,6 +1148,11 @@
         for (int i = 1; i <= wl.hpel_dirs; i++)
         {
             MV qmv = bmv + square1[i] * 2;
+
+            /* skip invalid range */
+            if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)))
+                continue;
+
             int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
             COPY2_IF_LT(bcost, cost, bdir, i);
         }
@@ -1124,6 +1164,11 @@
         for (int i = 1; i <= wl.qpel_dirs; i++)
         {
             MV qmv = bmv + square1[i];
+
+            /* skip invalid range */
+            if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)))
+                continue;
+
             int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
             COPY2_IF_LT(bcost, cost, bdir, i);
         }
@@ -1189,6 +1234,9 @@
         }
     }
 
+    // check mv range for slice bound
+    X265_CHECK((g_maxSlices == 1) | ((bmv.y >= qmvmin.y) & (bmv.y <= qmvmax.y)), "mv beyond range!");
+
     x265_emms();
     outQMv = bmv;
     return bcost;
diff -r b08109b3701e -r 9be03f087899 source/encoder/sao.cpp
--- a/source/encoder/sao.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/sao.cppMon Oct 31 12:31:24 2016 -0500
@@ -1206,12 +1206,19 @@
 void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
 {
     if (!saoParam->bSaoFlag[0])
+    {
         m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
+    }
     else
+    {
+        assert(m_numNoSao[0] <= numctus);
         m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = m_numNoSao[0] / ((double)numctus);
+    }
 
     if (!saoParam->bSaoFlag[1])
+    {
         m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
+    }
     else
         m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = m_numNoSao[1] / ((double)numctus);
 }
diff -r b08109b3701e -r 9be03f087899 source/encoder/search.cpp
--- a/source/encoder/search.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/search.cppMon Oct 31 12:31:24 2016 -0500
@@ -2545,6 +2545,9 @@
     /* conditional clipping for frame parallelism */
     mvmin.y = X265_MIN(mvmin.y, (int16_t)m_refLagPixels);
     mvmax.y = X265_MIN(mvmax.y, (int16_t)m_refLagPixels);
+
+    /* conditional clipping for negative mv range */
+    mvmax.y = X265_MAX(mvmax.y, mvmin.y);
 }
 
 /* Note: this function overwrites the RD cost variables of interMode, but leaves the sa8d cost unharmed */

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161101/99982ec4/attachment-0001.html>


More information about the x265-devel mailing list