[x265] [slices] fix multi-slices output non-determination bug
chen
chenm003 at 163.com
Mon Oct 31 18:33:25 CET 2016
# HG changeset patch
# User Min Chen <min.chen at multicorewareinc.com>
# Date 1477935084 18000
# Node ID 9be03f08789954f772a50f26485a9c96ca745497
# Parent b08109b3701e9b86010c5a5ed0ad7b3d6a051911
[slices] fix multi-slices output non-determination bug
---
source/common/common.h | 2 +-
source/encoder/analysis.cpp | 8 +-
source/encoder/frameencoder.cpp | 15 ++---
source/encoder/motion.cpp | 116 +++++++++++++++++++++++++++-----------
source/encoder/sao.cpp | 7 ++
source/encoder/search.cpp | 3 +
6 files changed, 104 insertions(+), 47 deletions(-)
diff -r b08109b3701e -r 9be03f087899 source/common/common.h
--- a/source/common/common.hFri Oct 28 10:28:15 2016 +0800
+++ b/source/common/common.hMon Oct 31 12:31:24 2016 -0500
@@ -176,7 +176,7 @@
#define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
#define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
-#define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y);
+#define COPY1_IF_LT(x, y) {if ((y) < (x)) (x) = (y);}
#define COPY2_IF_LT(x, y, a, b) \
if ((y) < (x)) \
{ \
diff -r b08109b3701e -r 9be03f087899 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/analysis.cppMon Oct 31 12:31:24 2016 -0500
@@ -1942,12 +1942,12 @@
if (m_param->maxSlices > 1)
{
// NOTE: First row in slice can't negative
- if ((candMvField[i][0].mv.y < m_sliceMinY) | (candMvField[i][1].mv.y < m_sliceMinY))
+ if (X265_MIN(candMvField[i][0].mv.y, candMvField[i][1].mv.y) < m_sliceMinY)
continue;
// Last row in slice can't reference beyond bound since it is another slice area
// TODO: we may beyond bound in future since these area have a chance to finish because we use parallel slices. Necessary prepare research on load balance
- if ((candMvField[i][0].mv.y > m_sliceMaxY) | (candMvField[i][1].mv.y > m_sliceMaxY))
+ if (X265_MAX(candMvField[i][0].mv.y, candMvField[i][1].mv.y) > m_sliceMaxY)
continue;
}
@@ -2072,12 +2072,12 @@
if (m_param->maxSlices > 1)
{
// NOTE: First row in slice can't negative
- if ((candMvField[i][0].mv.y < m_sliceMinY) | (candMvField[i][1].mv.y < m_sliceMinY))
+ if (X265_MIN(candMvField[i][0].mv.y, candMvField[i][1].mv.y) < m_sliceMinY)
continue;
// Last row in slice can't reference beyond bound since it is another slice area
// TODO: we may beyond bound in future since these area have a chance to finish because we use parallel slices. Necessary prepare research on load balance
- if ((candMvField[i][0].mv.y > m_sliceMaxY) | (candMvField[i][1].mv.y > m_sliceMaxY))
+ if (X265_MAX(candMvField[i][0].mv.y, candMvField[i][1].mv.y) > m_sliceMaxY)
continue;
}
diff -r b08109b3701e -r 9be03f087899 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/frameencoder.cppMon Oct 31 12:31:24 2016 -0500
@@ -123,7 +123,7 @@
int range = m_param->searchRange; /* fpel search */
range += !!(m_param->searchMethod < 2); /* diamond/hex range check lag */
range += NTAPS_LUMA / 2; /* subpel filter half-length */
- range += 2 + MotionEstimate::hpelIterationCount(m_param->subpelRefine) / 2; /* subpel refine steps */
+ range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine) + 1) / 2; /* subpel refine steps */
m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range + g_maxCUSize - 1) / g_maxCUSize);
// NOTE: 2 times of numRows because both Encoder and Filter in same queue
@@ -654,8 +654,7 @@
const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
const uint32_t row = sliceStartRow + rowInSlice;
- if (row >= m_numRows)
- break;
+ X265_CHECK(row < m_numRows, "slices row fault was detected");
if (row > sliceEndRow)
continue;
@@ -674,7 +673,7 @@
refpic->m_reconRowFlag[rowIdx].waitForChange(0);
if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
- m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows, sliceEndRow + 1, sliceId);
+ m_mref[l][ref].applyWeight(rowIdx, m_numRows, sliceEndRow, sliceId);
}
}
@@ -714,7 +713,7 @@
refpic->m_reconRowFlag[rowIdx].waitForChange(0);
if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
- m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows, m_numRows, 0);
+ m_mref[list][ref].applyWeight(rowIdx, m_numRows, m_numRows, 0);
}
}
@@ -1187,8 +1186,8 @@
// TODO: specially case handle on first and last row
// Initialize restrict on MV range in slices
- tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) + 2 * 4;
- tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * (g_maxCUSize * 4) - 3 * 4);
+ tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) + 3 * 4;
+ tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * (g_maxCUSize * 4) - 4 * 4);
// Handle single row slice
if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
@@ -1502,7 +1501,7 @@
ScopedLock self(curRow.lock);
if ((m_bAllRowsStop && intRow > m_vbvResetTriggerRow) ||
- (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < m_rows[row].completed + 2))
+ (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < curRow.completed + 2))
{
curRow.active = false;
curRow.busy = false;
diff -r b08109b3701e -r 9be03f087899 source/encoder/motion.cpp
--- a/source/encoder/motion.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/motion.cppMon Oct 31 12:31:24 2016 -0500
@@ -278,10 +278,14 @@
costs[1] += mvcost((omv + MV(m1x, m1y)) << 2); \
costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
- COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
- COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
- COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
- COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
+ if ((g_maxSlices == 1) | ((omv.y + m0y >= mvmin.y) & (omv.y + m0y <= mvmax.y))) \
+ COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
+ if ((g_maxSlices == 1) | ((omv.y + m1y >= mvmin.y) & (omv.y + m1y <= mvmax.y))) \
+ COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
+ if ((g_maxSlices == 1) | ((omv.y + m2y >= mvmin.y) & (omv.y + m2y <= mvmax.y))) \
+ COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
+ if ((g_maxSlices == 1) | ((omv.y + m3y >= mvmin.y) & (omv.y + m3y <= mvmax.y))) \
+ COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
}
#define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
@@ -659,8 +663,10 @@
do
{
COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
- COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
- COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
+ if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+ COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
+ if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+ COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
COPY1_IF_LT(bcost, (costs[2] << 4) + 4);
COPY1_IF_LT(bcost, (costs[3] << 4) + 12);
if (!(bcost & 15))
@@ -698,36 +704,57 @@
/* equivalent to the above, but eliminates duplicate candidates */
COST_MV_X3_DIR(-2, 0, -1, 2, 1, 2, costs);
bcost <<= 3;
- COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
- COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
- COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
+ if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <= mvmax.y)))
+ COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
+ if ((g_maxSlices == 1) | ((bmv.y + 2 >= mvmin.y) & (bmv.y + 2 <= mvmax.y)))
+ {
+ COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
+ COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
+ }
+
COST_MV_X3_DIR(2, 0, 1, -2, -1, -2, costs);
- COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
- COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
- COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
+ if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <= mvmax.y)))
+ COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
+ if ((g_maxSlices == 1) | ((bmv.y - 2 >= mvmin.y) & (bmv.y - 2 <= mvmax.y)))
+ {
+ COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
+ COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
+ }
if (bcost & 7)
{
int dir = (bcost & 7) - 2;
- bmv += hex2[dir + 1];
- /* half hexagon, not overlapping the previous iteration */
- for (int i = (merange >> 1) - 1; i > 0 && bmv.checkRange(mvmin, mvmax); i--)
+ if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
{
- COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
- hex2[dir + 1].x, hex2[dir + 1].y,
- hex2[dir + 2].x, hex2[dir + 2].y,
- costs);
- bcost &= ~7;
- COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
- COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
- COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
- if (!(bcost & 7))
- break;
- dir += (bcost & 7) - 2;
- dir = mod6m1[dir + 1];
bmv += hex2[dir + 1];
- }
+
+ /* half hexagon, not overlapping the previous iteration */
+ for (int i = (merange >> 1) - 1; i > 0 && bmv.checkRange(mvmin, mvmax); i--)
+ {
+ COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
+ hex2[dir + 1].x, hex2[dir + 1].y,
+ hex2[dir + 2].x, hex2[dir + 2].y,
+ costs);
+ bcost &= ~7;
+
+ if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 0].y >= mvmin.y) & (bmv.y + hex2[dir + 0].y <= mvmax.y)))
+ COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
+
+ if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
+ COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
+
+ if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 2].y >= mvmin.y) & (bmv.y + hex2[dir + 2].y <= mvmax.y)))
+ COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
+
+ if (!(bcost & 7))
+ break;
+
+ dir += (bcost & 7) - 2;
+ dir = mod6m1[dir + 1];
+ bmv += hex2[dir + 1];
+ }
+ } // if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
}
bcost >>= 3;
#endif // if 0
@@ -735,15 +762,21 @@
/* square refine */
int dir = 0;
COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
- COPY2_IF_LT(bcost, costs[0], dir, 1);
- COPY2_IF_LT(bcost, costs[1], dir, 2);
+ if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+ COPY2_IF_LT(bcost, costs[0], dir, 1);
+ if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+ COPY2_IF_LT(bcost, costs[1], dir, 2);
COPY2_IF_LT(bcost, costs[2], dir, 3);
COPY2_IF_LT(bcost, costs[3], dir, 4);
COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs);
- COPY2_IF_LT(bcost, costs[0], dir, 5);
- COPY2_IF_LT(bcost, costs[1], dir, 6);
- COPY2_IF_LT(bcost, costs[2], dir, 7);
- COPY2_IF_LT(bcost, costs[3], dir, 8);
+ if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+ COPY2_IF_LT(bcost, costs[0], dir, 5);
+ if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+ COPY2_IF_LT(bcost, costs[1], dir, 6);
+ if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)))
+ COPY2_IF_LT(bcost, costs[2], dir, 7);
+ if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)))
+ COPY2_IF_LT(bcost, costs[3], dir, 8);
bmv += square1[dir];
break;
}
@@ -756,6 +789,7 @@
/* refine predictors */
omv = bmv;
ucost1 = bcost;
+ X265_CHECK((g_maxSlices == 1) | ((pmv.y >= mvmin.y) & (pmv.y <= mvmax.y)), "pmv outside of search range!");
DIA1_ITER(pmv.x, pmv.y);
if (pmv.notZero())
DIA1_ITER(0, 0);
@@ -1099,6 +1133,7 @@
if ((g_maxSlices > 1) & ((bmv.y < qmvmin.y) | (bmv.y > qmvmax.y)))
{
bmv.y = x265_min(x265_max(bmv.y, qmvmin.y), qmvmax.y);
+ bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
}
if (!bcost)
@@ -1113,6 +1148,11 @@
for (int i = 1; i <= wl.hpel_dirs; i++)
{
MV qmv = bmv + square1[i] * 2;
+
+ /* skip invalid range */
+ if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)))
+ continue;
+
int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
COPY2_IF_LT(bcost, cost, bdir, i);
}
@@ -1124,6 +1164,11 @@
for (int i = 1; i <= wl.qpel_dirs; i++)
{
MV qmv = bmv + square1[i];
+
+ /* skip invalid range */
+ if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)))
+ continue;
+
int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
COPY2_IF_LT(bcost, cost, bdir, i);
}
@@ -1189,6 +1234,9 @@
}
}
+ // check mv range for slice bound
+ X265_CHECK((g_maxSlices == 1) | ((bmv.y >= qmvmin.y) & (bmv.y <= qmvmax.y)), "mv beyond range!");
+
x265_emms();
outQMv = bmv;
return bcost;
diff -r b08109b3701e -r 9be03f087899 source/encoder/sao.cpp
--- a/source/encoder/sao.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/sao.cppMon Oct 31 12:31:24 2016 -0500
@@ -1206,12 +1206,19 @@
void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
{
if (!saoParam->bSaoFlag[0])
+ {
m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
+ }
else
+ {
+ assert(m_numNoSao[0] <= numctus);
m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = m_numNoSao[0] / ((double)numctus);
+ }
if (!saoParam->bSaoFlag[1])
+ {
m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
+ }
else
m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = m_numNoSao[1] / ((double)numctus);
}
diff -r b08109b3701e -r 9be03f087899 source/encoder/search.cpp
--- a/source/encoder/search.cppFri Oct 28 10:28:15 2016 +0800
+++ b/source/encoder/search.cppMon Oct 31 12:31:24 2016 -0500
@@ -2545,6 +2545,9 @@
/* conditional clipping for frame parallelism */
mvmin.y = X265_MIN(mvmin.y, (int16_t)m_refLagPixels);
mvmax.y = X265_MIN(mvmax.y, (int16_t)m_refLagPixels);
+
+ /* conditional clipping for negative mv range */
+ mvmax.y = X265_MAX(mvmax.y, mvmin.y);
}
/* Note: this function overwrites the RD cost variables of interMode, but leaves the sa8d cost unharmed */
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161101/99982ec4/attachment-0001.html>
More information about the x265-devel
mailing list