[x265] [slices] fix multi-slices output non-determination bug
Pradeep Ramachandran
pradeep at multicorewareinc.com
Tue Nov 1 04:40:45 CET 2016
On Mon, Oct 31, 2016 at 11:03 PM, chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <min.chen at multicorewareinc.com>
> # Date 1477935084 18000
> # Node ID 9be03f08789954f772a50f26485a9c96ca745497
> # Parent b08109b3701e9b86010c5a5ed0ad7b3d6a051911
> [slices] fix multi-slices output non-determination bug
> ---
> source/common/common.h | 2 +-
> source/encoder/analysis.cpp | 8 +-
> source/encoder/frameencoder.cpp | 15 ++---
> source/encoder/motion.cpp | 116 +++++++++++++++++++++++++++---
> --------
> source/encoder/sao.cpp | 7 ++
> source/encoder/search.cpp | 3 +
> 6 files changed, 104 insertions(+), 47 deletions(-)
>
> diff -r b08109b3701e -r 9be03f087899 source/common/common.h
> --- a/source/common/common.h Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/common/common.h Mon Oct 31 12:31:24 2016 -0500
> @@ -176,7 +176,7 @@
>
> #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
> #define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
> -#define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y);
> +#define COPY1_IF_LT(x, y) {if ((y) < (x)) (x) = (y);}
> #define COPY2_IF_LT(x, y, a, b) \
> if ((y) < (x)) \
> { \
> diff -r b08109b3701e -r 9be03f087899 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/analysis.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -1942,12 +1942,12 @@
> if (m_param->maxSlices > 1)
> {
> // NOTE: First row in slice can't negative
> - if ((candMvField[i][0].mv.y < m_sliceMinY) |
> (candMvField[i][1].mv.y < m_sliceMinY))
> + if (X265_MIN(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) < m_sliceMinY)
> continue;
>
> // Last row in slice can't reference beyond bound since
> it is another slice area
> // TODO: we may beyond bound in future since these area
> have a chance to finish because we use parallel slices. Necessary prepare
> research on load balance
> - if ((candMvField[i][0].mv.y > m_sliceMaxY) |
> (candMvField[i][1].mv.y > m_sliceMaxY))
> + if (X265_MAX(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) > m_sliceMaxY)
> continue;
> }
>
> @@ -2072,12 +2072,12 @@
> if (m_param->maxSlices > 1)
> {
> // NOTE: First row in slice can't negative
> - if ((candMvField[i][0].mv.y < m_sliceMinY) |
> (candMvField[i][1].mv.y < m_sliceMinY))
> + if (X265_MIN(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) < m_sliceMinY)
> continue;
>
> // Last row in slice can't reference beyond bound since
> it is another slice area
> // TODO: we may beyond bound in future since these area
> have a chance to finish because we use parallel slices. Necessary prepare
> research on load balance
> - if ((candMvField[i][0].mv.y > m_sliceMaxY) |
> (candMvField[i][1].mv.y > m_sliceMaxY))
> + if (X265_MAX(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) > m_sliceMaxY)
> continue;
> }
>
> diff -r b08109b3701e -r 9be03f087899 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/frameencoder.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -123,7 +123,7 @@
> int range = m_param->searchRange; /* fpel search */
> range += !!(m_param->searchMethod < 2); /* diamond/hex range check
> lag */
> range += NTAPS_LUMA / 2; /* subpel filter half-length
> */
> - range += 2 + MotionEstimate::hpelIterationCount(m_param->subpelRefine)
> / 2; /* subpel refine steps */
> + range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine)
> + 1) / 2; /* subpel refine steps */
> m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range +
> g_maxCUSize - 1) / g_maxCUSize);
>
> // NOTE: 2 times of numRows because both Encoder and Filter in same
> queue
> @@ -654,8 +654,7 @@
> const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1]
> - 1;
> const uint32_t row = sliceStartRow + rowInSlice;
>
> - if (row >= m_numRows)
> - break;
> + X265_CHECK(row < m_numRows, "slices row fault was
> detected");
>
> if (row > sliceEndRow)
> continue;
> @@ -674,7 +673,7 @@
> refpic->m_reconRowFlag[
> rowIdx].waitForChange(0);
>
> if ((bUseWeightP || bUseWeightB) &&
> m_mref[l][ref].isWeighted)
> - m_mref[l][ref].applyWeight(row +
> m_refLagRows, m_numRows, sliceEndRow + 1, sliceId);
> + m_mref[l][ref].applyWeight(rowIdx,
> m_numRows, sliceEndRow, sliceId);
> }
> }
>
> @@ -714,7 +713,7 @@
> refpic->m_reconRowFlag[
> rowIdx].waitForChange(0);
>
> if ((bUseWeightP || bUseWeightB) &&
> m_mref[l][ref].isWeighted)
> - m_mref[list][ref].applyWeight(i +
> m_refLagRows, m_numRows, m_numRows, 0);
> + m_mref[list][ref].applyWeight(rowIdx,
> m_numRows, m_numRows, 0);
> }
> }
>
> @@ -1187,8 +1186,8 @@
> // TODO: specially case handle on first and last row
>
> // Initialize restrict on MV range in slices
> - tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) +
> 2 * 4;
> - tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (g_maxCUSize * 4) - 3 * 4);
> + tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) +
> 3 * 4;
> + tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (g_maxCUSize * 4) - 4 * 4);
>
> // Handle single row slice
> if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
> @@ -1502,7 +1501,7 @@
>
> ScopedLock self(curRow.lock);
> if ((m_bAllRowsStop && intRow > m_vbvResetTriggerRow) ||
> - (!bFirstRowInSlice && ((curRow.completed < numCols - 1) ||
> (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed <
> m_rows[row].completed + 2))
> + (!bFirstRowInSlice && ((curRow.completed < numCols - 1) ||
> (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed <
> curRow.completed + 2))
> {
> curRow.active = false;
> curRow.busy = false;
> diff -r b08109b3701e -r 9be03f087899 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/motion.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -278,10 +278,14 @@
> costs[1] += mvcost((omv + MV(m1x, m1y)) << 2); \
> costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
> costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
> - COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
> - COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
> - COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
> - COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
> + if ((g_maxSlices == 1) | ((omv.y + m0y >= mvmin.y) & (omv.y + m0y
> <= mvmax.y))) \
> + COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
> + if ((g_maxSlices == 1) | ((omv.y + m1y >= mvmin.y) & (omv.y + m1y
> <= mvmax.y))) \
> + COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
> + if ((g_maxSlices == 1) | ((omv.y + m2y >= mvmin.y) & (omv.y + m2y
> <= mvmax.y))) \
> + COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
> + if ((g_maxSlices == 1) | ((omv.y + m3y >= mvmin.y) & (omv.y + m3y
> <= mvmax.y))) \
> + COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
>
Why do you need the `(g_maxSlices == 1) || ` in all the if conditions
above? Isn't it safer to do this check for all cases, and not just for
multiple slices-per-frame?
}
>
> #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
> @@ -659,8 +663,10 @@
> do
> {
> COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
> - COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
> - COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
> + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1
> <= mvmax.y)))
> + COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
> + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1
> <= mvmax.y)))
> + COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
> COPY1_IF_LT(bcost, (costs[2] << 4) + 4);
> COPY1_IF_LT(bcost, (costs[3] << 4) + 12);
> if (!(bcost & 15))
> @@ -698,36 +704,57 @@
> /* equivalent to the above, but eliminates duplicate candidates */
> COST_MV_X3_DIR(-2, 0, -1, 2, 1, 2, costs);
> bcost <<= 3;
> - COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
> - COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
> - COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
> + if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <=
> mvmax.y)))
> + COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
> + if ((g_maxSlices == 1) | ((bmv.y + 2 >= mvmin.y) & (bmv.y + 2 <=
> mvmax.y)))
> + {
> + COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
> + COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
> + }
> +
> COST_MV_X3_DIR(2, 0, 1, -2, -1, -2, costs);
> - COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
> - COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
> - COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
> + if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <=
> mvmax.y)))
> + COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
> + if ((g_maxSlices == 1) | ((bmv.y - 2 >= mvmin.y) & (bmv.y - 2 <=
> mvmax.y)))
> + {
> + COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
> + COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
> + }
>
> if (bcost & 7)
> {
> int dir = (bcost & 7) - 2;
> - bmv += hex2[dir + 1];
>
> - /* half hexagon, not overlapping the previous iteration */
> - for (int i = (merange >> 1) - 1; i > 0 &&
> bmv.checkRange(mvmin, mvmax); i--)
> + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >=
> mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
> {
> - COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
> - hex2[dir + 1].x, hex2[dir + 1].y,
> - hex2[dir + 2].x, hex2[dir + 2].y,
> - costs);
> - bcost &= ~7;
> - COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
> - COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
> - COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
> - if (!(bcost & 7))
> - break;
> - dir += (bcost & 7) - 2;
> - dir = mod6m1[dir + 1];
> bmv += hex2[dir + 1];
> - }
> +
> + /* half hexagon, not overlapping the previous iteration */
> + for (int i = (merange >> 1) - 1; i > 0 &&
> bmv.checkRange(mvmin, mvmax); i--)
> + {
> + COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
> + hex2[dir + 1].x, hex2[dir + 1].y,
> + hex2[dir + 2].x, hex2[dir + 2].y,
> + costs);
> + bcost &= ~7;
> +
> + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 0].y >=
> mvmin.y) & (bmv.y + hex2[dir + 0].y <= mvmax.y)))
> + COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
> +
> + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >=
> mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
> + COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
> +
> + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 2].y >=
> mvmin.y) & (bmv.y + hex2[dir + 2].y <= mvmax.y)))
> + COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
> +
> + if (!(bcost & 7))
> + break;
> +
> + dir += (bcost & 7) - 2;
> + dir = mod6m1[dir + 1];
> + bmv += hex2[dir + 1];
> + }
> + } // if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >=
> mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
> }
> bcost >>= 3;
> #endif // if 0
> @@ -735,15 +762,21 @@
> /* square refine */
> int dir = 0;
> COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
> - COPY2_IF_LT(bcost, costs[0], dir, 1);
> - COPY2_IF_LT(bcost, costs[1], dir, 2);
> + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <=
> mvmax.y)))
> + COPY2_IF_LT(bcost, costs[0], dir, 1);
> + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <=
> mvmax.y)))
> + COPY2_IF_LT(bcost, costs[1], dir, 2);
> COPY2_IF_LT(bcost, costs[2], dir, 3);
> COPY2_IF_LT(bcost, costs[3], dir, 4);
> COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs);
> - COPY2_IF_LT(bcost, costs[0], dir, 5);
> - COPY2_IF_LT(bcost, costs[1], dir, 6);
> - COPY2_IF_LT(bcost, costs[2], dir, 7);
> - COPY2_IF_LT(bcost, costs[3], dir, 8);
> + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <=
> mvmax.y)))
> + COPY2_IF_LT(bcost, costs[0], dir, 5);
> + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <=
> mvmax.y)))
> + COPY2_IF_LT(bcost, costs[1], dir, 6);
> + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <=
> mvmax.y)))
> + COPY2_IF_LT(bcost, costs[2], dir, 7);
> + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <=
> mvmax.y)))
> + COPY2_IF_LT(bcost, costs[3], dir, 8);
> bmv += square1[dir];
> break;
> }
> @@ -756,6 +789,7 @@
> /* refine predictors */
> omv = bmv;
> ucost1 = bcost;
> + X265_CHECK((g_maxSlices == 1) | ((pmv.y >= mvmin.y) & (pmv.y <=
> mvmax.y)), "pmv outside of search range!");
> DIA1_ITER(pmv.x, pmv.y);
> if (pmv.notZero())
> DIA1_ITER(0, 0);
> @@ -1099,6 +1133,7 @@
> if ((g_maxSlices > 1) & ((bmv.y < qmvmin.y) | (bmv.y > qmvmax.y)))
> {
> bmv.y = x265_min(x265_max(bmv.y, qmvmin.y), qmvmax.y);
> + bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
> }
>
> if (!bcost)
> @@ -1113,6 +1148,11 @@
> for (int i = 1; i <= wl.hpel_dirs; i++)
> {
> MV qmv = bmv + square1[i] * 2;
> +
> + /* skip invalid range */
> + if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y >
> qmvmax.y)))
> + continue;
> +
> int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) +
> mvcost(qmv);
> COPY2_IF_LT(bcost, cost, bdir, i);
> }
> @@ -1124,6 +1164,11 @@
> for (int i = 1; i <= wl.qpel_dirs; i++)
> {
> MV qmv = bmv + square1[i];
> +
> + /* skip invalid range */
> + if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y >
> qmvmax.y)))
> + continue;
> +
> int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd)
> + mvcost(qmv);
> COPY2_IF_LT(bcost, cost, bdir, i);
> }
> @@ -1189,6 +1234,9 @@
> }
> }
>
> + // check mv range for slice bound
> + X265_CHECK((g_maxSlices == 1) | ((bmv.y >= qmvmin.y) & (bmv.y <=
> qmvmax.y)), "mv beyond range!");
> +
> x265_emms();
> outQMv = bmv;
> return bcost;
> diff -r b08109b3701e -r 9be03f087899 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/sao.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -1206,12 +1206,19 @@
> void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
> {
> if (!saoParam->bSaoFlag[0])
> + {
> m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
> + }
> else
> + {
> + assert(m_numNoSao[0] <= numctus);
> m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] =
> m_numNoSao[0] / ((double)numctus);
> + }
>
> if (!saoParam->bSaoFlag[1])
> + {
> m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
> + }
> else
> m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] =
> m_numNoSao[1] / ((double)numctus);
> }
> diff -r b08109b3701e -r 9be03f087899 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/search.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -2545,6 +2545,9 @@
> /* conditional clipping for frame parallelism */
> mvmin.y = X265_MIN(mvmin.y, (int16_t)m_refLagPixels);
> mvmax.y = X265_MIN(mvmax.y, (int16_t)m_refLagPixels);
> +
> + /* conditional clipping for negative mv range */
> + mvmax.y = X265_MAX(mvmax.y, mvmin.y);
> }
>
> /* Note: this function overwrites the RD cost variables of interMode, but
> leaves the sa8d cost unharmed */
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161101/c9339bfc/attachment-0001.html>
More information about the x265-devel
mailing list