[x265] [slices] fix multi-slices output non-determination bug

Pradeep Ramachandran pradeep at multicorewareinc.com
Tue Nov 1 04:40:45 CET 2016


On Mon, Oct 31, 2016 at 11:03 PM, chen <chenm003 at 163.com> wrote:

> # HG changeset patch
> # User Min Chen <min.chen at multicorewareinc.com>
> # Date 1477935084 18000
> # Node ID 9be03f08789954f772a50f26485a9c96ca745497
> # Parent  b08109b3701e9b86010c5a5ed0ad7b3d6a051911
> [slices] fix multi-slices output non-determination bug
> ---
>  source/common/common.h          |    2 +-
>  source/encoder/analysis.cpp     |    8 +-
>  source/encoder/frameencoder.cpp |   15 ++---
>  source/encoder/motion.cpp       |  116 +++++++++++++++++++++++++++---
> --------
>  source/encoder/sao.cpp          |    7 ++
>  source/encoder/search.cpp       |    3 +
>  6 files changed, 104 insertions(+), 47 deletions(-)
>
> diff -r b08109b3701e -r 9be03f087899 source/common/common.h
> --- a/source/common/common.h Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/common/common.h Mon Oct 31 12:31:24 2016 -0500
> @@ -176,7 +176,7 @@
>
>  #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
>  #define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
> -#define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y);
> +#define COPY1_IF_LT(x, y) {if ((y) < (x)) (x) = (y);}
>  #define COPY2_IF_LT(x, y, a, b) \
>      if ((y) < (x)) \
>      { \
> diff -r b08109b3701e -r 9be03f087899 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/analysis.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -1942,12 +1942,12 @@
>              if (m_param->maxSlices > 1)
>              {
>                  // NOTE: First row in slice can't negative
> -                if ((candMvField[i][0].mv.y < m_sliceMinY) |
> (candMvField[i][1].mv.y < m_sliceMinY))
> +                if (X265_MIN(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) < m_sliceMinY)
>                      continue;
>
>                  // Last row in slice can't reference beyond bound since
> it is another slice area
>                  // TODO: we may beyond bound in future since these area
> have a chance to finish because we use parallel slices. Necessary prepare
> research on load balance
> -                if ((candMvField[i][0].mv.y > m_sliceMaxY) |
> (candMvField[i][1].mv.y > m_sliceMaxY))
> +                if (X265_MAX(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) > m_sliceMaxY)
>                      continue;
>              }
>
> @@ -2072,12 +2072,12 @@
>              if (m_param->maxSlices > 1)
>              {
>                  // NOTE: First row in slice can't negative
> -                if ((candMvField[i][0].mv.y < m_sliceMinY) |
> (candMvField[i][1].mv.y < m_sliceMinY))
> +                if (X265_MIN(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) < m_sliceMinY)
>                      continue;
>
>                  // Last row in slice can't reference beyond bound since
> it is another slice area
>                  // TODO: we may beyond bound in future since these area
> have a chance to finish because we use parallel slices. Necessary prepare
> research on load balance
> -                if ((candMvField[i][0].mv.y > m_sliceMaxY) |
> (candMvField[i][1].mv.y > m_sliceMaxY))
> +                if (X265_MAX(candMvField[i][0].mv.y,
> candMvField[i][1].mv.y) > m_sliceMaxY)
>                      continue;
>              }
>
> diff -r b08109b3701e -r 9be03f087899 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/frameencoder.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -123,7 +123,7 @@
>      int range  = m_param->searchRange;       /* fpel search */
>      range += !!(m_param->searchMethod < 2);  /* diamond/hex range check
> lag */
>      range += NTAPS_LUMA / 2;                 /* subpel filter half-length
> */
> -    range += 2 + MotionEstimate::hpelIterationCount(m_param->subpelRefine)
> / 2; /* subpel refine steps */
> +    range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine)
> + 1) / 2; /* subpel refine steps */
>      m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range +
> g_maxCUSize - 1) / g_maxCUSize);
>
>      // NOTE: 2 times of numRows because both Encoder and Filter in same
> queue
> @@ -654,8 +654,7 @@
>                  const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1]
> - 1;
>                  const uint32_t row = sliceStartRow + rowInSlice;
>
> -                if (row >= m_numRows)
> -                    break;
> +                X265_CHECK(row < m_numRows, "slices row fault was
> detected");
>
>                  if (row > sliceEndRow)
>                      continue;
> @@ -674,7 +673,7 @@
>                              refpic->m_reconRowFlag[
> rowIdx].waitForChange(0);
>
>                          if ((bUseWeightP || bUseWeightB) &&
> m_mref[l][ref].isWeighted)
> -                            m_mref[l][ref].applyWeight(row +
> m_refLagRows, m_numRows, sliceEndRow + 1, sliceId);
> +                            m_mref[l][ref].applyWeight(rowIdx,
> m_numRows, sliceEndRow, sliceId);
>                      }
>                  }
>
> @@ -714,7 +713,7 @@
>                              refpic->m_reconRowFlag[
> rowIdx].waitForChange(0);
>
>                          if ((bUseWeightP || bUseWeightB) &&
> m_mref[l][ref].isWeighted)
> -                            m_mref[list][ref].applyWeight(i +
> m_refLagRows, m_numRows, m_numRows, 0);
> +                            m_mref[list][ref].applyWeight(rowIdx,
> m_numRows, m_numRows, 0);
>                      }
>                  }
>
> @@ -1187,8 +1186,8 @@
>      // TODO: specially case handle on first and last row
>
>      // Initialize restrict on MV range in slices
> -    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) +
> 2 * 4;
> -    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (g_maxCUSize * 4) - 3 * 4);
> +    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) +
> 3 * 4;
> +    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (g_maxCUSize * 4) - 4 * 4);
>
>      // Handle single row slice
>      if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
> @@ -1502,7 +1501,7 @@
>
>          ScopedLock self(curRow.lock);
>          if ((m_bAllRowsStop && intRow > m_vbvResetTriggerRow) ||
> -            (!bFirstRowInSlice && ((curRow.completed < numCols - 1) ||
> (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed <
> m_rows[row].completed + 2))
> +            (!bFirstRowInSlice && ((curRow.completed < numCols - 1) ||
> (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed <
> curRow.completed + 2))
>          {
>              curRow.active = false;
>              curRow.busy = false;
> diff -r b08109b3701e -r 9be03f087899 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/motion.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -278,10 +278,14 @@
>          costs[1] += mvcost((omv + MV(m1x, m1y)) << 2); \
>          costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
>          costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
> -        COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
> -        COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
> -        COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
> -        COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
> +        if ((g_maxSlices == 1) | ((omv.y + m0y >= mvmin.y) & (omv.y + m0y
> <= mvmax.y))) \
> +            COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
> +        if ((g_maxSlices == 1) | ((omv.y + m1y >= mvmin.y) & (omv.y + m1y
> <= mvmax.y))) \
> +            COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
> +        if ((g_maxSlices == 1) | ((omv.y + m2y >= mvmin.y) & (omv.y + m2y
> <= mvmax.y))) \
> +            COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
> +        if ((g_maxSlices == 1) | ((omv.y + m3y >= mvmin.y) & (omv.y + m3y
> <= mvmax.y))) \
> +            COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
>

Why do you need the `(g_maxSlices == 1)  || ` in all the if conditions
above? Isn't it safer to do this check for all cases, and not just for
multiple slices-per-frame?

     }
>
>  #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
> @@ -659,8 +663,10 @@
>          do
>          {
>              COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
> -            COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
> -            COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
> +            if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1
> <= mvmax.y)))
> +                COPY1_IF_LT(bcost, (costs[0] << 4) + 1);
> +            if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1
> <= mvmax.y)))
> +                COPY1_IF_LT(bcost, (costs[1] << 4) + 3);
>              COPY1_IF_LT(bcost, (costs[2] << 4) + 4);
>              COPY1_IF_LT(bcost, (costs[3] << 4) + 12);
>              if (!(bcost & 15))
> @@ -698,36 +704,57 @@
>        /* equivalent to the above, but eliminates duplicate candidates */
>          COST_MV_X3_DIR(-2, 0, -1, 2,  1, 2, costs);
>          bcost <<= 3;
> -        COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
> -        COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
> -        COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
> +        if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <=
> mvmax.y)))
> +            COPY1_IF_LT(bcost, (costs[0] << 3) + 2);
> +        if ((g_maxSlices == 1) | ((bmv.y + 2 >= mvmin.y) & (bmv.y + 2 <=
> mvmax.y)))
> +        {
> +            COPY1_IF_LT(bcost, (costs[1] << 3) + 3);
> +            COPY1_IF_LT(bcost, (costs[2] << 3) + 4);
> +        }
> +
>          COST_MV_X3_DIR(2, 0,  1, -2, -1, -2, costs);
> -        COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
> -        COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
> -        COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
> +        if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <=
> mvmax.y)))
> +            COPY1_IF_LT(bcost, (costs[0] << 3) + 5);
> +        if ((g_maxSlices == 1) | ((bmv.y - 2 >= mvmin.y) & (bmv.y - 2 <=
> mvmax.y)))
> +        {
> +            COPY1_IF_LT(bcost, (costs[1] << 3) + 6);
> +            COPY1_IF_LT(bcost, (costs[2] << 3) + 7);
> +        }
>
>          if (bcost & 7)
>          {
>              int dir = (bcost & 7) - 2;
> -            bmv += hex2[dir + 1];
>
> -            /* half hexagon, not overlapping the previous iteration */
> -            for (int i = (merange >> 1) - 1; i > 0 &&
> bmv.checkRange(mvmin, mvmax); i--)
> +            if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >=
> mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
>              {
> -                COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
> -                               hex2[dir + 1].x, hex2[dir + 1].y,
> -                               hex2[dir + 2].x, hex2[dir + 2].y,
> -                               costs);
> -                bcost &= ~7;
> -                COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
> -                COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
> -                COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
> -                if (!(bcost & 7))
> -                    break;
> -                dir += (bcost & 7) - 2;
> -                dir = mod6m1[dir + 1];
>                  bmv += hex2[dir + 1];
> -            }
> +
> +                /* half hexagon, not overlapping the previous iteration */
> +                for (int i = (merange >> 1) - 1; i > 0 &&
> bmv.checkRange(mvmin, mvmax); i--)
> +                {
> +                    COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y,
> +                        hex2[dir + 1].x, hex2[dir + 1].y,
> +                        hex2[dir + 2].x, hex2[dir + 2].y,
> +                        costs);
> +                    bcost &= ~7;
> +
> +                    if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 0].y >=
> mvmin.y) & (bmv.y + hex2[dir + 0].y <= mvmax.y)))
> +                        COPY1_IF_LT(bcost, (costs[0] << 3) + 1);
> +
> +                    if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >=
> mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
> +                        COPY1_IF_LT(bcost, (costs[1] << 3) + 2);
> +
> +                    if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 2].y >=
> mvmin.y) & (bmv.y + hex2[dir + 2].y <= mvmax.y)))
> +                        COPY1_IF_LT(bcost, (costs[2] << 3) + 3);
> +
> +                    if (!(bcost & 7))
> +                        break;
> +
> +                    dir += (bcost & 7) - 2;
> +                    dir = mod6m1[dir + 1];
> +                    bmv += hex2[dir + 1];
> +                }
> +            } // if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >=
> mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y)))
>          }
>          bcost >>= 3;
>  #endif // if 0
> @@ -735,15 +762,21 @@
>          /* square refine */
>          int dir = 0;
>          COST_MV_X4_DIR(0, -1,  0, 1, -1, 0, 1, 0, costs);
> -        COPY2_IF_LT(bcost, costs[0], dir, 1);
> -        COPY2_IF_LT(bcost, costs[1], dir, 2);
> +        if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <=
> mvmax.y)))
> +            COPY2_IF_LT(bcost, costs[0], dir, 1);
> +        if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <=
> mvmax.y)))
> +            COPY2_IF_LT(bcost, costs[1], dir, 2);
>          COPY2_IF_LT(bcost, costs[2], dir, 3);
>          COPY2_IF_LT(bcost, costs[3], dir, 4);
>          COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs);
> -        COPY2_IF_LT(bcost, costs[0], dir, 5);
> -        COPY2_IF_LT(bcost, costs[1], dir, 6);
> -        COPY2_IF_LT(bcost, costs[2], dir, 7);
> -        COPY2_IF_LT(bcost, costs[3], dir, 8);
> +        if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <=
> mvmax.y)))
> +            COPY2_IF_LT(bcost, costs[0], dir, 5);
> +        if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <=
> mvmax.y)))
> +            COPY2_IF_LT(bcost, costs[1], dir, 6);
> +        if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <=
> mvmax.y)))
> +            COPY2_IF_LT(bcost, costs[2], dir, 7);
> +        if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <=
> mvmax.y)))
> +            COPY2_IF_LT(bcost, costs[3], dir, 8);
>          bmv += square1[dir];
>          break;
>      }
> @@ -756,6 +789,7 @@
>          /* refine predictors */
>          omv = bmv;
>          ucost1 = bcost;
> +        X265_CHECK((g_maxSlices == 1) | ((pmv.y >= mvmin.y) & (pmv.y <=
> mvmax.y)), "pmv outside of search range!");
>          DIA1_ITER(pmv.x, pmv.y);
>          if (pmv.notZero())
>              DIA1_ITER(0, 0);
> @@ -1099,6 +1133,7 @@
>      if ((g_maxSlices > 1) & ((bmv.y < qmvmin.y) | (bmv.y > qmvmax.y)))
>      {
>          bmv.y = x265_min(x265_max(bmv.y, qmvmin.y), qmvmax.y);
> +        bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
>      }
>
>      if (!bcost)
> @@ -1113,6 +1148,11 @@
>          for (int i = 1; i <= wl.hpel_dirs; i++)
>          {
>              MV qmv = bmv + square1[i] * 2;
> +
> +            /* skip invalid range */
> +            if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y >
> qmvmax.y)))
> +                continue;
> +
>              int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) +
> mvcost(qmv);
>              COPY2_IF_LT(bcost, cost, bdir, i);
>          }
> @@ -1124,6 +1164,11 @@
>          for (int i = 1; i <= wl.qpel_dirs; i++)
>          {
>              MV qmv = bmv + square1[i];
> +
> +            /* skip invalid range */
> +            if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y >
> qmvmax.y)))
> +                continue;
> +
>              int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd)
> + mvcost(qmv);
>              COPY2_IF_LT(bcost, cost, bdir, i);
>          }
> @@ -1189,6 +1234,9 @@
>          }
>      }
>
> +    // check mv range for slice bound
> +    X265_CHECK((g_maxSlices == 1) | ((bmv.y >= qmvmin.y) & (bmv.y <=
> qmvmax.y)), "mv beyond range!");
> +
>      x265_emms();
>      outQMv = bmv;
>      return bcost;
> diff -r b08109b3701e -r 9be03f087899 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/sao.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -1206,12 +1206,19 @@
>  void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus)
>  {
>      if (!saoParam->bSaoFlag[0])
> +    {
>          m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
> +    }
>      else
> +    {
> +        assert(m_numNoSao[0] <= numctus);
>          m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] =
> m_numNoSao[0] / ((double)numctus);
> +    }
>
>      if (!saoParam->bSaoFlag[1])
> +    {
>          m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0;
> +    }
>      else
>          m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] =
> m_numNoSao[1] / ((double)numctus);
>  }
> diff -r b08109b3701e -r 9be03f087899 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Fri Oct 28 10:28:15 2016 +0800
> +++ b/source/encoder/search.cpp Mon Oct 31 12:31:24 2016 -0500
> @@ -2545,6 +2545,9 @@
>      /* conditional clipping for frame parallelism */
>      mvmin.y = X265_MIN(mvmin.y, (int16_t)m_refLagPixels);
>      mvmax.y = X265_MIN(mvmax.y, (int16_t)m_refLagPixels);
> +
> +    /* conditional clipping for negative mv range */
> +    mvmax.y = X265_MAX(mvmax.y, mvmin.y);
>  }
>
>  /* Note: this function overwrites the RD cost variables of interMode, but
> leaves the sa8d cost unharmed */
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161101/c9339bfc/attachment-0001.html>


More information about the x265-devel mailing list