[x265] [PATCH] Fix MV Wrap-around

Fri Mar 29 06:15:44 CET 2019

# HG changeset patch
# User Masaharu Tamura <tamura at pegasys-inc.com>
# Date 1553818841 -32400
#      Fri Mar 29 09:20:41 2019 +0900
# Node ID 0018ca1ca42f1c91422b9a78091967cc45bb944c
# Parent  3e5032228123c1898d408a30e45ac15eb687ffb6
Fix MV Wrap-around

Pushed to default branch of x265.

Thanks & Regards,
Dinesh

On Fri, Mar 29, 2019 at 5:53 AM <tamura at pegasys-inc.com> wrote:

> # HG changeset patch
> # User Masaharu Tamura <tamura at pegasys-inc.com>
> # Date 1553818841 -32400
> #      Fri Mar 29 09:20:41 2019 +0900
> # Node ID 0018ca1ca42f1c91422b9a78091967cc45bb944c
> # Parent  3e5032228123c1898d408a30e45ac15eb687ffb6
> Fix MV Wrap-around
>
> Fixed that wrap-around from MV structure overflow occurred around 8K
> pixels or over.
>
> diff -r 3e5032228123 -r 0018ca1ca42f source/CMakeLists.txt
> --- a/source/CMakeLists.txt     Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/CMakeLists.txt     Fri Mar 29 09:20:41 2019 +0900
> @@ -29,7 +29,7 @@
>  option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
>  mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>  # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 172)
> +set(X265_BUILD 173)
>  configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>                 "${PROJECT_BINARY_DIR}/x265.def")
>  configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r 3e5032228123 -r 0018ca1ca42f source/common/cudata.cpp
> --- a/source/common/cudata.cpp  Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/common/cudata.cpp  Fri Mar 29 09:20:41 2019 +0900
> @@ -106,7 +106,7 @@
>      int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale *
> mv.x < 0)) >> 8);
>      int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale *
> mv.y < 0)) >> 8);
>
> -    return MV((int16_t)mvx, (int16_t)mvy);
> +    return MV((int32_t)mvx, (int32_t)mvy);
>  }
>
>  }
> @@ -1917,11 +1917,11 @@
>      const uint32_t mvshift = 2;
>      uint32_t offset = 8;
>
> -    int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples +
> offset - m_cuPelX - 1) << mvshift);
> -    int16_t xmin = -(int16_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelX - 1) << mvshift);
> +    int32_t xmax = (int32_t)((m_slice->m_sps->picWidthInLumaSamples +
> offset - m_cuPelX - 1) << mvshift);
> +    int32_t xmin = -(int32_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelX - 1) << mvshift);
>
> -    int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples +
> offset - m_cuPelY - 1) << mvshift);
> -    int16_t ymin = -(int16_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelY - 1) << mvshift);
> +    int32_t ymax = (int32_t)((m_slice->m_sps->picHeightInLumaSamples +
> offset - m_cuPelY - 1) << mvshift);
> +    int32_t ymin = -(int32_t)((m_encData->m_param->maxCUSize + offset +
> m_cuPelY - 1) << mvshift);
>
>      outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
>      outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
> diff -r 3e5032228123 -r 0018ca1ca42f source/common/mv.h
> --- a/source/common/mv.h        Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/common/mv.h        Fri Mar 29 09:20:41 2019 +0900
> @@ -39,16 +39,16 @@
>  public:
>
>      union {
> -        struct { int16_t x, y; };
> +        struct { int32_t x, y; };
>
> -        int32_t word;
> +        int64_t word;
>      };
>
>      MV()                                       {}
> -    MV(int32_t w) : word(w)                    {}
> -    MV(int16_t _x, int16_t _y) : x(_x), y(_y)  {}
> +    MV(int64_t w) : word(w)                    {}
> +    MV(int32_t _x, int32_t _y) : x(_x), y(_y)  {}
>
> -    MV& operator =(uint32_t w)                 { word = w; return *this; }
> +    MV& operator =(uint64_t w)                 { word = w; return *this; }
>
>      MV& operator +=(const MV& other)           { x += other.x; y +=
> other.y; return *this; }
>
> @@ -67,7 +67,7 @@
>
>      MV operator >>(int i) const                { return MV(x >> i, y >>
> i); }
>
> -    MV operator *(int16_t i) const             { return MV(x * i, y * i);
> }
> +    MV operator *(int32_t i) const             { return MV(x * i, y * i);
> }
>
>      MV operator -(const MV& other) const       { return MV(x - other.x, y
> - other.y); }
>
> @@ -87,7 +87,7 @@
>
>      bool inline notZero() const                { return this->word != 0; }
>
> -    bool inline isSubpel() const               { return (this->word &
> 0x00030003) != 0; }
> +    bool inline isSubpel() const               { return (this->word &
> 0x0000000300000003ll) != 0; }
>
>      MV mvmin(const MV& m) const                { return MV(x > m.x ? m.x
> : x, y > m.y ? m.y : y); }
>
> diff -r 3e5032228123 -r 0018ca1ca42f source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/encoder/encoder.cpp        Fri Mar 29 09:20:41 2019 +0900
> @@ -3631,8 +3631,8 @@
>                                  (analysis->interData)->refIdx[i][count +
> pu] = refIdx[i][d];
>                                  if (m_param->scaleFactor)
>                                  {
> -                                    mv[i][d].x *=
> (int16_t)m_param->scaleFactor;
> -                                    mv[i][d].y *=
> (int16_t)m_param->scaleFactor;
> +                                    mv[i][d].x *=
> (int32_t)m_param->scaleFactor;
> +                                    mv[i][d].y *=
> (int32_t)m_param->scaleFactor;
>                                  }
>
>  memcpy(&(analysis->interData)->mv[i][count + pu], &mv[i][d], sizeof(MV));
>                              }
> @@ -4014,8 +4014,8 @@
>                              {
>                                  (analysis->interData)->mvpIdx[i][count +
> pu] = mvpIdx[i][d];
>                                  (analysis->interData)->refIdx[i][count +
> pu] = refIdx[i][d];
> -                                mvCopy[i].x = mv[i][d].x *
> (int16_t)m_param->scaleFactor;
> -                                mvCopy[i].y = mv[i][d].y *
> (int16_t)m_param->scaleFactor;
> +                                mvCopy[i].x = mv[i][d].x *
> (int32_t)m_param->scaleFactor;
> +                                mvCopy[i].y = mv[i][d].y *
> (int32_t)m_param->scaleFactor;
>
>  memcpy(&(analysis->interData)->mv[i][count + pu], &mvCopy[i], sizeof(MV));
>                              }
>                          }
> diff -r 3e5032228123 -r 0018ca1ca42f source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/encoder/frameencoder.cpp   Fri Mar 29 09:20:41 2019 +0900
> @@ -1406,8 +1406,8 @@
>      }
>
>      // Initialize restrict on MV range in slices
> -    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize
> * 4) + 3 * 4;
> -    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (m_param->maxCUSize * 4) - 4 * 4);
> +    tld.analysis.m_sliceMinY = -(int32_t)(rowInSlice * m_param->maxCUSize
> * 4) + 3 * 4;
> +    tld.analysis.m_sliceMaxY = (int32_t)((endRowInSlicePlus1 - 1 - row) *
> (m_param->maxCUSize * 4) - 4 * 4);
>
>      // Handle single row slice
>      if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
> diff -r 3e5032228123 -r 0018ca1ca42f source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/encoder/motion.cpp Fri Mar 29 09:20:41 2019 +0900
> @@ -382,10 +382,10 @@
>              4 * 5
>                7
>           */
> -        const int16_t top    = omv.y - dist;
> -        const int16_t bottom = omv.y + dist;
> -        const int16_t left   = omv.x - dist;
> -        const int16_t right  = omv.x + dist;
> +        const int32_t top    = omv.y - dist;
> +        const int32_t bottom = omv.y + dist;
> +        const int32_t left   = omv.x - dist;
> +        const int32_t right  = omv.x + dist;
>
>          if (top >= mvmin.y && left >= mvmin.x && right <= mvmax.x &&
> bottom <= mvmax.y)
>          {
> @@ -430,14 +430,14 @@
>           Points 2, 4, 5, 7 are dist
>           Points 1, 3, 6, 8 are dist>>1
>           */
> -        const int16_t top     = omv.y - dist;
> -        const int16_t bottom  = omv.y + dist;
> -        const int16_t left    = omv.x - dist;
> -        const int16_t right   = omv.x + dist;
> -        const int16_t top2    = omv.y - (dist >> 1);
> -        const int16_t bottom2 = omv.y + (dist >> 1);
> -        const int16_t left2   = omv.x - (dist >> 1);
> -        const int16_t right2  = omv.x + (dist >> 1);
> +        const int32_t top     = omv.y - dist;
> +        const int32_t bottom  = omv.y + dist;
> +        const int32_t left    = omv.x - dist;
> +        const int32_t right   = omv.x + dist;
> +        const int32_t top2    = omv.y - (dist >> 1);
> +        const int32_t bottom2 = omv.y + (dist >> 1);
> +        const int32_t left2   = omv.x - (dist >> 1);
> +        const int32_t right2  = omv.x + (dist >> 1);
>          saved = bcost;
>
>          if (top >= mvmin.y && left >= mvmin.x &&
> @@ -502,10 +502,10 @@
>
>      for (int16_t dist = 16; dist <= (int16_t)merange; dist <<= 1)
>      {
> -        const int16_t top    = omv.y - dist;
> -        const int16_t bottom = omv.y + dist;
> -        const int16_t left   = omv.x - dist;
> -        const int16_t right  = omv.x + dist;
> +        const int32_t top    = omv.y - dist;
> +        const int32_t bottom = omv.y + dist;
> +        const int32_t left   = omv.x - dist;
> +        const int32_t right  = omv.x + dist;
>
>          saved = bcost;
>          if (top >= mvmin.y && left >= mvmin.x &&
> @@ -530,10 +530,10 @@
>
>              for (int16_t index = 1; index < 4; index++)
>              {
> -                int16_t posYT = top    + ((dist >> 2) * index);
> -                int16_t posYB = bottom - ((dist >> 2) * index);
> -                int16_t posXL = omv.x  - ((dist >> 2) * index);
> -                int16_t posXR = omv.x  + ((dist >> 2) * index);
> +                int32_t posYT = top    + ((dist >> 2) * index);
> +                int32_t posYB = bottom - ((dist >> 2) * index);
> +                int32_t posXL = omv.x  - ((dist >> 2) * index);
> +                int32_t posXR = omv.x  + ((dist >> 2) * index);
>
>                  COST_MV_PT_DIST_X4(posXL, posYT, 0, dist,
>                                     posXR, posYT, 0, dist,
> @@ -561,10 +561,10 @@
>              }
>              for (int16_t index = 1; index < 4; index++)
>              {
> -                int16_t posYT = top    + ((dist >> 2) * index);
> -                int16_t posYB = bottom - ((dist >> 2) * index);
> -                int16_t posXL = omv.x - ((dist >> 2) * index);
> -                int16_t posXR = omv.x + ((dist >> 2) * index);
> +                int32_t posYT = top    + ((dist >> 2) * index);
> +                int32_t posYB = bottom - ((dist >> 2) * index);
> +                int32_t posXL = omv.x - ((dist >> 2) * index);
> +                int32_t posXR = omv.x + ((dist >> 2) * index);
>
>                  if (posYT >= mvmin.y) // check top
>                  {
> @@ -1235,10 +1235,10 @@
>      case X265_SEA:
>      {
>          // Successive Elimination Algorithm
> -        const int16_t minX = X265_MAX(omv.x - (int16_t)merange, mvmin.x);
> -        const int16_t minY = X265_MAX(omv.y - (int16_t)merange, mvmin.y);
> -        const int16_t maxX = X265_MIN(omv.x + (int16_t)merange, mvmax.x);
> -        const int16_t maxY = X265_MIN(omv.y + (int16_t)merange, mvmax.y);
> +        const int32_t minX = X265_MAX(omv.x - (int32_t)merange, mvmin.x);
> +        const int32_t minY = X265_MAX(omv.y - (int32_t)merange, mvmin.y);
> +        const int32_t maxX = X265_MIN(omv.x + (int32_t)merange, mvmax.x);
> +        const int32_t maxY = X265_MIN(omv.y + (int32_t)merange, mvmax.y);
>          const uint16_t *p_cost_mvx = m_cost_mvx - qmvp.x;
>          const uint16_t *p_cost_mvy = m_cost_mvy - qmvp.y;
>          int16_t* meScratchBuffer = NULL;
> diff -r 3e5032228123 -r 0018ca1ca42f source/encoder/search.cpp
> --- a/source/encoder/search.cpp Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/encoder/search.cpp Fri Mar 29 09:20:41 2019 +0900
> @@ -2633,7 +2633,7 @@
>
>  void Search::setSearchRange(const CUData& cu, const MV& mvp, int merange,
> MV& mvmin, MV& mvmax) const
>  {
> -    MV dist((int16_t)merange << 2, (int16_t)merange << 2);
> +    MV dist((int32_t)merange << 2, (int32_t)merange << 2);
>      mvmin = mvp - dist;
>      mvmax = mvp + dist;
>
> @@ -2670,8 +2670,8 @@
>      mvmax >>= 2;
>
>      /* conditional clipping for frame parallelism */
> -    mvmin.y = X265_MIN(mvmin.y, (int16_t)m_refLagPixels);
> -    mvmax.y = X265_MIN(mvmax.y, (int16_t)m_refLagPixels);
> +    mvmin.y = X265_MIN(mvmin.y, (int32_t)m_refLagPixels);
> +    mvmax.y = X265_MIN(mvmax.y, (int32_t)m_refLagPixels);
>
>      /* conditional clipping for negative mv range */
>      mvmax.y = X265_MAX(mvmax.y, mvmin.y);
> diff -r 3e5032228123 -r 0018ca1ca42f source/encoder/search.h
> --- a/source/encoder/search.h   Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/encoder/search.h   Fri Mar 29 09:20:41 2019 +0900
> @@ -283,8 +283,8 @@
>      int32_t         m_maxTUDepth;
>      uint16_t        m_limitTU;
>
> -    int16_t         m_sliceMaxY;
> -    int16_t         m_sliceMinY;
> +    int32_t         m_sliceMaxY;
> +    int32_t         m_sliceMinY;
>
>  #if DETAILED_CU_STATS
>      /* Accumulate CU statistics separately for each frame encoder */
> diff -r 3e5032228123 -r 0018ca1ca42f source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/encoder/slicetype.cpp      Fri Mar 29 09:20:41 2019 +0900
> @@ -2860,10 +2860,10 @@
>
>      // TODO: restrict to slices boundaries
>      // establish search bounds that don't cross extended frame boundaries
> -    mvmin.x = (int16_t)(-cuX * cuSize - 8);
> -    mvmin.y = (int16_t)(-cuY * cuSize - 8);
> -    mvmax.x = (int16_t)((widthInCU - cuX - 1) * cuSize + 8);
> -    mvmax.y = (int16_t)((heightInCU - cuY - 1) * cuSize + 8);
> +    mvmin.x = (int32_t)(-cuX * cuSize - 8);
> +    mvmin.y = (int32_t)(-cuY * cuSize - 8);
> +    mvmax.x = (int32_t)((widthInCU - cuX - 1) * cuSize + 8);
> +    mvmax.y = (int32_t)((heightInCU - cuY - 1) * cuSize + 8);
>
>      for (int i = 0; i < 1 + bBidir; i++)
>      {
> diff -r 3e5032228123 -r 0018ca1ca42f source/encoder/weightPrediction.cpp
> --- a/source/encoder/weightPrediction.cpp       Fri Mar 22 13:09:49 2019
> +0530
> +++ b/source/encoder/weightPrediction.cpp       Fri Mar 29 09:20:41 2019
> +0900
> @@ -69,15 +69,15 @@
>      for (int y = 0; y < ref.lines; y += cuSize)
>      {
>          intptr_t pixoff = y * stride;
> -        mvmin.y = (int16_t)((-y - 8) * mvshift);
> -        mvmax.y = (int16_t)((ref.lines - y - 1 + 8) * mvshift);
> +        mvmin.y = (int32_t)((-y - 8) * mvshift);
> +        mvmax.y = (int32_t)((ref.lines - y - 1 + 8) * mvshift);
>
>          for (int x = 0; x < ref.width; x += cuSize, pixoff += cuSize,
> cu++)
>          {
>              ALIGN_VAR_16(pixel, buf8x8[8 * 8]);
>              intptr_t bstride = 8;
> -            mvmin.x = (int16_t)((-x - 8) * mvshift);
> -            mvmax.x = (int16_t)((ref.width - x - 1 + 8) * mvshift);
> +            mvmin.x = (int32_t)((-x - 8) * mvshift);
> +            mvmax.x = (int32_t)((ref.width - x - 1 + 8) * mvshift);
>
>              /* clip MV to available pixels */
>              MV mv = mvs[cu];
> @@ -113,8 +113,8 @@
>           * into the lowres structures */
>          int cu = y * cache.lowresWidthInCU;
>          intptr_t pixoff = y * stride;
> -        mvmin.y = (int16_t)((-y - 8) * mvshift);
> -        mvmax.y = (int16_t)((height - y - 1 + 8) * mvshift);
> +        mvmin.y = (int32_t)((-y - 8) * mvshift);
> +        mvmax.y = (int32_t)((height - y - 1 + 8) * mvshift);
>
>          for (int x = 0; x < width; x += bw, cu++, pixoff += bw)
>          {
> @@ -126,8 +126,8 @@
>                  mv.y >>= cache.vshift;
>
>                  /* clip MV to available pixels */
> -                mvmin.x = (int16_t)((-x - 8) * mvshift);
> -                mvmax.x = (int16_t)((width - x - 1 + 8) * mvshift);
> +                mvmin.x = (int32_t)((-x - 8) * mvshift);
> +                mvmax.x = (int32_t)((width - x - 1 + 8) * mvshift);
>                  mv = mv.clipped(mvmin, mvmax);
>
>                  intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2);
> diff -r 3e5032228123 -r 0018ca1ca42f source/x265.h
> --- a/source/x265.h     Fri Mar 22 13:09:49 2019 +0530
> +++ b/source/x265.h     Fri Mar 29 09:20:41 2019 +0900
> @@ -147,9 +147,9 @@
>  typedef struct x265_analysis_MV
>  {
>      union{
> -        struct { int16_t x, y; };
> +        struct { int32_t x, y; };
>
> -        int32_t word;
> +        int64_t word;
>      };
>  }x265_analysis_MV;
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20190329/8b819f29/attachment-0001.html>