[x265] [PATCH] Fix MV Wrap-around

Fri Feb 8 04:15:53 CET 2019

# HG changeset patch
# User Masaharu Tamura <tamura at pegasys-inc.com>
# Date 1549593085 -32400
#      Fri Feb 08 11:31:25 2019 +0900
# Node ID e242ae904637b27e8c45658f299da4603f8e981a
# Parent  5b90dc59b57a8ea8f4fb269400c2335be3bb73c1
Fix MV Wrap-around

Fixed that wrap-around from MV structure overflow occurred around 8K pixels or over.

diff -r 5b90dc59b57a -r e242ae904637 source/CMakeLists.txt

--- a/source/CMakeLists.txt	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/CMakeLists.txt	Fri Feb 08 11:31:25 2019 +0900
@@ -29,7 +29,7 @@
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 169)
+set(X265_BUILD 170)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 5b90dc59b57a -r e242ae904637 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/common/cudata.cpp	Fri Feb 08 11:31:25 2019 +0900
@@ -106,7 +106,7 @@
     int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8);
     int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8);
 
-    return MV((int16_t)mvx, (int16_t)mvy);
+    return MV((int32_t)mvx, (int32_t)mvy);
 }
 
 }
@@ -1917,11 +1917,11 @@
     const uint32_t mvshift = 2;
     uint32_t offset = 8;
 
-    int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
-    int16_t xmin = -(int16_t)((m_encData->m_param->maxCUSize + offset + m_cuPelX - 1) << mvshift);
+    int32_t xmax = (int32_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
+    int32_t xmin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelX - 1) << mvshift);
 
-    int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
-    int16_t ymin = -(int16_t)((m_encData->m_param->maxCUSize + offset + m_cuPelY - 1) << mvshift);
+    int32_t ymax = (int32_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
+    int32_t ymin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelY - 1) << mvshift);
 
     outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
     outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
diff -r 5b90dc59b57a -r e242ae904637 source/common/mv.h
--- a/source/common/mv.h	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/common/mv.h	Fri Feb 08 11:31:25 2019 +0900
@@ -39,16 +39,16 @@
 public:
 
     union {
-        struct { int16_t x, y; };
+        struct { int32_t x, y; };
 
-        int32_t word;
+        int64_t word;
     };
 
     MV()                                       {}
-    MV(int32_t w) : word(w)                    {}
-    MV(int16_t _x, int16_t _y) : x(_x), y(_y)  {}
+    MV(int64_t w) : word(w)                    {}
+    MV(int32_t _x, int32_t _y) : x(_x), y(_y)  {}
 
-    MV& operator =(uint32_t w)                 { word = w; return *this; }
+    MV& operator =(uint64_t w)                 { word = w; return *this; }
 
     MV& operator +=(const MV& other)           { x += other.x; y += other.y; return *this; }
 
@@ -67,7 +67,7 @@
 
     MV operator >>(int i) const                { return MV(x >> i, y >> i); }
 
-    MV operator *(int16_t i) const             { return MV(x * i, y * i); }
+    MV operator *(int32_t i) const             { return MV(x * i, y * i); }
 
     MV operator -(const MV& other) const       { return MV(x - other.x, y - other.y); }
 
@@ -87,7 +87,7 @@
 
     bool inline notZero() const                { return this->word != 0; }
 
-    bool inline isSubpel() const               { return (this->word & 0x00030003) != 0; }
+    bool inline isSubpel() const               { return (this->word & 0x0000000300000003I64) != 0; }
 
     MV mvmin(const MV& m) const                { return MV(x > m.x ? m.x : x, y > m.y ? m.y : y); }
 
diff -r 5b90dc59b57a -r e242ae904637 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/encoder/encoder.cpp	Fri Feb 08 11:31:25 2019 +0900
@@ -3619,8 +3619,8 @@
                                 (analysis->interData)->refIdx[i][count + pu] = refIdx[i][d];
                                 if (m_param->scaleFactor)
                                 {
-                                    mv[i][d].x *= (int16_t)m_param->scaleFactor;
-                                    mv[i][d].y *= (int16_t)m_param->scaleFactor;
+                                    mv[i][d].x *= (int32_t)m_param->scaleFactor;
+                                    mv[i][d].y *= (int32_t)m_param->scaleFactor;
                                 }
                                 memcpy(&(analysis->interData)->mv[i][count + pu], &mv[i][d], sizeof(MV));
                             }
@@ -4002,8 +4002,8 @@
                             {
                                 (analysis->interData)->mvpIdx[i][count + pu] = mvpIdx[i][d];
                                 (analysis->interData)->refIdx[i][count + pu] = refIdx[i][d];
-                                mvCopy[i].x = mv[i][d].x * (int16_t)m_param->scaleFactor;
-                                mvCopy[i].y = mv[i][d].y * (int16_t)m_param->scaleFactor;
+                                mvCopy[i].x = mv[i][d].x * (int32_t)m_param->scaleFactor;
+                                mvCopy[i].y = mv[i][d].y * (int32_t)m_param->scaleFactor;
                                 memcpy(&(analysis->interData)->mv[i][count + pu], &mvCopy[i], sizeof(MV));
                             }
                         }
diff -r 5b90dc59b57a -r e242ae904637 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/encoder/frameencoder.cpp	Fri Feb 08 11:31:25 2019 +0900
@@ -1406,8 +1406,8 @@
     }
 
     // Initialize restrict on MV range in slices
-    tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;
-    tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * (m_param->maxCUSize * 4) - 4 * 4);
+    tld.analysis.m_sliceMinY = -(int32_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;
+    tld.analysis.m_sliceMaxY = (int32_t)((endRowInSlicePlus1 - 1 - row) * (m_param->maxCUSize * 4) - 4 * 4);
 
     // Handle single row slice
     if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
diff -r 5b90dc59b57a -r e242ae904637 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/encoder/motion.cpp	Fri Feb 08 11:31:25 2019 +0900
@@ -382,10 +382,10 @@
             4 * 5
               7
          */
-        const int16_t top    = omv.y - dist;
-        const int16_t bottom = omv.y + dist;
-        const int16_t left   = omv.x - dist;
-        const int16_t right  = omv.x + dist;
+        const int32_t top    = omv.y - dist;
+        const int32_t bottom = omv.y + dist;
+        const int32_t left   = omv.x - dist;
+        const int32_t right  = omv.x + dist;
 
         if (top >= mvmin.y && left >= mvmin.x && right <= mvmax.x && bottom <= mvmax.y)
         {
@@ -430,14 +430,14 @@
          Points 2, 4, 5, 7 are dist
          Points 1, 3, 6, 8 are dist>>1
          */
-        const int16_t top     = omv.y - dist;
-        const int16_t bottom  = omv.y + dist;
-        const int16_t left    = omv.x - dist;
-        const int16_t right   = omv.x + dist;
-        const int16_t top2    = omv.y - (dist >> 1);
-        const int16_t bottom2 = omv.y + (dist >> 1);
-        const int16_t left2   = omv.x - (dist >> 1);
-        const int16_t right2  = omv.x + (dist >> 1);
+        const int32_t top     = omv.y - dist;
+        const int32_t bottom  = omv.y + dist;
+        const int32_t left    = omv.x - dist;
+        const int32_t right   = omv.x + dist;
+        const int32_t top2    = omv.y - (dist >> 1);
+        const int32_t bottom2 = omv.y + (dist >> 1);
+        const int32_t left2   = omv.x - (dist >> 1);
+        const int32_t right2  = omv.x + (dist >> 1);
         saved = bcost;
 
         if (top >= mvmin.y && left >= mvmin.x &&
@@ -502,10 +502,10 @@
 
     for (int16_t dist = 16; dist <= (int16_t)merange; dist <<= 1)
     {
-        const int16_t top    = omv.y - dist;
-        const int16_t bottom = omv.y + dist;
-        const int16_t left   = omv.x - dist;
-        const int16_t right  = omv.x + dist;
+        const int32_t top    = omv.y - dist;
+        const int32_t bottom = omv.y + dist;
+        const int32_t left   = omv.x - dist;
+        const int32_t right  = omv.x + dist;
 
         saved = bcost;
         if (top >= mvmin.y && left >= mvmin.x &&
@@ -530,10 +530,10 @@
 
             for (int16_t index = 1; index < 4; index++)
             {
-                int16_t posYT = top    + ((dist >> 2) * index);
-                int16_t posYB = bottom - ((dist >> 2) * index);
-                int16_t posXL = omv.x  - ((dist >> 2) * index);
-                int16_t posXR = omv.x  + ((dist >> 2) * index);
+                int32_t posYT = top    + ((dist >> 2) * index);
+                int32_t posYB = bottom - ((dist >> 2) * index);
+                int32_t posXL = omv.x  - ((dist >> 2) * index);
+                int32_t posXR = omv.x  + ((dist >> 2) * index);
 
                 COST_MV_PT_DIST_X4(posXL, posYT, 0, dist,
                                    posXR, posYT, 0, dist,
@@ -561,10 +561,10 @@
             }
             for (int16_t index = 1; index < 4; index++)
             {
-                int16_t posYT = top    + ((dist >> 2) * index);
-                int16_t posYB = bottom - ((dist >> 2) * index);
-                int16_t posXL = omv.x - ((dist >> 2) * index);
-                int16_t posXR = omv.x + ((dist >> 2) * index);
+                int32_t posYT = top    + ((dist >> 2) * index);
+                int32_t posYB = bottom - ((dist >> 2) * index);
+                int32_t posXL = omv.x - ((dist >> 2) * index);
+                int32_t posXR = omv.x + ((dist >> 2) * index);
 
                 if (posYT >= mvmin.y) // check top
                 {
@@ -1235,10 +1235,10 @@
     case X265_SEA:
     {
         // Successive Elimination Algorithm
-        const int16_t minX = X265_MAX(omv.x - (int16_t)merange, mvmin.x);
-        const int16_t minY = X265_MAX(omv.y - (int16_t)merange, mvmin.y);
-        const int16_t maxX = X265_MIN(omv.x + (int16_t)merange, mvmax.x);
-        const int16_t maxY = X265_MIN(omv.y + (int16_t)merange, mvmax.y);
+        const int32_t minX = X265_MAX(omv.x - (int32_t)merange, mvmin.x);
+        const int32_t minY = X265_MAX(omv.y - (int32_t)merange, mvmin.y);
+        const int32_t maxX = X265_MIN(omv.x + (int32_t)merange, mvmax.x);
+        const int32_t maxY = X265_MIN(omv.y + (int32_t)merange, mvmax.y);
         const uint16_t *p_cost_mvx = m_cost_mvx - qmvp.x;
         const uint16_t *p_cost_mvy = m_cost_mvy - qmvp.y;
         int16_t* meScratchBuffer = NULL;
diff -r 5b90dc59b57a -r e242ae904637 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/encoder/search.cpp	Fri Feb 08 11:31:25 2019 +0900
@@ -2633,7 +2633,7 @@
 
 void Search::setSearchRange(const CUData& cu, const MV& mvp, int merange, MV& mvmin, MV& mvmax) const
 {
-    MV dist((int16_t)merange << 2, (int16_t)merange << 2);
+    MV dist((int32_t)merange << 2, (int32_t)merange << 2);
     mvmin = mvp - dist;
     mvmax = mvp + dist;
 
@@ -2670,8 +2670,8 @@
     mvmax >>= 2;
 
     /* conditional clipping for frame parallelism */
-    mvmin.y = X265_MIN(mvmin.y, (int16_t)m_refLagPixels);
-    mvmax.y = X265_MIN(mvmax.y, (int16_t)m_refLagPixels);
+    mvmin.y = X265_MIN(mvmin.y, (int32_t)m_refLagPixels);
+    mvmax.y = X265_MIN(mvmax.y, (int32_t)m_refLagPixels);
 
     /* conditional clipping for negative mv range */
     mvmax.y = X265_MAX(mvmax.y, mvmin.y);
diff -r 5b90dc59b57a -r e242ae904637 source/encoder/search.h
--- a/source/encoder/search.h	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/encoder/search.h	Fri Feb 08 11:31:25 2019 +0900
@@ -283,8 +283,8 @@
     int32_t         m_maxTUDepth;
     uint16_t        m_limitTU;
 
-    int16_t         m_sliceMaxY;
-    int16_t         m_sliceMinY;
+    int32_t         m_sliceMaxY;
+    int32_t         m_sliceMinY;
 
 #if DETAILED_CU_STATS
     /* Accumulate CU statistics separately for each frame encoder */
diff -r 5b90dc59b57a -r e242ae904637 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/encoder/slicetype.cpp	Fri Feb 08 11:31:25 2019 +0900
@@ -2860,10 +2860,10 @@
 
     // TODO: restrict to slices boundaries
     // establish search bounds that don't cross extended frame boundaries
-    mvmin.x = (int16_t)(-cuX * cuSize - 8);
-    mvmin.y = (int16_t)(-cuY * cuSize - 8);
-    mvmax.x = (int16_t)((widthInCU - cuX - 1) * cuSize + 8);
-    mvmax.y = (int16_t)((heightInCU - cuY - 1) * cuSize + 8);
+    mvmin.x = (int32_t)(-cuX * cuSize - 8);
+    mvmin.y = (int32_t)(-cuY * cuSize - 8);
+    mvmax.x = (int32_t)((widthInCU - cuX - 1) * cuSize + 8);
+    mvmax.y = (int32_t)((heightInCU - cuY - 1) * cuSize + 8);
 
     for (int i = 0; i < 1 + bBidir; i++)
     {
diff -r 5b90dc59b57a -r e242ae904637 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/encoder/weightPrediction.cpp	Fri Feb 08 11:31:25 2019 +0900
@@ -69,15 +69,15 @@
     for (int y = 0; y < ref.lines; y += cuSize)
     {
         intptr_t pixoff = y * stride;
-        mvmin.y = (int16_t)((-y - 8) * mvshift);
-        mvmax.y = (int16_t)((ref.lines - y - 1 + 8) * mvshift);
+        mvmin.y = (int32_t)((-y - 8) * mvshift);
+        mvmax.y = (int32_t)((ref.lines - y - 1 + 8) * mvshift);
 
         for (int x = 0; x < ref.width; x += cuSize, pixoff += cuSize, cu++)
         {
             ALIGN_VAR_16(pixel, buf8x8[8 * 8]);
             intptr_t bstride = 8;
-            mvmin.x = (int16_t)((-x - 8) * mvshift);
-            mvmax.x = (int16_t)((ref.width - x - 1 + 8) * mvshift);
+            mvmin.x = (int32_t)((-x - 8) * mvshift);
+            mvmax.x = (int32_t)((ref.width - x - 1 + 8) * mvshift);
 
             /* clip MV to available pixels */
             MV mv = mvs[cu];
@@ -113,8 +113,8 @@
          * into the lowres structures */
         int cu = y * cache.lowresWidthInCU;
         intptr_t pixoff = y * stride;
-        mvmin.y = (int16_t)((-y - 8) * mvshift);
-        mvmax.y = (int16_t)((height - y - 1 + 8) * mvshift);
+        mvmin.y = (int32_t)((-y - 8) * mvshift);
+        mvmax.y = (int32_t)((height - y - 1 + 8) * mvshift);
 
         for (int x = 0; x < width; x += bw, cu++, pixoff += bw)
         {
@@ -126,8 +126,8 @@
                 mv.y >>= cache.vshift;
 
                 /* clip MV to available pixels */
-                mvmin.x = (int16_t)((-x - 8) * mvshift);
-                mvmax.x = (int16_t)((width - x - 1 + 8) * mvshift);
+                mvmin.x = (int32_t)((-x - 8) * mvshift);
+                mvmax.x = (int32_t)((width - x - 1 + 8) * mvshift);
                 mv = mv.clipped(mvmin, mvmax);
 
                 intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2);
diff -r 5b90dc59b57a -r e242ae904637 source/x265.h
--- a/source/x265.h	Thu Feb 07 16:29:36 2019 +0530
+++ b/source/x265.h	Fri Feb 08 11:31:25 2019 +0900
@@ -147,9 +147,9 @@
 typedef struct x265_analysis_MV
 {
     union{
-        struct { int16_t x, y; };
+        struct { int32_t x, y; };
 
-        int32_t word;
+        int64_t word;
     };
 }x265_analysis_MV;