[x265] modify MV default constructor to do nothing

Mon Nov 17 11:47:23 CET 2014

# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1416221075 -32400
#      Mon Nov 17 19:44:35 2014 +0900
# Node ID 90ec907326e25ae40b7dc38130cf81874d201ad2
# Parent  27d36c4b4a27d2872430c6a6fc538fbddcf791e6
modify MV default constructor to do nothing

diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/cudata.cpp

--- a/source/common/cudata.cpp	Mon Nov 17 01:30:26 2014 +0530
+++ b/source/common/cudata.cpp	Mon Nov 17 19:44:35 2014 +0900
@@ -1237,7 +1237,7 @@
     else
     {
         // OUT OF BOUNDARY
-        outMvField.mv.word = 0;
+        outMvField.mv = 0;
         outMvField.refIdx = REF_NOT_VALID;
     }
 }
@@ -1399,6 +1399,8 @@
 
     for (uint32_t i = 0; i < maxNumMergeCand; ++i)
     {
+        mvFieldNeighbours[i][0].mv = 0;
+        mvFieldNeighbours[i][1].mv = 0;
         mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
         mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
     }
@@ -1646,7 +1648,7 @@
     while (count < maxNumMergeCand)
     {
         interDirNeighbours[count] = 1;
-        mvFieldNeighbours[count][0].mv.word = 0;
+        mvFieldNeighbours[count][0].mv = 0;
         mvFieldNeighbours[count][0].refIdx = r;
 
         if (isInterB)
diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/lowres.h
--- a/source/common/lowres.h	Mon Nov 17 01:30:26 2014 +0530
+++ b/source/common/lowres.h	Mon Nov 17 19:44:35 2014 +0900
@@ -56,11 +56,10 @@
         {
             int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
             pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
-
-            MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
-            int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
-
-            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
+            int qmvx = qmv.x + (qmv.x & 1);
+            int qmvy = qmv.y + (qmv.y & 1);
+            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
             primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
             return buf;
         }
@@ -79,9 +78,10 @@
             ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
             int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
             pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
-            MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
-            int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
-            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
+            int qmvx = qmv.x + (qmv.x & 1);
+            int qmvy = qmv.y + (qmv.y & 1);
+            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
             primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
             return comp(fenc, FENC_STRIDE, subpelbuf, 8);
         }
diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/mv.h
--- a/source/common/mv.h	Mon Nov 17 01:30:26 2014 +0530
+++ b/source/common/mv.h	Mon Nov 17 19:44:35 2014 +0900
@@ -44,19 +44,19 @@
         int32_t word;
     };
 
-    MV() : word(0)                             {}
-
+    MV()                                       {}
+    MV(int32_t w) : word(w)                    {}
     MV(int16_t _x, int16_t _y) : x(_x), y(_y)  {}
 
-    const MV& operator =(uint32_t w)           { word = w; return *this; }
+    MV& operator =(uint32_t w)                 { word = w; return *this; }
 
-    const MV& operator +=(const MV& other)     { x += other.x; y += other.y; return *this; }
+    MV& operator +=(const MV& other)           { x += other.x; y += other.y; return *this; }
 
-    const MV& operator -=(const MV& other)     { x -= other.x; y -= other.y; return *this; }
+    MV& operator -=(const MV& other)           { x -= other.x; y -= other.y; return *this; }
 
-    const MV& operator >>=(int i)              { x >>= i; y >>= i; return *this; }
+    MV& operator >>=(int i)                    { x >>= i; y >>= i; return *this; }
 
-    const MV& operator <<=(int i)              { x <<= i; y <<= i; return *this; }
+    MV& operator <<=(int i)                    { x <<= i; y <<= i; return *this; }
 
     MV operator >>(int i) const                { return MV(x >> i, y >> i); }
 
@@ -64,16 +64,18 @@
 
     MV operator *(int16_t i) const             { return MV(x * i, y * i); }
 
-    const MV operator -(const MV& other) const { return MV(x - other.x, y - other.y); }
+    MV operator -(const MV& other) const       { return MV(x - other.x, y - other.y); }
 
-    const MV operator +(const MV& other) const { return MV(x + other.x, y + other.y); }
+    MV operator +(const MV& other) const       { return MV(x + other.x, y + other.y); }
 
     bool operator ==(const MV& other) const    { return word == other.word; }
 
     bool operator !=(const MV& other) const    { return word != other.word; }
 
+    bool operator !() const                    { return !word; }
+
     // Scale down a QPEL mv to FPEL mv, rounding up by one HPEL offset
-    MV roundToFPel() const                     { return MV(x + 2, y + 2) >> 2; }
+    MV roundToFPel() const                     { return MV((x + 2) >> 2, (y + 2) >> 2); }
 
     // Scale up an FPEL mv to QPEL by shifting up two bits
     MV toQPel() const                          { return *this << 2; }
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/bitcost.h
--- a/source/encoder/bitcost.h	Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/bitcost.h	Mon Nov 17 19:44:35 2014 +0900
@@ -35,7 +35,7 @@
 {
 public:
 
-    BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0) {}
+    BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0), m_mvp(0) {}
 
     void setQP(unsigned int qp);
 
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/motion.cpp	Mon Nov 17 19:44:35 2014 +0900
@@ -43,7 +43,7 @@
     bool hpel_satd;
 };
 
-SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
+static const SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
 {
     { 1, 4, 0, 4, false }, // 4 SAD HPEL only
     { 1, 4, 1, 4, false }, // 4 SAD HPEL + 4 SATD QPEL
@@ -116,7 +116,6 @@
     sad_x4 = primitives.sad_x4[partEnum];
 
     blockwidth = width;
-    blockheight = height;
     blockOffset = offset;
 
     /* copy PU block into cache */
@@ -291,7 +290,7 @@
 {
     ALIGN_VAR_16(int, costs[16]);
     pixel *fref = ref->fpelPlane + blockOffset;
-    size_t stride = ref->lumaStride;
+    intptr_t stride = ref->lumaStride;
 
     MV omv = bmv;
     int saved = bcost;
@@ -531,8 +530,8 @@
                                    MV &             outQMv)
 {
     ALIGN_VAR_16(int, costs[16]);
-    size_t stride = ref->lumaStride;
     pixel *fref = ref->fpelPlane + blockOffset;
+    intptr_t stride = ref->lumaStride;
 
     setMVP(qmvp);
 
@@ -560,9 +559,7 @@
     MV bmv = pmv.roundToFPel();
     int bcost = bprecost;
     if (pmv.isSubpel())
-    {
         bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
-    }
 
     // measure SAD cost at MV(0) if MVP is not zero
     if (pmv.notZero())
@@ -576,21 +573,35 @@
     }
 
     // measure SAD cost at each QPEL motion vector candidate
-    for (int i = 0; i < numCandidates; i++)
+    if (ref->isLowres)
     {
-        MV m = mvc[i].clipped(qmvmin, qmvmax);
-        if (m.notZero() && m != pmv && m != bestpre) // check already measured
+        for (int i = 0; i < numCandidates; i++)
         {
-            int cost;
-            if (ref->isLowres)
-                cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
-            else
-                cost = subpelCompare(ref, m, sad) + mvcost(m);
-
-            if (cost < bprecost)
+            MV m = mvc[i].clipped(qmvmin, qmvmax);
+            if (m.notZero() && m != pmv && m != bestpre) // check already measured
             {
-                bprecost = cost;
-                bestpre = m;
+                int cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
+                if (cost < bprecost)
+                {
+                    bprecost = cost;
+                    bestpre = m;
+                }
+            }
+        }
+    }
+    else
+    {
+        for (int i = 0; i < numCandidates; i++)
+        {
+            MV m = mvc[i].clipped(qmvmin, qmvmax);
+            if (m.notZero() && m != pmv && m != bestpre) // check already measured
+            {
+                int cost = subpelCompare(ref, m, sad) + mvcost(m);
+                if (cost < bprecost)
+                {
+                    bprecost = cost;
+                    bestpre = m;
+                }
             }
         }
     }
@@ -1042,7 +1053,7 @@
     else
         bmv = bmv.toQPel(); // promote search bmv to qpel
 
-    SubpelWorkload& wl = workload[this->subpelRefine];
+    const SubpelWorkload& wl = workload[this->subpelRefine];
 
     if (!bcost)
     {
@@ -1052,11 +1063,11 @@
     }
     else if (ref->isLowres)
     {
-        int bdir = 0, cost;
+        int bdir = 0;
         for (int i = 1; i <= wl.hpel_dirs; i++)
         {
             MV qmv = bmv + square1[i] * 2;
-            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
+            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
             COPY2_IF_LT(bcost, cost, bdir, i);
         }
 
@@ -1067,7 +1078,7 @@
         for (int i = 1; i <= wl.qpel_dirs; i++)
         {
             MV qmv = bmv + square1[i];
-            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
+            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
             COPY2_IF_LT(bcost, cost, bdir, i);
         }
 
@@ -1087,11 +1098,11 @@
 
         for (int iter = 0; iter < wl.hpel_iters; iter++)
         {
-            int bdir = 0, cost;
+            int bdir = 0;
             for (int i = 1; i <= wl.hpel_dirs; i++)
             {
                 MV qmv = bmv + square1[i] * 2;
-                cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
+                int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
                 COPY2_IF_LT(bcost, cost, bdir, i);
             }
 
@@ -1107,11 +1118,11 @@
 
         for (int iter = 0; iter < wl.qpel_iters; iter++)
         {
-            int bdir = 0, cost;
+            int bdir = 0;
             for (int i = 1; i <= wl.qpel_dirs; i++)
             {
                 MV qmv = bmv + square1[i];
-                cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
+                int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
                 COPY2_IF_LT(bcost, cost, bdir, i);
             }
 
@@ -1129,14 +1140,13 @@
 
 int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
 {
+    intptr_t stride = ref->lumaStride;
+    pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * stride;
     int xFrac = qmv.x & 0x3;
     int yFrac = qmv.y & 0x3;
 
     if ((yFrac | xFrac) == 0)
-    {
-        pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
-        return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
-    }
+        return cmp(fenc, FENC_STRIDE, fref, stride);
     else
     {
         /* We are taking a short-cut here if the reference is weighted. To be
@@ -1145,22 +1155,17 @@
          * are simply interpolating the weighted full-pel pixels. Not 100%
          * accurate but good enough for fast qpel ME */
         ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
-        pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
         if (yFrac == 0)
-        {
-            primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
-        }
+            primitives.luma_hpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, xFrac);
         else if (xFrac == 0)
-        {
-            primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
-        }
+            primitives.luma_vpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, yFrac);
         else
         {
             ALIGN_VAR_32(int16_t, immed[64 * (64 + 8)]);
 
             int filterSize = NTAPS_LUMA;
             int halfFilterSize = filterSize >> 1;
-            primitives.luma_hps[partEnum](fref, ref->lumaStride, immed, blockwidth, xFrac, 1);
+            primitives.luma_hps[partEnum](fref, stride, immed, blockwidth, xFrac, 1);
             primitives.luma_vsp[partEnum](immed + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
         }
         return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.h
--- a/source/encoder/motion.h	Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/motion.h	Mon Nov 17 19:44:35 2014 +0900
@@ -54,7 +54,6 @@
     int subpelRefine;
 
     int blockwidth;
-    int blockheight;
     int partEnum;
 
     static const int COST_MAX = 1 << 28;
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/slicetype.cpp	Mon Nov 17 19:44:35 2014 +0900
@@ -1592,12 +1592,13 @@
         }
         if (bBidir)
         {
-            pixel subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE], subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
+            ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
+            ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
             intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
             pixel *src0 = wfref0->lowresMC(pelOffset, *fenc_mvs[0], subpelbuf0, stride0);
             pixel *src1 = fref1->lowresMC(pelOffset, *fenc_mvs[1], subpelbuf1, stride1);
 
-            pixel ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
+            ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
             primitives.pixelavg_pp[LUMA_8x8](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
             int bicost = primitives.satd[LUMA_8x8](fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
             COPY2_IF_LT(bcost, bicost, listused, 3);
@@ -1652,9 +1653,9 @@
 
         // generate 35 intra predictions into m_predictions
         pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
-        int icost = m_me.COST_MAX, cost;
+        int icost = m_me.COST_MAX;
         primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
-        cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
+        int cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
         if (cost < icost)
             icost = cost;
         pixel *above = (cuSize >= 8) ? above1 : above0;