[x265] modify MV default constructor to do nothing

Mon Nov 17 21:38:56 CET 2014

On 11/17, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1416221075 -32400
> #      Mon Nov 17 19:44:35 2014 +0900
> # Node ID 90ec907326e25ae40b7dc38130cf81874d201ad2
> # Parent  27d36c4b4a27d2872430c6a6fc538fbddcf791e6
> modify MV default constructor to do nothing

queued

> diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/cudata.cpp
> --- a/source/common/cudata.cpp	Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/common/cudata.cpp	Mon Nov 17 19:44:35 2014 +0900
> @@ -1237,7 +1237,7 @@
>      else
>      {
>          // OUT OF BOUNDARY
> -        outMvField.mv.word = 0;
> +        outMvField.mv = 0;
>          outMvField.refIdx = REF_NOT_VALID;
>      }
>  }
> @@ -1399,6 +1399,8 @@
>  
>      for (uint32_t i = 0; i < maxNumMergeCand; ++i)
>      {
> +        mvFieldNeighbours[i][0].mv = 0;
> +        mvFieldNeighbours[i][1].mv = 0;
>          mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
>          mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
>      }
> @@ -1646,7 +1648,7 @@
>      while (count < maxNumMergeCand)
>      {
>          interDirNeighbours[count] = 1;
> -        mvFieldNeighbours[count][0].mv.word = 0;
> +        mvFieldNeighbours[count][0].mv = 0;
>          mvFieldNeighbours[count][0].refIdx = r;
>  
>          if (isInterB)
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/lowres.h
> --- a/source/common/lowres.h	Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/common/lowres.h	Mon Nov 17 19:44:35 2014 +0900
> @@ -56,11 +56,10 @@
>          {
>              int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
>              pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
> -
> -            MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
> -            int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
> -
> -            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
> +            int qmvx = qmv.x + (qmv.x & 1);
> +            int qmvy = qmv.y + (qmv.y & 1);
> +            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
> +            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
>              primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
>              return buf;
>          }
> @@ -79,9 +78,10 @@
>              ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
>              int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
>              pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
> -            MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
> -            int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
> -            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
> +            int qmvx = qmv.x + (qmv.x & 1);
> +            int qmvy = qmv.y + (qmv.y & 1);
> +            int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
> +            pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
>              primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
>              return comp(fenc, FENC_STRIDE, subpelbuf, 8);
>          }
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/mv.h
> --- a/source/common/mv.h	Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/common/mv.h	Mon Nov 17 19:44:35 2014 +0900
> @@ -44,19 +44,19 @@
>          int32_t word;
>      };
>  
> -    MV() : word(0)                             {}
> -
> +    MV()                                       {}
> +    MV(int32_t w) : word(w)                    {}
>      MV(int16_t _x, int16_t _y) : x(_x), y(_y)  {}
>  
> -    const MV& operator =(uint32_t w)           { word = w; return *this; }
> +    MV& operator =(uint32_t w)                 { word = w; return *this; }
>  
> -    const MV& operator +=(const MV& other)     { x += other.x; y += other.y; return *this; }
> +    MV& operator +=(const MV& other)           { x += other.x; y += other.y; return *this; }
>  
> -    const MV& operator -=(const MV& other)     { x -= other.x; y -= other.y; return *this; }
> +    MV& operator -=(const MV& other)           { x -= other.x; y -= other.y; return *this; }
>  
> -    const MV& operator >>=(int i)              { x >>= i; y >>= i; return *this; }
> +    MV& operator >>=(int i)                    { x >>= i; y >>= i; return *this; }
>  
> -    const MV& operator <<=(int i)              { x <<= i; y <<= i; return *this; }
> +    MV& operator <<=(int i)                    { x <<= i; y <<= i; return *this; }
>  
>      MV operator >>(int i) const                { return MV(x >> i, y >> i); }
>  
> @@ -64,16 +64,18 @@
>  
>      MV operator *(int16_t i) const             { return MV(x * i, y * i); }
>  
> -    const MV operator -(const MV& other) const { return MV(x - other.x, y - other.y); }
> +    MV operator -(const MV& other) const       { return MV(x - other.x, y - other.y); }
>  
> -    const MV operator +(const MV& other) const { return MV(x + other.x, y + other.y); }
> +    MV operator +(const MV& other) const       { return MV(x + other.x, y + other.y); }
>  
>      bool operator ==(const MV& other) const    { return word == other.word; }
>  
>      bool operator !=(const MV& other) const    { return word != other.word; }
>  
> +    bool operator !() const                    { return !word; }
> +
>      // Scale down a QPEL mv to FPEL mv, rounding up by one HPEL offset
> -    MV roundToFPel() const                     { return MV(x + 2, y + 2) >> 2; }
> +    MV roundToFPel() const                     { return MV((x + 2) >> 2, (y + 2) >> 2); }
>  
>      // Scale up an FPEL mv to QPEL by shifting up two bits
>      MV toQPel() const                          { return *this << 2; }
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/bitcost.h
> --- a/source/encoder/bitcost.h	Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/bitcost.h	Mon Nov 17 19:44:35 2014 +0900
> @@ -35,7 +35,7 @@
>  {
>  public:
>  
> -    BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0) {}
> +    BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0), m_mvp(0) {}
>  
>      void setQP(unsigned int qp);
>  
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp	Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/motion.cpp	Mon Nov 17 19:44:35 2014 +0900
> @@ -43,7 +43,7 @@
>      bool hpel_satd;
>  };
>  
> -SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
> +static const SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
>  {
>      { 1, 4, 0, 4, false }, // 4 SAD HPEL only
>      { 1, 4, 1, 4, false }, // 4 SAD HPEL + 4 SATD QPEL
> @@ -116,7 +116,6 @@
>      sad_x4 = primitives.sad_x4[partEnum];
>  
>      blockwidth = width;
> -    blockheight = height;
>      blockOffset = offset;
>  
>      /* copy PU block into cache */
> @@ -291,7 +290,7 @@
>  {
>      ALIGN_VAR_16(int, costs[16]);
>      pixel *fref = ref->fpelPlane + blockOffset;
> -    size_t stride = ref->lumaStride;
> +    intptr_t stride = ref->lumaStride;
>  
>      MV omv = bmv;
>      int saved = bcost;
> @@ -531,8 +530,8 @@
>                                     MV &             outQMv)
>  {
>      ALIGN_VAR_16(int, costs[16]);
> -    size_t stride = ref->lumaStride;
>      pixel *fref = ref->fpelPlane + blockOffset;
> +    intptr_t stride = ref->lumaStride;
>  
>      setMVP(qmvp);
>  
> @@ -560,9 +559,7 @@
>      MV bmv = pmv.roundToFPel();
>      int bcost = bprecost;
>      if (pmv.isSubpel())
> -    {
>          bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
> -    }
>  
>      // measure SAD cost at MV(0) if MVP is not zero
>      if (pmv.notZero())
> @@ -576,21 +573,35 @@
>      }
>  
>      // measure SAD cost at each QPEL motion vector candidate
> -    for (int i = 0; i < numCandidates; i++)
> +    if (ref->isLowres)
>      {
> -        MV m = mvc[i].clipped(qmvmin, qmvmax);
> -        if (m.notZero() && m != pmv && m != bestpre) // check already measured
> +        for (int i = 0; i < numCandidates; i++)
>          {
> -            int cost;
> -            if (ref->isLowres)
> -                cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
> -            else
> -                cost = subpelCompare(ref, m, sad) + mvcost(m);
> -
> -            if (cost < bprecost)
> +            MV m = mvc[i].clipped(qmvmin, qmvmax);
> +            if (m.notZero() && m != pmv && m != bestpre) // check already measured
>              {
> -                bprecost = cost;
> -                bestpre = m;
> +                int cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
> +                if (cost < bprecost)
> +                {
> +                    bprecost = cost;
> +                    bestpre = m;
> +                }
> +            }
> +        }
> +    }
> +    else
> +    {
> +        for (int i = 0; i < numCandidates; i++)
> +        {
> +            MV m = mvc[i].clipped(qmvmin, qmvmax);
> +            if (m.notZero() && m != pmv && m != bestpre) // check already measured
> +            {
> +                int cost = subpelCompare(ref, m, sad) + mvcost(m);
> +                if (cost < bprecost)
> +                {
> +                    bprecost = cost;
> +                    bestpre = m;
> +                }
>              }
>          }
>      }
> @@ -1042,7 +1053,7 @@
>      else
>          bmv = bmv.toQPel(); // promote search bmv to qpel
>  
> -    SubpelWorkload& wl = workload[this->subpelRefine];
> +    const SubpelWorkload& wl = workload[this->subpelRefine];
>  
>      if (!bcost)
>      {
> @@ -1052,11 +1063,11 @@
>      }
>      else if (ref->isLowres)
>      {
> -        int bdir = 0, cost;
> +        int bdir = 0;
>          for (int i = 1; i <= wl.hpel_dirs; i++)
>          {
>              MV qmv = bmv + square1[i] * 2;
> -            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
> +            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
>              COPY2_IF_LT(bcost, cost, bdir, i);
>          }
>  
> @@ -1067,7 +1078,7 @@
>          for (int i = 1; i <= wl.qpel_dirs; i++)
>          {
>              MV qmv = bmv + square1[i];
> -            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
> +            int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
>              COPY2_IF_LT(bcost, cost, bdir, i);
>          }
>  
> @@ -1087,11 +1098,11 @@
>  
>          for (int iter = 0; iter < wl.hpel_iters; iter++)
>          {
> -            int bdir = 0, cost;
> +            int bdir = 0;
>              for (int i = 1; i <= wl.hpel_dirs; i++)
>              {
>                  MV qmv = bmv + square1[i] * 2;
> -                cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
> +                int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
>                  COPY2_IF_LT(bcost, cost, bdir, i);
>              }
>  
> @@ -1107,11 +1118,11 @@
>  
>          for (int iter = 0; iter < wl.qpel_iters; iter++)
>          {
> -            int bdir = 0, cost;
> +            int bdir = 0;
>              for (int i = 1; i <= wl.qpel_dirs; i++)
>              {
>                  MV qmv = bmv + square1[i];
> -                cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
> +                int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
>                  COPY2_IF_LT(bcost, cost, bdir, i);
>              }
>  
> @@ -1129,14 +1140,13 @@
>  
>  int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
>  {
> +    intptr_t stride = ref->lumaStride;
> +    pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * stride;
>      int xFrac = qmv.x & 0x3;
>      int yFrac = qmv.y & 0x3;
>  
>      if ((yFrac | xFrac) == 0)
> -    {
> -        pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
> -        return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
> -    }
> +        return cmp(fenc, FENC_STRIDE, fref, stride);
>      else
>      {
>          /* We are taking a short-cut here if the reference is weighted. To be
> @@ -1145,22 +1155,17 @@
>           * are simply interpolating the weighted full-pel pixels. Not 100%
>           * accurate but good enough for fast qpel ME */
>          ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
> -        pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
>          if (yFrac == 0)
> -        {
> -            primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
> -        }
> +            primitives.luma_hpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, xFrac);
>          else if (xFrac == 0)
> -        {
> -            primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
> -        }
> +            primitives.luma_vpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, yFrac);
>          else
>          {
>              ALIGN_VAR_32(int16_t, immed[64 * (64 + 8)]);
>  
>              int filterSize = NTAPS_LUMA;
>              int halfFilterSize = filterSize >> 1;
> -            primitives.luma_hps[partEnum](fref, ref->lumaStride, immed, blockwidth, xFrac, 1);
> +            primitives.luma_hps[partEnum](fref, stride, immed, blockwidth, xFrac, 1);
>              primitives.luma_vsp[partEnum](immed + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
>          }
>          return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.h
> --- a/source/encoder/motion.h	Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/motion.h	Mon Nov 17 19:44:35 2014 +0900
> @@ -54,7 +54,6 @@
>      int subpelRefine;
>  
>      int blockwidth;
> -    int blockheight;
>      int partEnum;
>  
>      static const int COST_MAX = 1 << 28;
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp	Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/slicetype.cpp	Mon Nov 17 19:44:35 2014 +0900
> @@ -1592,12 +1592,13 @@
>          }
>          if (bBidir)
>          {
> -            pixel subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE], subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
> +            ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
> +            ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
>              intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
>              pixel *src0 = wfref0->lowresMC(pelOffset, *fenc_mvs[0], subpelbuf0, stride0);
>              pixel *src1 = fref1->lowresMC(pelOffset, *fenc_mvs[1], subpelbuf1, stride1);
>  
> -            pixel ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
> +            ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
>              primitives.pixelavg_pp[LUMA_8x8](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
>              int bicost = primitives.satd[LUMA_8x8](fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
>              COPY2_IF_LT(bcost, bicost, listused, 3);
> @@ -1652,9 +1653,9 @@
>  
>          // generate 35 intra predictions into m_predictions
>          pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
> -        int icost = m_me.COST_MAX, cost;
> +        int icost = m_me.COST_MAX;
>          primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
> -        cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
> +        int cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
>          if (cost < icost)
>              icost = cost;
>          pixel *above = (cuSize >= 8) ? above1 : above0;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho