[x265] modify MV default constructor to do nothing
Steve Borho
steve at borho.org
Mon Nov 17 21:38:56 CET 2014
On 11/17, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1416221075 -32400
> # Mon Nov 17 19:44:35 2014 +0900
> # Node ID 90ec907326e25ae40b7dc38130cf81874d201ad2
> # Parent 27d36c4b4a27d2872430c6a6fc538fbddcf791e6
> modify MV default constructor to do nothing
queued
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/cudata.cpp
> --- a/source/common/cudata.cpp Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/common/cudata.cpp Mon Nov 17 19:44:35 2014 +0900
> @@ -1237,7 +1237,7 @@
> else
> {
> // OUT OF BOUNDARY
> - outMvField.mv.word = 0;
> + outMvField.mv = 0;
> outMvField.refIdx = REF_NOT_VALID;
> }
> }
> @@ -1399,6 +1399,8 @@
>
> for (uint32_t i = 0; i < maxNumMergeCand; ++i)
> {
> + mvFieldNeighbours[i][0].mv = 0;
> + mvFieldNeighbours[i][1].mv = 0;
> mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
> mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
> }
> @@ -1646,7 +1648,7 @@
> while (count < maxNumMergeCand)
> {
> interDirNeighbours[count] = 1;
> - mvFieldNeighbours[count][0].mv.word = 0;
> + mvFieldNeighbours[count][0].mv = 0;
> mvFieldNeighbours[count][0].refIdx = r;
>
> if (isInterB)
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/lowres.h
> --- a/source/common/lowres.h Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/common/lowres.h Mon Nov 17 19:44:35 2014 +0900
> @@ -56,11 +56,10 @@
> {
> int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
> pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
> -
> - MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
> - int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
> -
> - pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
> + int qmvx = qmv.x + (qmv.x & 1);
> + int qmvy = qmv.y + (qmv.y & 1);
> + int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
> + pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
> primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
> return buf;
> }
> @@ -79,9 +78,10 @@
> ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
> int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
> pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
> - MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
> - int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
> - pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
> + int qmvx = qmv.x + (qmv.x & 1);
> + int qmvy = qmv.y + (qmv.y & 1);
> + int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
> + pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
> primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
> return comp(fenc, FENC_STRIDE, subpelbuf, 8);
> }
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/mv.h
> --- a/source/common/mv.h Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/common/mv.h Mon Nov 17 19:44:35 2014 +0900
> @@ -44,19 +44,19 @@
> int32_t word;
> };
>
> - MV() : word(0) {}
> -
> + MV() {}
> + MV(int32_t w) : word(w) {}
> MV(int16_t _x, int16_t _y) : x(_x), y(_y) {}
>
> - const MV& operator =(uint32_t w) { word = w; return *this; }
> + MV& operator =(uint32_t w) { word = w; return *this; }
>
> - const MV& operator +=(const MV& other) { x += other.x; y += other.y; return *this; }
> + MV& operator +=(const MV& other) { x += other.x; y += other.y; return *this; }
>
> - const MV& operator -=(const MV& other) { x -= other.x; y -= other.y; return *this; }
> + MV& operator -=(const MV& other) { x -= other.x; y -= other.y; return *this; }
>
> - const MV& operator >>=(int i) { x >>= i; y >>= i; return *this; }
> + MV& operator >>=(int i) { x >>= i; y >>= i; return *this; }
>
> - const MV& operator <<=(int i) { x <<= i; y <<= i; return *this; }
> + MV& operator <<=(int i) { x <<= i; y <<= i; return *this; }
>
> MV operator >>(int i) const { return MV(x >> i, y >> i); }
>
> @@ -64,16 +64,18 @@
>
> MV operator *(int16_t i) const { return MV(x * i, y * i); }
>
> - const MV operator -(const MV& other) const { return MV(x - other.x, y - other.y); }
> + MV operator -(const MV& other) const { return MV(x - other.x, y - other.y); }
>
> - const MV operator +(const MV& other) const { return MV(x + other.x, y + other.y); }
> + MV operator +(const MV& other) const { return MV(x + other.x, y + other.y); }
>
> bool operator ==(const MV& other) const { return word == other.word; }
>
> bool operator !=(const MV& other) const { return word != other.word; }
>
> + bool operator !() const { return !word; }
> +
> // Scale down a QPEL mv to FPEL mv, rounding up by one HPEL offset
> - MV roundToFPel() const { return MV(x + 2, y + 2) >> 2; }
> + MV roundToFPel() const { return MV((x + 2) >> 2, (y + 2) >> 2); }
>
> // Scale up an FPEL mv to QPEL by shifting up two bits
> MV toQPel() const { return *this << 2; }
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/bitcost.h
> --- a/source/encoder/bitcost.h Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/bitcost.h Mon Nov 17 19:44:35 2014 +0900
> @@ -35,7 +35,7 @@
> {
> public:
>
> - BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0) {}
> + BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0), m_mvp(0) {}
>
> void setQP(unsigned int qp);
>
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/motion.cpp Mon Nov 17 19:44:35 2014 +0900
> @@ -43,7 +43,7 @@
> bool hpel_satd;
> };
>
> -SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
> +static const SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
> {
> { 1, 4, 0, 4, false }, // 4 SAD HPEL only
> { 1, 4, 1, 4, false }, // 4 SAD HPEL + 4 SATD QPEL
> @@ -116,7 +116,6 @@
> sad_x4 = primitives.sad_x4[partEnum];
>
> blockwidth = width;
> - blockheight = height;
> blockOffset = offset;
>
> /* copy PU block into cache */
> @@ -291,7 +290,7 @@
> {
> ALIGN_VAR_16(int, costs[16]);
> pixel *fref = ref->fpelPlane + blockOffset;
> - size_t stride = ref->lumaStride;
> + intptr_t stride = ref->lumaStride;
>
> MV omv = bmv;
> int saved = bcost;
> @@ -531,8 +530,8 @@
> MV & outQMv)
> {
> ALIGN_VAR_16(int, costs[16]);
> - size_t stride = ref->lumaStride;
> pixel *fref = ref->fpelPlane + blockOffset;
> + intptr_t stride = ref->lumaStride;
>
> setMVP(qmvp);
>
> @@ -560,9 +559,7 @@
> MV bmv = pmv.roundToFPel();
> int bcost = bprecost;
> if (pmv.isSubpel())
> - {
> bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
> - }
>
> // measure SAD cost at MV(0) if MVP is not zero
> if (pmv.notZero())
> @@ -576,21 +573,35 @@
> }
>
> // measure SAD cost at each QPEL motion vector candidate
> - for (int i = 0; i < numCandidates; i++)
> + if (ref->isLowres)
> {
> - MV m = mvc[i].clipped(qmvmin, qmvmax);
> - if (m.notZero() && m != pmv && m != bestpre) // check already measured
> + for (int i = 0; i < numCandidates; i++)
> {
> - int cost;
> - if (ref->isLowres)
> - cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
> - else
> - cost = subpelCompare(ref, m, sad) + mvcost(m);
> -
> - if (cost < bprecost)
> + MV m = mvc[i].clipped(qmvmin, qmvmax);
> + if (m.notZero() && m != pmv && m != bestpre) // check already measured
> {
> - bprecost = cost;
> - bestpre = m;
> + int cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
> + if (cost < bprecost)
> + {
> + bprecost = cost;
> + bestpre = m;
> + }
> + }
> + }
> + }
> + else
> + {
> + for (int i = 0; i < numCandidates; i++)
> + {
> + MV m = mvc[i].clipped(qmvmin, qmvmax);
> + if (m.notZero() && m != pmv && m != bestpre) // check already measured
> + {
> + int cost = subpelCompare(ref, m, sad) + mvcost(m);
> + if (cost < bprecost)
> + {
> + bprecost = cost;
> + bestpre = m;
> + }
> }
> }
> }
> @@ -1042,7 +1053,7 @@
> else
> bmv = bmv.toQPel(); // promote search bmv to qpel
>
> - SubpelWorkload& wl = workload[this->subpelRefine];
> + const SubpelWorkload& wl = workload[this->subpelRefine];
>
> if (!bcost)
> {
> @@ -1052,11 +1063,11 @@
> }
> else if (ref->isLowres)
> {
> - int bdir = 0, cost;
> + int bdir = 0;
> for (int i = 1; i <= wl.hpel_dirs; i++)
> {
> MV qmv = bmv + square1[i] * 2;
> - cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
> + int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
> COPY2_IF_LT(bcost, cost, bdir, i);
> }
>
> @@ -1067,7 +1078,7 @@
> for (int i = 1; i <= wl.qpel_dirs; i++)
> {
> MV qmv = bmv + square1[i];
> - cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
> + int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
> COPY2_IF_LT(bcost, cost, bdir, i);
> }
>
> @@ -1087,11 +1098,11 @@
>
> for (int iter = 0; iter < wl.hpel_iters; iter++)
> {
> - int bdir = 0, cost;
> + int bdir = 0;
> for (int i = 1; i <= wl.hpel_dirs; i++)
> {
> MV qmv = bmv + square1[i] * 2;
> - cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
> + int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
> COPY2_IF_LT(bcost, cost, bdir, i);
> }
>
> @@ -1107,11 +1118,11 @@
>
> for (int iter = 0; iter < wl.qpel_iters; iter++)
> {
> - int bdir = 0, cost;
> + int bdir = 0;
> for (int i = 1; i <= wl.qpel_dirs; i++)
> {
> MV qmv = bmv + square1[i];
> - cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
> + int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
> COPY2_IF_LT(bcost, cost, bdir, i);
> }
>
> @@ -1129,14 +1140,13 @@
>
> int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
> {
> + intptr_t stride = ref->lumaStride;
> + pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * stride;
> int xFrac = qmv.x & 0x3;
> int yFrac = qmv.y & 0x3;
>
> if ((yFrac | xFrac) == 0)
> - {
> - pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
> - return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
> - }
> + return cmp(fenc, FENC_STRIDE, fref, stride);
> else
> {
> /* We are taking a short-cut here if the reference is weighted. To be
> @@ -1145,22 +1155,17 @@
> * are simply interpolating the weighted full-pel pixels. Not 100%
> * accurate but good enough for fast qpel ME */
> ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
> - pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
> if (yFrac == 0)
> - {
> - primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
> - }
> + primitives.luma_hpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, xFrac);
> else if (xFrac == 0)
> - {
> - primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
> - }
> + primitives.luma_vpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, yFrac);
> else
> {
> ALIGN_VAR_32(int16_t, immed[64 * (64 + 8)]);
>
> int filterSize = NTAPS_LUMA;
> int halfFilterSize = filterSize >> 1;
> - primitives.luma_hps[partEnum](fref, ref->lumaStride, immed, blockwidth, xFrac, 1);
> + primitives.luma_hps[partEnum](fref, stride, immed, blockwidth, xFrac, 1);
> primitives.luma_vsp[partEnum](immed + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
> }
> return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.h
> --- a/source/encoder/motion.h Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/motion.h Mon Nov 17 19:44:35 2014 +0900
> @@ -54,7 +54,6 @@
> int subpelRefine;
>
> int blockwidth;
> - int blockheight;
> int partEnum;
>
> static const int COST_MAX = 1 << 28;
> diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Mon Nov 17 01:30:26 2014 +0530
> +++ b/source/encoder/slicetype.cpp Mon Nov 17 19:44:35 2014 +0900
> @@ -1592,12 +1592,13 @@
> }
> if (bBidir)
> {
> - pixel subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE], subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
> + ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
> + ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
> intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
> pixel *src0 = wfref0->lowresMC(pelOffset, *fenc_mvs[0], subpelbuf0, stride0);
> pixel *src1 = fref1->lowresMC(pelOffset, *fenc_mvs[1], subpelbuf1, stride1);
>
> - pixel ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
> + ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
> primitives.pixelavg_pp[LUMA_8x8](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
> int bicost = primitives.satd[LUMA_8x8](fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
> COPY2_IF_LT(bcost, bicost, listused, 3);
> @@ -1652,9 +1653,9 @@
>
> // generate 35 intra predictions into m_predictions
> pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
> - int icost = m_me.COST_MAX, cost;
> + int icost = m_me.COST_MAX;
> primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
> - cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
> + int cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
> if (cost < icost)
> icost = cost;
> pixel *above = (cuSize >= 8) ? above1 : above0;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list