[x265] modify MV default constructor to do nothing
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Nov 17 11:47:23 CET 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1416221075 -32400
# Mon Nov 17 19:44:35 2014 +0900
# Node ID 90ec907326e25ae40b7dc38130cf81874d201ad2
# Parent 27d36c4b4a27d2872430c6a6fc538fbddcf791e6
modify MV default constructor to do nothing
diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/cudata.cpp
--- a/source/common/cudata.cpp Mon Nov 17 01:30:26 2014 +0530
+++ b/source/common/cudata.cpp Mon Nov 17 19:44:35 2014 +0900
@@ -1237,7 +1237,7 @@
else
{
// OUT OF BOUNDARY
- outMvField.mv.word = 0;
+ outMvField.mv = 0;
outMvField.refIdx = REF_NOT_VALID;
}
}
@@ -1399,6 +1399,8 @@
for (uint32_t i = 0; i < maxNumMergeCand; ++i)
{
+ mvFieldNeighbours[i][0].mv = 0;
+ mvFieldNeighbours[i][1].mv = 0;
mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
}
@@ -1646,7 +1648,7 @@
while (count < maxNumMergeCand)
{
interDirNeighbours[count] = 1;
- mvFieldNeighbours[count][0].mv.word = 0;
+ mvFieldNeighbours[count][0].mv = 0;
mvFieldNeighbours[count][0].refIdx = r;
if (isInterB)
diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/lowres.h
--- a/source/common/lowres.h Mon Nov 17 01:30:26 2014 +0530
+++ b/source/common/lowres.h Mon Nov 17 19:44:35 2014 +0900
@@ -56,11 +56,10 @@
{
int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
-
- MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
- int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
-
- pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
+ int qmvx = qmv.x + (qmv.x & 1);
+ int qmvy = qmv.y + (qmv.y & 1);
+ int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+ pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
return buf;
}
@@ -79,9 +78,10 @@
ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
- MV qmvB = qmv + MV((qmv.x & 1) * 2, (qmv.y & 1) * 2);
- int hpelB = (qmvB.y & 2) | ((qmvB.x & 2) >> 1);
- pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x >> 2) + (qmvB.y >> 2) * lumaStride;
+ int qmvx = qmv.x + (qmv.x & 1);
+ int qmvy = qmv.y + (qmv.y & 1);
+ int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
+ pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
return comp(fenc, FENC_STRIDE, subpelbuf, 8);
}
diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/mv.h
--- a/source/common/mv.h Mon Nov 17 01:30:26 2014 +0530
+++ b/source/common/mv.h Mon Nov 17 19:44:35 2014 +0900
@@ -44,19 +44,19 @@
int32_t word;
};
- MV() : word(0) {}
-
+ MV() {}
+ MV(int32_t w) : word(w) {}
MV(int16_t _x, int16_t _y) : x(_x), y(_y) {}
- const MV& operator =(uint32_t w) { word = w; return *this; }
+ MV& operator =(uint32_t w) { word = w; return *this; }
- const MV& operator +=(const MV& other) { x += other.x; y += other.y; return *this; }
+ MV& operator +=(const MV& other) { x += other.x; y += other.y; return *this; }
- const MV& operator -=(const MV& other) { x -= other.x; y -= other.y; return *this; }
+ MV& operator -=(const MV& other) { x -= other.x; y -= other.y; return *this; }
- const MV& operator >>=(int i) { x >>= i; y >>= i; return *this; }
+ MV& operator >>=(int i) { x >>= i; y >>= i; return *this; }
- const MV& operator <<=(int i) { x <<= i; y <<= i; return *this; }
+ MV& operator <<=(int i) { x <<= i; y <<= i; return *this; }
MV operator >>(int i) const { return MV(x >> i, y >> i); }
@@ -64,16 +64,18 @@
MV operator *(int16_t i) const { return MV(x * i, y * i); }
- const MV operator -(const MV& other) const { return MV(x - other.x, y - other.y); }
+ MV operator -(const MV& other) const { return MV(x - other.x, y - other.y); }
- const MV operator +(const MV& other) const { return MV(x + other.x, y + other.y); }
+ MV operator +(const MV& other) const { return MV(x + other.x, y + other.y); }
bool operator ==(const MV& other) const { return word == other.word; }
bool operator !=(const MV& other) const { return word != other.word; }
+ bool operator !() const { return !word; }
+
// Scale down a QPEL mv to FPEL mv, rounding up by one HPEL offset
- MV roundToFPel() const { return MV(x + 2, y + 2) >> 2; }
+ MV roundToFPel() const { return MV((x + 2) >> 2, (y + 2) >> 2); }
// Scale up an FPEL mv to QPEL by shifting up two bits
MV toQPel() const { return *this << 2; }
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/bitcost.h
--- a/source/encoder/bitcost.h Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/bitcost.h Mon Nov 17 19:44:35 2014 +0900
@@ -35,7 +35,7 @@
{
public:
- BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0) {}
+ BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0), m_mvp(0) {}
void setQP(unsigned int qp);
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/motion.cpp Mon Nov 17 19:44:35 2014 +0900
@@ -43,7 +43,7 @@
bool hpel_satd;
};
-SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
+static const SubpelWorkload workload[X265_MAX_SUBPEL_LEVEL + 1] =
{
{ 1, 4, 0, 4, false }, // 4 SAD HPEL only
{ 1, 4, 1, 4, false }, // 4 SAD HPEL + 4 SATD QPEL
@@ -116,7 +116,6 @@
sad_x4 = primitives.sad_x4[partEnum];
blockwidth = width;
- blockheight = height;
blockOffset = offset;
/* copy PU block into cache */
@@ -291,7 +290,7 @@
{
ALIGN_VAR_16(int, costs[16]);
pixel *fref = ref->fpelPlane + blockOffset;
- size_t stride = ref->lumaStride;
+ intptr_t stride = ref->lumaStride;
MV omv = bmv;
int saved = bcost;
@@ -531,8 +530,8 @@
MV & outQMv)
{
ALIGN_VAR_16(int, costs[16]);
- size_t stride = ref->lumaStride;
pixel *fref = ref->fpelPlane + blockOffset;
+ intptr_t stride = ref->lumaStride;
setMVP(qmvp);
@@ -560,9 +559,7 @@
MV bmv = pmv.roundToFPel();
int bcost = bprecost;
if (pmv.isSubpel())
- {
bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
- }
// measure SAD cost at MV(0) if MVP is not zero
if (pmv.notZero())
@@ -576,21 +573,35 @@
}
// measure SAD cost at each QPEL motion vector candidate
- for (int i = 0; i < numCandidates; i++)
+ if (ref->isLowres)
{
- MV m = mvc[i].clipped(qmvmin, qmvmax);
- if (m.notZero() && m != pmv && m != bestpre) // check already measured
+ for (int i = 0; i < numCandidates; i++)
{
- int cost;
- if (ref->isLowres)
- cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
- else
- cost = subpelCompare(ref, m, sad) + mvcost(m);
-
- if (cost < bprecost)
+ MV m = mvc[i].clipped(qmvmin, qmvmax);
+ if (m.notZero() && m != pmv && m != bestpre) // check already measured
{
- bprecost = cost;
- bestpre = m;
+ int cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
+ if (cost < bprecost)
+ {
+ bprecost = cost;
+ bestpre = m;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int i = 0; i < numCandidates; i++)
+ {
+ MV m = mvc[i].clipped(qmvmin, qmvmax);
+ if (m.notZero() && m != pmv && m != bestpre) // check already measured
+ {
+ int cost = subpelCompare(ref, m, sad) + mvcost(m);
+ if (cost < bprecost)
+ {
+ bprecost = cost;
+ bestpre = m;
+ }
}
}
}
@@ -1042,7 +1053,7 @@
else
bmv = bmv.toQPel(); // promote search bmv to qpel
- SubpelWorkload& wl = workload[this->subpelRefine];
+ const SubpelWorkload& wl = workload[this->subpelRefine];
if (!bcost)
{
@@ -1052,11 +1063,11 @@
}
else if (ref->isLowres)
{
- int bdir = 0, cost;
+ int bdir = 0;
for (int i = 1; i <= wl.hpel_dirs; i++)
{
MV qmv = bmv + square1[i] * 2;
- cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
+ int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
COPY2_IF_LT(bcost, cost, bdir, i);
}
@@ -1067,7 +1078,7 @@
for (int i = 1; i <= wl.qpel_dirs; i++)
{
MV qmv = bmv + square1[i];
- cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
+ int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
COPY2_IF_LT(bcost, cost, bdir, i);
}
@@ -1087,11 +1098,11 @@
for (int iter = 0; iter < wl.hpel_iters; iter++)
{
- int bdir = 0, cost;
+ int bdir = 0;
for (int i = 1; i <= wl.hpel_dirs; i++)
{
MV qmv = bmv + square1[i] * 2;
- cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
+ int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
COPY2_IF_LT(bcost, cost, bdir, i);
}
@@ -1107,11 +1118,11 @@
for (int iter = 0; iter < wl.qpel_iters; iter++)
{
- int bdir = 0, cost;
+ int bdir = 0;
for (int i = 1; i <= wl.qpel_dirs; i++)
{
MV qmv = bmv + square1[i];
- cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
+ int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
COPY2_IF_LT(bcost, cost, bdir, i);
}
@@ -1129,14 +1140,13 @@
int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
{
+ intptr_t stride = ref->lumaStride;
+ pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * stride;
int xFrac = qmv.x & 0x3;
int yFrac = qmv.y & 0x3;
if ((yFrac | xFrac) == 0)
- {
- pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
- return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
- }
+ return cmp(fenc, FENC_STRIDE, fref, stride);
else
{
/* We are taking a short-cut here if the reference is weighted. To be
@@ -1145,22 +1155,17 @@
* are simply interpolating the weighted full-pel pixels. Not 100%
* accurate but good enough for fast qpel ME */
ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
- pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
if (yFrac == 0)
- {
- primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
- }
+ primitives.luma_hpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, xFrac);
else if (xFrac == 0)
- {
- primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
- }
+ primitives.luma_vpp[partEnum](fref, stride, subpelbuf, FENC_STRIDE, yFrac);
else
{
ALIGN_VAR_32(int16_t, immed[64 * (64 + 8)]);
int filterSize = NTAPS_LUMA;
int halfFilterSize = filterSize >> 1;
- primitives.luma_hps[partEnum](fref, ref->lumaStride, immed, blockwidth, xFrac, 1);
+ primitives.luma_hps[partEnum](fref, stride, immed, blockwidth, xFrac, 1);
primitives.luma_vsp[partEnum](immed + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
}
return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/motion.h
--- a/source/encoder/motion.h Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/motion.h Mon Nov 17 19:44:35 2014 +0900
@@ -54,7 +54,6 @@
int subpelRefine;
int blockwidth;
- int blockheight;
int partEnum;
static const int COST_MAX = 1 << 28;
diff -r 27d36c4b4a27 -r 90ec907326e2 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Mon Nov 17 01:30:26 2014 +0530
+++ b/source/encoder/slicetype.cpp Mon Nov 17 19:44:35 2014 +0900
@@ -1592,12 +1592,13 @@
}
if (bBidir)
{
- pixel subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE], subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
+ ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
+ ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
pixel *src0 = wfref0->lowresMC(pelOffset, *fenc_mvs[0], subpelbuf0, stride0);
pixel *src1 = fref1->lowresMC(pelOffset, *fenc_mvs[1], subpelbuf1, stride1);
- pixel ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
+ ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
primitives.pixelavg_pp[LUMA_8x8](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
int bicost = primitives.satd[LUMA_8x8](fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
COPY2_IF_LT(bcost, bicost, listused, 3);
@@ -1652,9 +1653,9 @@
// generate 35 intra predictions into m_predictions
pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
- int icost = m_me.COST_MAX, cost;
+ int icost = m_me.COST_MAX;
primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
- cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
+ int cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
if (cost < icost)
icost = cost;
pixel *above = (cuSize >= 8) ? above1 : above0;
More information about the x265-devel
mailing list