[x265] remove m_immedVals
Satoshi Nakagawa
nakagawa424 at oki.com
Thu May 12 13:37:05 CEST 2016
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1463052561 -32400
# Thu May 12 20:29:21 2016 +0900
# Node ID 3d6c4c1fcb9923e8215aefae62bfeeb118e173c0
# Parent a5362b9533f6a5b77740b4e8f97dba2555b6f929
remove m_immedVals
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/common/ipfilter.cpp Thu May 12 20:29:21 2016 +0900
@@ -365,10 +365,10 @@
template<int N, int width, int height>
void interp_hv_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
{
- short immedVals[(64 + 8) * (64 + 8)];
+ ALIGN_VAR_32(int16_t, immed[width * (height + N - 1)]);
- interp_horiz_ps_c<N, width, height>(src, srcStride, immedVals, width, idxX, 1);
- filterVertical_sp_c<N>(immedVals + 3 * width, width, dst, dstStride, width, height, idxY);
+ interp_horiz_ps_c<N, width, height>(src, srcStride, immed, width, idxX, 1);
+ filterVertical_sp_c<N>(immed + (N / 2 - 1) * width, width, dst, dstStride, width, height, idxY);
}
}
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.cpp
--- a/source/common/predict.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/common/predict.cpp Thu May 12 20:29:21 2016 +0900
@@ -57,12 +57,10 @@
Predict::Predict()
{
- m_immedVals = NULL;
}
Predict::~Predict()
{
- X265_FREE(m_immedVals);
m_predShortYuv[0].destroy();
m_predShortYuv[1].destroy();
}
@@ -72,12 +70,8 @@
m_csp = csp;
m_hChromaShift = CHROMA_H_SHIFT(csp);
m_vChromaShift = CHROMA_V_SHIFT(csp);
- CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
-
-fail:
- return false;
}
void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma)
@@ -258,8 +252,8 @@
int partEnum = partitionFromSizes(pu.width, pu.height);
const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
- int xFrac = mv.x & 0x3;
- int yFrac = mv.y & 0x3;
+ int xFrac = mv.x & 3;
+ int yFrac = mv.y & 3;
if (!(yFrac | xFrac))
primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
@@ -280,14 +274,14 @@
intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
- int xFrac = mv.x & 0x3;
- int yFrac = mv.y & 0x3;
-
int partEnum = partitionFromSizes(pu.width, pu.height);
X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n");
X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
+ int xFrac = mv.x & 3;
+ int yFrac = mv.y & 3;
+
if (!(yFrac | xFrac))
primitives.pu[partEnum].convert_p2s(src, srcStride, dst, dstStride);
else if (!yFrac)
@@ -296,11 +290,12 @@
primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac);
else
{
- int tmpStride = pu.width;
- int filterSize = NTAPS_LUMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals, tmpStride, xFrac, 1);
- primitives.pu[partEnum].luma_vss(m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
+ int immedStride = pu.width;
+ int halfFilterSize = NTAPS_LUMA >> 1;
+
+ primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, xFrac, 1);
+ primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, yFrac);
}
}
@@ -309,10 +304,10 @@
intptr_t dstStride = dstYuv.m_csize;
intptr_t refStride = refPic.m_strideC;
- int shiftHor = (2 + m_hChromaShift);
- int shiftVer = (2 + m_vChromaShift);
+ int mvx = mv.x << (1 - m_hChromaShift);
+ int mvy = mv.y << (1 - m_vChromaShift);
- intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
+ intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
@@ -320,11 +315,11 @@
pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx);
pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx);
- int xFrac = mv.x & ((1 << shiftHor) - 1);
- int yFrac = mv.y & ((1 << shiftVer) - 1);
+ int partEnum = partitionFromSizes(pu.width, pu.height);
- int partEnum = partitionFromSizes(pu.width, pu.height);
-
+ int xFrac = mvx & 7;
+ int yFrac = mvy & 7;
+
if (!(yFrac | xFrac))
{
primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride);
@@ -332,37 +327,36 @@
}
else if (!yFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac);
}
else if (!xFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac);
}
else
{
- int extStride = pu.width >> m_hChromaShift;
- int filterSize = NTAPS_CHROMA;
- int halfFilterSize = (filterSize >> 1);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
+ int immedStride = pu.width >> m_hChromaShift;
+ int halfFilterSize = NTAPS_CHROMA >> 1;
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
-
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
}
}
void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
{
+ intptr_t dstStride = dstSYuv.m_csize;
intptr_t refStride = refPic.m_strideC;
- intptr_t dstStride = dstSYuv.m_csize;
- int shiftHor = (2 + m_hChromaShift);
- int shiftVer = (2 + m_vChromaShift);
+ int mvx = mv.x << (1 - m_hChromaShift);
+ int mvy = mv.y << (1 - m_vChromaShift);
- intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
+ intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
@@ -370,15 +364,15 @@
int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx);
int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx);
- int xFrac = mv.x & ((1 << shiftHor) - 1);
- int yFrac = mv.y & ((1 << shiftVer) - 1);
-
int partEnum = partitionFromSizes(pu.width, pu.height);
uint32_t cxWidth = pu.width >> m_hChromaShift;
X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma block size expected to be multiple of 2\n");
+ int xFrac = mvx & 7;
+ int yFrac = mvy & 7;
+
if (!(yFrac | xFrac))
{
primitives.chroma[m_csp].pu[partEnum].p2s(refCb, refStride, dstCb, dstStride);
@@ -386,23 +380,24 @@
}
else if (!yFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac, 0);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac, 0);
}
else if (!xFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac);
}
else
{
- int extStride = cxWidth;
- int filterSize = NTAPS_CHROMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
+ int immedStride = cxWidth;
+ int halfFilterSize = NTAPS_CHROMA >> 1;
+
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
}
}
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.h
--- a/source/common/predict.h Wed May 04 21:08:09 2016 +0000
+++ b/source/common/predict.h Thu May 12 20:29:21 2016 +0900
@@ -73,7 +73,6 @@
};
ShortYuv m_predShortYuv[2]; /* temporary storage for weighted prediction */
- int16_t* m_immedVals;
// Unfiltered/filtered neighbours of the current partition.
pixel intraNeighbourBuf[2][258];
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/common/x86/asm-primitives.cpp Thu May 12 20:29:21 2016 +0900
@@ -861,12 +861,12 @@
template<int size>
void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
{
- ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA)]);
- const int filterSize = NTAPS_LUMA;
- const int halfFilterSize = filterSize >> 1;
-
- primitives.pu[size].luma_hps(src, srcStride, immed, MAX_CU_SIZE, idxX, 1);
- primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * MAX_CU_SIZE, MAX_CU_SIZE, dst, dstStride, idxY);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
+ const int halfFilterSize = NTAPS_LUMA >> 1;
+ const int immedStride = MAX_CU_SIZE;
+
+ primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1);
+ primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY);
}
#if HIGH_BIT_DEPTH
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/encoder/motion.cpp Thu May 12 20:29:21 2016 +0900
@@ -1180,15 +1180,17 @@
int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
{
intptr_t refStride = ref->lumaStride;
- pixel *fref = ref->fpelPlane[0] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * refStride;
+ const pixel* fref = ref->fpelPlane[0] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * refStride;
int xFrac = qmv.x & 0x3;
int yFrac = qmv.y & 0x3;
int cost;
- intptr_t lclStride = fencPUYuv.m_size;
- X265_CHECK(lclStride == FENC_STRIDE, "fenc buffer is assumed to have FENC_STRIDE by sad_x3 and sad_x4\n");
+ const intptr_t fencStride = FENC_STRIDE;
+ X265_CHECK(fencPUYuv.m_size == FENC_STRIDE, "fenc buffer is assumed to have FENC_STRIDE by sad_x3 and sad_x4\n");
+ ALIGN_VAR_32(pixel, subpelbuf[MAX_CU_SIZE * MAX_CU_SIZE]);
+
if (!(yFrac | xFrac))
- cost = cmp(fencPUYuv.m_buf[0], lclStride, fref, refStride);
+ cost = cmp(fencPUYuv.m_buf[0], fencStride, fref, refStride);
else
{
/* we are taking a short-cut here if the reference is weighted. To be
@@ -1196,15 +1198,13 @@
* the final 16bit values prior to rounding and down shifting. Instead we
* are simply interpolating the weighted full-pel pixels. Not 100%
* accurate but good enough for fast qpel ME */
- ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
if (!yFrac)
- primitives.pu[partEnum].luma_hpp(fref, refStride, subpelbuf, lclStride, xFrac);
+ primitives.pu[partEnum].luma_hpp(fref, refStride, subpelbuf, blockwidth, xFrac);
else if (!xFrac)
- primitives.pu[partEnum].luma_vpp(fref, refStride, subpelbuf, lclStride, yFrac);
+ primitives.pu[partEnum].luma_vpp(fref, refStride, subpelbuf, blockwidth, yFrac);
else
- primitives.pu[partEnum].luma_hvpp(fref, refStride, subpelbuf, lclStride, xFrac, yFrac);
-
- cost = cmp(fencPUYuv.m_buf[0], lclStride, subpelbuf, lclStride);
+ primitives.pu[partEnum].luma_hvpp(fref, refStride, subpelbuf, blockwidth, xFrac, yFrac);
+ cost = cmp(fencPUYuv.m_buf[0], fencStride, subpelbuf, blockwidth);
}
if (bChromaSATD)
@@ -1212,12 +1212,12 @@
int csp = fencPUYuv.m_csp;
int hshift = fencPUYuv.m_hChromaShift;
int vshift = fencPUYuv.m_vChromaShift;
- int shiftHor = (2 + hshift);
- int shiftVer = (2 + vshift);
- lclStride = fencPUYuv.m_csize;
+ int mvx = qmv.x << (1 - hshift);
+ int mvy = qmv.y << (1 - vshift);
+ intptr_t fencStrideC = fencPUYuv.m_csize;
intptr_t refStrideC = ref->reconPic->m_strideC;
- intptr_t refOffset = (qmv.x >> shiftHor) + (qmv.y >> shiftVer) * refStrideC;
+ intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStrideC;
const pixel* refCb = ref->getCbAddr(ctuAddr, absPartIdx) + refOffset;
const pixel* refCr = ref->getCrAddr(ctuAddr, absPartIdx) + refOffset;
@@ -1225,48 +1225,46 @@
X265_CHECK((hshift == 0) || (hshift == 1), "hshift must be 0 or 1\n");
X265_CHECK((vshift == 0) || (vshift == 1), "vshift must be 0 or 1\n");
- xFrac = qmv.x & (hshift ? 7 : 3);
- yFrac = qmv.y & (vshift ? 7 : 3);
+ xFrac = mvx & 7;
+ yFrac = mvy & 7;
if (!(yFrac | xFrac))
{
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, refCb, refStrideC);
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, refCr, refStrideC);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, refCb, refStrideC);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, refCr, refStrideC);
}
else
{
- ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
+ int blockwidthC = blockwidth >> hshift;
+
if (!yFrac)
{
- primitives.chroma[csp].pu[partEnum].filter_hpp(refCb, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hpp(refCb, refStrideC, subpelbuf, blockwidthC, xFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf, blockwidthC);
- primitives.chroma[csp].pu[partEnum].filter_hpp(refCr, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hpp(refCr, refStrideC, subpelbuf, blockwidthC, xFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf, blockwidthC);
}
else if (!xFrac)
{
- primitives.chroma[csp].pu[partEnum].filter_vpp(refCb, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_vpp(refCb, refStrideC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf, blockwidthC);
- primitives.chroma[csp].pu[partEnum].filter_vpp(refCr, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_vpp(refCr, refStrideC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf, blockwidthC);
}
else
{
- ALIGN_VAR_32(int16_t, immed[64 * (64 + NTAPS_CHROMA)]);
-
- int extStride = blockwidth >> hshift;
- int filterSize = NTAPS_CHROMA;
- int halfFilterSize = (filterSize >> 1);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
+ const int halfFilterSize = (NTAPS_CHROMA >> 1);
- primitives.chroma[csp].pu[partEnum].filter_hps(refCb, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
- primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hps(refCb, refStrideC, immed, blockwidthC, xFrac, 1);
+ primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * blockwidthC, blockwidthC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf, blockwidthC);
- primitives.chroma[csp].pu[partEnum].filter_hps(refCr, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
- primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hps(refCr, refStrideC, immed, blockwidthC, xFrac, 1);
+ primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * blockwidthC, blockwidthC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf, blockwidthC);
}
}
}
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/encoder/weightPrediction.cpp Thu May 12 20:29:21 2016 +0900
@@ -132,25 +132,25 @@
intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2);
pixel *temp = src + pixoff + fpeloffset;
- int xFrac = mv.x & 0x7;
- int yFrac = mv.y & 0x7;
- if ((yFrac | xFrac) == 0)
+ int xFrac = mv.x & 7;
+ int yFrac = mv.y & 7;
+ if (!(yFrac | xFrac))
{
primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, temp, stride);
}
- else if (yFrac == 0)
+ else if (!yFrac)
{
primitives.chroma[csp].pu[LUMA_16x16].filter_hpp(temp, stride, mcout + pixoff, stride, xFrac);
}
- else if (xFrac == 0)
+ else if (!xFrac)
{
primitives.chroma[csp].pu[LUMA_16x16].filter_vpp(temp, stride, mcout + pixoff, stride, yFrac);
}
else
{
- ALIGN_VAR_16(int16_t, imm[16 * (16 + NTAPS_CHROMA)]);
- primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, imm, bw, xFrac, 1);
- primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(imm + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
+ ALIGN_VAR_16(int16_t, immed[16 * (16 + NTAPS_CHROMA - 1)]);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, immed, bw, xFrac, 1);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(immed + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
}
}
else
More information about the x265-devel
mailing list