[x265] [PATCH 2 of 4] disable interpolate horizontal merge
Min Chen
chenm003 at 163.com
Mon Oct 28 15:24:34 CET 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1382970193 -28800
# Node ID 6bc4a571ed625cd23503810cfaa5256a3f32a20d
# Parent 4a14bd24b6572ed1ffcf9dcfa9f4c841adc62211
disable interpolate horizontal merge
we need width is multiple of 4 in asm code, the maskmovq is very expensive
diff -r 4a14bd24b657 -r 6bc4a571ed62 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Mon Oct 28 22:22:11 2013 +0800
+++ b/source/encoder/motion.cpp Mon Oct 28 22:23:13 2013 +0800
@@ -854,9 +854,19 @@
}
else
{
- subpelInterpolate(ref, qmv0, dir);
- cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE + (dir == 2)) + mvcost0;
- cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf + (dir == 2) + (dir == 1 ? FENC_STRIDE : 0), FENC_STRIDE + (dir == 2)) + mvcost1;
+ if (dir == 1)
+ {
+ subpelInterpolate(ref, qmv0, 1);
+ cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE + (dir == 2)) + mvcost0;
+ cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf + (dir == 2) + (dir == 1 ? FENC_STRIDE : 0), FENC_STRIDE + (dir == 2)) + mvcost1;
+ }
+ else
+ {
+ subpelInterpolate(ref, qmv0, 0);
+ cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE) + mvcost0;
+ subpelInterpolate(ref, qmv1, 0);
+ cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE) + mvcost1;
+ }
}
COPY2_IF_LT(bcost, cost0, bdir, i + 0);
COPY2_IF_LT(bcost, cost1, bdir, i + 1);
@@ -1179,9 +1189,9 @@
int yFrac = qmv.y & 0x3;
assert(yFrac | xFrac);
- int realWidth = blockwidth + (dir == 2);
+ assert(dir != 2);
+ assert((blockwidth % 4) == 0);
int realHeight = blockheight + (dir == 1);
- intptr_t realStride = FENC_STRIDE + (dir == 2);
pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
int shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
int local_shift = ref->shift + shiftNum;
@@ -1190,39 +1200,39 @@
{
if (yFrac == 0)
{
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref, ref->lumaStride, immedVal, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
- primitives.weightpUni(immedVal, subpelbuf, realStride, realStride, realWidth, realHeight, ref->weight, local_round, local_shift, ref->offset);
+ primitives.ipfilter_ps[FILTER_H_P_S_8](fref, ref->lumaStride, immedVal, FENC_STRIDE, blockwidth, realHeight, g_lumaFilter[xFrac]);
+ primitives.weightpUni(immedVal, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, realHeight, ref->weight, local_round, local_shift, ref->offset);
}
else if (xFrac == 0)
{
- primitives.ipfilter_ps[FILTER_V_P_S_8](fref, ref->lumaStride, immedVal, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
- primitives.weightpUni(immedVal, subpelbuf, realStride, realStride, realWidth, realHeight, ref->weight, local_round, local_shift, ref->offset);
+ primitives.ipfilter_ps[FILTER_V_P_S_8](fref, ref->lumaStride, immedVal, FENC_STRIDE, blockwidth, realHeight, g_lumaFilter[yFrac]);
+ primitives.weightpUni(immedVal, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, realHeight, ref->weight, local_round, local_shift, ref->offset);
}
else
{
int filterSize = NTAPS_LUMA;
int halfFilterSize = (filterSize >> 1);
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
- primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal + (halfFilterSize - 1) * realWidth, realWidth, immedVal2, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
- primitives.weightpUni(immedVal2, subpelbuf, realStride, realStride, realWidth, realHeight, ref->weight, local_round, local_shift, ref->offset);
+ primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
+ primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal + (halfFilterSize - 1) * blockwidth, blockwidth, immedVal2, FENC_STRIDE, blockwidth, realHeight, g_lumaFilter[yFrac]);
+ primitives.weightpUni(immedVal2, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, realHeight, ref->weight, local_round, local_shift, ref->offset);
}
}
else
{
if (yFrac == 0)
{
- primitives.ipfilter_pp[FILTER_H_P_P_8](fref, ref->lumaStride, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
+ primitives.ipfilter_pp[FILTER_H_P_P_8](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, realHeight, g_lumaFilter[xFrac]);
}
else if (xFrac == 0)
{
- primitives.ipfilter_pp[FILTER_V_P_P_8](fref, ref->lumaStride, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
+ primitives.ipfilter_pp[FILTER_V_P_P_8](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, realHeight, g_lumaFilter[yFrac]);
}
else
{
int filterSize = NTAPS_LUMA;
int halfFilterSize = (filterSize >> 1);
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
- primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal + (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
+ primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
+ primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, blockwidth, realHeight, g_lumaFilter[yFrac]);
}
}
}
More information about the x265-devel
mailing list