[x265] [PATCH] motion: use fast weighted subpel refine
Steve Borho
steve at borho.org
Fri Nov 22 00:30:09 CET 2013
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1385076566 21600
# Thu Nov 21 17:29:26 2013 -0600
# Node ID 8f156b97360be563a52743826bded075bd98b267
# Parent b172259c07f1b7a04deadfeb89469a700f644feb
motion: use fast weighted subpel refine
Don't do the full-blown weighted motion compensation for ME. Just interpolate
the weighted full pel pixels. It is not 100% accurate to the pixels that will
be used to encode the final prediction; but close enough for ME.
Testing with sintel_trailer_2k_720p24.y4m at medium preset and all defaults
x265 [info]: 651 of 1124 (57.92%) P frames weighted
before: 1253 frames in 512.74s (2.44 fps), 223.51 kb/s, Global PSNR: 50.552
after: 1253 frames in 410.25s (3.05 fps), 223.59 kb/s, Global PSNR: 50.589
diff -r b172259c07f1 -r 8f156b97360b source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Thu Nov 21 17:07:34 2013 -0600
+++ b/source/encoder/motion.cpp Thu Nov 21 17:29:26 2013 -0600
@@ -104,17 +104,11 @@
init_scales();
fenc = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
- subpelbuf = (pixel*)X265_MALLOC(pixel, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1));
- immedVal = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
- immedVal2 = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
}
MotionEstimate::~MotionEstimate()
{
X265_FREE(fenc);
- X265_FREE(subpelbuf);
- X265_FREE(immedVal);
- X265_FREE(immedVal2);
}
void MotionEstimate::setSourcePU(int offset, int width, int height)
@@ -1137,50 +1131,32 @@
}
else
{
- if (ref->isWeighted)
+ /* We are taking a short-cut here if the reference is weighted. To be
+ * accurate we should be interpolating unweighted pixels and weighting
+ * the final 16bit values prior to rounding and downshifting. Instead we
+ * are simply interpolating the weighted full-pel pixels. Not 100%
+ * accurate but good enough for fast qpel ME */
+ ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
+ pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
+ if (yFrac == 0)
{
- int shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
- int shift = ref->shift + shiftNum;
- int round = shift ? (1 << (shift - 1)) : 0;
- pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
-
- if (yFrac == 0)
- {
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref, ref->lumaStride, immedVal, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[xFrac]);
- primitives.weight_sp(immedVal, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, blockheight, ref->weight, round, shift, ref->offset);
- }
- else if (xFrac == 0)
- {
- primitives.ipfilter_ps[FILTER_V_P_S_8](fref, ref->lumaStride, immedVal, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[yFrac]);
- primitives.weight_sp(immedVal, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, blockheight, ref->weight, round, shift, ref->offset);
- }
- else
- {
- int filterSize = NTAPS_LUMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth, blockheight + filterSize - 1, g_lumaFilter[xFrac]);
- primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal + (halfFilterSize - 1) * blockwidth, blockwidth, immedVal2, FENC_STRIDE, blockwidth, blockheight, yFrac);
- primitives.weight_sp(immedVal2, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, blockheight, ref->weight, round, shift, ref->offset);
- }
+ primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
+ }
+ else if (xFrac == 0)
+ {
+ primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
}
else
{
- pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
- if (yFrac == 0)
- {
- primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
- }
- else if (xFrac == 0)
- {
- primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
- }
- else
- {
- int filterSize = NTAPS_LUMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth, blockheight + filterSize - 1, g_lumaFilter[xFrac]);
- primitives.luma_vsp[partEnum](immedVal + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
- }
+ ALIGN_VAR_32(int16_t, immed[64 * (64 + 8)]);
+
+ int filterSize = NTAPS_LUMA;
+ int halfFilterSize = filterSize >> 1;
+ primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride,
+ immed, blockwidth,
+ blockwidth, blockheight + filterSize - 1,
+ g_lumaFilter[xFrac]);
+ primitives.luma_vsp[partEnum](immed + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
}
return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
}
diff -r b172259c07f1 -r 8f156b97360b source/encoder/motion.h
--- a/source/encoder/motion.h Thu Nov 21 17:07:34 2013 -0600
+++ b/source/encoder/motion.h Thu Nov 21 17:29:26 2013 -0600
@@ -52,9 +52,6 @@
int subpelRefine;
/* subpel generation buffers */
- pixel *subpelbuf;
- int16_t *immedVal;
- int16_t *immedVal2;
int blockwidth;
int blockheight;
More information about the x265-devel
mailing list