[x265] [PATCH 1 of 2] motion: Implement 3-level Hierarchial Motion Estimation
pooja at multicorewareinc.com
pooja at multicorewareinc.com
Wed Jul 10 06:17:02 CEST 2019
# HG changeset patch
# User Pooja Venkatesan <pooja at multicorewareinc.com>
# Date 1562305646 -19800
# Fri Jul 05 11:17:26 2019 +0530
# Node ID 14a235657a2011aa28d45544f33b7186c33b9218
# Parent 4f6dde51a5db4f9229bddb60db176f16ac98f505
motion: Implement 3-level Hierarchial Motion Estimation
This patch does the following:
1) Create HME-level 0 planes
2) Add option "--hme" and "--hme-search" to enable HME
and to select search method for levels 0, 1 and 2
diff -r 4f6dde51a5db -r 14a235657a20 doc/reST/cli.rst
--- a/doc/reST/cli.rst Fri Jul 05 10:47:15 2019 +0530
+++ b/doc/reST/cli.rst Fri Jul 05 11:17:26 2019 +0530
@@ -1261,6 +1261,18 @@
Enable motion estimation with source frame pixels, in this mode,
motion estimation can be computed independently. Default disabled.
+.. option:: --hme, --no-hme
+
+ Enable 3-level Hierarchical motion estimation at One-Sixteenth,
+ Quarter and Full resolution. Default disabled.
+
+.. option:: --hme-search <integer|string>,<integer|string>,<integer|string>
+
+ Motion search method for HME Level 0, 1 and 2. Refer to :option:`--me` for values.
+ Specify search method for each level. Alternatively, specify a single value
+ which will apply to all levels. Default is hex,umh,umh for
+ levels 0,1,2 respectively.
+
Spatial/intra options
=====================
diff -r 4f6dde51a5db -r 14a235657a20 source/CMakeLists.txt
--- a/source/CMakeLists.txt Fri Jul 05 10:47:15 2019 +0530
+++ b/source/CMakeLists.txt Fri Jul 05 11:17:26 2019 +0530
@@ -29,7 +29,7 @@
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 176)
+set(X265_BUILD 177)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 4f6dde51a5db -r 14a235657a20 source/common/lowres.cpp
--- a/source/common/lowres.cpp Fri Jul 05 10:47:15 2019 +0530
+++ b/source/common/lowres.cpp Fri Jul 05 11:17:26 2019 +0530
@@ -55,6 +55,7 @@
heightFullRes = origPic->m_picHeight;
width = origPic->m_picWidth / 2;
lines = origPic->m_picHeight / 2;
+ bEnableHME = param->bEnableHME ? 1 : 0;
lumaStride = width + 2 * origPic->m_lumaMarginX;
if (lumaStride & 31)
lumaStride += 32 - (lumaStride & 31);
@@ -137,6 +138,26 @@
lowresPlane[2] = buffer[2] + padoffset;
lowresPlane[3] = buffer[3] + padoffset;
+ if (bEnableHME)
+ {
+ intptr_t lumaStrideHalf = lumaStride / 2;
+ if (lumaStrideHalf & 31)
+ lumaStrideHalf += 32 - (lumaStrideHalf & 31);
+ size_t planesizeHalf = planesize / 2;
+ size_t padoffsetHalf = padoffset / 2;
+ /* allocate lower-res buffers */
+ CHECKED_MALLOC_ZERO(lowerResBuffer[0], pixel, 4 * planesizeHalf);
+
+ lowerResBuffer[1] = lowerResBuffer[0] + planesizeHalf;
+ lowerResBuffer[2] = lowerResBuffer[1] + planesizeHalf;
+ lowerResBuffer[3] = lowerResBuffer[2] + planesizeHalf;
+
+ lowerResPlane[0] = lowerResBuffer[0] + padoffsetHalf;
+ lowerResPlane[1] = lowerResBuffer[1] + padoffsetHalf;
+ lowerResPlane[2] = lowerResBuffer[2] + padoffsetHalf;
+ lowerResPlane[3] = lowerResBuffer[3] + padoffsetHalf;
+ }
+
CHECKED_MALLOC(intraCost, int32_t, cuCount);
CHECKED_MALLOC(intraMode, uint8_t, cuCount);
@@ -166,6 +187,8 @@
void Lowres::destroy()
{
X265_FREE(buffer[0]);
+ if(bEnableHME)
+ X265_FREE(lowerResBuffer[0]);
X265_FREE(intraCost);
X265_FREE(intraMode);
@@ -253,5 +276,18 @@
extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
+
+ if (origPic->m_param->bEnableHME)
+ {
+ primitives.frameInitLowerRes(lowresPlane[0],
+ lowerResPlane[0], lowerResPlane[1], lowerResPlane[2], lowerResPlane[3],
+ lumaStride, lumaStride/2, (width / 2), (lines / 2));
+ extendPicBorder(lowerResPlane[0], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+ extendPicBorder(lowerResPlane[1], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+ extendPicBorder(lowerResPlane[2], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+ extendPicBorder(lowerResPlane[3], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
+ fpelLowerResPlane[0] = lowerResPlane[0];
+ }
+
fpelPlane[0] = lowresPlane[0];
}
diff -r 4f6dde51a5db -r 14a235657a20 source/common/lowres.h
--- a/source/common/lowres.h Fri Jul 05 10:47:15 2019 +0530
+++ b/source/common/lowres.h Fri Jul 05 11:17:26 2019 +0530
@@ -40,6 +40,10 @@
pixel* lowresPlane[4];
PicYuv* reconPic;
+ /* 1/16th resolution : Level-0 HME planes */
+ pixel* fpelLowerResPlane[3];
+ pixel* lowerResPlane[4];
+
bool isWeighted;
bool isLowres;
@@ -150,6 +154,7 @@
struct Lowres : public ReferencePlanes
{
pixel *buffer[4];
+ pixel *lowerResBuffer[4]; // Level-0 buffer
int frameNum; // Presentation frame number
int sliceType; // Slice type decided by lookahead
@@ -181,6 +186,9 @@
uint32_t maxBlocksInRowFullRes;
uint32_t maxBlocksInColFullRes;
+ /* Hierarchical Motion Estimation */
+ bool bEnableHME;
+
/* used for vbvLookahead */
int plannedType[X265_LOOKAHEAD_MAX + 1];
int64_t plannedSatd[X265_LOOKAHEAD_MAX + 1];
diff -r 4f6dde51a5db -r 14a235657a20 source/common/param.cpp
--- a/source/common/param.cpp Fri Jul 05 10:47:15 2019 +0530
+++ b/source/common/param.cpp Fri Jul 05 11:17:26 2019 +0530
@@ -201,6 +201,9 @@
param->bEnableTSkipFast = 0;
param->maxNumReferences = 3;
param->bEnableTemporalMvp = 1;
+ param->bEnableHME = 0;
+ param->hmeSearchMethod[0] = X265_HEX_SEARCH;
+ param->hmeSearchMethod[1] = param->hmeSearchMethod[2] = X265_UMH_SEARCH;
param->bSourceReferenceEstimation = 0;
param->limitTU = 0;
param->dynamicRd = 0;
@@ -1282,6 +1285,27 @@
OPT("fades") p->bEnableFades = atobool(value);
OPT("field") p->bField = atobool( value );
OPT("cll") p->bEmitCLL = atobool(value);
+ OPT("hme") p->bEnableHME = atobool(value);
+ OPT("hme-search")
+ {
+ char search[3][5];
+ memset(search, '\0', 15 * sizeof(char));
+ if(3 == sscanf(value, "%d,%d,%d", &p->hmeSearchMethod[0], &p->hmeSearchMethod[1], &p->hmeSearchMethod[2]) ||
+ 3 == sscanf(value, "%4[^,],%4[^,],%4[^,]", search[0], search[1], search[2]))
+ {
+ if(search[0][0])
+ for(int level = 0; level < 3; level++)
+ p->hmeSearchMethod[level] = parseName(search[level], x265_motion_est_names, bError);
+ }
+ else if (sscanf(value, "%d", &p->hmeSearchMethod[0]) || sscanf(value, "%s", search[0]))
+ {
+ if (search[0][0]) {
+ p->hmeSearchMethod[0] = parseName(search[0], x265_motion_est_names, bError);
+ p->hmeSearchMethod[1] = p->hmeSearchMethod[2] = p->hmeSearchMethod[0];
+ }
+ }
+ p->bEnableHME = true;
+ }
else
return X265_PARAM_BAD_NAME;
}
@@ -1732,8 +1756,13 @@
x265_log(param, X265_LOG_INFO, "Residual QT: max TU size, max depth : %d / %d inter / %d intra\n",
param->maxTUSize, param->tuQTMaxInterDepth, param->tuQTMaxIntraDepth);
- x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n",
- x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
+ if (param->bEnableHME)
+ x265_log(param, X265_LOG_INFO, "HME L0,1,2 / range / subpel / merge : %s, %s, %s / %d / %d / %d\n",
+ x265_motion_est_names[param->hmeSearchMethod[0]], x265_motion_est_names[param->hmeSearchMethod[1]], x265_motion_est_names[param->hmeSearchMethod[2]], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
+ else
+ x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n",
+ x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
+
if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, param->scenecutThreshold, param->scenecutBias * 100);
else
@@ -1928,6 +1957,9 @@
s += sprintf(s, " subme=%d", p->subpelRefine);
s += sprintf(s, " merange=%d", p->searchRange);
BOOL(p->bEnableTemporalMvp, "temporal-mvp");
+ BOOL(p->bEnableHME, "hme");
+ if (p->bEnableHME)
+ s += sprintf(s, " Level 0,1,2=%d,%d,%d", p->hmeSearchMethod[0], p->hmeSearchMethod[1], p->hmeSearchMethod[2]);
BOOL(p->bEnableWeightedPred, "weightp");
BOOL(p->bEnableWeightedBiPred, "weightb");
BOOL(p->bSourceReferenceEstimation, "analyze-src-pics");
@@ -2215,6 +2247,12 @@
dst->subpelRefine = src->subpelRefine;
dst->searchRange = src->searchRange;
dst->bEnableTemporalMvp = src->bEnableTemporalMvp;
+ dst->bEnableHME = src->bEnableHME;
+ if (src->bEnableHME)
+ {
+ for (int level = 0; level < 3; level++)
+ dst->hmeSearchMethod[level] = src->hmeSearchMethod[level];
+ }
dst->bEnableWeightedBiPred = src->bEnableWeightedBiPred;
dst->bEnableWeightedPred = src->bEnableWeightedPred;
dst->bSourceReferenceEstimation = src->bSourceReferenceEstimation;
diff -r 4f6dde51a5db -r 14a235657a20 source/common/pixel.cpp
--- a/source/common/pixel.cpp Fri Jul 05 10:47:15 2019 +0530
+++ b/source/common/pixel.cpp Fri Jul 05 11:17:26 2019 +0530
@@ -1309,6 +1309,7 @@
p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = scale1D_128to64;
p.scale2D_64to32 = scale2D_64to32;
p.frameInitLowres = frame_init_lowres_core;
+ p.frameInitLowerRes = frame_init_lowres_core;
p.ssim_4x4x2_core = ssim_4x4x2_core;
p.ssim_end_4 = ssim_end_4;
diff -r 4f6dde51a5db -r 14a235657a20 source/common/primitives.h
--- a/source/common/primitives.h Fri Jul 05 10:47:15 2019 +0530
+++ b/source/common/primitives.h Fri Jul 05 11:17:26 2019 +0530
@@ -349,6 +349,7 @@
saoCuStatsE3_t saoCuStatsE3;
downscale_t frameInitLowres;
+ downscale_t frameInitLowerRes;
cutree_propagate_cost propagateCost;
cutree_fix8_unpack fix8Unpack;
cutree_fix8_pack fix8Pack;
diff -r 4f6dde51a5db -r 14a235657a20 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Jul 05 10:47:15 2019 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Jul 05 11:17:26 2019 +0530
@@ -1090,6 +1090,7 @@
LUMA_VSS_FILTERS(sse2);
p.frameInitLowres = PFX(frame_init_lowres_core_sse2);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2);
// TODO: the planecopy_sp is really planecopy_SC now, must be fix it
//p.planecopy_sp = PFX(downShift_16_sse2);
p.planecopy_sp_shl = PFX(upShift_16_sse2);
@@ -1132,6 +1133,7 @@
p.cu[BLOCK_8x8].idct = PFX(idct8_ssse3);
p.frameInitLowres = PFX(frame_init_lowres_core_ssse3);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3);
ALL_LUMA_PU(convert_p2s[ALIGNED], filterPixelToShort, ssse3);
ALL_LUMA_PU(convert_p2s[NONALIGNED], filterPixelToShort, ssse3);
@@ -1453,6 +1455,7 @@
p.cu[BLOCK_64x64].copy_sp = (copy_sp_t)PFX(blockcopy_ss_64x64_avx);
p.frameInitLowres = PFX(frame_init_lowres_core_avx);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_avx);
p.pu[LUMA_64x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x16_avx);
p.pu[LUMA_64x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x32_avx);
@@ -1469,6 +1472,7 @@
#endif
LUMA_VAR(xop);
p.frameInitLowres = PFX(frame_init_lowres_core_xop);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_xop);
}
if (cpuMask & X265_CPU_AVX2)
{
@@ -2296,6 +2300,7 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = PFX(interp_4tap_vert_sp_64x64_avx2);
p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2);
p.propagateCost = PFX(mbtree_propagate_cost_avx2);
p.fix8Unpack = PFX(cutree_fix8_unpack_avx2);
p.fix8Pack = PFX(cutree_fix8_pack_avx2);
@@ -3294,6 +3299,7 @@
//p.frameInitLowres = PFX(frame_init_lowres_core_mmx2);
p.frameInitLowres = PFX(frame_init_lowres_core_sse2);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2);
ALL_LUMA_TU(blockfill_s[NONALIGNED], blockfill_s, sse2);
ALL_LUMA_TU(blockfill_s[ALIGNED], blockfill_s, sse2);
@@ -3414,6 +3420,7 @@
p.pu[LUMA_8x8].luma_hvpp = PFX(interp_8tap_hv_pp_8x8_ssse3);
p.frameInitLowres = PFX(frame_init_lowres_core_ssse3);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3);
ASSIGN2(p.scale1D_128to64, scale1D_128to64_ssse3);
p.scale2D_64to32 = PFX(scale2D_64to32_ssse3);
@@ -3682,6 +3689,7 @@
p.pu[LUMA_48x64].copy_pp = PFX(blockcopy_pp_48x64_avx);
p.frameInitLowres = PFX(frame_init_lowres_core_avx);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_avx);
p.propagateCost = PFX(mbtree_propagate_cost_avx);
}
if (cpuMask & X265_CPU_XOP)
@@ -3693,6 +3701,8 @@
p.cu[BLOCK_8x8].sse_pp = PFX(pixel_ssd_8x8_xop);
p.cu[BLOCK_16x16].sse_pp = PFX(pixel_ssd_16x16_xop);
p.frameInitLowres = PFX(frame_init_lowres_core_xop);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_xop);
+
}
#if X86_64
if (cpuMask & X265_CPU_AVX2)
@@ -4667,6 +4677,7 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = PFX(interp_4tap_vert_pp_64x16_avx2);
p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
+ p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2);
p.propagateCost = PFX(mbtree_propagate_cost_avx2);
p.saoCuStatsE0 = PFX(saoCuStatsE0_avx2);
p.saoCuStatsE1 = PFX(saoCuStatsE1_avx2);
diff -r 4f6dde51a5db -r 14a235657a20 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Jul 05 10:47:15 2019 +0530
+++ b/source/encoder/encoder.cpp Fri Jul 05 11:17:26 2019 +0530
@@ -3379,6 +3379,15 @@
p->bRepeatHeaders = 1;
x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n");
}
+
+ if (m_param->bEnableHME)
+ {
+ if (m_param->sourceHeight < 540)
+ {
+ x265_log(p, X265_LOG_WARNING, "Source height < 540p is too low for HME. Disabling HME.\n");
+ p->bEnableHME = 0;
+ }
+ }
}
void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x265_picture* picIn, int paramBytes)
diff -r 4f6dde51a5db -r 14a235657a20 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Fri Jul 05 10:47:15 2019 +0530
+++ b/source/test/regression-tests.txt Fri Jul 05 11:17:26 2019 +0530
@@ -153,6 +153,7 @@
big_buck_bunny_360p24.y4m, --keyint 60 --min-keyint 40 --gop-lookahead 14
BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000
big_buck_bunny_360p24.y4m, --bitrate 500 --fades
+720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
# Main12 intraCost overflow bug test
720p50_parkrun_ter.y4m,--preset medium
diff -r 4f6dde51a5db -r 14a235657a20 source/x265.h
--- a/source/x265.h Fri Jul 05 10:47:15 2019 +0530
+++ b/source/x265.h Fri Jul 05 11:17:26 2019 +0530
@@ -1172,6 +1172,14 @@
/* Enable availability of temporal motion vector for AMVP, default is enabled */
int bEnableTemporalMvp;
+ /* Enable 3-level Hierarchical motion estimation at One-Sixteenth, Quarter and Full resolution.
+ * Default is disabled */
+ int bEnableHME;
+
+ /* Enable HME search method (DIA, HEX, UMH, STAR, SEA, FULL) for level 0, 1 and 2.
+ * Default is hex, umh, umh for L0, L1 and L2 respectively. */
+ int hmeSearchMethod[3];
+
/* Enable weighted prediction in P slices. This enables weighting analysis
* in the lookahead, which influences slice decisions, and enables weighting
* analysis in the main encoder which allows P reference samples to have a
diff -r 4f6dde51a5db -r 14a235657a20 source/x265cli.h
--- a/source/x265cli.h Fri Jul 05 10:47:15 2019 +0530
+++ b/source/x265cli.h Fri Jul 05 11:17:26 2019 +0530
@@ -95,6 +95,9 @@
{ "max-merge", required_argument, NULL, 0 },
{ "no-temporal-mvp", no_argument, NULL, 0 },
{ "temporal-mvp", no_argument, NULL, 0 },
+ { "hme", no_argument, NULL, 0 },
+ { "no-hme", no_argument, NULL, 0 },
+ { "hme-search", required_argument, NULL, 0 },
{ "rdpenalty", required_argument, NULL, 0 },
{ "no-rect", no_argument, NULL, 0 },
{ "rect", no_argument, NULL, 0 },
@@ -464,6 +467,8 @@
H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP));
H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes);
H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp));
+ H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME));
+ H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
H0("\nSpatial / intra options:\n");
H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing));
H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265-1.patch
Type: text/x-patch
Size: 18780 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20190710/277f9132/attachment-0001.bin>
More information about the x265-devel
mailing list