[x265] [PATCH] Added --fast-intra option to use a faster search for the best angle intra mode
dtyx265 at gmail.com
dtyx265 at gmail.com
Sat Aug 2 03:45:01 CEST 2014
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1406942556 25200
# Node ID 48156a5c85b2b4f94025e06d30e9239bcd6c25aa
# Parent 3db5fda6abf08721efe7dcaf0ca57ff2965284fb
Added --fast-intra option to use a faster search for the best angle intra mode
1. Starting from 4, check every 5th mode up to 34
2. Check -2 of the best mode so far.
3. If 34 not found in step one, check +2.
4. between -2 and +2, which ever is better, check -1 or +1
There should only be 11 or 12 calls to generate predictions and costs
diff -r 3db5fda6abf0 -r 48156a5c85b2 source/common/param.cpp
--- a/source/common/param.cpp Fri Aug 01 16:31:20 2014 +0530
+++ b/source/common/param.cpp Fri Aug 01 18:22:36 2014 -0700
@@ -132,6 +132,7 @@
/* Intra Coding Tools */
param->bEnableConstrainedIntra = 0;
param->bEnableStrongIntraSmoothing = 1;
+ param->bEnableFastIntra = 0;
/* Inter Coding tools */
param->searchMethod = X265_HEX_SEARCH;
@@ -558,6 +559,7 @@
OPT("lossless") p->bLossless = atobool(value);
OPT("cu-lossless") p->bCULossless = atobool(value);
OPT("constrained-intra") p->bEnableConstrainedIntra = atobool(value);
+ OPT("fast-intra") p->bEnableFastIntra = atobool(value);
OPT("open-gop") p->bOpenGOP = atobool(value);
OPT("scenecut")
{
@@ -1189,6 +1191,7 @@
BOOL(p->bLossless, "lossless");
BOOL(p->bCULossless, "cu-lossless");
BOOL(p->bEnableConstrainedIntra, "constrained-intra");
+ BOOL(p->bEnableFastIntra, "fast-intra");
BOOL(p->bOpenGOP, "open-gop");
s += sprintf(s, " interlace=%d", p->interlaceMode);
s += sprintf(s, " keyint=%d", p->keyframeMax);
diff -r 3db5fda6abf0 -r 48156a5c85b2 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Fri Aug 01 16:31:20 2014 +0530
+++ b/source/encoder/slicetype.cpp Fri Aug 01 18:22:36 2014 -0700
@@ -33,6 +33,7 @@
#include "slicetype.h"
#include "motion.h"
#include "ratecontrol.h"
+#include "predict.h"
#define NUM_CUS (m_widthInCU > 2 && m_heightInCU > 2 ? (m_widthInCU - 2) * (m_heightInCU - 2) : m_widthInCU * m_heightInCU)
@@ -1242,6 +1243,7 @@
{
m_rows[i].m_widthInCU = m_widthInCU;
m_rows[i].m_heightInCU = m_heightInCU;
+ m_rows[i].m_param = m_param;
}
if (!WaveFront::init(m_heightInCU))
@@ -1675,27 +1677,89 @@
}
int predsize = cuSize * cuSize;
+ int icost = m_me.COST_MAX, cost;
+ pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
- // generate 35 intra predictions into tmp
+ // generate intra predictions into m_predictions
primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
+ cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
+ if (cost < icost)
+ icost = cost;
pixel *above = (cuSize >= 8) ? above1 : above0;
pixel *left = (cuSize >= 8) ? left1 : left0;
primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0);
- primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
+ cost = satd(m_me.fenc, FENC_STRIDE, m_predictions + predsize, cuSize);
+ if (cost < icost)
+ icost = cost;
+ uint32_t lowmode, mode;
+ // fast intra prediction angle search
+ if (m_param->bEnableFastIntra)
+ {
+ int acost = m_me.COST_MAX;
+ for (mode = 4;mode < 35; mode += 5)
+ {
+ left = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? left1 : left0);
+ above = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? above1 : above0);
+ primitives.intra_pred[sizeIdx][mode](&m_predictions[mode * predsize], cuSize, left, above, mode, cuSize <= 16);
+ cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
+ if (cost < acost)
+ {
+ lowmode = mode;
+ acost = cost;
+ }
+ }
+ mode = lowmode - 2;
+ left = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? left1 : left0);
+ above = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? above1 : above0);
+ primitives.intra_pred[sizeIdx][mode](&m_predictions[mode * predsize], cuSize, left, above, mode, cuSize <= 16);
+ int lowcost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[(mode) * predsize], cuSize);
+ int highcost = m_me.COST_MAX;
+ if (lowmode < 34)
+ {
+ mode = lowmode + 2;
+ left = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? left1 : left0);
+ above = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? above1 : above0);
+ primitives.intra_pred[sizeIdx][mode](&m_predictions[mode * predsize], cuSize, left, above, mode, cuSize <= 16);
+ highcost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
+ }
+ if (lowcost <= highcost)
+ {
+ mode = lowmode - 1;
+ left = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? left1 : left0);
+ above = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? above1 : above0);
+ if (lowcost < acost)
+ acost = lowcost;
+ }
+ else
+ {
+ mode = lowmode + 1;
+ left = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? left1 : left0);
+ above = (Predict::filteringIntraReferenceSamples(mode, sizeIdx) ? above1 : above0);
+ if (highcost < acost)
+ acost = highcost;
+ }
+ primitives.intra_pred[sizeIdx][mode](&m_predictions[mode * predsize], cuSize, left, above, mode, cuSize <= 16);
+ cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
+ if (cost < acost)
+ acost = cost;
+ if (acost < icost)
+ icost = acost;
+ }
+ else // calculate and search all intra prediction angles for lowest cost
+ {
+ ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
+ primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
+ primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
- // calculate 35 satd costs, keep least cost
- ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
- primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
- pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
- int icost = m_me.COST_MAX, cost;
- for (uint32_t mode = 0; mode < 35; mode++)
- {
- if ((mode >= 2) && (mode < 18))
- cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize);
- else
- cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
- if (cost < icost)
- icost = cost;
+ for (mode = 2; mode < 35; mode++)
+ {
+ if (mode < 18)
+ cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize);
+ else
+ cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
+ if (cost < icost)
+ icost = cost;
+ }
}
const int intraPenalty = 5 * m_lookAheadLambda;
icost += intraPenalty + lowresPenalty;
diff -r 3db5fda6abf0 -r 48156a5c85b2 source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Fri Aug 01 16:31:20 2014 +0530
+++ b/source/encoder/slicetype.h Fri Aug 01 18:22:36 2014 -0700
@@ -48,6 +48,7 @@
class EstimateRow
{
public:
+ x265_param* m_param;
MotionEstimate m_me;
Lock m_lock;
pixel* m_predictions; // buffer for 35 intra predictions
diff -r 3db5fda6abf0 -r 48156a5c85b2 source/x265.cpp
--- a/source/x265.cpp Fri Aug 01 16:31:20 2014 +0530
+++ b/source/x265.cpp Fri Aug 01 18:22:36 2014 -0700
@@ -117,6 +117,8 @@
{ "no-cu-lossless", no_argument, NULL, 0 },
{ "no-constrained-intra", no_argument, NULL, 0 },
{ "constrained-intra", no_argument, NULL, 0 },
+ { "fast-intra", no_argument, NULL, 0 },
+ { "no-fast-intra", no_argument, NULL, 0 },
{ "no-open-gop", no_argument, NULL, 0 },
{ "open-gop", no_argument, NULL, 0 },
{ "keyint", required_argument, NULL, 'I' },
@@ -385,6 +387,7 @@
H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing));
H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra));
H0(" --[no-]b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames));
+ H0(" --[no]-fast-intra Enable faster search method for intra mode. Default %s\n", OPT(param->bEnableFastIntra));
H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty);
H0("\nSlice decision options:\n");
H0(" --[no-]open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP));
diff -r 3db5fda6abf0 -r 48156a5c85b2 source/x265.h
--- a/source/x265.h Fri Aug 01 16:31:20 2014 +0530
+++ b/source/x265.h Fri Aug 01 18:22:36 2014 -0700
@@ -533,6 +533,9 @@
* depending on your source material. Defaults to disabled */
int bEnableStrongIntraSmoothing;
+ /* Use a faster search method to find the best intra mode. Default is 0 */
+ int bEnableFastIntra;
+
/*== Inter Coding Tools ==*/
/* ME search method (DIA, HEX, UMH, STAR, FULL). The search patterns
More information about the x265-devel
mailing list