[x265-commits] [x265] Added fast intra search option to Analysis::checkIntraInI...
David T Yuen
dtyx265 at gmail.com
Fri Aug 15 03:37:07 CEST 2014
details: http://hg.videolan.org/x265/rev/213f17c1492c
branches:
changeset: 7794:213f17c1492c
user: David T Yuen <dtyx265 at gmail.com>
date: Thu Aug 14 09:43:39 2014 -0700
description:
Added fast intra search option to Analysis::checkIntraInInter_rd0_4
Subject: [x265] analysis: use macro and for-loop to simplify fast-intra
details: http://hg.videolan.org/x265/rev/07138e6ac952
branches:
changeset: 7795:07138e6ac952
user: Steve Borho <steve at borho.org>
date: Thu Aug 14 15:38:01 2014 -0500
description:
analysis: use macro and for-loop to simplify fast-intra
this changes behavior a bit; it's trying both +/-1 offsets instead of just
one. and it has to do one extra check at the end since mode 34 isn't reached
by the other previous loops
Subject: [x265] param: don't allow turbo mode to increase rd-level, improve docs
details: http://hg.videolan.org/x265/rev/c4f0bbad98cf
branches:
changeset: 7796:c4f0bbad98cf
user: Steve Borho <steve at borho.org>
date: Thu Aug 14 19:35:47 2014 -0500
description:
param: don't allow turbo mode to increase rd-level, improve docs
Subject: [x265] quant: use optimized primitive for transquant bypass in invTransformNxN
details: http://hg.videolan.org/x265/rev/ef25a0e7de3b
branches:
changeset: 7797:ef25a0e7de3b
user: Steve Borho <steve at borho.org>
date: Thu Aug 14 16:06:01 2014 -0500
description:
quant: use optimized primitive for transquant bypass in invTransformNxN
diffstat:
doc/reST/cli.rst | 21 +++++++++++--
source/common/param.cpp | 2 +-
source/common/quant.cpp | 5 +--
source/encoder/analysis.cpp | 64 ++++++++++++++++++++++++++++++++++++++------
4 files changed, 74 insertions(+), 18 deletions(-)
diffs (148 lines):
diff -r 08e9d25e2c72 -r ef25a0e7de3b doc/reST/cli.rst
--- a/doc/reST/cli.rst Thu Aug 14 15:33:51 2014 -0500
+++ b/doc/reST/cli.rst Thu Aug 14 16:06:01 2014 -0500
@@ -831,7 +831,7 @@ Quality, rate control and rate distortio
Enable multipass rate control mode. Input is encoded multiple times,
storing the encoded information of each pass in a stats file from which
the consecutive pass tunes the qp of each frame to improve the quality
- of the output.Default 0(disabled)
+ of the output. Default disabled
1. First pass, creates stats file
2. Last pass, does not overwrite stats file
@@ -841,9 +841,22 @@ Quality, rate control and rate distortio
.. option:: --slow-firstpass, --no-slow-firstpass
- Enable a slow and more detailed first pass encode in Multipass rate control mode.
- Speed of the first pass encode is slightly lesser and quality midly improved when
- compared to the default settings in a multipass encode. Default disabled
+ Enable a slow and more detailed first pass encode in Multipass rate
+ control mode. Speed of the first pass encode is slightly lesser and
+ quality midly improved when compared to the default settings in a
+ multipass encode. Default disabled (turbo mode enabled)
+
+ When **turbo** first pass is not disabled, these options are
+ set on the first pass to improve performance:
+
+ * :option:`--no-rect`
+ * :option:`--no-amp`
+ * :option:`--early-skip`
+ * :option:`--ref` = 1
+ * :option:`--max-merge` = 1
+ * :option:`--me` = DIA
+ * :option:`--subme` = MIN(2, :option:`--subme`)
+ * :option:`--rd` = MIN(2, :option:`--rd`)
Loop filters
============
diff -r 08e9d25e2c72 -r ef25a0e7de3b source/common/param.cpp
--- a/source/common/param.cpp Thu Aug 14 15:33:51 2014 -0500
+++ b/source/common/param.cpp Thu Aug 14 16:06:01 2014 -0500
@@ -1037,7 +1037,7 @@ void x265_param_apply_fastfirstpass(x265
param->searchMethod = X265_DIA_SEARCH;
param->subpelRefine = X265_MIN(2, param->subpelRefine);
param->bEnableEarlySkip = 1;
- param->rdLevel = 2;
+ param->rdLevel = X265_MIN(2, param->rdLevel);
}
}
diff -r 08e9d25e2c72 -r ef25a0e7de3b source/common/quant.cpp
--- a/source/common/quant.cpp Thu Aug 14 15:33:51 2014 -0500
+++ b/source/common/quant.cpp Thu Aug 14 16:06:01 2014 -0500
@@ -406,10 +406,7 @@ void Quant::invtransformNxN(bool transQu
{
if (transQuantBypass)
{
- int trSize = 1 << log2TrSize;
- for (int k = 0; k < trSize; k++)
- for (int j = 0; j < trSize; j++)
- residual[k * stride + j] = (int16_t)(coeff[k * trSize + j]);
+ primitives.cvt32to16_shr(residual, coeff, stride, 0, 1 << log2TrSize);
return;
}
diff -r 08e9d25e2c72 -r ef25a0e7de3b source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Thu Aug 14 15:33:51 2014 -0500
+++ b/source/encoder/analysis.cpp Thu Aug 14 16:06:01 2014 -0500
@@ -1653,6 +1653,7 @@ void Analysis::checkIntraInInter_rd0_4(T
}
pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
+ int predsize = scaleTuSize * scaleTuSize;
uint32_t preds[3];
cu->getIntraDirLumaPredictor(partOffset, preds);
@@ -1689,17 +1690,62 @@ void Analysis::checkIntraInInter_rd0_4(T
primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
- for (mode = 2; mode < 35; mode++)
+ bool modeHor;
+ pixel *cmp;
+ intptr_t srcStride;
+
+#define TRY_ANGLE(angle) \
+ modeHor = angle < 18; \
+ cmp = modeHor ? buf_trans : fenc; \
+ srcStride = modeHor ? scaleTuSize : scaleStride; \
+ sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
+ bits = (mpms & ((uint64_t)1 << angle)) ? xModeBitsIntra(cu, angle, partOffset, depth) : rbits; \
+ cost = m_rdCost.calcRdSADCost(sad, bits)
+
+ if (m_param->bEnableFastIntra)
{
- bool modeHor = (mode < 18);
- pixel *cmp = (modeHor ? buf_trans : fenc);
- intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
- sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
- bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
- cost = m_rdCost.calcRdSADCost(sad, bits);
- COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
+ int asad = 0;
+ uint32_t lowmode, highmode, amode, abits = 0;
+ uint64_t acost = MAX_INT64;
+
+ /* pick the best angle, sampling at distance of 5 */
+ for (mode = 5; mode < 35; mode += 5)
+ {
+ TRY_ANGLE(mode);
+ COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
+ }
+
+ /* refine best angle at distance 2, then distance 1 */
+ for (uint32_t dist = 2; dist >= 1; dist--)
+ {
+ lowmode = amode - dist;
+ highmode = amode + dist;
+
+ X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out of range\n");
+ TRY_ANGLE(lowmode);
+ COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, bits);
+
+ X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode out of range\n");
+ TRY_ANGLE(highmode);
+ COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, bits);
+ }
+
+ if (amode == 33)
+ {
+ TRY_ANGLE(34);
+ COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits);
+ }
+
+ COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
}
-
+ else // calculate and search all intra prediction angles for lowest cost
+ {
+ for (mode = 2; mode < 35; mode++)
+ {
+ TRY_ANGLE(mode);
+ COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
+ }
+ }
cu->m_totalBits = bbits;
cu->m_totalDistortion = bsad;
cu->m_sa8dCost = bcost;
More information about the x265-commits
mailing list