[x265-commits] [x265] Added fast intra search option to Analysis::checkIntraInI...

Fri Aug 15 03:37:07 CEST 2014

details:   http://hg.videolan.org/x265/rev/213f17c1492c
branches:  
changeset: 7794:213f17c1492c
user:      David T Yuen <dtyx265 at gmail.com>
date:      Thu Aug 14 09:43:39 2014 -0700
description:
Added fast intra search option to Analysis::checkIntraInInter_rd0_4
Subject: [x265] analysis: use macro and for-loop to simplify fast-intra

details:   http://hg.videolan.org/x265/rev/07138e6ac952
branches:  
changeset: 7795:07138e6ac952
user:      Steve Borho <steve at borho.org>
date:      Thu Aug 14 15:38:01 2014 -0500
description:
analysis: use macro and for-loop to simplify fast-intra

this changes behavior a bit; it's trying both +/-1 offsets instead of just
one. and it has to do one extra check at the end since mode 34 isn't reached
by the other previous loops
Subject: [x265] param: don't allow turbo mode to increase rd-level, improve docs

details:   http://hg.videolan.org/x265/rev/c4f0bbad98cf
branches:  
changeset: 7796:c4f0bbad98cf
user:      Steve Borho <steve at borho.org>
date:      Thu Aug 14 19:35:47 2014 -0500
description:
param: don't allow turbo mode to increase rd-level, improve docs
Subject: [x265] quant: use optimized primitive for transquant bypass in invTransformNxN

details:   http://hg.videolan.org/x265/rev/ef25a0e7de3b
branches:  
changeset: 7797:ef25a0e7de3b
user:      Steve Borho <steve at borho.org>
date:      Thu Aug 14 16:06:01 2014 -0500
description:
quant: use optimized primitive for transquant bypass in invTransformNxN

diffstat:

 doc/reST/cli.rst            |  21 +++++++++++--
 source/common/param.cpp     |   2 +-
 source/common/quant.cpp     |   5 +--
 source/encoder/analysis.cpp |  64 ++++++++++++++++++++++++++++++++++++++------
 4 files changed, 74 insertions(+), 18 deletions(-)

diffs (148 lines):

diff -r 08e9d25e2c72 -r ef25a0e7de3b doc/reST/cli.rst

--- a/doc/reST/cli.rst	Thu Aug 14 15:33:51 2014 -0500
+++ b/doc/reST/cli.rst	Thu Aug 14 16:06:01 2014 -0500
@@ -831,7 +831,7 @@ Quality, rate control and rate distortio
 	Enable multipass rate control mode. Input is encoded multiple times,
 	storing the encoded information of each pass in a stats file from which
 	the consecutive pass tunes the qp of each frame to improve the quality
-	of the output.Default 0(disabled)
+	of the output. Default disabled
 
 	1. First pass, creates stats file
 	2. Last pass, does not overwrite stats file
@@ -841,9 +841,22 @@ Quality, rate control and rate distortio
 
 .. option:: --slow-firstpass, --no-slow-firstpass
 
-	Enable a slow and more detailed first pass encode in Multipass rate control mode.
-	Speed of the first pass encode is slightly lesser and quality midly improved when
-	compared to the default settings in a multipass encode.  Default disabled
+	Enable a slow and more detailed first pass encode in Multipass rate
+	control mode.  Speed of the first pass encode is slightly lesser and
+	quality midly improved when compared to the default settings in a
+	multipass encode. Default disabled (turbo mode enabled)
+
+	When **turbo** first pass is not disabled, these options are
+	set on the first pass to improve performance:
+	
+	* :option:`--no-rect`
+	* :option:`--no-amp`
+	* :option:`--early-skip`
+	* :option:`--ref` = 1
+	* :option:`--max-merge` = 1
+	* :option:`--me` = DIA
+	* :option:`--subme` = MIN(2, :option:`--subme`)
+	* :option:`--rd` = MIN(2, :option:`--rd`)
 
 Loop filters
 ============
diff -r 08e9d25e2c72 -r ef25a0e7de3b source/common/param.cpp
--- a/source/common/param.cpp	Thu Aug 14 15:33:51 2014 -0500
+++ b/source/common/param.cpp	Thu Aug 14 16:06:01 2014 -0500
@@ -1037,7 +1037,7 @@ void x265_param_apply_fastfirstpass(x265
         param->searchMethod = X265_DIA_SEARCH;
         param->subpelRefine = X265_MIN(2, param->subpelRefine);
         param->bEnableEarlySkip = 1;
-        param->rdLevel = 2;
+        param->rdLevel = X265_MIN(2, param->rdLevel);
     }
 }
 
diff -r 08e9d25e2c72 -r ef25a0e7de3b source/common/quant.cpp
--- a/source/common/quant.cpp	Thu Aug 14 15:33:51 2014 -0500
+++ b/source/common/quant.cpp	Thu Aug 14 16:06:01 2014 -0500
@@ -406,10 +406,7 @@ void Quant::invtransformNxN(bool transQu
 {
     if (transQuantBypass)
     {
-        int trSize = 1 << log2TrSize;
-        for (int k = 0; k < trSize; k++)
-            for (int j = 0; j < trSize; j++)
-                residual[k * stride + j] = (int16_t)(coeff[k * trSize + j]);
+        primitives.cvt32to16_shr(residual, coeff, stride, 0, 1 << log2TrSize);
         return;
     }
 
diff -r 08e9d25e2c72 -r ef25a0e7de3b source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Thu Aug 14 15:33:51 2014 -0500
+++ b/source/encoder/analysis.cpp	Thu Aug 14 16:06:01 2014 -0500
@@ -1653,6 +1653,7 @@ void Analysis::checkIntraInInter_rd0_4(T
     }
 
     pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
+    int predsize = scaleTuSize * scaleTuSize;
 
     uint32_t preds[3];
     cu->getIntraDirLumaPredictor(partOffset, preds);
@@ -1689,17 +1690,62 @@ void Analysis::checkIntraInInter_rd0_4(T
 
     primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
 
-    for (mode = 2; mode < 35; mode++)
+    bool modeHor;
+    pixel *cmp;
+    intptr_t srcStride;
+
+#define TRY_ANGLE(angle) \
+    modeHor = angle < 18; \
+    cmp = modeHor ? buf_trans : fenc; \
+    srcStride = modeHor ? scaleTuSize : scaleStride; \
+    sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
+    bits = (mpms & ((uint64_t)1 << angle)) ? xModeBitsIntra(cu, angle, partOffset, depth) : rbits; \
+    cost = m_rdCost.calcRdSADCost(sad, bits)
+
+    if (m_param->bEnableFastIntra)
     {
-        bool modeHor = (mode < 18);
-        pixel *cmp = (modeHor ? buf_trans : fenc);
-        intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
-        sad  = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
-        bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
-        cost = m_rdCost.calcRdSADCost(sad, bits);
-        COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
+        int asad = 0;
+        uint32_t lowmode, highmode, amode, abits = 0;
+        uint64_t acost = MAX_INT64;
+
+        /* pick the best angle, sampling at distance of 5 */
+        for (mode = 5; mode < 35; mode += 5)
+        {
+            TRY_ANGLE(mode);
+            COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
+        }
+
+        /* refine best angle at distance 2, then distance 1 */
+        for (uint32_t dist = 2; dist >= 1; dist--)
+        {
+            lowmode = amode - dist;
+            highmode = amode + dist;
+
+            X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out of range\n");
+            TRY_ANGLE(lowmode);
+            COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, bits);
+
+            X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode out of range\n");
+            TRY_ANGLE(highmode);
+            COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, bits);
+        }
+
+        if (amode == 33)
+        {
+            TRY_ANGLE(34);
+            COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits);
+        }
+
+        COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
     }
-
+    else // calculate and search all intra prediction angles for lowest cost
+    {
+        for (mode = 2; mode < 35; mode++)
+        {
+            TRY_ANGLE(mode);
+            COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
+        }
+    }
     cu->m_totalBits = bbits;
     cu->m_totalDistortion = bsad;
     cu->m_sa8dCost = bcost;