[x265] [PATCH RFC] analysis: use macro and for-loop to simplify fast-intra

Steve Borho steve at borho.org
Fri Aug 15 02:02:08 CEST 2014


On 08/14, dave wrote:
> On 08/14/2014 01:42 PM, Steve Borho wrote:
> ># HG changeset patch
> ># User Steve Borho <steve at borho.org>
> ># Date 1408048681 18000
> >#      Thu Aug 14 15:38:01 2014 -0500
> ># Node ID 07138e6ac952c96d1e31f5490c44f4cfaf6ac12a
> ># Parent  213f17c1492c5bf96c3f382e7beffe0c871a563c
> >analysis: use macro and for-loop to simplify fast-intra
> >
> >this changes behavior a bit; it's trying both +/-1 offsets instead of just
> >one. and it has to do one extra check at the end since mode 34 isn't reached
> >by the other previous loops
> >
> >diff -r 213f17c1492c -r 07138e6ac952 source/encoder/analysis.cpp
> >--- a/source/encoder/analysis.cpp	Thu Aug 14 09:43:39 2014 -0700
> >+++ b/source/encoder/analysis.cpp	Thu Aug 14 15:38:01 2014 -0500
> >@@ -1693,68 +1693,56 @@
> >      bool modeHor;
> >      pixel *cmp;
> >      intptr_t srcStride;
> >+
> >+#define TRY_ANGLE(angle) \
> >+    modeHor = angle < 18; \
> >+    cmp = modeHor ? buf_trans : fenc; \
> >+    srcStride = modeHor ? scaleTuSize : scaleStride; \
> >+    sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
> >+    bits = (mpms & ((uint64_t)1 << angle)) ? xModeBitsIntra(cu, angle, partOffset, depth) : rbits; \
> >+    cost = m_rdCost.calcRdSADCost(sad, bits)
> >+
> >      if (m_param->bEnableFastIntra)
> >      {
> >-        int lowsad, highsad, asad = 0;
> >-        uint32_t lowbits, highbits, amode, lowmode, highmode, abits = 0;
> >-        uint64_t lowcost, highcost = MAX_INT64, acost = MAX_INT64;
> >+        int asad = 0;
> >+        uint32_t lowmode, highmode, amode, abits = 0;
> >+        uint64_t acost = MAX_INT64;
> >-        for (mode = 4;mode < 35; mode += 5)
> >+        /* pick the best angle, sampling at distance of 5 */
> >+        for (mode = 5; mode < 35; mode += 5)

Thanks for reviewing

> By starting with mode = 5, won't this miss mode 2 since only +/-2 is
> checked?  By starting from 4 the loop should end at 34.

if 5 was the best angle of the initial sweep, we'll try +/- 2 (3 and
7). If 3 is the new best we try +/-1 which would be 2 and 4.

On the high end of the spectrum; if 30 was the best cost, it will try
28 and 32, then 33 and 31.

Starting with 4 would remove the need for the extra check at the end,
but at the same time we would need to range-check the low/high modes as
well, since it could reach mode 1 (planar) or modes above 34.

> >-            modeHor = (mode < 18);
> >-            cmp = (modeHor ? buf_trans : fenc);
> >-            srcStride = (modeHor ? scaleTuSize : scaleStride);
> >-            sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
> >-            bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
> >-            cost = m_rdCost.calcRdSADCost(sad, bits);
> >+            TRY_ANGLE(mode);
> >              COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
> >          }
> >-        lowmode = amode - 2;
> >-        modeHor = (lowmode < 18);
> >-        cmp = (modeHor ? buf_trans : fenc);
> >-        srcStride = (modeHor ? scaleTuSize : scaleStride);
> >-        lowsad = sa8d(cmp, srcStride, &tmp[(lowmode - 2) * predsize], scaleTuSize) << costShift;
> >-        lowbits = !(mpms & ((uint64_t)1 << lowmode)) ? rbits : xModeBitsIntra(cu, lowmode, partOffset, depth);
> >-        lowcost = m_rdCost.calcRdSADCost(lowsad, lowbits);
> >-        if (amode < 34)
> >+
> >+        /* refine best angle at distance 2, then distance 1 */
> >+        for (uint32_t dist = 2; dist >= 1; dist--)
> >          {
> >-            highmode = amode + 2;
> >-            modeHor = (highmode < 18);
> >-            cmp = (modeHor ? buf_trans : fenc);
> >-            srcStride = (modeHor ? scaleTuSize : scaleStride);
> >-            highsad = sa8d(cmp, srcStride, &tmp[(highmode - 2) * predsize], scaleTuSize) << costShift;
> >-            highbits = !(mpms & ((uint64_t)1 << highmode)) ? rbits : xModeBitsIntra(cu, highmode, partOffset, depth);
> >-            highcost = m_rdCost.calcRdSADCost(highsad, highbits);
> >+            lowmode = amode - dist;
> >+            highmode = amode + dist;
> >+
> >+            X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out of range\n");
> >+            TRY_ANGLE(lowmode);
> >+            COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, bits);
> >+
> >+            X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode out of range\n");
> >+            TRY_ANGLE(highmode);
> >+            COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, bits);
> >          }
> >-        if (lowcost <= highcost)
> >+
> >+        if (amode == 33)
> >          {
> >-            mode = amode - 1;
> >-            COPY4_IF_LT(acost, lowcost, amode, lowmode, asad, lowsad, abits, lowbits);
> >+            TRY_ANGLE(34);
> >+            COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits);
> >          }
> >-        else
> >-        {
> >-            mode = amode + 1;
> >-            COPY4_IF_LT(acost, highcost, amode, highmode, asad, highsad, abits, highbits);
> >-        }
> >-        modeHor = (mode < 18);
> >-        cmp = (modeHor ? buf_trans : fenc);
> >-        srcStride = (modeHor ? scaleTuSize : scaleStride);
> >-        sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
> >-        bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
> >-        cost = m_rdCost.calcRdSADCost(sad, bits);
> >-        COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
> >+
> >          COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
> >      }
> >      else // calculate and search all intra prediction angles for lowest cost
> >      {
> >          for (mode = 2; mode < 35; mode++)
> >          {
> >-            modeHor = (mode < 18);
> >-            cmp = (modeHor ? buf_trans : fenc);
> >-            srcStride = (modeHor ? scaleTuSize : scaleStride);
> >-            sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
> >-            bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
> >-            cost = m_rdCost.calcRdSADCost(sad, bits);
> >+            TRY_ANGLE(mode);
> >              COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
> >          }
> >      }
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
> 
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list