[x265] [PATCH RFC] analysis: use macro and for-loop to simplify fast-intra

dave dtyx265 at gmail.com
Fri Aug 15 03:53:50 CEST 2014


On 08/14/2014 05:02 PM, Steve Borho wrote:
> On 08/14, dave wrote:
>> On 08/14/2014 01:42 PM, Steve Borho wrote:
>>> # HG changeset patch
>>> # User Steve Borho <steve at borho.org>
>>> # Date 1408048681 18000
>>> #      Thu Aug 14 15:38:01 2014 -0500
>>> # Node ID 07138e6ac952c96d1e31f5490c44f4cfaf6ac12a
>>> # Parent  213f17c1492c5bf96c3f382e7beffe0c871a563c
>>> analysis: use macro and for-loop to simplify fast-intra
>>>
>>> this changes behavior a bit; it's trying both +/-1 offsets instead of just
>>> one. and it has to do one extra check at the end since mode 34 isn't reached
>>> by the other previous loops
>>>
>>> diff -r 213f17c1492c -r 07138e6ac952 source/encoder/analysis.cpp
>>> --- a/source/encoder/analysis.cpp	Thu Aug 14 09:43:39 2014 -0700
>>> +++ b/source/encoder/analysis.cpp	Thu Aug 14 15:38:01 2014 -0500
>>> @@ -1693,68 +1693,56 @@
>>>       bool modeHor;
>>>       pixel *cmp;
>>>       intptr_t srcStride;
>>> +
>>> +#define TRY_ANGLE(angle) \
>>> +    modeHor = angle < 18; \
>>> +    cmp = modeHor ? buf_trans : fenc; \
>>> +    srcStride = modeHor ? scaleTuSize : scaleStride; \
>>> +    sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
>>> +    bits = (mpms & ((uint64_t)1 << angle)) ? xModeBitsIntra(cu, angle, partOffset, depth) : rbits; \
>>> +    cost = m_rdCost.calcRdSADCost(sad, bits)
>>> +
>>>       if (m_param->bEnableFastIntra)
>>>       {
>>> -        int lowsad, highsad, asad = 0;
>>> -        uint32_t lowbits, highbits, amode, lowmode, highmode, abits = 0;
>>> -        uint64_t lowcost, highcost = MAX_INT64, acost = MAX_INT64;
>>> +        int asad = 0;
>>> +        uint32_t lowmode, highmode, amode, abits = 0;
>>> +        uint64_t acost = MAX_INT64;
>>> -        for (mode = 4;mode < 35; mode += 5)
>>> +        /* pick the best angle, sampling at distance of 5 */
>>> +        for (mode = 5; mode < 35; mode += 5)
> Thanks for reviewing
>
>> By starting with mode = 5, won't this miss mode 2 since only +/-2 is
>> checked?  By starting from 4 the loop should end at 34.
> if 5 was the best angle of the initial sweep, we'll try +/- 2 (3 and
> 7). If 3 is the new best we try +/-1 which would be 2 and 4.
>
> On the high end of the spectrum; if 30 was the best cost, it will try
> 28 and 32, then 33 and 31.
>
> Starting with 4 would remove the need for the extra check at the end,
> but at the same time we would need to range-check the low/high modes as
> well, since it could reach mode 1 (planar) or modes above 34.
I understand now.  I am testing the original search method in 
TEncSearch::estIntraPredQT.  Would you prefer the new one there too?
>
>>> -            modeHor = (mode < 18);
>>> -            cmp = (modeHor ? buf_trans : fenc);
>>> -            srcStride = (modeHor ? scaleTuSize : scaleStride);
>>> -            sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
>>> -            bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
>>> -            cost = m_rdCost.calcRdSADCost(sad, bits);
>>> +            TRY_ANGLE(mode);
>>>               COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
>>>           }
>>> -        lowmode = amode - 2;
>>> -        modeHor = (lowmode < 18);
>>> -        cmp = (modeHor ? buf_trans : fenc);
>>> -        srcStride = (modeHor ? scaleTuSize : scaleStride);
>>> -        lowsad = sa8d(cmp, srcStride, &tmp[(lowmode - 2) * predsize], scaleTuSize) << costShift;
>>> -        lowbits = !(mpms & ((uint64_t)1 << lowmode)) ? rbits : xModeBitsIntra(cu, lowmode, partOffset, depth);
>>> -        lowcost = m_rdCost.calcRdSADCost(lowsad, lowbits);
>>> -        if (amode < 34)
>>> +
>>> +        /* refine best angle at distance 2, then distance 1 */
>>> +        for (uint32_t dist = 2; dist >= 1; dist--)
>>>           {
>>> -            highmode = amode + 2;
>>> -            modeHor = (highmode < 18);
>>> -            cmp = (modeHor ? buf_trans : fenc);
>>> -            srcStride = (modeHor ? scaleTuSize : scaleStride);
>>> -            highsad = sa8d(cmp, srcStride, &tmp[(highmode - 2) * predsize], scaleTuSize) << costShift;
>>> -            highbits = !(mpms & ((uint64_t)1 << highmode)) ? rbits : xModeBitsIntra(cu, highmode, partOffset, depth);
>>> -            highcost = m_rdCost.calcRdSADCost(highsad, highbits);
>>> +            lowmode = amode - dist;
>>> +            highmode = amode + dist;
>>> +
>>> +            X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out of range\n");
>>> +            TRY_ANGLE(lowmode);
>>> +            COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, bits);
>>> +
>>> +            X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode out of range\n");
>>> +            TRY_ANGLE(highmode);
>>> +            COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, bits);
>>>           }
>>> -        if (lowcost <= highcost)
>>> +
>>> +        if (amode == 33)
>>>           {
>>> -            mode = amode - 1;
>>> -            COPY4_IF_LT(acost, lowcost, amode, lowmode, asad, lowsad, abits, lowbits);
>>> +            TRY_ANGLE(34);
>>> +            COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits);
>>>           }
>>> -        else
>>> -        {
>>> -            mode = amode + 1;
>>> -            COPY4_IF_LT(acost, highcost, amode, highmode, asad, highsad, abits, highbits);
>>> -        }
>>> -        modeHor = (mode < 18);
>>> -        cmp = (modeHor ? buf_trans : fenc);
>>> -        srcStride = (modeHor ? scaleTuSize : scaleStride);
>>> -        sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
>>> -        bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
>>> -        cost = m_rdCost.calcRdSADCost(sad, bits);
>>> -        COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
>>> +
>>>           COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
>>>       }
>>>       else // calculate and search all intra prediction angles for lowest cost
>>>       {
>>>           for (mode = 2; mode < 35; mode++)
>>>           {
>>> -            modeHor = (mode < 18);
>>> -            cmp = (modeHor ? buf_trans : fenc);
>>> -            srcStride = (modeHor ? scaleTuSize : scaleStride);
>>> -            sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
>>> -            bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
>>> -            cost = m_rdCost.calcRdSADCost(sad, bits);
>>> +            TRY_ANGLE(mode);
>>>               COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
>>>           }
>>>       }
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel



More information about the x265-devel mailing list