[x265] [PATCH] Added fast intra search option

dave dtyx265 at gmail.com
Wed Aug 13 17:29:00 CEST 2014


On 08/12/2014 10:22 PM, Steve Borho wrote:
> On 08/12, dtyx265 at gmail.com wrote:
>> # HG changeset patch
>> # User David T Yuen <dtyx265 at gmail.com>
>> # Date 1407882999 25200
>> # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5
>> # Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
>> Added fast intra search option
>>
>> This version calls intra_pred_allangs  to create the predictions then the faster search with satd
> on my newer CPUs, this version was unambiguously faster; so I've pushed
> this version, thanks.
How were you testing it?  I was encoding a 2 minute video with -I 1.

I also have a patch that changes m_predictions from EstimateRow member 
pointer to enough dynamically allocated memory to hold all 35 
predictions to a local array of EstimateRow::estimateCUCost big enough 
to hold one prediction which can't be used with allangs and so would 
only be useful with the other fast-intra version.  Again, I didn't seem 
to help much on my system but if you would like to try it I'll submit a 
patch.
>
>> diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp
>> --- a/source/common/param.cpp	Tue Aug 12 01:11:39 2014 -0500
>> +++ b/source/common/param.cpp	Tue Aug 12 15:36:39 2014 -0700
>> @@ -132,6 +132,7 @@
>>       /* Intra Coding Tools */
>>       param->bEnableConstrainedIntra = 0;
>>       param->bEnableStrongIntraSmoothing = 1;
>> +    param->bEnableFastIntra = 0;
>>   
>>       /* Inter Coding tools */
>>       param->searchMethod = X265_HEX_SEARCH;
>> @@ -560,6 +561,7 @@
>>       OPT("lossless") p->bLossless = atobool(value);
>>       OPT("cu-lossless") p->bCULossless = atobool(value);
>>       OPT("constrained-intra") p->bEnableConstrainedIntra = atobool(value);
>> +    OPT("fast-intra") p->bEnableFastIntra = atobool(value);
>>       OPT("open-gop") p->bOpenGOP = atobool(value);
>>       OPT("scenecut")
>>       {
>> @@ -1211,6 +1213,7 @@
>>       BOOL(p->bLossless, "lossless");
>>       BOOL(p->bCULossless, "cu-lossless");
>>       BOOL(p->bEnableConstrainedIntra, "constrained-intra");
>> +    BOOL(p->bEnableFastIntra, "fast-intra");
>>       BOOL(p->bOpenGOP, "open-gop");
>>       s += sprintf(s, " interlace=%d", p->interlaceMode);
>>       s += sprintf(s, " keyint=%d", p->keyframeMax);
>> diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp
>> --- a/source/encoder/slicetype.cpp	Tue Aug 12 01:11:39 2014 -0500
>> +++ b/source/encoder/slicetype.cpp	Tue Aug 12 15:36:39 2014 -0700
>> @@ -1242,6 +1242,7 @@
>>       {
>>           m_rows[i].m_widthInCU = m_widthInCU;
>>           m_rows[i].m_heightInCU = m_heightInCU;
>> +        m_rows[i].m_param = m_param;
>>       }
>>   
>>       if (!WaveFront::init(m_heightInCU))
>> @@ -1676,26 +1677,86 @@
>>   
>>           int predsize = cuSize * cuSize;
>>   
>> -        // generate 35 intra predictions into tmp
>> +        // generate 35 intra predictions into m_predictions
>> +        pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
>> +        int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX;
>> +        uint32_t  lowmode, mode;
>>           primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
>> +        cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
>> +        if (cost < icost)
>> +            icost = cost;
>>           pixel *above = (cuSize >= 8) ? above1 : above0;
>>           pixel *left  = (cuSize >= 8) ? left1 : left0;
>> -        primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0);
>> +        primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0);
>> +        cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize);
>> +        if (cost < icost)
>> +            icost = cost;
>>           primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
>>   
>> -        // calculate 35 satd costs, keep least cost
>> +        // calculate satd costs, keep least cost
>>           ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
>>           primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
>> -        pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
>> -        int icost = m_me.COST_MAX, cost;
>> -        for (uint32_t mode = 0; mode < 35; mode++)
>> +        // fast-intra angle search
>> +        if (m_param->bEnableFastIntra)
>>           {
>> -            if ((mode >= 2) && (mode < 18))
>> +            for (mode = 4;mode < 35; mode += 5)
>> +            {
>> +                if (mode < 18)
>> +                    cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize);
>> +                else
>> +                    cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
>> +                if (cost < acost)
>> +                {
>> +                    lowmode = mode;
>> +                    acost = cost;
>> +                }
>> +            }
>> +            mode = lowmode - 2;
>> +            if (mode < 18)
>> +                lowcost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize);
>> +            else
>> +                lowcost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
>> +            highcost = m_me.COST_MAX;
>> +            if (lowmode < 34)
>> +            {
>> +                mode = lowmode + 2;
>> +                if (mode < 18)
>> +                    highcost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize);
>> +                else
>> +                    highcost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
>> +            }
>> +            if (lowcost <= highcost)
>> +            {
>> +                mode = lowmode - 1;
>> +                if (lowcost < acost)
>> +                    acost = lowcost;
>> +            }
>> +            else
>> +            {
>> +                mode = lowmode + 1;
>> +                if (highcost < acost)
>> +                    acost = highcost;
>> +            }
>> +            if (mode < 18)
>>                   cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize);
>>               else
>>                   cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
>> -            if (cost < icost)
>> -                icost = cost;
>> +             if (cost < acost)
>> +                acost = cost;
>> +            if (acost < icost)
>> +                icost = acost;
>> +        }
>> +        else // calculate and search all intra prediction angles for lowest cost
>> +        {
>> +            for (mode = 2; mode < 35; mode++)
>> +            {
>> +                if (mode < 18)
>> +                    cost = satd(buf_trans, cuSize, &m_predictions[mode * predsize], cuSize);
>> +                else
>> +                    cost = satd(m_me.fenc, FENC_STRIDE, &m_predictions[mode * predsize], cuSize);
>> +                if (cost < icost)
>> +                    icost = cost;
>> +            }
>>           }
>>           const int intraPenalty = 5 * m_lookAheadLambda;
>>           icost += intraPenalty + lowresPenalty;
>> diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.h
>> --- a/source/encoder/slicetype.h	Tue Aug 12 01:11:39 2014 -0500
>> +++ b/source/encoder/slicetype.h	Tue Aug 12 15:36:39 2014 -0700
>> @@ -48,6 +48,7 @@
>>   class EstimateRow
>>   {
>>   public:
>> +    x265_param*         m_param;
>>       MotionEstimate      m_me;
>>       Lock                m_lock;
>>       pixel*              m_predictions;    // buffer for 35 intra predictions
>> diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/x265.cpp
>> --- a/source/x265.cpp	Tue Aug 12 01:11:39 2014 -0500
>> +++ b/source/x265.cpp	Tue Aug 12 15:36:39 2014 -0700
>> @@ -117,6 +117,8 @@
>>       { "no-cu-lossless",       no_argument, NULL, 0 },
>>       { "no-constrained-intra", no_argument, NULL, 0 },
>>       { "constrained-intra",    no_argument, NULL, 0 },
>> +    { "fast-intra",           no_argument, NULL, 0 },
>> +    { "no-fast-intra",        no_argument, NULL, 0 },
>>       { "no-open-gop",          no_argument, NULL, 0 },
>>       { "open-gop",             no_argument, NULL, 0 },
>>       { "keyint",         required_argument, NULL, 'I' },
>> @@ -387,6 +389,7 @@
>>       H0("   --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing));
>>       H0("   --[no-]constrained-intra      Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra));
>>       H0("   --[no-]b-intra                Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames));
>> +    H0("   --[no]-fast-intra             Enable faster search method for intra mode. Default %s\n", OPT(param->bEnableFastIntra));
>>       H0("   --rdpenalty <0..2>            penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty);
>>       H0("\nSlice decision options:\n");
>>       H0("   --[no-]open-gop               Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP));
>> diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/x265.h
>> --- a/source/x265.h	Tue Aug 12 01:11:39 2014 -0500
>> +++ b/source/x265.h	Tue Aug 12 15:36:39 2014 -0700
>> @@ -533,6 +533,9 @@
>>        * depending on your source material. Defaults to disabled */
>>       int       bEnableStrongIntraSmoothing;
>>   
>> +    /* Use a faster search method to find the best intra mode. Default is 0 */
>> +    int       bEnableFastIntra;
>> +
>>       /*== Inter Coding Tools ==*/
>>   
>>       /* ME search method (DIA, HEX, UMH, STAR, FULL). The search patterns
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel



More information about the x265-devel mailing list