[x264-devel] commit: Don't check i16x16 planar mode unless previous modes were useful ( Jason Garrett-Glaser )

Sat Jul 10 04:13:10 CEST 2010

On Wed, Jul 7, 2010 at 8:27 AM, Pascal Massimino
<pascal.massimino at gmail.com> wrote:
>
>
> On Sun, Jul 4, 2010 at 6:41 PM, <git at videolan.org> wrote:
>>
>> x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Wed
>> Jun 30 13:06:22 2010 -0700| [f378994ab3c816aaab2b795143e31919fdee1f2d] |
>> committer: Jason Garrett-Glaser
>>
>> Don't check i16x16 planar mode unless previous modes were useful
>> Saves ~160 clocks per MB at subme=1, ~270 per MB at subme>1 (measured on
>> Core i7).
>> Negligle effect on compression.
>>
>> Also make a few more arrays static.
>>
>> >
>> > http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f378994ab3c816aaab2b795143e31919fdee1f2d
>> ---
>>
>>  encoder/analyse.c |   29 +++++++++++++++++++----------
>>  encoder/set.c     |    3 ++-
>>  2 files changed, 21 insertions(+), 11 deletions(-)
>>
>> diff --git a/encoder/analyse.c b/encoder/analyse.c
>> index 696c78f..cdbdd1e 100644
>> --- a/encoder/analyse.c
>> +++ b/encoder/analyse.c
>> @@ -646,16 +646,27 @@ static void x264_mb_analyse_intra( x264_t *h,
>> x264_mb_analysis_t *a, int i_satd_
>>     /* 16x16 prediction selection */
>>     const int8_t *predict_mode = predict_16x16_mode_available(
>> h->mb.i_neighbour_intra );
>>
>> +    /* Not heavily tuned */
>> +    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4,
>> 4, 4, 4, 4 };
>> +    int i16x16_thresh = a->b_fast_intra ?
>> (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
>> +
>>     if( !h->mb.b_lossless && predict_mode[3] >= 0 )
>>     {
>>         h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir
>> );
>> -        h->predict_16x16[I_PRED_16x16_P]( p_dst );
>> -        a->i_satd_i16x16_dir[I_PRED_16x16_P] =
>> -            h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src,
>> FENC_STRIDE );
>> -        for( int i = 0; i < 4; i++ )
>> +        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
>> +        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
>> +        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
>> +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0],
>> a->i_predict16x16, 0 );
>> +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1],
>> a->i_predict16x16, 1 );
>> +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2],
>> a->i_predict16x16, 2 );
>> +
>> +        /* Plane is expensive, so don't check it unless one of the
>> previous modes was useful. */
>> +        if( a->i_satd_i16x16 <= i16x16_thresh )
>>         {
>> -            int cost = a->i_satd_i16x16_dir[i] += lambda * bs_size_ue(i);
>> -            COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );
>> +            h->predict_16x16[I_PRED_16x16_P]( p_dst );
>> +            a->i_satd_i16x16_dir[I_PRED_16x16_P] =
>> h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
>> +            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda *
>> bs_size_ue(3);
>> +            COPY2_IF_LT( a->i_satd_i16x16,
>> a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
>>         }
>>     }
>>     else
>> @@ -681,9 +692,7 @@ static void x264_mb_analyse_intra( x264_t *h,
>> x264_mb_analysis_t *a, int i_satd_
>>         /* cavlc mb type prefix */
>>         a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
>>
>> -    /* Not heavily tuned */
>> -    const uint8_t i16x16_thresh[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4
>> };
>> -    if( a->b_fast_intra && a->i_satd_i16x16 >
>> (i16x16_thresh[h->mb.i_subpel_refine]*i_satd_inter)>>1 )
>> +    if( a->i_satd_i16x16 > i16x16_thresh )
>
> what the...??

What the what?

Dark Shikari