[x264-devel] commit: Don't check i16x16 planar mode unless previous modes were useful ( Jason Garrett-Glaser )

Wed Jul 7 17:27:55 CEST 2010

On Sun, Jul 4, 2010 at 6:41 PM, <git at videolan.org> wrote:

> x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Wed
> Jun 30 13:06:22 2010 -0700| [f378994ab3c816aaab2b795143e31919fdee1f2d] |
> committer: Jason Garrett-Glaser
>
> Don't check i16x16 planar mode unless previous modes were useful
> Saves ~160 clocks per MB at subme=1, ~270 per MB at subme>1 (measured on
> Core i7).
> Negligle effect on compression.
>
> Also make a few more arrays static.
>
> >
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f378994ab3c816aaab2b795143e31919fdee1f2d
> ---
>
>  encoder/analyse.c |   29 +++++++++++++++++++----------
>  encoder/set.c     |    3 ++-
>  2 files changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/encoder/analyse.c b/encoder/analyse.c
> index 696c78f..cdbdd1e 100644
> --- a/encoder/analyse.c
> +++ b/encoder/analyse.c
> @@ -646,16 +646,27 @@ static void x264_mb_analyse_intra( x264_t *h,
> x264_mb_analysis_t *a, int i_satd_
>     /* 16x16 prediction selection */
>     const int8_t *predict_mode = predict_16x16_mode_available(
> h->mb.i_neighbour_intra );
>
> +    /* Not heavily tuned */
> +    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4,
> 4, 4, 4 };
> +    int i16x16_thresh = a->b_fast_intra ?
> (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
> +
>     if( !h->mb.b_lossless && predict_mode[3] >= 0 )
>     {
>         h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
> -        h->predict_16x16[I_PRED_16x16_P]( p_dst );
> -        a->i_satd_i16x16_dir[I_PRED_16x16_P] =
> -            h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src,
> FENC_STRIDE );
> -        for( int i = 0; i < 4; i++ )
> +        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
> +        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
> +        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
> +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0],
> a->i_predict16x16, 0 );
> +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1],
> a->i_predict16x16, 1 );
> +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2],
> a->i_predict16x16, 2 );
> +
> +        /* Plane is expensive, so don't check it unless one of the
> previous modes was useful. */
> +        if( a->i_satd_i16x16 <= i16x16_thresh )
>         {
> -            int cost = a->i_satd_i16x16_dir[i] += lambda * bs_size_ue(i);
> -            COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );
> +            h->predict_16x16[I_PRED_16x16_P]( p_dst );
> +            a->i_satd_i16x16_dir[I_PRED_16x16_P] =
> h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
> +            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda *
> bs_size_ue(3);
> +            COPY2_IF_LT( a->i_satd_i16x16,
> a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
>         }
>     }
>     else
> @@ -681,9 +692,7 @@ static void x264_mb_analyse_intra( x264_t *h,
> x264_mb_analysis_t *a, int i_satd_
>         /* cavlc mb type prefix */
>         a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
>
> -    /* Not heavily tuned */
> -    const uint8_t i16x16_thresh[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
> -    if( a->b_fast_intra && a->i_satd_i16x16 >
> (i16x16_thresh[h->mb.i_subpel_refine]*i_satd_inter)>>1 )
> +    if( a->i_satd_i16x16 > i16x16_thresh )
>

what the...??

>         return;
>
>     /* 8x8 prediction selection */
> @@ -784,7 +793,7 @@ static void x264_mb_analyse_intra( x264_t *h,
> x264_mb_analysis_t *a, int i_satd_
>             i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
>         }
>         /* Not heavily tuned */
> -        const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6
> };
> +        static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6,
> 6, 6, 6 };
>         if( X264_MIN(i_cost, a->i_satd_i16x16) >
> (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
>             return;
>     }
> diff --git a/encoder/set.c b/encoder/set.c
> index 8d007aa..8ea6eac 100644
> --- a/encoder/set.c
> +++ b/encoder/set.c
> @@ -534,7 +534,8 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
>  {
>     int i;
>     // random ID number generated according to ISO-11578
> -    const uint8_t uuid[16] = {
> +    static const uint8_t uuid[16] =
> +    {
>         0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7,
>         0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef
>     };
>
> _______________________________________________
> x264-devel mailing list
> x264-devel at videolan.org
> http://mailman.videolan.org/listinfo/x264-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x264-devel/attachments/20100707/4676db20/attachment.htm>