[x264-devel] [PATCH 3/3] RFC: checkasm: Compare the combined sa8d_satd functions against the individual functions

Janne Grunau janne-x264 at jannau.net
Tue Aug 25 20:49:49 CEST 2015


On 2015-08-14 00:00:59 +0300, Martin Storsjö wrote:
> This shows the actual benefit of using the combined version, versus
> just calling the individual asm functions one at a time.
> ---
>  tools/checkasm.c |   17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/tools/checkasm.c b/tools/checkasm.c
> index bc7f8ff..73e8392 100644
> --- a/tools/checkasm.c
> +++ b/tools/checkasm.c
> @@ -293,6 +293,14 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
>  #define call_c2(func,...) ({ call_bench(func,0,__VA_ARGS__); })
>  
>  
> +static uint64_t sa8d_satd_16x16_sep( x264_pixel_function_t* funcs,
> +                                     pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 )
> +{
> +    uint32_t cost8 = funcs->sa8d[PIXEL_16x16]( pix1, stride1, pix2, stride2 );
> +    uint32_t cost4 = funcs->satd[PIXEL_16x16]( pix1, stride1, pix2, stride2 );
> +    return (uint64_t)cost4 << 32 | cost8;
> +}
> +
>  static int check_pixel( int cpu_ref, int cpu_new )
>  {
>      x264_pixel_function_t pixel_c;
> @@ -388,6 +396,15 @@ static int check_pixel( int cpu_ref, int cpu_new )
>                  break;
>              }
>          }
> +        set_func_name( "sa8d_satd_%s_separate", pixel_names[PIXEL_16x16] );
> +        for( int j = 0; j < 64; j++ )
> +        {
> +            call_a( sa8d_satd_16x16_sep, &pixel_asm, pbuf1, (intptr_t)16, pbuf2, (intptr_t)64 );
> +        }
> +        /* Try to set an unique pointer based on the sa8d/satd functions used.
> +         * By itself, the sa8d_satd_16x16_sep function pointer is the same for
> +         * all instruction sets, regardless of which functions are used. */
> +        get_bench( func_name, cpu_new )->pointer = (void*) ((intptr_t) pixel_asm.sa8d[PIXEL_16x16] + (intptr_t) pixel_asm.satd[PIXEL_16x16]);
>          for( int j = 0; j < 0x1000 && ok; j += 256 ) \
>          {
>              uint32_t cost8_c = pixel_c.sa8d[PIXEL_16x16]( pbuf3+j, 16, pbuf4+j, 16 );

looks ok to me

Janne


More information about the x264-devel mailing list