[x264-devel] [PATCH 3/3] RFC: checkasm: Compare the combined sa8d_satd functions against the individual functions
Janne Grunau
janne-x264 at jannau.net
Tue Aug 25 20:49:49 CEST 2015
On 2015-08-14 00:00:59 +0300, Martin Storsjö wrote:
> This shows the actual benefit of using the combined version, versus
> just calling the individual asm functions one at a time.
> ---
> tools/checkasm.c | 17 +++++++++++++++++
> 1 file changed, 17 insertions(+)
>
> diff --git a/tools/checkasm.c b/tools/checkasm.c
> index bc7f8ff..73e8392 100644
> --- a/tools/checkasm.c
> +++ b/tools/checkasm.c
> @@ -293,6 +293,14 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
> #define call_c2(func,...) ({ call_bench(func,0,__VA_ARGS__); })
>
>
> +static uint64_t sa8d_satd_16x16_sep( x264_pixel_function_t* funcs,
> + pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 )
> +{
> + uint32_t cost8 = funcs->sa8d[PIXEL_16x16]( pix1, stride1, pix2, stride2 );
> + uint32_t cost4 = funcs->satd[PIXEL_16x16]( pix1, stride1, pix2, stride2 );
> + return (uint64_t)cost4 << 32 | cost8;
> +}
> +
> static int check_pixel( int cpu_ref, int cpu_new )
> {
> x264_pixel_function_t pixel_c;
> @@ -388,6 +396,15 @@ static int check_pixel( int cpu_ref, int cpu_new )
> break;
> }
> }
> + set_func_name( "sa8d_satd_%s_separate", pixel_names[PIXEL_16x16] );
> + for( int j = 0; j < 64; j++ )
> + {
> + call_a( sa8d_satd_16x16_sep, &pixel_asm, pbuf1, (intptr_t)16, pbuf2, (intptr_t)64 );
> + }
> + /* Try to set an unique pointer based on the sa8d/satd functions used.
> + * By itself, the sa8d_satd_16x16_sep function pointer is the same for
> + * all instruction sets, regardless of which functions are used. */
> + get_bench( func_name, cpu_new )->pointer = (void*) ((intptr_t) pixel_asm.sa8d[PIXEL_16x16] + (intptr_t) pixel_asm.satd[PIXEL_16x16]);
> for( int j = 0; j < 0x1000 && ok; j += 256 ) \
> {
> uint32_t cost8_c = pixel_c.sa8d[PIXEL_16x16]( pbuf3+j, 16, pbuf4+j, 16 );
looks ok to me
Janne
More information about the x264-devel
mailing list