[x264-devel] [PATCH 3/3] RFC: checkasm: Compare the combined sa8d_satd functions against the individual functions
Martin Storsjö
martin at martin.st
Thu Aug 13 23:00:59 CEST 2015
This shows the actual benefit of using the combined version, versus
just calling the individual asm functions one at a time.
---
tools/checkasm.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/tools/checkasm.c b/tools/checkasm.c
index bc7f8ff..73e8392 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -293,6 +293,14 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
#define call_c2(func,...) ({ call_bench(func,0,__VA_ARGS__); })
+static uint64_t sa8d_satd_16x16_sep( x264_pixel_function_t* funcs,
+ pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 )
+{
+ uint32_t cost8 = funcs->sa8d[PIXEL_16x16]( pix1, stride1, pix2, stride2 );
+ uint32_t cost4 = funcs->satd[PIXEL_16x16]( pix1, stride1, pix2, stride2 );
+ return (uint64_t)cost4 << 32 | cost8;
+}
+
static int check_pixel( int cpu_ref, int cpu_new )
{
x264_pixel_function_t pixel_c;
@@ -388,6 +396,15 @@ static int check_pixel( int cpu_ref, int cpu_new )
break;
}
}
+ set_func_name( "sa8d_satd_%s_separate", pixel_names[PIXEL_16x16] );
+ for( int j = 0; j < 64; j++ )
+ {
+ call_a( sa8d_satd_16x16_sep, &pixel_asm, pbuf1, (intptr_t)16, pbuf2, (intptr_t)64 );
+ }
+ /* Try to set an unique pointer based on the sa8d/satd functions used.
+ * By itself, the sa8d_satd_16x16_sep function pointer is the same for
+ * all instruction sets, regardless of which functions are used. */
+ get_bench( func_name, cpu_new )->pointer = (void*) ((intptr_t) pixel_asm.sa8d[PIXEL_16x16] + (intptr_t) pixel_asm.satd[PIXEL_16x16]);
for( int j = 0; j < 0x1000 && ok; j += 256 ) \
{
uint32_t cost8_c = pixel_c.sa8d[PIXEL_16x16]( pbuf3+j, 16, pbuf4+j, 16 );
--
1.7.10.4
More information about the x264-devel
mailing list