[x264-devel] Fix pixel_ssim_end4 asm function for x86_64 systems

Anton Mitrofanov git at videolan.org
Sun Jul 20 11:58:28 CEST 2014


x264 | branch: master | Anton Mitrofanov <BugMaster at narod.ru> | Thu May 22 13:27:00 2014 +0400| [13d6dfd83af98e472a9e9a8b6abf5c971707a893] | committer: Fiona Glaser

Fix pixel_ssim_end4 asm function for x86_64 systems

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=13d6dfd83af98e472a9e9a8b6abf5c971707a893
---

 common/x86/pixel-a.asm |    3 ++-
 tools/checkasm.c       |    7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 85e7d5a..edadad3 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -4689,7 +4689,8 @@ cglobal pixel_ssim_4x4x2_core, 4,4,8
 ;-----------------------------------------------------------------------------
 ; float pixel_ssim_end( int sum0[5][4], int sum1[5][4], int width )
 ;-----------------------------------------------------------------------------
-cglobal pixel_ssim_end4, 3,3,7
+cglobal pixel_ssim_end4, 2,3,7
+    mov      r2d, r2m
     movdqa    m0, [r0+ 0]
     movdqa    m1, [r0+16]
     movdqa    m2, [r0+32]
diff --git a/tools/checkasm.c b/tools/checkasm.c
index f72b7a0..cb88966 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -728,11 +728,14 @@ static int check_pixel( int cpu_ref, int cpu_new )
             fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
         }
         set_func_name( "ssim_core" );
-        call_c2( pixel_c.ssim_4x4x2_core,   pbuf1+2, (intptr_t)32, pbuf2+2, (intptr_t)32, sums );
-        call_a2( pixel_asm.ssim_4x4x2_core, pbuf1+2, (intptr_t)32, pbuf2+2, (intptr_t)32, sums );
+        call_c( pixel_c.ssim_4x4x2_core,   pbuf1+2, (intptr_t)32, pbuf2+2, (intptr_t)32, sums );
+        call_a( pixel_asm.ssim_4x4x2_core, pbuf1+2, (intptr_t)32, pbuf2+2, (intptr_t)32, sums );
         set_func_name( "ssim_end" );
         call_c2( pixel_c.ssim_end4,   sums, sums, 4 );
         call_a2( pixel_asm.ssim_end4, sums, sums, 4 );
+        /* check incorrect assumptions that 32-bit ints are zero-extended to 64-bit */
+        call_c1( pixel_c.ssim_end4,   sums, sums, 3 );
+        call_a1( pixel_asm.ssim_end4, sums, sums, 3 );
         report( "ssim :" );
     }
 



More information about the x264-devel mailing list