[x265-commits] [x265] fix pixel_ssim_end4, the 3rd is dword

Min Chen chenm003 at 163.com
Fri Apr 25 16:57:11 CEST 2014


details:   http://hg.videolan.org/x265/rev/2900e858e30a
branches:  
changeset: 6778:2900e858e30a
user:      Min Chen <chenm003 at 163.com>
date:      Fri Apr 25 11:01:36 2014 +0800
description:
fix pixel_ssim_end4, the 3rd is dword
Subject: [x265] correct register num in intrapred8.asm

details:   http://hg.videolan.org/x265/rev/52d812d0fc48
branches:  
changeset: 6779:52d812d0fc48
user:      Min Chen <chenm003 at 163.com>
date:      Fri Apr 25 11:01:56 2014 +0800
description:
correct register num in intrapred8.asm
Subject: [x265] testbench: support float ret value

details:   http://hg.videolan.org/x265/rev/7baf8b8ecfdc
branches:  
changeset: 6780:7baf8b8ecfdc
user:      Min Chen <chenm003 at 163.com>
date:      Fri Apr 25 11:01:12 2014 +0800
description:
testbench: support float ret value

diffstat:

 source/common/x86/intrapred8.asm  |  10 +++++-----
 source/common/x86/pixel-util8.asm |   3 ++-
 source/test/checkasm-a.asm        |   2 ++
 source/test/pixelharness.cpp      |   2 +-
 source/test/testharness.h         |  10 ++++++++++
 5 files changed, 20 insertions(+), 7 deletions(-)

diffs (125 lines):

diff -r c630b0b393ee -r 7baf8b8ecfdc source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Thu Apr 24 15:59:05 2014 -0500
+++ b/source/common/x86/intrapred8.asm	Fri Apr 25 11:01:12 2014 +0800
@@ -1156,7 +1156,7 @@ cglobal intra_pred_ang8_2, 3,5,2
     RET
 
 INIT_XMM sse4
-cglobal intra_pred_ang8_3, 3,5,7
+cglobal intra_pred_ang8_3, 3,5,8
     cmp         r4m,       byte 33
     cmove       r2,        r3mp
     lea         r3,        [ang_table + 14 * 16]
@@ -1240,7 +1240,7 @@ cglobal intra_pred_ang8_3, 3,5,7
 
     RET
 
-cglobal intra_pred_ang8_4, 3,5,7
+cglobal intra_pred_ang8_4, 3,5,8
     cmp         r4m,       byte 32
     cmove       r2,        r3mp
     lea         r3,        [ang_table + 19 * 16]
@@ -1724,7 +1724,7 @@ cglobal intra_pred_ang8_13, 4,5,8
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_14, 4,5,7
+cglobal intra_pred_ang8_14, 4,5,8
     cmp         r4m,       byte 22
     jnz         .skip
     xchg        r2,        r3
@@ -1773,7 +1773,7 @@ cglobal intra_pred_ang8_14, 4,5,7
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_15, 4,5,7
+cglobal intra_pred_ang8_15, 4,5,8
     cmp         r4m,       byte 21
     jnz         .skip
     xchg        r2,        r3
@@ -1877,7 +1877,7 @@ cglobal intra_pred_ang8_16, 4,5,8
     packuswb    m1,        m0
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
 
-cglobal intra_pred_ang8_17, 4,5,7
+cglobal intra_pred_ang8_17, 4,5,8
     cmp         r4m,       byte 19
     jnz         .skip
     xchg        r2,        r3
diff -r c630b0b393ee -r 7baf8b8ecfdc source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Thu Apr 24 15:59:05 2014 -0500
+++ b/source/common/x86/pixel-util8.asm	Fri Apr 25 11:01:12 2014 +0800
@@ -1981,7 +1981,8 @@ cglobal pixel_ssim_4x4x2_core, 4,4,8
 ;-----------------------------------------------------------------------------
 ; float pixel_ssim_end( int sum0[5][4], int sum1[5][4], int width )
 ;-----------------------------------------------------------------------------
-cglobal pixel_ssim_end4, 3,3,7
+cglobal pixel_ssim_end4, 2,3,7
+    mov       r2d, r2m
     movdqa    m0, [r0+ 0]
     movdqa    m1, [r0+16]
     movdqa    m2, [r0+32]
diff -r c630b0b393ee -r 7baf8b8ecfdc source/test/checkasm-a.asm
--- a/source/test/checkasm-a.asm	Thu Apr 24 15:59:05 2014 -0500
+++ b/source/test/checkasm-a.asm	Fri Apr 25 11:01:12 2014 +0800
@@ -87,6 +87,7 @@ cglobal checkasm_stack_clobber, 1,2
 ;-----------------------------------------------------------------------------
 ; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... )
 ;-----------------------------------------------------------------------------
+cglobal checkasm_call_float
 INIT_XMM
 cglobal checkasm_call, 2,15,16,max_args*8+8
     mov  r6, r0
@@ -170,6 +171,7 @@ cglobal checkasm_call, 2,15,16,max_args*
 ;-----------------------------------------------------------------------------
 ; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
 ;-----------------------------------------------------------------------------
+cglobal checkasm_call_float
 cglobal checkasm_call, 1,7
     mov  r3, n3
     mov  r4, n4
diff -r c630b0b393ee -r 7baf8b8ecfdc source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Thu Apr 24 15:59:05 2014 -0500
+++ b/source/test/pixelharness.cpp	Fri Apr 25 11:01:12 2014 +0800
@@ -915,7 +915,7 @@ bool PixelHarness::check_ssim_end(ssim_e
 
         int width = (rand() % 4) + 1; // range[1-4]
         float cres = ref(sum0, sum1, width);
-        float vres = (float)checked(opt, sum0, sum1, width);
+        float vres = checked_float(opt, sum0, sum1, width);
         if (fabs(vres - cres) > 0.00001)
             return false;
 
diff -r c630b0b393ee -r 7baf8b8ecfdc source/test/testharness.h
--- a/source/test/testharness.h	Thu Apr 24 15:59:05 2014 -0500
+++ b/source/test/testharness.h	Fri Apr 25 11:01:12 2014 +0800
@@ -121,6 +121,7 @@ int x265_stack_pagealign(int (*func)(), 
 /* detect when callee-saved regs aren't saved
  * needs an explicit asm check because it only sometimes crashes in normal use. */
 intptr_t x265_checkasm_call(intptr_t (*func)(), int *ok, ...);
+float x265_checkasm_call_float(float (*func)(), int *ok, ...);
 #else
 #define x265_stack_pagealign( func, align ) func()
 #endif
@@ -143,12 +144,21 @@ void x265_checkasm_stack_clobber( uint64
                                 m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,\
                                 m_rand,m_rand,m_rand,m_rand,m_rand), /* max_args+6 */ \
     x265_checkasm_call((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
+
+#define checked_float(func,...) ( \
+    m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
+    x265_checkasm_stack_clobber(m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,\
+                                m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,\
+                                m_rand,m_rand,m_rand,m_rand,m_rand), /* max_args+6 */ \
+    x265_checkasm_call_float((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
 #define reportfail() if (!m_ok) { fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); }
 #elif ARCH_X86
 #define checked(func,...) x265_checkasm_call((intptr_t(*)())func, &m_ok, __VA_ARGS__);
+#define checked_float(func,...) x265_checkasm_call_float((float(*)())func, &m_ok, __VA_ARGS__);
 
 #else
 #define checked(func,...) func(__VA_ARGS__)
+#define checked_float(func,...) func(__VA_ARGS__)
 #define reportfail()
 #endif
 }


More information about the x265-commits mailing list