[x265-commits] [x265] fix pixel_ssim_end4, the 3rd is dword
Min Chen
chenm003 at 163.com
Fri Apr 25 16:57:11 CEST 2014
details: http://hg.videolan.org/x265/rev/2900e858e30a
branches:
changeset: 6778:2900e858e30a
user: Min Chen <chenm003 at 163.com>
date: Fri Apr 25 11:01:36 2014 +0800
description:
fix pixel_ssim_end4, the 3rd is dword
Subject: [x265] correct register num in intrapred8.asm
details: http://hg.videolan.org/x265/rev/52d812d0fc48
branches:
changeset: 6779:52d812d0fc48
user: Min Chen <chenm003 at 163.com>
date: Fri Apr 25 11:01:56 2014 +0800
description:
correct register num in intrapred8.asm
Subject: [x265] testbench: support float ret value
details: http://hg.videolan.org/x265/rev/7baf8b8ecfdc
branches:
changeset: 6780:7baf8b8ecfdc
user: Min Chen <chenm003 at 163.com>
date: Fri Apr 25 11:01:12 2014 +0800
description:
testbench: support float ret value
diffstat:
source/common/x86/intrapred8.asm | 10 +++++-----
source/common/x86/pixel-util8.asm | 3 ++-
source/test/checkasm-a.asm | 2 ++
source/test/pixelharness.cpp | 2 +-
source/test/testharness.h | 10 ++++++++++
5 files changed, 20 insertions(+), 7 deletions(-)
diffs (125 lines):
diff -r c630b0b393ee -r 7baf8b8ecfdc source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Thu Apr 24 15:59:05 2014 -0500
+++ b/source/common/x86/intrapred8.asm Fri Apr 25 11:01:12 2014 +0800
@@ -1156,7 +1156,7 @@ cglobal intra_pred_ang8_2, 3,5,2
RET
INIT_XMM sse4
-cglobal intra_pred_ang8_3, 3,5,7
+cglobal intra_pred_ang8_3, 3,5,8
cmp r4m, byte 33
cmove r2, r3mp
lea r3, [ang_table + 14 * 16]
@@ -1240,7 +1240,7 @@ cglobal intra_pred_ang8_3, 3,5,7
RET
-cglobal intra_pred_ang8_4, 3,5,7
+cglobal intra_pred_ang8_4, 3,5,8
cmp r4m, byte 32
cmove r2, r3mp
lea r3, [ang_table + 19 * 16]
@@ -1724,7 +1724,7 @@ cglobal intra_pred_ang8_13, 4,5,8
packuswb m1, m0
jmp mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
-cglobal intra_pred_ang8_14, 4,5,7
+cglobal intra_pred_ang8_14, 4,5,8
cmp r4m, byte 22
jnz .skip
xchg r2, r3
@@ -1773,7 +1773,7 @@ cglobal intra_pred_ang8_14, 4,5,7
packuswb m1, m0
jmp mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
-cglobal intra_pred_ang8_15, 4,5,7
+cglobal intra_pred_ang8_15, 4,5,8
cmp r4m, byte 21
jnz .skip
xchg r2, r3
@@ -1877,7 +1877,7 @@ cglobal intra_pred_ang8_16, 4,5,8
packuswb m1, m0
jmp mangle(private_prefix %+ _ %+ intra_pred_ang8_3 %+ SUFFIX %+ .transpose8x8)
-cglobal intra_pred_ang8_17, 4,5,7
+cglobal intra_pred_ang8_17, 4,5,8
cmp r4m, byte 19
jnz .skip
xchg r2, r3
diff -r c630b0b393ee -r 7baf8b8ecfdc source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Thu Apr 24 15:59:05 2014 -0500
+++ b/source/common/x86/pixel-util8.asm Fri Apr 25 11:01:12 2014 +0800
@@ -1981,7 +1981,8 @@ cglobal pixel_ssim_4x4x2_core, 4,4,8
;-----------------------------------------------------------------------------
; float pixel_ssim_end( int sum0[5][4], int sum1[5][4], int width )
;-----------------------------------------------------------------------------
-cglobal pixel_ssim_end4, 3,3,7
+cglobal pixel_ssim_end4, 2,3,7
+ mov r2d, r2m
movdqa m0, [r0+ 0]
movdqa m1, [r0+16]
movdqa m2, [r0+32]
diff -r c630b0b393ee -r 7baf8b8ecfdc source/test/checkasm-a.asm
--- a/source/test/checkasm-a.asm Thu Apr 24 15:59:05 2014 -0500
+++ b/source/test/checkasm-a.asm Fri Apr 25 11:01:12 2014 +0800
@@ -87,6 +87,7 @@ cglobal checkasm_stack_clobber, 1,2
;-----------------------------------------------------------------------------
; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... )
;-----------------------------------------------------------------------------
+cglobal checkasm_call_float
INIT_XMM
cglobal checkasm_call, 2,15,16,max_args*8+8
mov r6, r0
@@ -170,6 +171,7 @@ cglobal checkasm_call, 2,15,16,max_args*
;-----------------------------------------------------------------------------
; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
;-----------------------------------------------------------------------------
+cglobal checkasm_call_float
cglobal checkasm_call, 1,7
mov r3, n3
mov r4, n4
diff -r c630b0b393ee -r 7baf8b8ecfdc source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Thu Apr 24 15:59:05 2014 -0500
+++ b/source/test/pixelharness.cpp Fri Apr 25 11:01:12 2014 +0800
@@ -915,7 +915,7 @@ bool PixelHarness::check_ssim_end(ssim_e
int width = (rand() % 4) + 1; // range[1-4]
float cres = ref(sum0, sum1, width);
- float vres = (float)checked(opt, sum0, sum1, width);
+ float vres = checked_float(opt, sum0, sum1, width);
if (fabs(vres - cres) > 0.00001)
return false;
diff -r c630b0b393ee -r 7baf8b8ecfdc source/test/testharness.h
--- a/source/test/testharness.h Thu Apr 24 15:59:05 2014 -0500
+++ b/source/test/testharness.h Fri Apr 25 11:01:12 2014 +0800
@@ -121,6 +121,7 @@ int x265_stack_pagealign(int (*func)(),
/* detect when callee-saved regs aren't saved
* needs an explicit asm check because it only sometimes crashes in normal use. */
intptr_t x265_checkasm_call(intptr_t (*func)(), int *ok, ...);
+float x265_checkasm_call_float(float (*func)(), int *ok, ...);
#else
#define x265_stack_pagealign( func, align ) func()
#endif
@@ -143,12 +144,21 @@ void x265_checkasm_stack_clobber( uint64
m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,\
m_rand,m_rand,m_rand,m_rand,m_rand), /* max_args+6 */ \
x265_checkasm_call((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
+
+#define checked_float(func,...) ( \
+ m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
+ x265_checkasm_stack_clobber(m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,\
+ m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,m_rand,\
+ m_rand,m_rand,m_rand,m_rand,m_rand), /* max_args+6 */ \
+ x265_checkasm_call_float((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
#define reportfail() if (!m_ok) { fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); }
#elif ARCH_X86
#define checked(func,...) x265_checkasm_call((intptr_t(*)())func, &m_ok, __VA_ARGS__);
+#define checked_float(func,...) x265_checkasm_call_float((float(*)())func, &m_ok, __VA_ARGS__);
#else
#define checked(func,...) func(__VA_ARGS__)
+#define checked_float(func,...) func(__VA_ARGS__)
#define reportfail()
#endif
}
More information about the x265-commits
mailing list