[x265] [PATCH] asm: fix the bug caused on 32-bit linux due to satd routines
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Mon Nov 18 11:10:23 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1384769347 -19800
# Mon Nov 18 15:39:07 2013 +0530
# Node ID f076c5ca413a905d6d4e8c1bbea2638992cb21d7
# Parent e2895ce7bbeb2c3d845fee2578758d0012fa2cb4
asm: fix the bug caused on 32-bit linux due to satd routines.
diff -r e2895ce7bbeb -r f076c5ca413a source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Sun Nov 17 11:24:13 2013 -0600
+++ b/source/common/x86/pixel-a.asm Mon Nov 18 15:39:07 2013 +0530
@@ -2239,27 +2239,42 @@
%else
-cglobal pixel_satd_32x8, 4,6,8 ;if !WIN64
+%if WIN64
+cglobal pixel_satd_32x8, 4,8,8 ;if WIN64 && cpuflag(avx)
SATD_START_SSE2 m6, m7
- BACKUP_POINTERS
- call pixel_satd_8x8_internal
- RESTORE_AND_INC_POINTERS
- BACKUP_POINTERS
- call pixel_satd_8x8_internal
- RESTORE_AND_INC_POINTERS
-%if WIN64 == 0
- add r0, 8*SIZEOF_PIXEL
- add r2, 8*SIZEOF_PIXEL
-%endif
- BACKUP_POINTERS
- call pixel_satd_8x8_internal
- RESTORE_AND_INC_POINTERS
-%if WIN64 == 0
- add r0, 16*SIZEOF_PIXEL
- add r2, 16*SIZEOF_PIXEL
-%endif
+ mov r6, r0
+ mov r7, r2
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ lea r2, [r7 + 8]
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 16]
+ lea r2, [r7 + 16]
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 24]
+ lea r2, [r7 + 24]
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
+%else
+cglobal pixel_satd_32x8, 4,7,8,0-4 ;if !WIN64
+ SATD_START_SSE2 m6, m7
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
+ add r2, 8
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
+ add r2, 16
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
+ add r2, 24
+ call pixel_satd_8x8_internal
+ SATD_END_SSE2 m6
+%endif
%if WIN64
cglobal pixel_satd_32x16, 4,8,8 ;if WIN64 && cpuflag(avx)
@@ -2282,25 +2297,24 @@
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
%else
-cglobal pixel_satd_32x16, 4,6,8 ;if !WIN64
+cglobal pixel_satd_32x16, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
add r2, 24
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
@@ -2332,28 +2346,27 @@
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
%else
-cglobal pixel_satd_32x24, 4,6,8 ;if !WIN64
+cglobal pixel_satd_32x24, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
add r2, 24
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
@@ -2389,38 +2402,41 @@
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
-%else
-cglobal pixel_satd_32x32, 4,6,8 ;if !WIN64
+
+
+%else
+cglobal pixel_satd_32x32, 4,7,8,0-4 ;if !WIN64
+
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
add r2, 24
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
+
%endif
%if WIN64
@@ -2474,19 +2490,20 @@
movd eax, m6
RET
%else
-cglobal pixel_satd_32x64, 4,6,8 ;if !WIN64
+cglobal pixel_satd_32x64, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2496,9 +2513,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2508,9 +2524,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
add r2, 24
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2600,68 +2615,65 @@
movd eax, m6
RET
%else
-cglobal pixel_satd_48x64, 4,6,8 ;if !WIN64
+cglobal pixel_satd_48x64, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
- add r2, 8
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
- add r2, 16
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
- add r2, 24
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 32
- add r2, 32
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 40
- add r2, 40
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
+ add r2,8
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
+ add r2,16
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
+ add r2,24
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 32]
+ mov r2, [rsp]
+ add r2,32
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 40]
+ mov r2, [rsp]
+ add r2,40
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2717,50 +2729,45 @@
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
%else
-cglobal pixel_satd_64x16, 4,6,8 ;if !WIN64
+cglobal pixel_satd_64x16, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
- add r2, 8
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
- add r2, 16
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
- add r2, 24
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 32
- add r2, 32
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 40
- add r2, 40
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 48
- add r2, 48
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 56
- add r2, 56
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
+ add r2,8
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
+ add r2,16
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
+ add r2,24
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 32]
+ mov r2, [rsp]
+ add r2,32
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 40]
+ mov r2, [rsp]
+ add r2,40
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 48]
+ mov r2, [rsp]
+ add r2,48
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 56]
+ mov r2, [rsp]
+ add r2,56
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
@@ -2825,63 +2832,58 @@
movd eax, m6
RET
%else
-cglobal pixel_satd_64x32, 4,6,8 ;if !WIN64
+cglobal pixel_satd_64x32, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
add r2, 24
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 32
+ lea r0, [r6 + 32]
+ mov r2, [rsp]
add r2, 32
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 40
+ lea r0, [r6 + 40]
+ mov r2, [rsp]
add r2, 40
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 48
+ lea r0, [r6 + 48]
+ mov r2, [rsp]
add r2, 48
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 56
+ lea r0, [r6 + 56]
+ mov r2, [rsp]
add r2, 56
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2971,17 +2973,18 @@
movd eax, m6
RET
%else
-cglobal pixel_satd_64x48, 4,6,8 ;if !WIN64
+cglobal pixel_satd_64x48, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2989,9 +2992,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -2999,9 +3001,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
add r2, 24
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3009,9 +3010,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 32
+ lea r0, [r6 + 32]
+ mov r2, [rsp]
add r2, 32
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3019,9 +3019,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 40
+ lea r0, [r6 + 40]
+ mov r2, [rsp]
add r2, 40
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3029,9 +3028,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 48
+ lea r0, [r6 + 48]
+ mov r2, [rsp]
add r2, 48
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3039,9 +3037,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 56
+ lea r0, [r6 + 56]
+ mov r2, [rsp]
add r2, 56
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3149,19 +3146,20 @@
movd eax, m6
RET
%else
-cglobal pixel_satd_64x64, 4,6,8 ;if !WIN64
+cglobal pixel_satd_64x64, 4,7,8,0-4 ;if !WIN64
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3171,9 +3169,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3183,9 +3180,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 24
+ lea r0, [r6 + 24]
+ mov r2, [rsp]
add r2, 24
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3195,9 +3191,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 32
+ lea r0, [r6 + 32]
+ mov r2, [rsp]
add r2, 32
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3207,9 +3202,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 40
+ lea r0, [r6 + 40]
+ mov r2, [rsp]
add r2, 40
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3219,9 +3213,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 48
+ lea r0, [r6 + 48]
+ mov r2, [rsp]
add r2, 48
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3231,9 +3224,8 @@
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
- mov r0, r0mp
- mov r2, r2mp
- add r0, 56
+ lea r0, [r6 + 56]
+ mov r2, [rsp]
add r2, 56
call pixel_satd_8x8_internal2
call pixel_satd_8x8_internal2
@@ -3357,8 +3349,10 @@
movd eax, m7
RET
%else
-cglobal pixel_satd_12x16, 4,6,8
+cglobal pixel_satd_12x16, 4,7,8,0-4
SATD_START_MMX
+ mov r6, r0
+ mov [rsp], r2
%if vertical==0
mova m7, [hmul_4p]
%endif
@@ -3366,17 +3360,15 @@
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
- mov r0, r0mp
- mov r2, r2mp
- add r0, 4
+ lea r0, [r6 + 4]
+ mov r2, [rsp]
add r2, 4
SATD_4x8_SSE vertical, 1, add
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
lea r2, [r2 + r3*2*SIZEOF_PIXEL]
SATD_4x8_SSE vertical, 1, add
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
SATD_4x8_SSE vertical, 1, add
lea r0, [r0 + r1*2*SIZEOF_PIXEL]
@@ -3410,23 +3402,23 @@
call pixel_satd_8x8_internal
SATD_END_SSE2 m6
%else
-cglobal pixel_satd_24x32, 4,6,8
+cglobal pixel_satd_24x32, 4,7,8,0-4
SATD_START_SSE2 m6, m7
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 8
+ mov r6, r0
+ mov [rsp], r2
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ call pixel_satd_8x8_internal
+ lea r0, [r6 + 8]
+ mov r2, [rsp]
add r2, 8
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
- mov r0, r0mp
- mov r2, r2mp
- add r0, 16
+ lea r0, [r6 + 16]
+ mov r2, [rsp]
add r2, 16
call pixel_satd_8x8_internal
call pixel_satd_8x8_internal
More information about the x265-devel
mailing list