[x265] [PATCH] asm: fix the bug caused on 32-bit linux due to satd routines

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Mon Nov 18 11:10:23 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1384769347 -19800
#      Mon Nov 18 15:39:07 2013 +0530
# Node ID f076c5ca413a905d6d4e8c1bbea2638992cb21d7
# Parent  e2895ce7bbeb2c3d845fee2578758d0012fa2cb4
asm: fix the bug caused on 32-bit linux due to satd routines.

diff -r e2895ce7bbeb -r f076c5ca413a source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Sun Nov 17 11:24:13 2013 -0600
+++ b/source/common/x86/pixel-a.asm	Mon Nov 18 15:39:07 2013 +0530
@@ -2239,27 +2239,42 @@
 
 %else
 
-cglobal pixel_satd_32x8, 4,6,8    ;if !WIN64
+%if WIN64
+cglobal pixel_satd_32x8, 4,8,8    ;if WIN64 && cpuflag(avx)
     SATD_START_SSE2 m6, m7
-    BACKUP_POINTERS
-    call pixel_satd_8x8_internal
-    RESTORE_AND_INC_POINTERS
-    BACKUP_POINTERS
-    call pixel_satd_8x8_internal
-    RESTORE_AND_INC_POINTERS
-%if WIN64 == 0
-    add     r0, 8*SIZEOF_PIXEL
-    add     r2, 8*SIZEOF_PIXEL
-%endif
-    BACKUP_POINTERS
-    call pixel_satd_8x8_internal
-    RESTORE_AND_INC_POINTERS
-%if WIN64 == 0
-    add     r0, 16*SIZEOF_PIXEL
-    add     r2, 16*SIZEOF_PIXEL
-%endif
+    mov r6, r0
+    mov r7, r2
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    lea r2, [r7 + 8]
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 16]
+    lea r2, [r7 + 16]
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 24]
+    lea r2, [r7 + 24]
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
+%else
+cglobal pixel_satd_32x8, 4,7,8,0-4    ;if !WIN64
+    SATD_START_SSE2 m6, m7
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
+    add r2, 8
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
+    add r2, 16
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
+    add r2, 24
+    call pixel_satd_8x8_internal
+    SATD_END_SSE2 m6
+%endif
 
 %if WIN64
 cglobal pixel_satd_32x16, 4,8,8    ;if WIN64 && cpuflag(avx)
@@ -2282,25 +2297,24 @@
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
 %else
-cglobal pixel_satd_32x16, 4,6,8    ;if !WIN64
+cglobal pixel_satd_32x16, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
     add r2, 24
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
@@ -2332,28 +2346,27 @@
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
 %else
-cglobal pixel_satd_32x24, 4,6,8    ;if !WIN64
+cglobal pixel_satd_32x24, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
     add r2, 24
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
@@ -2389,38 +2402,41 @@
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
-%else
-cglobal pixel_satd_32x32, 4,6,8    ;if !WIN64
+
+
+%else   
+cglobal pixel_satd_32x32, 4,7,8,0-4    ;if !WIN64
+
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
     add r2, 24
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
+
 %endif
 
 %if WIN64
@@ -2474,19 +2490,20 @@
     movd   eax, m6
     RET
 %else
-cglobal pixel_satd_32x64, 4,6,8    ;if !WIN64
+cglobal pixel_satd_32x64, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2496,9 +2513,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2508,9 +2524,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
     add r2, 24
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2600,68 +2615,65 @@
     movd   eax, m6
     RET
 %else
-cglobal pixel_satd_48x64, 4,6,8    ;if !WIN64
+cglobal pixel_satd_48x64, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
-    add r2, 8
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
-    add r2, 16
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
-    add r2, 24
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 32
-    add r2, 32
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 40
-    add r2, 40
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
+    add r2,8
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
+    add r2,16
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
+    add r2,24
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 32]
+    mov r2, [rsp]
+    add r2,32
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 40]
+    mov r2, [rsp]
+    add r2,40
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2717,50 +2729,45 @@
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
 %else
-cglobal pixel_satd_64x16, 4,6,8    ;if !WIN64
+cglobal pixel_satd_64x16, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
-    add r2, 8
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
-    add r2, 16
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
-    add r2, 24
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 32
-    add r2, 32
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 40
-    add r2, 40
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 48
-    add r2, 48
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 56
-    add r2, 56
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
+    add r2,8
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
+    add r2,16
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
+    add r2,24
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 32]
+    mov r2, [rsp]
+    add r2,32
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 40]
+    mov r2, [rsp]
+    add r2,40
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 48]
+    mov r2, [rsp]
+    add r2,48
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 56]
+    mov r2, [rsp]
+    add r2,56
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
@@ -2825,63 +2832,58 @@
     movd   eax, m6
     RET
 %else
-cglobal pixel_satd_64x32, 4,6,8    ;if !WIN64
+cglobal pixel_satd_64x32, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
     add r2, 24
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 32
+    lea r0, [r6 + 32]
+    mov r2, [rsp]
     add r2, 32
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 40
+    lea r0, [r6 + 40]
+    mov r2, [rsp]
     add r2, 40
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 48
+    lea r0, [r6 + 48]
+    mov r2, [rsp]
     add r2, 48
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 56
+    lea r0, [r6 + 56]
+    mov r2, [rsp]
     add r2, 56
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2971,17 +2973,18 @@
     movd   eax, m6
     RET
 %else
-cglobal pixel_satd_64x48, 4,6,8    ;if !WIN64
+cglobal pixel_satd_64x48, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2989,9 +2992,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -2999,9 +3001,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
     add r2, 24
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3009,9 +3010,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 32
+    lea r0, [r6 + 32]
+    mov r2, [rsp]
     add r2, 32
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3019,9 +3019,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 40
+    lea r0, [r6 + 40]
+    mov r2, [rsp]
     add r2, 40
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3029,9 +3028,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 48
+    lea r0, [r6 + 48]
+    mov r2, [rsp]
     add r2, 48
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3039,9 +3037,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 56
+    lea r0, [r6 + 56]
+    mov r2, [rsp]
     add r2, 56
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3149,19 +3146,20 @@
     movd   eax, m6
     RET
 %else
-cglobal pixel_satd_64x64, 4,6,8    ;if !WIN64
+cglobal pixel_satd_64x64, 4,7,8,0-4    ;if !WIN64
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3171,9 +3169,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3183,9 +3180,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 24
+    lea r0, [r6 + 24]
+    mov r2, [rsp]
     add r2, 24
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3195,9 +3191,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 32
+    lea r0, [r6 + 32]
+    mov r2, [rsp]
     add r2, 32
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3207,9 +3202,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 40
+    lea r0, [r6 + 40]
+    mov r2, [rsp]
     add r2, 40
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3219,9 +3213,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 48
+    lea r0, [r6 + 48]
+    mov r2, [rsp]
     add r2, 48
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3231,9 +3224,8 @@
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 56
+    lea r0, [r6 + 56]
+    mov r2, [rsp]
     add r2, 56
     call pixel_satd_8x8_internal2
     call pixel_satd_8x8_internal2
@@ -3357,8 +3349,10 @@
     movd eax, m7
     RET
 %else
-cglobal pixel_satd_12x16, 4,6,8
+cglobal pixel_satd_12x16, 4,7,8,0-4
     SATD_START_MMX
+    mov r6, r0
+    mov [rsp], r2
 %if vertical==0
     mova m7, [hmul_4p]
 %endif
@@ -3366,17 +3360,15 @@
     lea r0, [r0 + r1*2*SIZEOF_PIXEL]
     lea r2, [r2 + r3*2*SIZEOF_PIXEL]
     SATD_4x8_SSE vertical, 1, add
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 4
+    lea r0, [r6 + 4]
+    mov r2, [rsp]
     add r2, 4
     SATD_4x8_SSE vertical, 1, add
     lea r0, [r0 + r1*2*SIZEOF_PIXEL]
     lea r2, [r2 + r3*2*SIZEOF_PIXEL]
     SATD_4x8_SSE vertical, 1, add
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     SATD_4x8_SSE vertical, 1, add
     lea r0, [r0 + r1*2*SIZEOF_PIXEL]
@@ -3410,23 +3402,23 @@
     call pixel_satd_8x8_internal
     SATD_END_SSE2 m6
 %else
-cglobal pixel_satd_24x32, 4,6,8
+cglobal pixel_satd_24x32, 4,7,8,0-4
     SATD_START_SSE2 m6, m7
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 8
+    mov r6, r0
+    mov [rsp], r2
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    mov r2, [rsp]
     add r2, 8
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal
-    mov r0, r0mp
-    mov r2, r2mp
-    add r0, 16
+    lea r0, [r6 + 16]
+    mov r2, [rsp]
     add r2, 16
     call pixel_satd_8x8_internal
     call pixel_satd_8x8_internal


More information about the x265-devel mailing list