[x265] [PATCH] asm: fix the bug which occured at win32 compile
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Thu Oct 31 08:29:23 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1383204505 -19800
# Thu Oct 31 12:58:25 2013 +0530
# Node ID f6e35bfe1fd67668cc3c18bc41260a3f1d71dffc
# Parent a406f7c1dd3bcc471c0885ad0720e60fa0007983
asm: fix the bug which occured at win32 compile
diff -r a406f7c1dd3b -r f6e35bfe1fd6 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Wed Oct 30 20:20:08 2013 -0500
+++ b/source/common/x86/sad-a.asm Thu Oct 31 12:58:25 2013 +0530
@@ -926,7 +926,7 @@
;-----------------------------------------------------------------------------
; int pixel_sad_64x32( uint8_t *, intptr_t, uint8_t *, intptr_t )
;-----------------------------------------------------------------------------
-cglobal pixel_sad_64x32, 4,4,5
+cglobal pixel_sad_64x32, 4,5,5
pxor m0, m0
mov r4, 32
@@ -956,7 +956,7 @@
;-----------------------------------------------------------------------------
; int pixel_sad_64x48( uint8_t *, intptr_t, uint8_t *, intptr_t )
;-----------------------------------------------------------------------------
-cglobal pixel_sad_64x48, 4,4,5
+cglobal pixel_sad_64x48, 4,5,5
pxor m0, m0
mov r4, 48
@@ -986,7 +986,7 @@
;-----------------------------------------------------------------------------
; int pixel_sad_64x64( uint8_t *, intptr_t, uint8_t *, intptr_t )
;-----------------------------------------------------------------------------
-cglobal pixel_sad_64x64, 4,4,5
+cglobal pixel_sad_64x64, 4,5,5
pxor m0, m0
mov r4, 64
@@ -1016,7 +1016,7 @@
;-----------------------------------------------------------------------------
; int pixel_sad_48x64( uint8_t *, intptr_t, uint8_t *, intptr_t )
;-----------------------------------------------------------------------------
-cglobal pixel_sad_48x64, 4,4,5
+cglobal pixel_sad_48x64, 4,5,5
pxor m0, m0
mov r4, 64
@@ -1046,7 +1046,7 @@
;-----------------------------------------------------------------------------
; int pixel_sad_24x32( uint8_t *, intptr_t, uint8_t *, intptr_t )
;-----------------------------------------------------------------------------
-cglobal pixel_sad_24x32, 4,4,4
+cglobal pixel_sad_24x32, 4,5,4
pxor m0, m0
mov r4, 32
@@ -2866,7 +2866,7 @@
%endmacro
%macro SAD_X3_W24 0
-cglobal pixel_sad_x3_24x32, 5, 6, 8
+cglobal pixel_sad_x3_24x32, 5, 7, 8
pxor m0, m0
pxor m1, m1
pxor m2, m2
@@ -2884,25 +2884,31 @@
SAD_X3_END_SSE2 1
%endmacro
-%macro SAD_X4_W24 0
-cglobal pixel_sad_x4_24x32, 6, 8, 8
- pxor m0, m0
- pxor m1, m1
- pxor m2, m2
- pxor m3, m3
- mov r7, 32
-
-.loop
- SAD_X4_24x4
- SAD_X4_24x4
- SAD_X4_24x4
- SAD_X4_24x4
-
- sub r7, 16
- cmp r7, 0
-jnz .loop
- SAD_X4_END_SSE2 1
-%endmacro
+%macro SAD_X4_W24 0
+%if ARCH_X86_64 == 1
+cglobal pixel_sad_x4_24x32, 6, 8, 8
+%define count r7
+%else
+cglobal pixel_sad_x4_24x32, 6, 7, 8, 0-4
+%define count dword [rsp]
+%endif
+ pxor m0, m0
+ pxor m1, m1
+ pxor m2, m2
+ pxor m3, m3
+ mov count, 32
+
+.loop
+ SAD_X4_24x4
+ SAD_X4_24x4
+ SAD_X4_24x4
+ SAD_X4_24x4
+
+ sub count, 16
+ jnz .loop
+ SAD_X4_END_SSE2 1
+
+%endmacro
%macro SAD_X3_W32 0
cglobal pixel_sad_x3_32x8, 5, 6, 8
@@ -3010,41 +3016,52 @@
SAD_X4_32x4
SAD_X4_END_SSE2 1
-cglobal pixel_sad_x4_32x32, 6, 8, 8
- pxor m0, m0
- pxor m1, m1
- pxor m2, m2
- pxor m3, m3
- mov r7, 32
-
-.loop
- SAD_X4_32x4
- SAD_X4_32x4
- SAD_X4_32x4
- SAD_X4_32x4
-
- sub r7, 16
- cmp r7, 0
-jnz .loop
+%if ARCH_X86_64 == 1
+cglobal pixel_sad_x4_32x32, 6, 8, 8
+%define count r7
+%else
+cglobal pixel_sad_x4_32x32, 6, 7, 8, 0-4
+%define count dword [rsp]
+%endif
+ pxor m0, m0
+ pxor m1, m1
+ pxor m2, m2
+ pxor m3, m3
+ mov count, 32
+
+.loop
+ SAD_X4_32x4
+ SAD_X4_32x4
+ SAD_X4_32x4
+ SAD_X4_32x4
+
+ sub count, 16
+ jnz .loop
SAD_X4_END_SSE2 1
-cglobal pixel_sad_x4_32x64, 6, 8, 8
- pxor m0, m0
- pxor m1, m1
- pxor m2, m2
- pxor m3, m3
- mov r7, 64
+%if ARCH_X86_64 == 1
+cglobal pixel_sad_x4_32x64, 6, 8, 8
+%define count r7
+%else
+cglobal pixel_sad_x4_32x64, 6, 7, 8, 0-4
+%define count dword [rsp]
+%endif
+ pxor m0, m0
+ pxor m1, m1
+ pxor m2, m2
+ pxor m3, m3
+ mov count, 64
+
+.loop
+ SAD_X4_32x4
+ SAD_X4_32x4
+ SAD_X4_32x4
+ SAD_X4_32x4
+
+ sub count, 16
+ jnz .loop
+ SAD_X4_END_SSE2 1
-.loop1
- SAD_X4_32x4
- SAD_X4_32x4
- SAD_X4_32x4
- SAD_X4_32x4
-
- sub r7, 16
- cmp r7, 0
-jnz .loop1
- SAD_X4_END_SSE2 1
%endmacro
More information about the x265-devel
mailing list