[x265] [PATCH] asm: pixel_sad_64xN reduce large code size
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Dec 3 08:56:16 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1386056358 -19800
# Tue Dec 03 13:09:18 2013 +0530
# Node ID 4e00525b99e44f8054020e6562588c5509b6a784
# Parent ca7bd538e052d104b1b333691836db37739cfdf0
asm: pixel_sad_64xN reduce large code size
diff -r ca7bd538e052 -r 4e00525b99e4 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Mon Dec 02 20:26:19 2013 -0600
+++ b/source/common/x86/sad-a.asm Tue Dec 03 13:09:18 2013 +0530
@@ -374,7 +374,8 @@
paddd m3, m4
paddd m0, m1
paddd m0, m3
-
+ lea r2, [r2 + r3]
+ lea r0, [r0 + r1]
%endmacro
%macro SAD_W16 0
@@ -660,20 +661,8 @@
pxor m0, m0
PROCESS_SAD_64x4
-
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
PROCESS_SAD_64x4
-
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
PROCESS_SAD_64x4
-
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
PROCESS_SAD_64x4
movhlps m1, m0
@@ -686,25 +675,14 @@
;-----------------------------------------------------------------------------
cglobal pixel_sad_64x32, 4,5,5
pxor m0, m0
- mov r4, 32
+ mov r4, 4
.loop
PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
+ PROCESS_SAD_64x4
- PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
- sub r4, 8
- cmp r4, 8
-
-jnz .loop
- PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
- PROCESS_SAD_64x4
+ dec r4
+ jnz .loop
movhlps m1, m0
paddd m0, m1
@@ -716,25 +694,13 @@
;-----------------------------------------------------------------------------
cglobal pixel_sad_64x48, 4,5,5
pxor m0, m0
- mov r4, 48
+ mov r4, 6
.loop
PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
- sub r4, 8
- cmp r4, 8
-
-jnz .loop
- PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
- PROCESS_SAD_64x4
+ dec r4d
+ jnz .loop
movhlps m1, m0
paddd m0, m1
@@ -746,25 +712,13 @@
;-----------------------------------------------------------------------------
cglobal pixel_sad_64x64, 4,5,5
pxor m0, m0
- mov r4, 64
+ mov r4, 8
.loop
PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
-
- sub r4, 8
- cmp r4, 8
-
-jnz .loop
- PROCESS_SAD_64x4
- lea r2, [r2 + r3]
- lea r0, [r0 + r1]
- PROCESS_SAD_64x4
+ dec r4
+ jnz .loop
movhlps m1, m0
paddd m0, m1
More information about the x265-devel
mailing list