[x264-devel] [patch] faster SAD_INC_2x16P ported to amd64
Josef Zlomek
josef.zlomek at xeris.cz
Fri Jul 8 10:16:59 CEST 2005
Hello,
the attached patch ports the recently committed patch with
faster SAD_INC_2x16P to amd64 (x86_64) architecture.
Josef Zlomek
-------------- next part --------------
Index: common/amd64/pixel-a.asm
===================================================================
--- common/amd64/pixel-a.asm (revision 272)
+++ common/amd64/pixel-a.asm (working copy)
@@ -38,27 +38,19 @@
%macro SAD_INC_2x16P 0
movq mm1, [rax]
- movq mm2, [rcx]
- movq mm3, [rax+8]
- movq mm4, [rcx+8]
-
- psadbw mm1, mm2
- psadbw mm3, mm4
- paddw mm0, mm1
- paddw mm0, mm3
-
- movq mm1, [rax+rbx]
- movq mm2, [rcx+rdx]
- movq mm3, [rax+rbx+8]
- movq mm4, [rcx+rdx+8]
-
- psadbw mm1, mm2
- psadbw mm3, mm4
- paddw mm0, mm1
- paddw mm0, mm3
-
+ movq mm2, [rax+8]
+ movq mm3, [rax+rbx]
+ movq mm4, [rax+rbx+8]
+ psadbw mm1, [rcx]
+ psadbw mm2, [rcx+8]
+ psadbw mm3, [rcx+rdx]
+ psadbw mm4, [rcx+rdx+8]
lea rax, [rax+2*rbx]
+ paddw mm1, mm2
+ paddw mm3, mm4
lea rcx, [rcx+2*rdx]
+ paddw mm0, mm1
+ paddw mm0, mm3
%endmacro
%macro SAD_INC_2x8P 0
More information about the x264-devel
mailing list