[x264-devel] [patch] faster SAD_INC_2x16P ported to amd64

Josef Zlomek josef.zlomek at xeris.cz
Fri Jul 8 10:16:59 CEST 2005


Hello,

the attached patch ports the recently committed patch with
faster SAD_INC_2x16P to amd64 (x86_64) architecture.

Josef Zlomek
-------------- next part --------------
Index: common/amd64/pixel-a.asm
===================================================================
--- common/amd64/pixel-a.asm	(revision 272)
+++ common/amd64/pixel-a.asm	(working copy)
@@ -38,27 +38,19 @@
 
 %macro SAD_INC_2x16P 0
     movq    mm1,    [rax]
-    movq    mm2,    [rcx]
-    movq    mm3,    [rax+8]
-    movq    mm4,    [rcx+8]
-
-    psadbw  mm1,    mm2
-    psadbw  mm3,    mm4
-    paddw   mm0,    mm1
-    paddw   mm0,    mm3
-
-    movq    mm1,    [rax+rbx]
-    movq    mm2,    [rcx+rdx]
-    movq    mm3,    [rax+rbx+8]
-    movq    mm4,    [rcx+rdx+8]
-
-    psadbw  mm1,    mm2
-    psadbw  mm3,    mm4
-    paddw   mm0,    mm1
-    paddw   mm0,    mm3
-
+    movq    mm2,    [rax+8]
+    movq    mm3,    [rax+rbx]
+    movq    mm4,    [rax+rbx+8]
+    psadbw  mm1,    [rcx]
+    psadbw  mm2,    [rcx+8]
+    psadbw  mm3,    [rcx+rdx]
+    psadbw  mm4,    [rcx+rdx+8]
     lea     rax,    [rax+2*rbx]
+    paddw   mm1,    mm2
+    paddw   mm3,    mm4
     lea     rcx,    [rcx+2*rdx]
+    paddw   mm0,    mm1
+    paddw   mm0,    mm3
 %endmacro
 
 %macro SAD_INC_2x8P 0


More information about the x264-devel mailing list