[x264-devel] commit: Faster integral_init (Jason Garrett-Glaser )

git version control git at videolan.org
Tue Mar 10 12:02:54 CET 2009


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon Mar  9 23:37:53 2009 -0700| [8d82fecc3377b3052279f038f2273ade3a5b65cc] | committer: Jason Garrett-Glaser 

Faster integral_init
palignr to avoid unaligned loads is worth it in inith, but not initv.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8d82fecc3377b3052279f038f2273ade3a5b65cc
---

 common/x86/mc-a2.asm |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 775a84e..1148910 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -759,7 +759,8 @@ cglobal x264_integral_init4h_sse4, 3,4
     pxor    m4, m4
 .loop:
     movdqa  m0, [r1+r2]
-    movdqu  m1, [r1+r2+8]
+    movdqa  m1, [r1+r2+16]
+    palignr m1, m0, 8
     mpsadbw m0, m4, 0
     mpsadbw m1, m4, 0
     paddw   m0, [r0+r2*2]
@@ -777,7 +778,8 @@ cglobal x264_integral_init8h_sse4, 3,4
     pxor    m4, m4
 .loop:
     movdqa  m0, [r1+r2]
-    movdqu  m1, [r1+r2+8]
+    movdqa  m1, [r1+r2+16]
+    palignr m1, m0, 8
     movdqa  m2, m0
     movdqa  m3, m1
     mpsadbw m0, m4, 0



More information about the x264-devel mailing list