[x264-devel] commit: Faster integral_init (Jason Garrett-Glaser )
git version control
git at videolan.org
Tue Mar 10 12:02:54 CET 2009
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon Mar 9 23:37:53 2009 -0700| [8d82fecc3377b3052279f038f2273ade3a5b65cc] | committer: Jason Garrett-Glaser
Faster integral_init
palignr to avoid unaligned loads is worth it in inith, but not initv.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8d82fecc3377b3052279f038f2273ade3a5b65cc
---
common/x86/mc-a2.asm | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 775a84e..1148910 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -759,7 +759,8 @@ cglobal x264_integral_init4h_sse4, 3,4
pxor m4, m4
.loop:
movdqa m0, [r1+r2]
- movdqu m1, [r1+r2+8]
+ movdqa m1, [r1+r2+16]
+ palignr m1, m0, 8
mpsadbw m0, m4, 0
mpsadbw m1, m4, 0
paddw m0, [r0+r2*2]
@@ -777,7 +778,8 @@ cglobal x264_integral_init8h_sse4, 3,4
pxor m4, m4
.loop:
movdqa m0, [r1+r2]
- movdqu m1, [r1+r2+8]
+ movdqa m1, [r1+r2+16]
+ palignr m1, m0, 8
movdqa m2, m0
movdqa m3, m1
mpsadbw m0, m4, 0
More information about the x264-devel
mailing list