[x264-devel] commit: Slightly faster ssse3 width4 chroma MC (Jason Garrett-Glaser )
git version control
git at videolan.org
Fri Oct 30 03:13:34 CET 2009
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Tue Oct 27 15:08:37 2009 -0700| [d4b7db5c840661a5853369300e704b20c7c3ff53] | committer: Jason Garrett-Glaser
Slightly faster ssse3 width4 chroma MC
Cacheline-aware in the same fashion as width8, but not conditional.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=d4b7db5c840661a5853369300e704b20c7c3ff53
---
common/x86/mc-a.asm | 26 +++++++++++++++++---------
1 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 3e9df66..5bd646a 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -954,14 +954,22 @@ cglobal x264_mc_chroma_ssse3%1, 0,6,%2
movifnidn r4d, r7m
SPLATW m6, m6
SPLATW m7, m7
- movh m0, [r2]
- punpcklbw m0, [r2+1]
- add r2, r3
+ mov r5, r2
+ and r2, ~3
+ and r5, 3
+%ifdef PIC
+ lea r11, [ch_shuffle GLOBAL]
+ movu m5, [r11 + r5*2]
+%else
+ movu m5, [ch_shuffle + r5*2 GLOBAL]
+%endif
+ movu m0, [r2]
+ pshufb m0, m5
.loop4:
- movh m1, [r2]
- movh m3, [r2+r3]
- punpcklbw m1, [r2+1]
- punpcklbw m3, [r2+r3+1]
+ movu m1, [r2+r3]
+ pshufb m1, m5
+ movu m3, [r2+2*r3]
+ pshufb m3, m5
lea r2, [r2+2*r3]
mova m2, m1
mova m4, m3
@@ -969,8 +977,8 @@ cglobal x264_mc_chroma_ssse3%1, 0,6,%2
pmaddubsw m1, m6
pmaddubsw m2, m7
pmaddubsw m3, m6
- paddw m0, m5
- paddw m2, m5
+ paddw m0, [pw_32 GLOBAL]
+ paddw m2, [pw_32 GLOBAL]
paddw m1, m0
paddw m3, m2
mova m0, m4
More information about the x264-devel
mailing list