[x264-devel] commit: Slightly faster mbtree asm (Jason Garrett-Glaser )
git at videolan.org
git at videolan.org
Wed May 26 19:39:35 CEST 2010
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon May 24 11:13:22 2010 -0700| [7c2fd41075685b6f67471c71b323b08fc3c13764] | committer: Jason Garrett-Glaser
Slightly faster mbtree asm
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=7c2fd41075685b6f67471c71b323b08fc3c13764
---
common/x86/mc-a2.asm | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 8deb9e0..aee3f0a 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -1111,7 +1111,7 @@ FRAME_INIT_LOWRES ssse3, 12
; void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
; uint16_t *inter_costs, uint16_t *inv_qscales, int len )
;-----------------------------------------------------------------------------
-cglobal mbtree_propagate_cost_sse2, 6,6
+cglobal mbtree_propagate_cost_sse2, 6,6,7
shl r5d, 1
lea r0, [r0+r5*2]
add r1, r5
@@ -1121,6 +1121,7 @@ cglobal mbtree_propagate_cost_sse2, 6,6
neg r5
pxor xmm5, xmm5
movdqa xmm4, [pd_128]
+ movdqa xmm6, [pw_3fff]
.loop:
movq xmm2, [r2+r5] ; intra
movq xmm0, [r4+r5] ; invq
@@ -1131,7 +1132,7 @@ cglobal mbtree_propagate_cost_sse2, 6,6
psrld xmm0, 8 ; intra*invq>>8
movq xmm3, [r3+r5] ; inter
movq xmm1, [r1+r5] ; prop
- pand xmm3, [pw_3fff]
+ pand xmm3, xmm6
punpcklwd xmm1, xmm5
punpcklwd xmm3, xmm5
paddd xmm0, xmm1 ; prop + (intra*invq>>8)
More information about the x264-devel
mailing list