[x264-devel] commit: Slightly faster mbtree asm (Jason Garrett-Glaser )

git at videolan.org git at videolan.org
Wed May 26 19:39:35 CEST 2010


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon May 24 11:13:22 2010 -0700| [7c2fd41075685b6f67471c71b323b08fc3c13764] | committer: Jason Garrett-Glaser 

Slightly faster mbtree asm

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=7c2fd41075685b6f67471c71b323b08fc3c13764
---

 common/x86/mc-a2.asm |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 8deb9e0..aee3f0a 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -1111,7 +1111,7 @@ FRAME_INIT_LOWRES ssse3, 12
 ; void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
 ;                             uint16_t *inter_costs, uint16_t *inv_qscales, int len )
 ;-----------------------------------------------------------------------------
-cglobal mbtree_propagate_cost_sse2, 6,6
+cglobal mbtree_propagate_cost_sse2, 6,6,7
     shl r5d, 1
     lea r0, [r0+r5*2]
     add r1, r5
@@ -1121,6 +1121,7 @@ cglobal mbtree_propagate_cost_sse2, 6,6
     neg r5
     pxor      xmm5, xmm5
     movdqa    xmm4, [pd_128]
+    movdqa    xmm6, [pw_3fff]
 .loop:
     movq      xmm2, [r2+r5] ; intra
     movq      xmm0, [r4+r5] ; invq
@@ -1131,7 +1132,7 @@ cglobal mbtree_propagate_cost_sse2, 6,6
     psrld     xmm0, 8       ; intra*invq>>8
     movq      xmm3, [r3+r5] ; inter
     movq      xmm1, [r1+r5] ; prop
-    pand      xmm3, [pw_3fff]
+    pand      xmm3, xmm6
     punpcklwd xmm1, xmm5
     punpcklwd xmm3, xmm5
     paddd     xmm0, xmm1    ; prop + (intra*invq>>8)



More information about the x264-devel mailing list