[x264-devel] commit: Faster coeff_last64 on 32-bit (Jason Garrett-Glaser )

git version control git at videolan.org
Mon Feb 9 11:32:49 CET 2009


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Sat Feb  7 01:57:43 2009 -0800| [3927938121ef63d72d9fd429c25202ebd65dd208] | committer: Jason Garrett-Glaser 

Faster coeff_last64 on 32-bit

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3927938121ef63d72d9fd429c25202ebd65dd208
---

 common/x86/quant-a.asm |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index b74dfa5..fbabd16 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -820,17 +820,17 @@ cglobal x264_coeff_last16_%1, 1,3
 %ifndef ARCH_X86_64
 cglobal x264_coeff_last64_%1, 1, 5-mmsize/16
     pxor m2, m2
-    LAST_MASK r1d, r0, r4d
-    LAST_MASK r2d, r0+32, r4d
-    shl r2d, 16
-    or  r1d, r2d
     LAST_MASK r2d, r0+64, r4d
     LAST_MASK r3d, r0+96, r4d
     shl r3d, 16
     or  r2d, r3d
-    not r1d
     xor r2d, -1
     jne .secondhalf
+    LAST_MASK r1d, r0, r4d
+    LAST_MASK r3d, r0+32, r4d
+    shl r3d, 16
+    or  r1d, r3d
+    not r1d
     LAST eax, r1d, 0x1f
     RET
 .secondhalf:



More information about the x264-devel mailing list