[x264-devel] commit: Faster deblock strength asm on conroe/penryn (Jason Garrett-Glaser )

git at videolan.org git at videolan.org
Wed May 26 19:39:35 CEST 2010


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Fri May 21 15:39:38 2010 -0700| [cca28cdeb64df106d9aa49eb75ae1c7b933d6c49] | committer: Jason Garrett-Glaser 

Faster deblock strength asm on conroe/penryn

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=cca28cdeb64df106d9aa49eb75ae1c7b933d6c49
---

 common/x86/deblock-a.asm |   24 +++++++++++++++++++++++-
 1 files changed, 23 insertions(+), 1 deletions(-)

diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index 628ee5d..f2f3e58 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -1022,7 +1022,7 @@ cglobal deblock_strength_mmxext, 6,6
     RET
 
 %macro DEBLOCK_STRENGTH_XMM 1
-cglobal deblock_strength_%1, 6,6,7
+cglobal deblock_strength_%1, 6,6,8
     ; Prepare mv comparison register
     shl      r4d, 8
     add      r4d, 3 - (1<<8)
@@ -1040,6 +1040,27 @@ cglobal deblock_strength_%1, 6,6,7
     por       m5, m1
 
     ; Check mvs
+%ifidn %1, ssse3
+    mova      m3, [mv+4*8*0]
+    mova      m2, [mv+4*8*1]
+    mova      m0, m3
+    mova      m1, m2
+    palignr   m3, [mv+4*8*0-16], 12
+    palignr   m2, [mv+4*8*1-16], 12
+    psubw     m0, m3
+    psubw     m1, m2
+    packsswb  m0, m1
+
+    mova      m3, [mv+4*8*2]
+    mova      m7, [mv+4*8*3]
+    mova      m2, m3
+    mova      m1, m7
+    palignr   m3, [mv+4*8*2-16], 12
+    palignr   m7, [mv+4*8*3-16], 12
+    psubw     m2, m3
+    psubw     m1, m7
+    packsswb  m2, m1
+%else
     movu      m0, [mv-4+4*8*0]
     movu      m1, [mv-4+4*8*1]
     movu      m2, [mv-4+4*8*2]
@@ -1050,6 +1071,7 @@ cglobal deblock_strength_%1, 6,6,7
     psubw     m3, [mv+4*8*3]
     packsswb  m0, m1
     packsswb  m2, m3
+%endif
     ABSB2     m0, m2, m1, m3
     psubusb   m0, m6
     psubusb   m2, m6



More information about the x264-devel mailing list