[x265] [PATCH] asm: Replaced SSE4 instructions with SSE2 and general purpose instructions for chroma vsp filter functions

chen chenm003 at 163.com
Wed Nov 13 12:29:37 CET 2013


At 2013-11-13 14:53:27,nabajit at multicorewareinc.com wrote:
># HG changeset patch
># User Nabajit Deka
># Date 1384325281 -19800
>#      Wed Nov 13 12:18:01 2013 +0530
># Node ID 017763dc543d091170082eccf7b42a0c47c453ff
># Parent  c4ca80d19105ccf1ba2ec14dd65915f2820a660d
>asm: Replaced SSE4 instructions with SSE2 and general purpose instructions for chroma vsp filter functions.
>
>diff -r c4ca80d19105 -r 017763dc543d source/common/x86/ipfilter8.asm
>--- a/source/common/x86/ipfilter8.asm Tue Nov 12 19:10:23 2013 +0530
>+++ b/source/common/x86/ipfilter8.asm Wed Nov 13 12:18:01 2013 +0530
>@@ -3183,11 +3183,16 @@
>     packssdw  m0, m2
>     packuswb  m0, m0
> 
>-    pextrw    [r2], m0, 0
>-    pextrw    [r2 + r3], m0, 1
>-    pextrw    [r2 + 2 * r3], m0, 2
>+    movd      r5d, m0
>+    mov       [r2], r5w
>+    shr       r5d, 16
>+    mov       [r2 + r3], r5w
>+    pshufd    m0, m0, 1
>+    movd      r5d, m0
pshufd + movd is slower than pextrd
 
>+    mov       [r2 + 2 * r3], r5w
>+    shr       r5d, 16
>     lea       r2, [r2 + 2 * r3]
>-    pextrw    [r2 + r3], m0, 3
>+    mov       [r2 + r3], r5w
> 
>     lea       r2, [r2 + 2 * r3]
> 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131113/03f6b4eb/attachment-0001.html>


More information about the x265-devel mailing list