[x265] [PATCH] asm: Replaced SSE4 instructions with SSE2 and general purpose instructions for chroma vsp filter functions

nabajit at multicorewareinc.com nabajit at multicorewareinc.com
Wed Nov 13 07:53:27 CET 2013


# HG changeset patch
# User Nabajit Deka
# Date 1384325281 -19800
#      Wed Nov 13 12:18:01 2013 +0530
# Node ID 017763dc543d091170082eccf7b42a0c47c453ff
# Parent  c4ca80d19105ccf1ba2ec14dd65915f2820a660d
asm: Replaced SSE4 instructions with SSE2 and general purpose instructions for chroma vsp filter functions.

diff -r c4ca80d19105 -r 017763dc543d source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Tue Nov 12 19:10:23 2013 +0530
+++ b/source/common/x86/ipfilter8.asm	Wed Nov 13 12:18:01 2013 +0530
@@ -3183,11 +3183,16 @@
     packssdw  m0, m2
     packuswb  m0, m0
 
-    pextrw    [r2], m0, 0
-    pextrw    [r2 + r3], m0, 1
-    pextrw    [r2 + 2 * r3], m0, 2
+    movd      r5d, m0
+    mov       [r2], r5w
+    shr       r5d, 16
+    mov       [r2 + r3], r5w
+    pshufd    m0, m0, 1
+    movd      r5d, m0
+    mov       [r2 + 2 * r3], r5w
+    shr       r5d, 16
     lea       r2, [r2 + 2 * r3]
-    pextrw    [r2 + r3], m0, 3
+    mov       [r2 + r3], r5w
 
     lea       r2, [r2 + 2 * r3]
 
@@ -3315,11 +3320,16 @@
     packssdw  m0, m2
     packuswb  m0, m0
 
-    pextrw    [r2], m0, 0
-    pextrw    [r2 + r3], m0, 1
-    pextrw    [r2 + 2 * r3], m0, 2
+    movd      r5d, m0
+    mov       [r2], r5w
+    shr       r5d, 16
+    mov       [r2 + r3], r5w
+    pshufd    m0, m0, 1
+    movd      r5d, m0
+    mov       [r2 + 2 * r3], r5w
+    shr       r5d, 16
     lea       r2, [r2 + 2 * r3]
-    pextrw    [r2 + r3], m0, 3
+    mov       [r2 + r3], r5w
 
     sub       r0, 2 * 4
     lea       r2, [r2 + 2 * r3 - 4]


More information about the x265-devel mailing list