[x265] [PATCH] asm: improvement on chroma_hpp{2, 4} by reduce memory operator
Min Chen
chenm003 at 163.com
Tue Oct 29 14:01:40 CET 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1383051361 -28800
# Node ID 7ee3e65d3d1b9f765741b519ea77111ce83a04e6
# Parent 7416f8a54b710f0e0eb482eb6d07109ba8574e91
asm: improvement on chroma_hpp{2,4} by reduce memory operator
diff -r 7416f8a54b71 -r 7ee3e65d3d1b source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Tue Oct 29 16:35:24 2013 +0800
+++ b/source/common/x86/ipfilter8.asm Tue Oct 29 20:56:01 2013 +0800
@@ -76,17 +76,19 @@
SECTION .text
%macro FILTER_H4_w2_2 3
- movu %2, [srcq - 1]
+ movh %2, [srcq - 1]
pshufb %2, %2, Tm0
+ movh %1, [srcq + srcstrideq - 1]
+ pshufb %1, %1, Tm0
+ punpcklqdq %2, %1
pmaddubsw %2, coef2
- movu %1, [srcq + srcstrideq - 1]
- pshufb %1, %1, Tm0
- pmaddubsw %1, coef2
- phaddw %2, %1
+ phaddw %2, %2
pmulhrsw %2, %3
packuswb %2, %2
- pextrw [dstq], %2, 0
- pextrw [dstq + dststrideq], %2, 2
+ movd r4, %2
+ mov [dstq], r4w
+ shr r4, 16
+ mov [dstq + dststrideq], r4w
%endmacro
;-----------------------------------------------------------------------------
@@ -158,17 +160,18 @@
RET
%macro FILTER_H4_w4_2 3
- movu %2, [srcq - 1]
+ movh %2, [srcq - 1]
pshufb %2, %2, Tm0
pmaddubsw %2, coef2
- movu %1, [srcq + srcstrideq - 1]
+ movh %1, [srcq + srcstrideq - 1]
pshufb %1, %1, Tm0
pmaddubsw %1, coef2
phaddw %2, %1
pmulhrsw %2, %3
packuswb %2, %2
- movd [dstq], %2
- pextrd [dstq + dststrideq], %2, 1
+ movd [dstq], %2
+ palignr %2, %2, 4
+ movd [dstq + dststrideq], %2
%endmacro
;-----------------------------------------------------------------------------
More information about the x265-devel
mailing list