[x265] [PATCH Review Only] optimiztion with interp_4tap_vert_pp_12x16 asm code
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Mon Oct 28 12:18:04 CET 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1382959071 -19800
# Node ID 14ae0b70567b0d78738f2cb064b25c46e437d950
# Parent 1438266dbde617130d04ae13bd0d6e9c0993581c
optimiztion with interp_4tap_vert_pp_12x16 asm code
diff -r 1438266dbde6 -r 14ae0b70567b source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Mon Oct 28 13:54:08 2013 +0530
+++ b/source/common/x86/ipfilter8.asm Mon Oct 28 16:47:51 2013 +0530
@@ -630,7 +630,7 @@
;-----------------------------------------------------------------------------
; void interp_4tap_vert_pp_12x16(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;-----------------------------------------------------------------------------
-%macro FILTER_V8_W12_H2 2
+%macro FILTER_V4_W12_H2 2
INIT_XMM sse4
cglobal interp_4tap_vert_pp_%1x%2, 4, 6, 8
@@ -647,70 +647,69 @@
pshufb m1, m0, [tab_Vm]
pshufb m0, [tab_Vm + 16]
-xor r4, r4
-add r4d, %2
+mov r4d, %2
.loop
movu m2, [r0]
movu m3, [r0 + r1]
punpcklbw m4, m2, m3,
-punpckhbw m5, m2, m3,
+punpckhbw m2, m3,
pmaddubsw m4, m1
-pmaddubsw m5, m1
+pmaddubsw m2, m1
-movu m2, [r0 + 2 * r1]
+movu m5, [r0 + 2 * r1]
lea r5, [r0 + 2 * r1]
movu m3, [r5 + r1]
-punpcklbw m6, m2, m3,
-punpckhbw m7, m2, m3,
+punpcklbw m6, m5, m3,
+punpckhbw m5, m3,
pmaddubsw m6, m0
-pmaddubsw m7, m0
+pmaddubsw m5, m0
-paddw m4, m6;
-paddw m5, m7;
+paddw m4, m6
+paddw m2, m5
mova m6, [tab_c_512]
pmulhrsw m4, m6
-pmulhrsw m5, m6
+pmulhrsw m2, m6
-packuswb m4, m5
+packuswb m4, m2
-movh [r2], m4
-pextrd [r2 + 8], m4, 2
+movh [r2], m4
+pextrd [r2 + 8], m4, 2
movu m2, [r0 + r1]
movu m3, [r0 + 2 * r1]
punpcklbw m4, m2, m3,
-punpckhbw m5, m2, m3,
+punpckhbw m2, m3,
pmaddubsw m4, m1
-pmaddubsw m5, m1
+pmaddubsw m2, m1
lea r5, [r0 + 2 * r1]
-movu m2, [r5 + r1]
+movu m5, [r5 + r1]
movu m3, [r5 + 2 * r1]
-punpcklbw m6, m2, m3,
-punpckhbw m7, m2, m3,
+punpcklbw m6, m5, m3,
+punpckhbw m5, m3,
pmaddubsw m6, m0
-pmaddubsw m7, m0
+pmaddubsw m5, m0
paddw m4, m6
-paddw m5, m7
+paddw m2, m5
mova m6, [tab_c_512]
pmulhrsw m4, m6
-pmulhrsw m5, m6
+pmulhrsw m2, m6
-packuswb m4, m5
+packuswb m4, m2
movh [r2 + r3], m4
pextrd [r2 + r3 + 8], m4, 2
@@ -723,4 +722,4 @@
RET
%endmacro
-FILTER_V8_W12_H2 12, 16
+FILTER_V4_W12_H2 12, 16
More information about the x265-devel
mailing list