[x265] [PATCH 2 of 4] asm: less code size by reduce constant offset
Min Chen
chenm003 at 163.com
Thu Oct 31 14:03:00 CET 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1383224429 -28800
# Node ID a64e813de628856f6916bdafcc069f19d9d81082
# Parent 08bc7ccc8aad56095837e143896418cd39a7e930
asm: less code size by reduce constant offset
diff -r 08bc7ccc8aad -r a64e813de628 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Thu Oct 31 20:59:27 2013 +0800
+++ b/source/common/x86/ipfilter8.asm Thu Oct 31 21:00:29 2013 +0800
@@ -2077,8 +2077,6 @@
mova m6, [tab_c_128]
mova m7, [tab_c_64_n64]
- ;shr r4d, 2
- lea r2, [r2 - 16]
.loopH:
xor r5d, r5d
@@ -2105,19 +2103,19 @@
add r5, 8
cmp r5, r3
jg .width4
- movu [r2 + r5 * 2 + FENC_STRIDE * 0], m0
- movu [r2 + r5 * 2 + FENC_STRIDE * 2], m1
- movu [r2 + r5 * 2 + FENC_STRIDE * 4], m2
- movu [r2 + r5 * 2 + FENC_STRIDE * 6], m3
+ movu [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
+ movu [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
+ movu [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
+ movu [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
lea r5, [r5 + 8]
je .nextH
jmp .loopW
.width4:
- movh [r2 + r5 * 2 + FENC_STRIDE * 0], m0
- movh [r2 + r5 * 2 + FENC_STRIDE * 2], m1
- movh [r2 + r5 * 2 + FENC_STRIDE * 4], m2
- movh [r2 + r5 * 2 + FENC_STRIDE * 6], m3
+ movh [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
+ movh [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
+ movh [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
+ movh [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
.nextH:
lea r0, [r0 + r1 * 4]
More information about the x265-devel
mailing list