[x265] [PATCH 2 of 4] asm: less code size by reduce constant offset

Min Chen chenm003 at 163.com
Thu Oct 31 14:03:00 CET 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1383224429 -28800
# Node ID a64e813de628856f6916bdafcc069f19d9d81082
# Parent  08bc7ccc8aad56095837e143896418cd39a7e930
asm: less code size by reduce constant offset

diff -r 08bc7ccc8aad -r a64e813de628 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Thu Oct 31 20:59:27 2013 +0800
+++ b/source/common/x86/ipfilter8.asm	Thu Oct 31 21:00:29 2013 +0800
@@ -2077,8 +2077,6 @@
     mova        m6, [tab_c_128]
     mova        m7, [tab_c_64_n64]
 
-    ;shr         r4d, 2
-    lea         r2, [r2 - 16]
 .loopH:
 
     xor         r5d, r5d
@@ -2105,19 +2103,19 @@
     add         r5, 8
     cmp         r5, r3
     jg          .width4
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 0], m0
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 2], m1
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 4], m2
-    movu        [r2 + r5 * 2 + FENC_STRIDE * 6], m3
+    movu        [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
+    movu        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
+    movu        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
+    movu        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
     lea         r5, [r5 + 8]
     je          .nextH
     jmp         .loopW
 
 .width4:
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 0], m0
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 2], m1
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 4], m2
-    movh        [r2 + r5 * 2 + FENC_STRIDE * 6], m3
+    movh        [r2 + r5 * 2 + FENC_STRIDE * 0 - 16], m0
+    movh        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
+    movh        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
+    movh        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
 
 .nextH:
     lea         r0, [r0 + r1 * 4]



More information about the x265-devel mailing list