[x265] [PATCH] blockcopy_pp: avx asm code indentation

praveen at multicorewareinc.com praveen at multicorewareinc.com
Wed Sep 24 15:29:43 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1411565375 -19800
# Node ID f95f684622945299497a2055ac2f9d28434535d2
# Parent  de1435d8eb446e21bcf2725caa55de7c44f67085
blockcopy_pp: avx asm code indentation

diff -r de1435d8eb44 -r f95f68462294 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Wed Sep 24 18:52:33 2014 +0530
+++ b/source/common/x86/blockcopy8.asm	Wed Sep 24 18:59:35 2014 +0530
@@ -607,90 +607,90 @@
 
 INIT_YMM avx
 cglobal blockcopy_pp_32x8, 4, 6, 6
-    lea    r4, [3 * r1]
-    lea    r5, [3 * r3]
-
-    movu    m0, [r2]
-    movu    m1, [r2 + r3]
-    movu    m2, [r2 + 2 * r3]
-    movu    m3, [r2 + r5]
-    lea     r2, [r2 + 4 * r3]
-    movu    m4, [r2]
-    movu    m5, [r2 + r3]
-
-    movu    [r0], m0
-    movu    [r0 + r1], m1
-    movu    [r0 + 2 * r1], m2
-    movu    [r0 + r4], m3
-    lea     r0, [r0 + 4 * r1]
-    movu    [r0], m4
-    movu    [r0 + r1], m5
-
-    movu    m0, [r2 + 2 * r3]
-    movu    m1, [r2 + r5]
-
-    movu    [r0 + 2 * r1], m0
-    movu    [r0 + r4], m1
-    RET
+lea    r4, [3 * r1]
+lea    r5, [3 * r3]
+
+movu    m0, [r2]
+movu    m1, [r2 + r3]
+movu    m2, [r2 + 2 * r3]
+movu    m3, [r2 + r5]
+lea     r2, [r2 + 4 * r3]
+movu    m4, [r2]
+movu    m5, [r2 + r3]
+
+movu    [r0], m0
+movu    [r0 + r1], m1
+movu    [r0 + 2 * r1], m2
+movu    [r0 + r4], m3
+lea     r0, [r0 + 4 * r1]
+movu    [r0], m4
+movu    [r0 + r1], m5
+
+movu    m0, [r2 + 2 * r3]
+movu    m1, [r2 + r5]
+
+movu    [r0 + 2 * r1], m0
+movu    [r0 + r4], m1
+RET
 
 INIT_YMM avx
 cglobal blockcopy_pp_32x16, 4, 6, 6
-    lea    r4,  [3 * r1]
-    lea    r5,  [3 * r3]
-
-    movu    m0, [r2]
-    movu    m1, [r2 + r3]
-    movu    m2, [r2 + 2 * r3]
-    movu    m3, [r2 + r5]
-    lea     r2, [r2 + 4 * r3]
-    movu    m4, [r2]
-    movu    m5, [r2 + r3]
-
-    movu    [r0], m0
-    movu    [r0 + r1], m1
-    movu    [r0 + 2 * r1], m2
-    movu    [r0 + r4], m3
-    lea     r0, [r0 + 4 * r1]
-    movu    [r0], m4
-    movu    [r0 + r1], m5
-
-    movu    m0, [r2 + 2 * r3]
-    movu    m1, [r2 + r5]
-    lea     r2, [r2 + 4 * r3]
-    movu    m2, [r2]
-    movu    m3, [r2 + r3]
-    movu    m4, [r2 + 2 * r3]
-    movu    m5, [r2 + r5]
-
-    movu    [r0 + 2 * r1], m0
-    movu    [r0 + r4], m1
-    lea     r0, [r0 + 4 * r1]
-    movu    [r0], m2
-    movu    [r0 + r1], m3
-    movu    [r0 + 2 * r1], m4
-    movu    [r0 + r4], m5
-
-    lea     r2, [r2 + 4 * r3]
-    movu    m0, [r2]
-    movu    m1, [r2 + r3]
-    movu    m2, [r2 + 2 * r3]
-    movu    m3, [r2 + r5]
-
-    lea     r0, [r0 + 4 * r1]
-    movu    [r0], m0
-    movu    [r0 + r1], m1
-    movu    [r0 + 2 * r1], m2
-    movu    [r0 + r4], m3
-    RET
+lea    r4,  [3 * r1]
+lea    r5,  [3 * r3]
+
+movu    m0, [r2]
+movu    m1, [r2 + r3]
+movu    m2, [r2 + 2 * r3]
+movu    m3, [r2 + r5]
+lea     r2, [r2 + 4 * r3]
+movu    m4, [r2]
+movu    m5, [r2 + r3]
+
+movu    [r0], m0
+movu    [r0 + r1], m1
+movu    [r0 + 2 * r1], m2
+movu    [r0 + r4], m3
+lea     r0, [r0 + 4 * r1]
+movu    [r0], m4
+movu    [r0 + r1], m5
+
+movu    m0, [r2 + 2 * r3]
+movu    m1, [r2 + r5]
+lea     r2, [r2 + 4 * r3]
+movu    m2, [r2]
+movu    m3, [r2 + r3]
+movu    m4, [r2 + 2 * r3]
+movu    m5, [r2 + r5]
+
+movu    [r0 + 2 * r1], m0
+movu    [r0 + r4], m1
+lea     r0, [r0 + 4 * r1]
+movu    [r0], m2
+movu    [r0 + r1], m3
+movu    [r0 + 2 * r1], m4
+movu    [r0 + r4], m5
+
+lea     r2, [r2 + 4 * r3]
+movu    m0, [r2]
+movu    m1, [r2 + r3]
+movu    m2, [r2 + 2 * r3]
+movu    m3, [r2 + r5]
+
+lea     r0, [r0 + 4 * r1]
+movu    [r0], m0
+movu    [r0 + r1], m1
+movu    [r0 + 2 * r1], m2
+movu    [r0 + r4], m3
+RET
 
 ;-----------------------------------------------------------------------------
 ; void blockcopy_pp_32x24(pixel *dest, intptr_t deststride, pixel *src, intptr_t srcstride)
 ;-----------------------------------------------------------------------------
 INIT_YMM avx
 cglobal blockcopy_pp_32x24, 4, 7, 6
-lea    r4,  [3 * r1]
-lea    r5,  [3 * r3]
-mov    r6d, 24/8
+    lea    r4,  [3 * r1]
+    lea    r5,  [3 * r3]
+    mov    r6d, 24/8
 
 .loop:
     movu    m0, [r2]


More information about the x265-devel mailing list