[x265] [PATCH] blockcopy_pp_8x12: sse2 asm code optimization

chen chenm003 at 163.com
Tue Feb 3 13:28:10 CET 2015


right
At 2015-02-03 20:25:48,praveen at multicorewareinc.com wrote:
># HG changeset patch
># User Praveen Tiwari
># Date 1422966338 -19800
># Node ID 3fc854e9e1b07e490c1422635dffea7b62e911c9
># Parent  bfc9a2d99e20568cb43d9fba0133735009793b00
>blockcopy_pp_8x12: sse2 asm code optimization
>
>improved, 235.05c -> 158.79c
>
>diff -r bfc9a2d99e20 -r 3fc854e9e1b0 source/common/x86/blockcopy8.asm
>--- a/source/common/x86/blockcopy8.asm Tue Feb 03 17:14:55 2015 +0530
>+++ b/source/common/x86/blockcopy8.asm Tue Feb 03 17:55:38 2015 +0530
>@@ -351,17 +351,34 @@
> ; void blockcopy_pp_8x12(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
> ;-----------------------------------------------------------------------------
> INIT_XMM sse2
>-cglobal blockcopy_pp_8x12, 4, 5, 2
>-    mov      r4d,       12/2
>-.loop:
>-    movh     m0,        [r2]
>-    movh     m1,        [r2 + r3]
>-    movh     [r0],      m0
>-    movh     [r0 + r1], m1
>-    dec      r4d
>-    lea      r0,        [r0 + 2 * r1]
>-    lea      r2,        [r2 + 2 * r3]
>-    jnz      .loop
>+cglobal blockcopy_pp_8x12, 4, 5, 4
>+
>+    lea      r4, [3 * r3]
>+    lea      r5, [3 * r1]
>+
>+    movh     m0, [r2]
>+    movh     m1, [r2 + r3]
>+    movh     m2, [r2 + 2 * r3]
>+    movh     m3, [r2 + r4]
>+
>+    movh     [r0],          m0
>+    movh     [r0 + r1],     m1
>+    movh     [r0 + 2 * r1], m2
>+    movh     [r0 + r5],     m3
>+
>+    %rep 2
>+    lea      r2, [r2 + 4 * r3]
>+    movh     m0, [r2]
>+    movh     m1, [r2 + r3]
>+    movh     m2, [r2 + 2 * r3]
>+    movh     m3, [r2 + r4]
>+
>+    lea      r0,            [r0 + 4 * r1]
>+    movh     [r0],          m0
>+    movh     [r0 + r1],     m1
>+    movh     [r0 + 2 * r1], m2
>+    movh     [r0 + r5],     m3
>+    %endrep
>     RET
> 
> ;-----------------------------------------------------------------------------
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150203/ea0c4129/attachment-0001.html>


More information about the x265-devel mailing list