[x265] [PATCH] alignment DCT8's stack to 64-bytes to avoid crash and improvement cache performance
Min Chen
chenm003 at 163.com
Wed Apr 16 04:49:51 CEST 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1397616580 -28800
# Node ID 1a8b54ce0dfa8eba524c4cadc81939710054ae44
# Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
alignment DCT8's stack to 64-bytes to avoid crash and improvement cache performance
diff -r 1cf67a7b362d -r 1a8b54ce0dfa source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm Mon Apr 14 21:26:37 2014 -0500
+++ b/source/common/x86/dct8.asm Wed Apr 16 10:49:40 2014 +0800
@@ -834,8 +834,14 @@
ret
-cglobal idct8, 3,7,8,0-16*mmsize
+cglobal idct8, 3,7,8 ;,0-16*mmsize
+ ; alignment stack to 64-bytes
mov r5, rsp
+ sub rsp, 16*mmsize + gprsize
+ and rsp, ~(64-1)
+ mov [rsp + 16*mmsize], r5
+ mov r5, rsp
+
lea r4, [tab_idct8_3]
lea r6, [tab_dct4]
@@ -866,4 +872,7 @@
call patial_butterfly_inverse_internal_pass2
+ ; restore origin stack pointer
+ mov rsp, [rsp + 16*mmsize]
+
RET
More information about the x265-devel
mailing list