[x265] [PATCH] alignment DCT8's stack to 64-bytes to avoid crash and improvement cache performance

Min Chen chenm003 at 163.com
Wed Apr 16 04:49:51 CEST 2014


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1397616580 -28800
# Node ID 1a8b54ce0dfa8eba524c4cadc81939710054ae44
# Parent  1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
alignment DCT8's stack to 64-bytes to avoid crash and improvement cache performance

diff -r 1cf67a7b362d -r 1a8b54ce0dfa source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Mon Apr 14 21:26:37 2014 -0500
+++ b/source/common/x86/dct8.asm	Wed Apr 16 10:49:40 2014 +0800
@@ -834,8 +834,14 @@
 
     ret
 
-cglobal idct8, 3,7,8,0-16*mmsize
+cglobal idct8, 3,7,8 ;,0-16*mmsize
+    ; alignment stack to 64-bytes
     mov         r5, rsp
+    sub         rsp, 16*mmsize + gprsize
+    and         rsp, ~(64-1)
+    mov         [rsp + 16*mmsize], r5
+    mov         r5, rsp
+
     lea         r4, [tab_idct8_3]
     lea         r6, [tab_dct4]
 
@@ -866,4 +872,7 @@
 
     call        patial_butterfly_inverse_internal_pass2
 
+    ; restore origin stack pointer
+    mov         rsp, [rsp + 16*mmsize]
+
     RET



More information about the x265-devel mailing list