[x265] Add Support for IBT/SHSTK in ASM
Yaswanth Sastry
yaswanth.sastry at multicorewareinc.com
Wed Mar 5 04:43:26 UTC 2025
>From 4ebc38057fa79a74e0724b5d7d1ab4b4365d688a Mon Sep 17 00:00:00 2001
From: PavanTarun <pavan.tarun at multicorewareinc.com>
Date: Mon, 3 Mar 2025 16:37:37 +0530
Subject: [PATCH 1/1] Add Support for IBT/SHSTK in ASM
---
source/CMakeLists.txt | 13 ++++++++++
source/common/x86/blockcopy8.asm | 1 +
source/common/x86/const-a.asm | 1 +
source/common/x86/cpu-a.asm | 1 +
source/common/x86/dct8.asm | 1 +
source/common/x86/h-ipfilter16.asm | 7 +++---
source/common/x86/h-ipfilter8.asm | 1 +
source/common/x86/h4-ipfilter16.asm | 7 +++---
source/common/x86/intrapred16.asm | 1 +
source/common/x86/intrapred8.asm | 1 +
source/common/x86/intrapred8_allangs.asm | 1 +
source/common/x86/ipfilter16.asm | 1 +
source/common/x86/ipfilter8.asm | 1 +
source/common/x86/loopfilter.asm | 1 +
source/common/x86/mc-a.asm | 1 +
source/common/x86/mc-a2.asm | 1 +
source/common/x86/pixel-32.asm | 2 +-
source/common/x86/pixel-a.asm | 1 +
source/common/x86/pixel-util8.asm | 1 +
source/common/x86/pixeladd8.asm | 1 +
source/common/x86/sad-a.asm | 1 +
source/common/x86/sad16-a.asm | 1 +
source/common/x86/seaintegral.asm | 1 +
source/common/x86/ssd-a.asm | 1 +
source/common/x86/v4-ipfilter16.asm | 7 +++---
source/common/x86/v4-ipfilter8.asm | 1 +
source/common/x86/x86inc.asm | 32 +++++++++++++++++++++++-
27 files changed, 78 insertions(+), 11 deletions(-)
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 37dbe1a87..dde360493 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -152,6 +152,13 @@ if(UNIX)
if(ENABLE_LIBVMAF)
add_definitions(-DENABLE_LIBVMAF)
endif()
+ if(X64)
+ option(ENABLE_CET "Enable Control-flow Enforcement" OFF)
+ if(ENABLE_CET)
+ add_definitions(-DENABLE_CET)
+ list(APPEND ASM_FLAGS "-DENABLE_CET=1")
+ endif()
+ endif(X64)
endif(UNIX)
if((X64 AND NOT WIN32) OR ARM64)
@@ -483,6 +490,12 @@ int main() { return 0; }")
# linked.
list(APPEND LINKER_OPTIONS "-fsanitize=${FSANITIZE}")
endif()
+ if(ENABLE_CET)
+ check_cxx_compiler_flag(-fcf-protection=full CC_HAS_CET_FLAG)
+ if(CC_HAS_CET_FLAG)
+ add_definitions(-fcf-protection=full)
+ endif()
+ endif()
option(ENABLE_AGGRESSIVE_CHECKS "Enable stack protection and -ftrapv" OFF)
if(ENABLE_AGGRESSIVE_CHECKS)
# use with care, -ftrapv can cause testbench SIGILL exceptions
diff --git a/source/common/x86/blockcopy8.asm b/source/common/x86/blockcopy8.asm
index 1ea772182..f4eaa5788 100644
--- a/source/common/x86/blockcopy8.asm
+++ b/source/common/x86/blockcopy8.asm
@@ -6925,3 +6925,4 @@ cglobal cpy1Dto2D_shr_32, 3, 4, 6
dec r3d
jnz .loop
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/const-a.asm b/source/common/x86/const-a.asm
index c8142bbb5..3dbef6169 100644
--- a/source/common/x86/const-a.asm
+++ b/source/common/x86/const-a.asm
@@ -157,3 +157,4 @@ const trans8_shuf, times 1 dd 0, 4, 1, 5, 2, 6, 3, 7
;; 64-bit constants
const pq_1, times 1 dq 1
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/cpu-a.asm b/source/common/x86/cpu-a.asm
index 747b921f7..519ae42a8 100644
--- a/source/common/x86/cpu-a.asm
+++ b/source/common/x86/cpu-a.asm
@@ -201,3 +201,4 @@ cglobal safe_intel_cpu_indicator_init
pop r1
pop r0
ret
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/dct8.asm b/source/common/x86/dct8.asm
index 618b1d41c..4604e031a 100644
--- a/source/common/x86/dct8.asm
+++ b/source/common/x86/dct8.asm
@@ -7639,3 +7639,4 @@ cglobal psyRdoQuant_1p32, 7, 9, 16
RET
%endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/h-ipfilter16.asm b/source/common/x86/h-ipfilter16.asm
index 688688fa2..2761c1632 100644
--- a/source/common/x86/h-ipfilter16.asm
+++ b/source/common/x86/h-ipfilter16.asm
@@ -2697,6 +2697,7 @@ cglobal interp_8tap_horiz_ps_12x16, 4, 6, 8
add r2, r3
add r0, r1
dec r4d
- jnz .loop0
- RET
-%endif
+ jnz .loop0
+ RET
+%endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/h-ipfilter8.asm b/source/common/x86/h-ipfilter8.asm
index 757efc509..41538f7a5 100644
--- a/source/common/x86/h-ipfilter8.asm
+++ b/source/common/x86/h-ipfilter8.asm
@@ -6735,3 +6735,4 @@ cglobal interp_4tap_horiz_pp_2x16, 4, 6, 6
pextrw [r2 + r3 * 2], xm1, 6
pextrw [r2 + r4], xm1, 7
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/h4-ipfilter16.asm b/source/common/x86/h4-ipfilter16.asm
index 0d1d20e56..8db3481c0 100644
--- a/source/common/x86/h4-ipfilter16.asm
+++ b/source/common/x86/h4-ipfilter16.asm
@@ -2629,6 +2629,7 @@ cglobal interp_4tap_horiz_ps_6x%1, 4, 7, 6
RET
%endif
%endmacro
-
- IPFILTER_CHROMA_PS_6xN_AVX2 8
- IPFILTER_CHROMA_PS_6xN_AVX2 16
+
+ IPFILTER_CHROMA_PS_6xN_AVX2 8
+ IPFILTER_CHROMA_PS_6xN_AVX2 16
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/intrapred16.asm b/source/common/x86/intrapred16.asm
index e5e23d3ec..785326a5c 100644
--- a/source/common/x86/intrapred16.asm
+++ b/source/common/x86/intrapred16.asm
@@ -25115,3 +25115,4 @@ cglobal intra_filter_4x4, 2,4,4
mov [r1 + 16], r2w ; topLast
mov [r1 + 32], r3w ; LeftLast
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/intrapred8.asm b/source/common/x86/intrapred8.asm
index a1e4dd905..f351c0a36 100644
--- a/source/common/x86/intrapred8.asm
+++ b/source/common/x86/intrapred8.asm
@@ -22546,3 +22546,4 @@ cglobal intra_filter_4x4, 2,4,4
mov [r1 + 8], r2b ; topLast
mov [r1 + 16], r3b ; LeftLast
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/intrapred8_allangs.asm b/source/common/x86/intrapred8_allangs.asm
index f49bb21ae..87d73c5d0 100644
--- a/source/common/x86/intrapred8_allangs.asm
+++ b/source/common/x86/intrapred8_allangs.asm
@@ -24121,3 +24121,4 @@ cglobal all_angs_pred_4x4, 4, 4, 8
movd [r0 + 524], m7 ;byte[5, 6, 7, 8]
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/ipfilter16.asm b/source/common/x86/ipfilter16.asm
index 8c6e31591..9c5e456f6 100644
--- a/source/common/x86/ipfilter16.asm
+++ b/source/common/x86/ipfilter16.asm
@@ -14071,3 +14071,4 @@ cglobal interp_8tap_vert_%1_64x%2, 5, 8, 22
;-------------------------------------------------------------------------------------------------------------
;ipfilter_luma_avx512 code end
;-------------------------------------------------------------------------------------------------------------
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/ipfilter8.asm b/source/common/x86/ipfilter8.asm
index fca182df4..6fad077e9 100644
--- a/source/common/x86/ipfilter8.asm
+++ b/source/common/x86/ipfilter8.asm
@@ -14947,3 +14947,4 @@ FILTER_VERT_LUMA_64xN_AVX512 ps, 64
;-------------------------------------------------------------------------------------------------------------
;ipfilter_luma_avx512 code end
;-------------------------------------------------------------------------------------------------------------
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/loopfilter.asm b/source/common/x86/loopfilter.asm
index dab6169d6..8082b02df 100644
--- a/source/common/x86/loopfilter.asm
+++ b/source/common/x86/loopfilter.asm
@@ -4213,3 +4213,4 @@ cglobal pelFilterChroma_V, 6,6,5
%endif
RET
%endif ; ARCH_X86_64
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/mc-a.asm b/source/common/x86/mc-a.asm
index 78af04b47..428088683 100644
--- a/source/common/x86/mc-a.asm
+++ b/source/common/x86/mc-a.asm
@@ -7410,3 +7410,4 @@ cglobal prefetch_ref, 3,3
prefetcht0 [r0+r1*2]
prefetcht0 [r0+r2]
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/mc-a2.asm b/source/common/x86/mc-a2.asm
index 94a995ee0..96d2e3e2d 100644
--- a/source/common/x86/mc-a2.asm
+++ b/source/common/x86/mc-a2.asm
@@ -1591,3 +1591,4 @@ CUTREE_FIX8
INIT_YMM avx2
CUTREE_FIX8
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixel-32.asm b/source/common/x86/pixel-32.asm
index 9d1696930..2a40f0091 100644
--- a/source/common/x86/pixel-32.asm
+++ b/source/common/x86/pixel-32.asm
@@ -418,4 +418,4 @@ cglobal pixel_ssim_4x4x2_core, 0,5
jge .loop
emms
RET
-
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixel-a.asm b/source/common/x86/pixel-a.asm
index 4f5f276b5..0f787d184 100644
--- a/source/common/x86/pixel-a.asm
+++ b/source/common/x86/pixel-a.asm
@@ -16581,3 +16581,4 @@ cglobal normFact64, 4, 5, 6
%endif
movq [r3], xm3
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixel-util8.asm b/source/common/x86/pixel-util8.asm
index 6ad2852d3..707251724 100644
--- a/source/common/x86/pixel-util8.asm
+++ b/source/common/x86/pixel-util8.asm
@@ -8991,3 +8991,4 @@ cglobal costC1C2Flag, 4,12,2
or eax, r4d
RET
%endif ; ARCH_X86_64
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixeladd8.asm b/source/common/x86/pixeladd8.asm
index 77f7a124a..e4e28a0b7 100644
--- a/source/common/x86/pixeladd8.asm
+++ b/source/common/x86/pixeladd8.asm
@@ -1646,3 +1646,4 @@ cglobal pixel_add_ps_aligned_32x64, 6, 9, 5
;-----------------------------------------------------------------------------
; pixel_add_ps avx512 code end
;-----------------------------------------------------------------------------
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/sad-a.asm b/source/common/x86/sad-a.asm
index 1ffff1b39..5e7bc7a03 100644
--- a/source/common/x86/sad-a.asm
+++ b/source/common/x86/sad-a.asm
@@ -6758,3 +6758,4 @@ SAD_MxN_AVX512 32, 24
SAD_MxN_AVX512 32, 32
SAD_MxN_AVX512 32, 64
%endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/sad16-a.asm b/source/common/x86/sad16-a.asm
index f98c80bbf..09d11ded5 100644
--- a/source/common/x86/sad16-a.asm
+++ b/source/common/x86/sad16-a.asm
@@ -4368,3 +4368,4 @@ cglobal pixel_sad_x4_64x64, 6,8,15
PROCESS_SAD_X4_END_AVX512
RET
%endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/seaintegral.asm b/source/common/x86/seaintegral.asm
index bd664883a..cdbaa3a83 100644
--- a/source/common/x86/seaintegral.asm
+++ b/source/common/x86/seaintegral.asm
@@ -1060,3 +1060,4 @@ cglobal integral32h, 3, 5, 3
.end:
RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/ssd-a.asm b/source/common/x86/ssd-a.asm
index 8a7ed11bb..1d55f7250 100644
--- a/source/common/x86/ssd-a.asm
+++ b/source/common/x86/ssd-a.asm
@@ -3703,3 +3703,4 @@ cglobal pixel_ssd_s_aligned_32, 2,4,5
%endif
RET
%endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/v4-ipfilter16.asm b/source/common/x86/v4-ipfilter16.asm
index 6981ffd33..aa3c49441 100644
--- a/source/common/x86/v4-ipfilter16.asm
+++ b/source/common/x86/v4-ipfilter16.asm
@@ -3525,7 +3525,8 @@ cglobal interp_4tap_vert_%1_8x12, 4, 7, 15
%endif
%endmacro
-FILTER_VER_CHROMA_AVX2_8x12 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_8x12 ps, 0, INTERP_SHIFT_PS
-FILTER_VER_CHROMA_AVX2_8x12 sp, 1, INTERP_SHIFT_SP
+FILTER_VER_CHROMA_AVX2_8x12 pp, 1, 6
+FILTER_VER_CHROMA_AVX2_8x12 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_8x12 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_8x12 ss, 0, 6
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/v4-ipfilter8.asm b/source/common/x86/v4-ipfilter8.asm
index 99ef68dd8..7aa62aad6 100644
--- a/source/common/x86/v4-ipfilter8.asm
+++ b/source/common/x86/v4-ipfilter8.asm
@@ -12797,3 +12797,4 @@ cglobal interp_4tap_vert_ss_%1x%2, 5, 6, 7
FILTER_VER_CHROMA_SS_W8_H2 8, 12
FILTER_VER_CHROMA_SS_W8_H2 8, 64
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/x86inc.asm b/source/common/x86/x86inc.asm
index 805aa1180..974980ccc 100644
--- a/source/common/x86/x86inc.asm
+++ b/source/common/x86/x86inc.asm
@@ -90,7 +90,30 @@
SECTION .rodata align=%1
%endif
%endmacro
-
+%macro SECTION_IBT_SHSTK 0
+ %if ENABLE_CET
+ %ifidn __OUTPUT_FORMAT__,win32
+ %elif WIN64
+ %else
+ SECTION .note.gnu.property note
+ align 8
+ dd .x1 - .x0 ; data size for "GNU\0"
+ dd .x4 - .x1 ; Elf_Prop size
+ dd 5 ; ELF::NT_GNU_PROPERTY_TYPE_0
+ .x0:
+ db "GNU", 0
+ .x1:
+ align 8
+ dd 0xc0000002 ; ELF::GNU_PROPERTY_X86_FEATURE_1_AND
+ dd .x3 - .x2 ; data size
+ .x2:
+ dd 0x3 ; ELF::GNU_PROPERTY_X86_FEATURE_1_SHSTK | ELF::GNU_PROPERTY_X86_FEATURE_1_IBT
+ .x3:
+ align 8
+ .x4:
+ %endif
+ %endif
+%endmacro
%if WIN64
%define PIC
%elif ARCH_X86_64 == 0
@@ -737,6 +760,13 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%assign stack_size 0 ; amount of stack space that can be freely used inside a function
%assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
%assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 and vzeroupper
+ %if ENABLE_CET
+ %if ARCH_X86_64
+ endbr64
+ %else
+ endbr32
+ %endif
+ %endif
%ifnidn %3, ""
PROLOGUE %3
%endif
--
2.43.0
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250305/274bccc6/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-Add-Support-for-IBT-SHSTK-in-ASM.patch
Type: application/octet-stream
Size: 13630 bytes
Desc: 0002-Add-Support-for-IBT-SHSTK-in-ASM.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250305/274bccc6/attachment-0001.obj>
More information about the x265-devel
mailing list