[x265] Add Support for IBT/SHSTK in ASM

Yaswanth Sastry yaswanth.sastry at multicorewareinc.com
Wed Mar 5 04:43:26 UTC 2025


>From 4ebc38057fa79a74e0724b5d7d1ab4b4365d688a Mon Sep 17 00:00:00 2001
From: PavanTarun <pavan.tarun at multicorewareinc.com>
Date: Mon, 3 Mar 2025 16:37:37 +0530
Subject: [PATCH 1/1] Add Support for IBT/SHSTK in ASM

---
 source/CMakeLists.txt                    | 13 ++++++++++
 source/common/x86/blockcopy8.asm         |  1 +
 source/common/x86/const-a.asm            |  1 +
 source/common/x86/cpu-a.asm              |  1 +
 source/common/x86/dct8.asm               |  1 +
 source/common/x86/h-ipfilter16.asm       |  7 +++---
 source/common/x86/h-ipfilter8.asm        |  1 +
 source/common/x86/h4-ipfilter16.asm      |  7 +++---
 source/common/x86/intrapred16.asm        |  1 +
 source/common/x86/intrapred8.asm         |  1 +
 source/common/x86/intrapred8_allangs.asm |  1 +
 source/common/x86/ipfilter16.asm         |  1 +
 source/common/x86/ipfilter8.asm          |  1 +
 source/common/x86/loopfilter.asm         |  1 +
 source/common/x86/mc-a.asm               |  1 +
 source/common/x86/mc-a2.asm              |  1 +
 source/common/x86/pixel-32.asm           |  2 +-
 source/common/x86/pixel-a.asm            |  1 +
 source/common/x86/pixel-util8.asm        |  1 +
 source/common/x86/pixeladd8.asm          |  1 +
 source/common/x86/sad-a.asm              |  1 +
 source/common/x86/sad16-a.asm            |  1 +
 source/common/x86/seaintegral.asm        |  1 +
 source/common/x86/ssd-a.asm              |  1 +
 source/common/x86/v4-ipfilter16.asm      |  7 +++---
 source/common/x86/v4-ipfilter8.asm       |  1 +
 source/common/x86/x86inc.asm             | 32 +++++++++++++++++++++++-
 27 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 37dbe1a87..dde360493 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -152,6 +152,13 @@ if(UNIX)
     if(ENABLE_LIBVMAF)
         add_definitions(-DENABLE_LIBVMAF)
     endif()
+    if(X64)
+        option(ENABLE_CET "Enable Control-flow Enforcement" OFF)
+        if(ENABLE_CET)
+            add_definitions(-DENABLE_CET)
+            list(APPEND ASM_FLAGS "-DENABLE_CET=1")
+        endif()
+    endif(X64)
 endif(UNIX)

 if((X64 AND NOT WIN32) OR ARM64)
@@ -483,6 +490,12 @@ int main() { return 0; }")
         # linked.
         list(APPEND LINKER_OPTIONS "-fsanitize=${FSANITIZE}")
     endif()
+    if(ENABLE_CET)
+        check_cxx_compiler_flag(-fcf-protection=full CC_HAS_CET_FLAG)
+        if(CC_HAS_CET_FLAG)
+            add_definitions(-fcf-protection=full)
+        endif()
+    endif()
     option(ENABLE_AGGRESSIVE_CHECKS "Enable stack protection and -ftrapv" OFF)
     if(ENABLE_AGGRESSIVE_CHECKS)
         # use with care, -ftrapv can cause testbench SIGILL exceptions
diff --git a/source/common/x86/blockcopy8.asm b/source/common/x86/blockcopy8.asm
index 1ea772182..f4eaa5788 100644
--- a/source/common/x86/blockcopy8.asm
+++ b/source/common/x86/blockcopy8.asm
@@ -6925,3 +6925,4 @@ cglobal cpy1Dto2D_shr_32, 3, 4, 6
     dec                 r3d
     jnz                 .loop
     RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/const-a.asm b/source/common/x86/const-a.asm
index c8142bbb5..3dbef6169 100644
--- a/source/common/x86/const-a.asm
+++ b/source/common/x86/const-a.asm
@@ -157,3 +157,4 @@ const trans8_shuf,          times  1 dd   0,   4,   1,   5,   2,   6,   3,   7
 ;; 64-bit constants

 const pq_1,                 times 1 dq 1
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/cpu-a.asm b/source/common/x86/cpu-a.asm
index 747b921f7..519ae42a8 100644
--- a/source/common/x86/cpu-a.asm
+++ b/source/common/x86/cpu-a.asm
@@ -201,3 +201,4 @@ cglobal safe_intel_cpu_indicator_init
     pop r1
     pop r0
     ret
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/dct8.asm b/source/common/x86/dct8.asm
index 618b1d41c..4604e031a 100644
--- a/source/common/x86/dct8.asm
+++ b/source/common/x86/dct8.asm
@@ -7639,3 +7639,4 @@ cglobal psyRdoQuant_1p32, 7, 9, 16
     RET

 %endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/h-ipfilter16.asm b/source/common/x86/h-ipfilter16.asm
index 688688fa2..2761c1632 100644
--- a/source/common/x86/h-ipfilter16.asm
+++ b/source/common/x86/h-ipfilter16.asm
@@ -2697,6 +2697,7 @@ cglobal interp_8tap_horiz_ps_12x16, 4, 6, 8
     add                 r2, r3
     add                 r0, r1
     dec                 r4d
-    jnz                 .loop0
-    RET
-%endif
+    jnz                 .loop0
+    RET
+%endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/h-ipfilter8.asm b/source/common/x86/h-ipfilter8.asm
index 757efc509..41538f7a5 100644
--- a/source/common/x86/h-ipfilter8.asm
+++ b/source/common/x86/h-ipfilter8.asm
@@ -6735,3 +6735,4 @@ cglobal interp_4tap_horiz_pp_2x16, 4, 6, 6
     pextrw            [r2 + r3 * 2], xm1,         6
     pextrw            [r2 + r4],     xm1,         7
     RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/h4-ipfilter16.asm b/source/common/x86/h4-ipfilter16.asm
index 0d1d20e56..8db3481c0 100644
--- a/source/common/x86/h4-ipfilter16.asm
+++ b/source/common/x86/h4-ipfilter16.asm
@@ -2629,6 +2629,7 @@ cglobal interp_4tap_horiz_ps_6x%1, 4, 7, 6
     RET
 %endif
 %endmacro
-
-    IPFILTER_CHROMA_PS_6xN_AVX2 8
-    IPFILTER_CHROMA_PS_6xN_AVX2 16
+
+    IPFILTER_CHROMA_PS_6xN_AVX2 8
+    IPFILTER_CHROMA_PS_6xN_AVX2 16
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/intrapred16.asm b/source/common/x86/intrapred16.asm
index e5e23d3ec..785326a5c 100644
--- a/source/common/x86/intrapred16.asm
+++ b/source/common/x86/intrapred16.asm
@@ -25115,3 +25115,4 @@ cglobal intra_filter_4x4, 2,4,4
     mov             [r1 + 16], r2w              ; topLast
     mov             [r1 + 32], r3w              ; LeftLast
     RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/intrapred8.asm b/source/common/x86/intrapred8.asm
index a1e4dd905..f351c0a36 100644
--- a/source/common/x86/intrapred8.asm
+++ b/source/common/x86/intrapred8.asm
@@ -22546,3 +22546,4 @@ cglobal intra_filter_4x4, 2,4,4
     mov             [r1 +  8], r2b              ; topLast
     mov             [r1 + 16], r3b              ; LeftLast
     RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/intrapred8_allangs.asm b/source/common/x86/intrapred8_allangs.asm
index f49bb21ae..87d73c5d0 100644
--- a/source/common/x86/intrapred8_allangs.asm
+++ b/source/common/x86/intrapred8_allangs.asm
@@ -24121,3 +24121,4 @@ cglobal all_angs_pred_4x4, 4, 4, 8
     movd        [r0 + 524],     m7              ;byte[5, 6, 7, 8]

 RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/ipfilter16.asm b/source/common/x86/ipfilter16.asm
index 8c6e31591..9c5e456f6 100644
--- a/source/common/x86/ipfilter16.asm
+++ b/source/common/x86/ipfilter16.asm
@@ -14071,3 +14071,4 @@ cglobal interp_8tap_vert_%1_64x%2, 5, 8, 22
 ;-------------------------------------------------------------------------------------------------------------
 ;ipfilter_luma_avx512 code end
 ;-------------------------------------------------------------------------------------------------------------
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/ipfilter8.asm b/source/common/x86/ipfilter8.asm
index fca182df4..6fad077e9 100644
--- a/source/common/x86/ipfilter8.asm
+++ b/source/common/x86/ipfilter8.asm
@@ -14947,3 +14947,4 @@ FILTER_VERT_LUMA_64xN_AVX512 ps, 64
 ;-------------------------------------------------------------------------------------------------------------
 ;ipfilter_luma_avx512 code end
 ;-------------------------------------------------------------------------------------------------------------
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/loopfilter.asm b/source/common/x86/loopfilter.asm
index dab6169d6..8082b02df 100644
--- a/source/common/x86/loopfilter.asm
+++ b/source/common/x86/loopfilter.asm
@@ -4213,3 +4213,4 @@ cglobal pelFilterChroma_V, 6,6,5
 %endif
     RET
 %endif ; ARCH_X86_64
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/mc-a.asm b/source/common/x86/mc-a.asm
index 78af04b47..428088683 100644
--- a/source/common/x86/mc-a.asm
+++ b/source/common/x86/mc-a.asm
@@ -7410,3 +7410,4 @@ cglobal prefetch_ref, 3,3
     prefetcht0  [r0+r1*2]
     prefetcht0  [r0+r2]
     RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/mc-a2.asm b/source/common/x86/mc-a2.asm
index 94a995ee0..96d2e3e2d 100644
--- a/source/common/x86/mc-a2.asm
+++ b/source/common/x86/mc-a2.asm
@@ -1591,3 +1591,4 @@ CUTREE_FIX8

 INIT_YMM avx2
 CUTREE_FIX8
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixel-32.asm b/source/common/x86/pixel-32.asm
index 9d1696930..2a40f0091 100644
--- a/source/common/x86/pixel-32.asm
+++ b/source/common/x86/pixel-32.asm
@@ -418,4 +418,4 @@ cglobal pixel_ssim_4x4x2_core, 0,5
     jge .loop
     emms
     RET
-
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixel-a.asm b/source/common/x86/pixel-a.asm
index 4f5f276b5..0f787d184 100644
--- a/source/common/x86/pixel-a.asm
+++ b/source/common/x86/pixel-a.asm
@@ -16581,3 +16581,4 @@ cglobal normFact64, 4, 5, 6
 %endif
     movq           [r3],        xm3
     RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixel-util8.asm b/source/common/x86/pixel-util8.asm
index 6ad2852d3..707251724 100644
--- a/source/common/x86/pixel-util8.asm
+++ b/source/common/x86/pixel-util8.asm
@@ -8991,3 +8991,4 @@ cglobal costC1C2Flag, 4,12,2
     or          eax, r4d
     RET
 %endif ; ARCH_X86_64
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/pixeladd8.asm b/source/common/x86/pixeladd8.asm
index 77f7a124a..e4e28a0b7 100644
--- a/source/common/x86/pixeladd8.asm
+++ b/source/common/x86/pixeladd8.asm
@@ -1646,3 +1646,4 @@ cglobal pixel_add_ps_aligned_32x64, 6, 9, 5
 ;-----------------------------------------------------------------------------
 ; pixel_add_ps avx512 code end
 ;-----------------------------------------------------------------------------
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/sad-a.asm b/source/common/x86/sad-a.asm
index 1ffff1b39..5e7bc7a03 100644
--- a/source/common/x86/sad-a.asm
+++ b/source/common/x86/sad-a.asm
@@ -6758,3 +6758,4 @@ SAD_MxN_AVX512 32, 24
 SAD_MxN_AVX512 32, 32
 SAD_MxN_AVX512 32, 64
 %endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/sad16-a.asm b/source/common/x86/sad16-a.asm
index f98c80bbf..09d11ded5 100644
--- a/source/common/x86/sad16-a.asm
+++ b/source/common/x86/sad16-a.asm
@@ -4368,3 +4368,4 @@ cglobal pixel_sad_x4_64x64, 6,8,15
     PROCESS_SAD_X4_END_AVX512
     RET
 %endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/seaintegral.asm b/source/common/x86/seaintegral.asm
index bd664883a..cdbaa3a83 100644
--- a/source/common/x86/seaintegral.asm
+++ b/source/common/x86/seaintegral.asm
@@ -1060,3 +1060,4 @@ cglobal integral32h, 3, 5, 3

 .end:
     RET
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/ssd-a.asm b/source/common/x86/ssd-a.asm
index 8a7ed11bb..1d55f7250 100644
--- a/source/common/x86/ssd-a.asm
+++ b/source/common/x86/ssd-a.asm
@@ -3703,3 +3703,4 @@ cglobal pixel_ssd_s_aligned_32, 2,4,5
 %endif
     RET
 %endif
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/v4-ipfilter16.asm b/source/common/x86/v4-ipfilter16.asm
index 6981ffd33..aa3c49441 100644
--- a/source/common/x86/v4-ipfilter16.asm
+++ b/source/common/x86/v4-ipfilter16.asm
@@ -3525,7 +3525,8 @@ cglobal interp_4tap_vert_%1_8x12, 4, 7, 15
 %endif
 %endmacro

-FILTER_VER_CHROMA_AVX2_8x12 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_8x12 ps, 0, INTERP_SHIFT_PS
-FILTER_VER_CHROMA_AVX2_8x12 sp, 1, INTERP_SHIFT_SP
+FILTER_VER_CHROMA_AVX2_8x12 pp, 1, 6
+FILTER_VER_CHROMA_AVX2_8x12 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_8x12 sp, 1, INTERP_SHIFT_SP
 FILTER_VER_CHROMA_AVX2_8x12 ss, 0, 6
+SECTION_IBT_SHSTK
diff --git a/source/common/x86/v4-ipfilter8.asm b/source/common/x86/v4-ipfilter8.asm
index 99ef68dd8..7aa62aad6 100644
--- a/source/common/x86/v4-ipfilter8.asm
+++ b/source/common/x86/v4-ipfilter8.asm
@@ -12797,3 +12797,4 @@ cglobal interp_4tap_vert_ss_%1x%2, 5, 6, 7
     FILTER_VER_CHROMA_SS_W8_H2 8, 12
     FILTER_VER_CHROMA_SS_W8_H2 8, 64

+SECTION_IBT_SHSTK
diff --git a/source/common/x86/x86inc.asm b/source/common/x86/x86inc.asm
index 805aa1180..974980ccc 100644
--- a/source/common/x86/x86inc.asm
+++ b/source/common/x86/x86inc.asm
@@ -90,7 +90,30 @@
         SECTION .rodata align=%1
     %endif
 %endmacro
-
+%macro SECTION_IBT_SHSTK 0
+  %if ENABLE_CET
+    %ifidn __OUTPUT_FORMAT__,win32
+    %elif WIN64
+    %else
+         SECTION .note.gnu.property note
+         align 8
+         dd    .x1 - .x0      ; data size for "GNU\0"
+         dd    .x4 - .x1      ; Elf_Prop size
+         dd    5              ; ELF::NT_GNU_PROPERTY_TYPE_0
+    .x0:
+         db    "GNU", 0
+    .x1:
+         align 8
+         dd    0xc0000002     ; ELF::GNU_PROPERTY_X86_FEATURE_1_AND
+         dd    .x3 - .x2      ; data size
+    .x2:
+         dd    0x3            ; ELF::GNU_PROPERTY_X86_FEATURE_1_SHSTK | ELF::GNU_PROPERTY_X86_FEATURE_1_IBT
+    .x3:
+         align 8
+    .x4:
+    %endif
+  %endif
+%endmacro
 %if WIN64
     %define PIC
 %elif ARCH_X86_64 == 0
@@ -737,6 +760,13 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %assign stack_size 0        ; amount of stack space that can be freely used inside a function
     %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
     %assign xmm_regs_used 0     ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 and vzeroupper
+    %if ENABLE_CET
+      %if ARCH_X86_64
+        endbr64
+      %else
+        endbr32
+      %endif
+    %endif
     %ifnidn %3, ""
         PROLOGUE %3
     %endif
--
2.43.0



-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250305/274bccc6/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-Add-Support-for-IBT-SHSTK-in-ASM.patch
Type: application/octet-stream
Size: 13630 bytes
Desc: 0002-Add-Support-for-IBT-SHSTK-in-ASM.patch
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250305/274bccc6/attachment-0001.obj>


More information about the x265-devel mailing list