[x264-devel] [PATCH] prefetch_fenc_\sub\()_aarch64 parameter register number using error fix
zhengzhi Duan
royzzduan at foxmail.com
Mon Sep 23 06:30:32 UTC 2024
From: Zhengzhi Duan <zhengzhi.duan at shopee.com>
this fix can bring 1.2fps speedup for veryfast at 720p on an average 50fps
---
common/aarch64/mc-a.S | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/common/aarch64/mc-a.S b/common/aarch64/mc-a.S
index 9c52d045..25baf0b4 100644
--- a/common/aarch64/mc-a.S
+++ b/common/aarch64/mc-a.S
@@ -57,21 +57,21 @@ endfunc
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
.macro prefetch_fenc sub
function prefetch_fenc_\sub\()_aarch64, export=1
- and w6, w5, #3
- and w7, w5, #3
- mul x6, x6, x1
- mul x7, x7, x3
+ and w5, w4, #3
+ and w6, w4, #3
+ mul x5, x5, x1
+ mul x6, x6, x3
add x0, x0, #64
add x2, x2, #64
- add x0, x0, x6, lsl #2
- add x6, x0, x1, lsl #1
+ add x0, x0, x5, lsl #2
+ add x5, x0, x1, lsl #1
prfm pldl1strm, [x0]
prfm pldl1strm, [x0, x1]
- prfm pldl1strm, [x6]
- prfm pldl1strm, [x6, x1]
+ prfm pldl1strm, [x5]
+ prfm pldl1strm, [x5, x1]
- add x2, x2, x7, lsl #1
+ add x2, x2, x6, lsl #1
prfm pldl1strm, [x2]
prfm pldl1strm, [x2, x3]
.ifc \sub, 422
--
2.39.3 (Apple Git-145)
More information about the x264-devel
mailing list