[x264-devel] [PATCH] arm: Load mb_y properly in x264_mbtree_propagate_list_internal_neon
Martin Storsjö
martin at martin.st
Mon Dec 26 23:22:48 CET 2016
The previous version, attempting to load two stack parameters at once,
only would have worked if they were interpreted and loaded as 32 bit
elements, not when loading them as 16 bit elements.
---
common/arm/mc-a.S | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 165c1fa..8c15191 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1818,13 +1818,14 @@ function x264_mbtree_propagate_cost_neon
endfunc
function x264_mbtree_propagate_list_internal_neon
- vld2.16 {d4[], d5[]}, [sp] @ bipred_weight, mb_y
+ vld1.16 {d4[]}, [sp] @ bipred_weight
movrel r12, pw_0to15
vmov.u16 q10, #0xc000
vld1.16 {q0}, [r12, :128] @h->mb.i_mb_x,h->mb.i_mb_y
+ ldrh r12, [sp, #4]
vmov.u32 q11, #4
vmov.u8 q3, #32
- vdup.u16 q8, d5[0] @ mb_y
+ vdup.u16 q8, r12 @ mb_y
vzip.u16 q0, q8
ldr r12, [sp, #8]
8:
--
2.7.4
More information about the x264-devel
mailing list