[vlc-commits] deinterlace: handle multiple of 16 and 32 widths correctly in arm64 NEON merge asm
Janne Grunau
git at videolan.org
Thu Oct 13 11:07:22 CEST 2016
vlc | branch: master | Janne Grunau <janne-vlc at jannau.net> | Thu Oct 13 01:03:19 2016 +0200| [e6241c404f0e72c4954d769361e069172a440111] | committer: Jean-Baptiste Kempf
deinterlace: handle multiple of 16 and 32 widths correctly in arm64 NEON merge asm
The tests for 32 and 16 pixels remainder were inverted resulting in
uninitialized data (YUV green) of 16,32 or 48 pixels at the right
picture edge or equally large overreads/overwrites in the last line.
Closes #17486
Signed-off-by: Jean-Baptiste Kempf <jb at videolan.org>
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=e6241c404f0e72c4954d769361e069172a440111
---
modules/video_filter/deinterlace/merge_arm64.S | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/video_filter/deinterlace/merge_arm64.S b/modules/video_filter/deinterlace/merge_arm64.S
index 827d6f5..a893cc2 100644
--- a/modules/video_filter/deinterlace/merge_arm64.S
+++ b/modules/video_filter/deinterlace/merge_arm64.S
@@ -50,14 +50,14 @@ merge8_arm64_neon:
st1 {v2.16b,v3.16b}, [DEST], #32
b.gt 1b
2:
- tbz SIZE, #32, 3f
+ tbnz SIZE, #32, 3f
ld1 {v0.16b,v1.16b}, [SRC1], #32
ld1 {v4.16b,v5.16b}, [SRC2], #32
uhadd v0.16b, v0.16b, v4.16b
uhadd v1.16b, v1.16b, v5.16b
st1 {v0.16b,v1.16b}, [DEST], #32
3:
- tbz SIZE, #16, 4f
+ tbnz SIZE, #16, 4f
ld1 {v0.16b}, [SRC1]
ld1 {v4.16b}, [SRC2]
uhadd v0.16b, v0.16b, v4.16b
@@ -85,14 +85,14 @@ merge16_arm64_neon:
subs x5, x5, #64
b.gt 1b
2:
- tbz SIZE, #32, 3f
+ tbnz SIZE, #32, 3f
ld1 {v0.8h,v1.8h}, [SRC1], #32
ld1 {v4.8h,v5.8h}, [SRC2], #32
uhadd v0.8h, v0.8h, v4.8h
uhadd v1.8h, v1.8h, v5.8h
st1 {v0.8h,v1.8h}, [DEST], #32
3:
- tbz SIZE, #16, 4f
+ tbnz SIZE, #16, 4f
ld1 {v0.8h}, [SRC1]
ld1 {v4.8h}, [SRC2]
uhadd v0.8h, v0.8h, v4.8h
More information about the vlc-commits
mailing list