[vlc-commits] deinterlace: ARM optimizations for 16-bits merge
Rémi Denis-Courmont
git at videolan.org
Sat Aug 4 18:09:57 CEST 2012
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sat Aug 4 19:09:16 2012 +0300| [3d64b908e727a51f0e9bd29c4fbf5b24928a00f0] | committer: Rémi Denis-Courmont
deinterlace: ARM optimizations for 16-bits merge
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=3d64b908e727a51f0e9bd29c4fbf5b24928a00f0
---
modules/video_filter/deinterlace/deinterlace.c | 10 ++--
modules/video_filter/deinterlace/merge.h | 2 +
modules/video_filter/deinterlace/merge_arm.S | 63 ++++++++++++++++++++++++
3 files changed, 71 insertions(+), 4 deletions(-)
diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c
index 9d52f99..4cfb86a 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -657,11 +657,13 @@ int Open( vlc_object_t *p_this )
else
#endif
#if defined(__arm__)
- if( chroma->pixel_size == 1 && vlc_CPU_ARM_NEON() )
- p_sys->pf_merge = merge8_arm_neon;
+ if( vlc_CPU_ARM_NEON() )
+ p_sys->pf_merge =
+ (chroma->pixel_size == 1) ? merge8_arm_neon : merge16_arm_neon;
else
- if( chroma->pixel_size == 1 && vlc_CPU_ARMv6() )
- p_sys->pf_merge = merge8_armv6;
+ if( vlc_CPU_ARMv6() )
+ p_sys->pf_merge =
+ (chroma->pixel_size == 1) ? merge8_armv6 : merge16_armv6;
else
#endif
{
diff --git a/modules/video_filter/deinterlace/merge.h b/modules/video_filter/deinterlace/merge.h
index 04634db..ca0c61c 100644
--- a/modules/video_filter/deinterlace/merge.h
+++ b/modules/video_filter/deinterlace/merge.h
@@ -163,11 +163,13 @@ void Merge16BitSSE2( void *, const void *, const void *, size_t );
* ARM NEON routine to blend pixels from two picture lines.
*/
void merge8_arm_neon (void *, const void *, const void *, size_t);
+void merge16_arm_neon (void *, const void *, const void *, size_t);
/**
* ARMv6 SIMD routine to blend pixels from two picture lines.
*/
void merge8_armv6 (void *, const void *, const void *, size_t);
+void merge16_armv6 (void *, const void *, const void *, size_t);
#endif
/*****************************************************************************
diff --git a/modules/video_filter/deinterlace/merge_arm.S b/modules/video_filter/deinterlace/merge_arm.S
index 80c652b..04dc068 100644
--- a/modules/video_filter/deinterlace/merge_arm.S
+++ b/modules/video_filter/deinterlace/merge_arm.S
@@ -72,6 +72,47 @@ merge8_arm_neon:
bx lr
.align 2
+ .global merge16_arm_neon
+ .type merge16_arm_neon, %function
+merge16_arm_neon:
+ cmp SIZE, #64
+ blo 2f
+1:
+ pld [SRC1, #64]
+ vld1.u16 {q0-q1}, [SRC1,:128]!
+ pld [SRC2, #64]
+ vld1.u16 {q8-q9}, [SRC2,:128]!
+ vhadd.u16 q0, q0, q8
+ sub SIZE, SIZE, #64
+ vld1.u16 {q2-q3}, [SRC1,:128]!
+ vhadd.u16 q1, q1, q9
+ vld1.u16 {q10-q11}, [SRC2,:128]!
+ vhadd.u16 q2, q2, q10
+ cmp SIZE, #64
+ vhadd.u16 q3, q3, q11
+ vst1.u16 {q0-q1}, [DEST,:128]!
+ vst1.u16 {q2-q3}, [DEST,:128]!
+ bhs 1b
+2:
+ cmp SIZE, #32
+ blo 3f
+ vld1.u16 {q0-q1}, [SRC1,:128]!
+ sub SIZE, SIZE, #32
+ vld1.u16 {q8-q9}, [SRC2,:128]!
+ vhadd.u16 q0, q0, q8
+ vhadd.u16 q1, q1, q9
+ vst1.u16 {q0-q1}, [DEST,:128]!
+3:
+ cmp SIZE, #16
+ bxlo lr
+ vld1.u16 {q0}, [SRC1,:128]!
+ sub SIZE, SIZE, #16
+ vld1.u16 {q8}, [SRC2,:128]!
+ vhadd.u16 q0, q0, q8
+ vst1.u16 {q0}, [DEST,:128]!
+ bx lr
+
+ .align 2
.global merge8_armv6
.type merge8_armv6, %function
merge8_armv6:
@@ -92,3 +133,25 @@ merge8_armv6:
stm DEST!, {r6-r7}
popeq {r4-r9,pc}
b 1b
+
+ .align 2
+ .global merge16_armv6
+ .type merge16_armv6, %function
+merge16_armv6:
+ push {r4-r9,lr}
+1:
+ pld [SRC1, #64]
+ ldm SRC1!, {r4-r5}
+ pld [SRC2, #64]
+ ldm SRC2!, {r8-r9}
+ subs SIZE, SIZE, #16
+ uhadd16 r4, r4, r8
+ ldm SRC1!, {r6-r7}
+ uhadd16 r5, r5, r9
+ ldm SRC2!, {ip,lr}
+ uhadd16 r6, r6, ip
+ stm DEST!, {r4-r5}
+ uhadd16 r7, r7, lr
+ stm DEST!, {r6-r7}
+ popeq {r4-r9,pc}
+ b 1b
More information about the vlc-commits
mailing list