[vlc-commits] deinterlace: ARM optimizations for 16-bits merge

Rémi Denis-Courmont git at videolan.org
Sat Aug 4 18:09:57 CEST 2012


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sat Aug  4 19:09:16 2012 +0300| [3d64b908e727a51f0e9bd29c4fbf5b24928a00f0] | committer: Rémi Denis-Courmont

deinterlace: ARM optimizations for 16-bits merge

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=3d64b908e727a51f0e9bd29c4fbf5b24928a00f0
---

 modules/video_filter/deinterlace/deinterlace.c |   10 ++--
 modules/video_filter/deinterlace/merge.h       |    2 +
 modules/video_filter/deinterlace/merge_arm.S   |   63 ++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c
index 9d52f99..4cfb86a 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -657,11 +657,13 @@ int Open( vlc_object_t *p_this )
     else
 #endif
 #if defined(__arm__)
-    if( chroma->pixel_size == 1 && vlc_CPU_ARM_NEON() )
-        p_sys->pf_merge = merge8_arm_neon;
+    if( vlc_CPU_ARM_NEON() )
+        p_sys->pf_merge =
+            (chroma->pixel_size == 1) ? merge8_arm_neon : merge16_arm_neon;
     else
-    if( chroma->pixel_size == 1 && vlc_CPU_ARMv6() )
-        p_sys->pf_merge = merge8_armv6;
+    if( vlc_CPU_ARMv6() )
+        p_sys->pf_merge =
+            (chroma->pixel_size == 1) ? merge8_armv6 : merge16_armv6;
     else
 #endif
     {
diff --git a/modules/video_filter/deinterlace/merge.h b/modules/video_filter/deinterlace/merge.h
index 04634db..ca0c61c 100644
--- a/modules/video_filter/deinterlace/merge.h
+++ b/modules/video_filter/deinterlace/merge.h
@@ -163,11 +163,13 @@ void Merge16BitSSE2( void *, const void *, const void *, size_t );
  * ARM NEON routine to blend pixels from two picture lines.
  */
 void merge8_arm_neon (void *, const void *, const void *, size_t);
+void merge16_arm_neon (void *, const void *, const void *, size_t);
 
 /**
  * ARMv6 SIMD routine to blend pixels from two picture lines.
  */
 void merge8_armv6 (void *, const void *, const void *, size_t);
+void merge16_armv6 (void *, const void *, const void *, size_t);
 #endif
 
 /*****************************************************************************
diff --git a/modules/video_filter/deinterlace/merge_arm.S b/modules/video_filter/deinterlace/merge_arm.S
index 80c652b..04dc068 100644
--- a/modules/video_filter/deinterlace/merge_arm.S
+++ b/modules/video_filter/deinterlace/merge_arm.S
@@ -72,6 +72,47 @@ merge8_arm_neon:
 	bx		lr
 
 	.align 2
+	.global merge16_arm_neon
+	.type	merge16_arm_neon, %function
+merge16_arm_neon:
+	cmp		SIZE,	#64
+	blo		2f
+1:
+	pld		[SRC1, #64]
+	vld1.u16	{q0-q1},	[SRC1,:128]!
+	pld		[SRC2, #64]
+	vld1.u16	{q8-q9},	[SRC2,:128]!
+	vhadd.u16	q0,	q0,	q8
+	sub		SIZE,	SIZE,	#64
+	vld1.u16	{q2-q3},	[SRC1,:128]!
+	vhadd.u16	q1,	q1,	q9
+	vld1.u16	{q10-q11},	[SRC2,:128]!
+	vhadd.u16	q2,	q2,	q10
+	cmp		SIZE,	#64
+	vhadd.u16	q3,	q3,	q11
+	vst1.u16	{q0-q1},	[DEST,:128]!
+	vst1.u16	{q2-q3},	[DEST,:128]!
+	bhs		1b
+2:
+	cmp		SIZE,	#32
+	blo		3f
+	vld1.u16	{q0-q1},	[SRC1,:128]!
+	sub		SIZE,	SIZE,	#32
+	vld1.u16	{q8-q9},	[SRC2,:128]!
+	vhadd.u16	q0,	q0,	q8
+	vhadd.u16	q1,	q1,	q9
+	vst1.u16	{q0-q1},	[DEST,:128]!
+3:
+	cmp		SIZE,	#16
+	bxlo		lr
+	vld1.u16	{q0},		[SRC1,:128]!
+	sub		SIZE,	SIZE,	#16
+	vld1.u16	{q8},		[SRC2,:128]!
+	vhadd.u16	q0,	q0,	q8
+	vst1.u16	{q0},		[DEST,:128]!
+	bx		lr
+
+	.align 2
 	.global merge8_armv6
 	.type	merge8_armv6, %function
 merge8_armv6:
@@ -92,3 +133,25 @@ merge8_armv6:
 	stm		DEST!,	{r6-r7}
 	popeq		{r4-r9,pc}
 	b		1b
+
+	.align 2
+	.global merge16_armv6
+	.type	merge16_armv6, %function
+merge16_armv6:
+	push		{r4-r9,lr}
+1:
+	pld		[SRC1, #64]
+	ldm		SRC1!,	{r4-r5}
+	pld		[SRC2, #64]
+	ldm		SRC2!,	{r8-r9}
+	subs		SIZE,	SIZE,	#16
+	uhadd16		r4,	r4,	r8
+	ldm		SRC1!,	{r6-r7}
+	uhadd16		r5,	r5,	r9
+	ldm		SRC2!,	{ip,lr}
+	uhadd16		r6,	r6,	ip
+	stm		DEST!,	{r4-r5}
+	uhadd16		r7,	r7,	lr
+	stm		DEST!,	{r6-r7}
+	popeq		{r4-r9,pc}
+	b		1b



More information about the vlc-commits mailing list