[vlc-commits] Added support for SSE2 to 16 bit merge (deinterlace).
Laurent Aimar
git at videolan.org
Fri May 25 21:20:31 CEST 2012
vlc | branch: master | Laurent Aimar <fenrir at videolan.org> | Fri May 25 21:13:00 2012 +0200| [83f2312b574f5cbe289ec63867e584f05c52fff6] | committer: Laurent Aimar
Added support for SSE2 to 16 bit merge (deinterlace).
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=83f2312b574f5cbe289ec63867e584f05c52fff6
---
modules/video_filter/deinterlace/deinterlace.c | 4 +--
modules/video_filter/deinterlace/merge.c | 32 ++++++++++++++++++++++--
modules/video_filter/deinterlace/merge.h | 11 +++++++-
3 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c
index 414f87b..2762f04 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -636,9 +636,9 @@ int Open( vlc_object_t *p_this )
else
#endif
#if defined(CAN_COMPILE_SSE)
- if( chroma->pixel_size == 1 && (vlc_CPU() & CPU_CAPABILITY_SSE2) )
+ if( (vlc_CPU() & CPU_CAPABILITY_SSE2) )
{
- p_sys->pf_merge = MergeSSE2;
+ p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2;
p_sys->pf_end_merge = EndMMX;
}
else
diff --git a/modules/video_filter/deinterlace/merge.c b/modules/video_filter/deinterlace/merge.c
index b6fb619..b462b21 100644
--- a/modules/video_filter/deinterlace/merge.c
+++ b/modules/video_filter/deinterlace/merge.c
@@ -118,8 +118,8 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
#endif
#if defined(CAN_COMPILE_SSE)
-void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
- size_t i_bytes )
+void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
+ size_t i_bytes )
{
uint8_t *p_dest = _p_dest;
const uint8_t *p_s1 = _p_s1;
@@ -143,6 +143,34 @@ void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
for( ; i_bytes > 0; i_bytes-- )
*p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
}
+
+void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
+ size_t i_bytes )
+{
+ uint16_t *p_dest = _p_dest;
+ const uint16_t *p_s1 = _p_s1;
+ const uint16_t *p_s2 = _p_s2;
+
+ size_t i_words = i_bytes / 2;
+ for( ; i_words > 0 && ((uintptr_t)p_s1 & 15); i_words-- )
+ *p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
+
+ for( ; i_words >= 8; i_words -= 8 )
+ {
+ __asm__ __volatile__( "movdqu %2,%%xmm1;"
+ "pavgw %1, %%xmm1;"
+ "movdqu %%xmm1, %0" :"=m" (*p_dest):
+ "m" (*p_s1),
+ "m" (*p_s2) );
+ p_dest += 8;
+ p_s1 += 8;
+ p_s2 += 8;
+ }
+
+ for( ; i_words > 0; i_words-- )
+ *p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
+}
+
#endif
#ifdef CAN_COMPILE_C_ALTIVEC
diff --git a/modules/video_filter/deinterlace/merge.h b/modules/video_filter/deinterlace/merge.h
index 7f4af07..1117b37 100644
--- a/modules/video_filter/deinterlace/merge.h
+++ b/modules/video_filter/deinterlace/merge.h
@@ -141,7 +141,16 @@ void Merge3DNow ( void *, const void *, const void *, size_t );
* @param _p_s2 Source line B
* @param i_bytes Number of bytes to merge
*/
-void MergeSSE2 ( void *, const void *, const void *, size_t );
+void Merge8BitSSE2( void *, const void *, const void *, size_t );
+/**
+ * SSE2 routine to blend pixels from two picture lines.
+ *
+ * @param _p_dest Target
+ * @param _p_s1 Source line A
+ * @param _p_s2 Source line B
+ * @param i_bytes Number of bytes to merge
+ */
+void Merge16BitSSE2( void *, const void *, const void *, size_t );
#endif
#if defined __ARM_NEON__
More information about the vlc-commits
mailing list