[vlc-commits] Added support for SSE2 to 16 bit merge (deinterlace).

Laurent Aimar git at videolan.org
Fri May 25 21:20:31 CEST 2012


vlc | branch: master | Laurent Aimar <fenrir at videolan.org> | Fri May 25 21:13:00 2012 +0200| [83f2312b574f5cbe289ec63867e584f05c52fff6] | committer: Laurent Aimar

Added support for SSE2 to 16 bit merge (deinterlace).

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=83f2312b574f5cbe289ec63867e584f05c52fff6
---

 modules/video_filter/deinterlace/deinterlace.c |    4 +--
 modules/video_filter/deinterlace/merge.c       |   32 ++++++++++++++++++++++--
 modules/video_filter/deinterlace/merge.h       |   11 +++++++-
 3 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c
index 414f87b..2762f04 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -636,9 +636,9 @@ int Open( vlc_object_t *p_this )
     else
 #endif
 #if defined(CAN_COMPILE_SSE)
-    if( chroma->pixel_size == 1 && (vlc_CPU() & CPU_CAPABILITY_SSE2) )
+    if( (vlc_CPU() & CPU_CAPABILITY_SSE2) )
     {
-        p_sys->pf_merge = MergeSSE2;
+        p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2;
         p_sys->pf_end_merge = EndMMX;
     }
     else
diff --git a/modules/video_filter/deinterlace/merge.c b/modules/video_filter/deinterlace/merge.c
index b6fb619..b462b21 100644
--- a/modules/video_filter/deinterlace/merge.c
+++ b/modules/video_filter/deinterlace/merge.c
@@ -118,8 +118,8 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
 #endif
 
 #if defined(CAN_COMPILE_SSE)
-void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
-                size_t i_bytes )
+void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
+                    size_t i_bytes )
 {
     uint8_t *p_dest = _p_dest;
     const uint8_t *p_s1 = _p_s1;
@@ -143,6 +143,34 @@ void MergeSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
     for( ; i_bytes > 0; i_bytes-- )
         *p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
 }
+
+void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
+                     size_t i_bytes )
+{
+    uint16_t *p_dest = _p_dest;
+    const uint16_t *p_s1 = _p_s1;
+    const uint16_t *p_s2 = _p_s2;
+
+    size_t i_words = i_bytes / 2;
+    for( ; i_words > 0 && ((uintptr_t)p_s1 & 15); i_words-- )
+        *p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
+
+    for( ; i_words >= 8; i_words -= 8 )
+    {
+        __asm__  __volatile__( "movdqu %2,%%xmm1;"
+                               "pavgw %1, %%xmm1;"
+                               "movdqu %%xmm1, %0" :"=m" (*p_dest):
+                                                 "m" (*p_s1),
+                                                 "m" (*p_s2) );
+        p_dest += 8;
+        p_s1 += 8;
+        p_s2 += 8;
+    }
+
+    for( ; i_words > 0; i_words-- )
+        *p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
+}
+
 #endif
 
 #ifdef CAN_COMPILE_C_ALTIVEC
diff --git a/modules/video_filter/deinterlace/merge.h b/modules/video_filter/deinterlace/merge.h
index 7f4af07..1117b37 100644
--- a/modules/video_filter/deinterlace/merge.h
+++ b/modules/video_filter/deinterlace/merge.h
@@ -141,7 +141,16 @@ void Merge3DNow   ( void *, const void *, const void *, size_t );
  * @param _p_s2 Source line B
  * @param i_bytes Number of bytes to merge
  */
-void MergeSSE2    ( void *, const void *, const void *, size_t );
+void Merge8BitSSE2( void *, const void *, const void *, size_t );
+/**
+ * SSE2 routine to blend pixels from two picture lines.
+ *
+ * @param _p_dest Target
+ * @param _p_s1 Source line A
+ * @param _p_s2 Source line B
+ * @param i_bytes Number of bytes to merge
+ */
+void Merge16BitSSE2( void *, const void *, const void *, size_t );
 #endif
 
 #if defined __ARM_NEON__



More information about the vlc-commits mailing list