[vlc-commits] deinterlace: clobber MM and XMM registers correctly

Rémi Denis-Courmont git at videolan.org
Mon Dec 17 20:17:00 CET 2012


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Fri Dec 14 20:23:04 2012 +0200| [8fbb7daaf0bf1d20b8ea8ad2437a1221256b3939] | committer: Rémi Denis-Courmont

deinterlace: clobber MM and XMM registers correctly

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=8fbb7daaf0bf1d20b8ea8ad2437a1221256b3939
---

 modules/video_filter/deinterlace/algo_x.c |    5 +++++
 modules/video_filter/deinterlace/merge.c  |   16 ++++++++++------
 modules/video_filter/deinterlace/mmx.h    |   19 ++++++++++++-------
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/modules/video_filter/deinterlace/algo_x.c b/modules/video_filter/deinterlace/algo_x.c
index 8a72fd6..a428610 100644
--- a/modules/video_filter/deinterlace/algo_x.c
+++ b/modules/video_filter/deinterlace/algo_x.c
@@ -78,6 +78,7 @@ static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
     return fc < 1 ? false : true;
 }
 #ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
 static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
 {
 
@@ -164,6 +165,7 @@ static inline void XDeint8x8MergeC( uint8_t *dst,  int i_dst,
 }
 
 #ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
 static inline void XDeint8x8MergeMMXEXT( uint8_t *dst,  int i_dst,
                                          uint8_t *src1, int i_src1,
                                          uint8_t *src2, int i_src2 )
@@ -237,6 +239,7 @@ static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
 }
 
 #ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
 static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
                                           uint8_t *src, int i_src )
 {
@@ -308,6 +311,7 @@ static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
 }
 
 #ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
 static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
                                          uint8_t *src, int i_src )
 {
@@ -495,6 +499,7 @@ static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
 }
 
 #ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
 static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
                                         uint8_t *src, int i_src,
                                         const int i_mbx, int i_modx )
diff --git a/modules/video_filter/deinterlace/merge.c b/modules/video_filter/deinterlace/merge.c
index 109d1a2..94cdd77 100644
--- a/modules/video_filter/deinterlace/merge.c
+++ b/modules/video_filter/deinterlace/merge.c
@@ -27,11 +27,11 @@
 #   include "config.h"
 #endif
 
-#include <vlc_common.h>
-
 #include <stdlib.h>
 #include <stdint.h>
 
+#include <vlc_common.h>
+#include <vlc_cpu.h>
 #include "merge.h"
 
 #ifdef CAN_COMPILE_MMXEXT
@@ -69,6 +69,7 @@ void Merge16BitGeneric( void *_p_dest, const void *_p_s1,
 }
 
 #if defined(CAN_COMPILE_MMXEXT)
+VLC_MMX
 void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
                   size_t i_bytes )
 {
@@ -82,7 +83,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
                                "pavgb %1, %%mm1;"
                                "movq %%mm1, %0" :"=m" (*p_dest):
                                                  "m" (*p_s1),
-                                                 "m" (*p_s2) );
+                                                 "m" (*p_s2) : "mm1" );
         p_dest += 8;
         p_s1 += 8;
         p_s2 += 8;
@@ -94,6 +95,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
 #endif
 
 #if defined(CAN_COMPILE_3DNOW)
+VLC_MMX
 void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
                  size_t i_bytes )
 {
@@ -107,7 +109,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
                                "pavgusb %1, %%mm1;"
                                "movq %%mm1, %0" :"=m" (*p_dest):
                                                  "m" (*p_s1),
-                                                 "m" (*p_s2) );
+                                                 "m" (*p_s2) : "mm1" );
         p_dest += 8;
         p_s1 += 8;
         p_s2 += 8;
@@ -119,6 +121,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
 #endif
 
 #if defined(CAN_COMPILE_SSE)
+VLC_SSE
 void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
                     size_t i_bytes )
 {
@@ -135,7 +138,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
                                "pavgb %1, %%xmm1;"
                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
                                                  "m" (*p_s1),
-                                                 "m" (*p_s2) );
+                                                 "m" (*p_s2) : "xmm1" );
         p_dest += 16;
         p_s1 += 16;
         p_s2 += 16;
@@ -145,6 +148,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
         *p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
 }
 
+VLC_SSE
 void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
                      size_t i_bytes )
 {
@@ -162,7 +166,7 @@ void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
                                "pavgw %1, %%xmm1;"
                                "movdqu %%xmm1, %0" :"=m" (*p_dest):
                                                  "m" (*p_s1),
-                                                 "m" (*p_s2) );
+                                                 "m" (*p_s2) : "xmm1" );
         p_dest += 8;
         p_s1 += 8;
         p_s2 += 8;
diff --git a/modules/video_filter/deinterlace/mmx.h b/modules/video_filter/deinterlace/mmx.h
index d2755f0..6f64a75 100644
--- a/modules/video_filter/deinterlace/mmx.h
+++ b/modules/video_filter/deinterlace/mmx.h
@@ -43,20 +43,23 @@ typedef    union {
 #define    mmx_i2r(op,imm,reg) \
     __asm__ __volatile__ (#op " %0, %%" #reg \
                   : /* nothing */ \
-                  : "i" (imm) )
+                  : "i" (imm) \
+                  : #reg)
 
 #define    mmx_m2r(op,mem,reg) \
     __asm__ __volatile__ (#op " %0, %%" #reg \
                   : /* nothing */ \
-                  : "m" (mem))
+                  : "m" (mem) \
+                  : #reg)
 
 #define    mmx_r2m(op,reg,mem) \
     __asm__ __volatile__ (#op " %%" #reg ", %0" \
                   : "=m" (mem) \
-                  : /* nothing */ )
+                  : /* nothing */ \
+                  : "memory")
 
 #define    mmx_r2r(op,regs,regd) \
-    __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+    __asm__ __volatile__ (#op " %%" #regs ", %%" #regd ::: #regd)
 
 
 #define    emms() __asm__ __volatile__ ("emms")
@@ -200,11 +203,13 @@ typedef    union {
 #define mmx_m2ri(op,mem,reg,imm) \
         __asm__ __volatile__ (#op " %1, %0, %%" #reg \
                               : /* nothing */ \
-                              : "X" (mem), "X" (imm))
+                              : "X" (mem), "X" (imm) \
+                              : #reg)
 #define mmx_r2ri(op,regs,regd,imm) \
         __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
                               : /* nothing */ \
-                              : "X" (imm) )
+                              : "X" (imm) \
+                              : #regd)
 
 #define    mmx_fetch(mem,hint) \
     __asm__ __volatile__ ("prefetch" #hint " %0" \
@@ -238,7 +243,7 @@ typedef    union {
 #define    pminub_r2r(regs,regd)        mmx_r2r (pminub, regs, regd)
 
 #define    pmovmskb(mmreg,reg) \
-    __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+    __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg : : : #reg)
 
 #define    pmulhuw_m2r(var,reg)        mmx_m2r (pmulhuw, var, reg)
 #define    pmulhuw_r2r(regs,regd)        mmx_r2r (pmulhuw, regs, regd)



More information about the vlc-commits mailing list