[vlc-commits] deinterlace: clobber MM and XMM registers correctly
Rémi Denis-Courmont
git at videolan.org
Mon Dec 17 20:17:00 CET 2012
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Fri Dec 14 20:23:04 2012 +0200| [8fbb7daaf0bf1d20b8ea8ad2437a1221256b3939] | committer: Rémi Denis-Courmont
deinterlace: clobber MM and XMM registers correctly
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=8fbb7daaf0bf1d20b8ea8ad2437a1221256b3939
---
modules/video_filter/deinterlace/algo_x.c | 5 +++++
modules/video_filter/deinterlace/merge.c | 16 ++++++++++------
modules/video_filter/deinterlace/mmx.h | 19 ++++++++++++-------
3 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/modules/video_filter/deinterlace/algo_x.c b/modules/video_filter/deinterlace/algo_x.c
index 8a72fd6..a428610 100644
--- a/modules/video_filter/deinterlace/algo_x.c
+++ b/modules/video_filter/deinterlace/algo_x.c
@@ -78,6 +78,7 @@ static inline int XDeint8x8DetectC( uint8_t *src, int i_src )
return fc < 1 ? false : true;
}
#ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
static inline int XDeint8x8DetectMMXEXT( uint8_t *src, int i_src )
{
@@ -164,6 +165,7 @@ static inline void XDeint8x8MergeC( uint8_t *dst, int i_dst,
}
#ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
static inline void XDeint8x8MergeMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src1, int i_src1,
uint8_t *src2, int i_src2 )
@@ -237,6 +239,7 @@ static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst,
}
#ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
static inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src )
{
@@ -308,6 +311,7 @@ static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst,
}
#ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
static inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src )
{
@@ -495,6 +499,7 @@ static inline void XDeintBand8x8C( uint8_t *dst, int i_dst,
}
#ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
uint8_t *src, int i_src,
const int i_mbx, int i_modx )
diff --git a/modules/video_filter/deinterlace/merge.c b/modules/video_filter/deinterlace/merge.c
index 109d1a2..94cdd77 100644
--- a/modules/video_filter/deinterlace/merge.c
+++ b/modules/video_filter/deinterlace/merge.c
@@ -27,11 +27,11 @@
# include "config.h"
#endif
-#include <vlc_common.h>
-
#include <stdlib.h>
#include <stdint.h>
+#include <vlc_common.h>
+#include <vlc_cpu.h>
#include "merge.h"
#ifdef CAN_COMPILE_MMXEXT
@@ -69,6 +69,7 @@ void Merge16BitGeneric( void *_p_dest, const void *_p_s1,
}
#if defined(CAN_COMPILE_MMXEXT)
+VLC_MMX
void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
@@ -82,7 +83,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgb %1, %%mm1;"
"movq %%mm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
- "m" (*p_s2) );
+ "m" (*p_s2) : "mm1" );
p_dest += 8;
p_s1 += 8;
p_s2 += 8;
@@ -94,6 +95,7 @@ void MergeMMXEXT( void *_p_dest, const void *_p_s1, const void *_p_s2,
#endif
#if defined(CAN_COMPILE_3DNOW)
+VLC_MMX
void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
@@ -107,7 +109,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgusb %1, %%mm1;"
"movq %%mm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
- "m" (*p_s2) );
+ "m" (*p_s2) : "mm1" );
p_dest += 8;
p_s1 += 8;
p_s2 += 8;
@@ -119,6 +121,7 @@ void Merge3DNow( void *_p_dest, const void *_p_s1, const void *_p_s2,
#endif
#if defined(CAN_COMPILE_SSE)
+VLC_SSE
void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
@@ -135,7 +138,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgb %1, %%xmm1;"
"movdqu %%xmm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
- "m" (*p_s2) );
+ "m" (*p_s2) : "xmm1" );
p_dest += 16;
p_s1 += 16;
p_s2 += 16;
@@ -145,6 +148,7 @@ void Merge8BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
*p_dest++ = ( *p_s1++ + *p_s2++ ) >> 1;
}
+VLC_SSE
void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
size_t i_bytes )
{
@@ -162,7 +166,7 @@ void Merge16BitSSE2( void *_p_dest, const void *_p_s1, const void *_p_s2,
"pavgw %1, %%xmm1;"
"movdqu %%xmm1, %0" :"=m" (*p_dest):
"m" (*p_s1),
- "m" (*p_s2) );
+ "m" (*p_s2) : "xmm1" );
p_dest += 8;
p_s1 += 8;
p_s2 += 8;
diff --git a/modules/video_filter/deinterlace/mmx.h b/modules/video_filter/deinterlace/mmx.h
index d2755f0..6f64a75 100644
--- a/modules/video_filter/deinterlace/mmx.h
+++ b/modules/video_filter/deinterlace/mmx.h
@@ -43,20 +43,23 @@ typedef union {
#define mmx_i2r(op,imm,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
- : "i" (imm) )
+ : "i" (imm) \
+ : #reg)
#define mmx_m2r(op,mem,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
- : "m" (mem))
+ : "m" (mem) \
+ : #reg)
#define mmx_r2m(op,reg,mem) \
__asm__ __volatile__ (#op " %%" #reg ", %0" \
: "=m" (mem) \
- : /* nothing */ )
+ : /* nothing */ \
+ : "memory")
#define mmx_r2r(op,regs,regd) \
- __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+ __asm__ __volatile__ (#op " %%" #regs ", %%" #regd ::: #regd)
#define emms() __asm__ __volatile__ ("emms")
@@ -200,11 +203,13 @@ typedef union {
#define mmx_m2ri(op,mem,reg,imm) \
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
: /* nothing */ \
- : "X" (mem), "X" (imm))
+ : "X" (mem), "X" (imm) \
+ : #reg)
#define mmx_r2ri(op,regs,regd,imm) \
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
: /* nothing */ \
- : "X" (imm) )
+ : "X" (imm) \
+ : #regd)
#define mmx_fetch(mem,hint) \
__asm__ __volatile__ ("prefetch" #hint " %0" \
@@ -238,7 +243,7 @@ typedef union {
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
#define pmovmskb(mmreg,reg) \
- __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+ __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg : : : #reg)
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
More information about the vlc-commits
mailing list