[x264-devel] Add SSE support to rectangle.h for 16-byte stores
Jason Garrett-Glaser
git at videolan.org
Wed Apr 13 04:04:29 CEST 2011
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Tue Mar 29 05:33:44 2011 -0700| [f422ec93254ed3f9883acac0bb3f67e3b4ea960c] | committer: Jason Garrett-Glaser
Add SSE support to rectangle.h for 16-byte stores
Uses GCC vector intrinsics; may be suboptimal on particularly old GCC versions.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f422ec93254ed3f9883acac0bb3f67e3b4ea960c
---
common/common.h | 3 ++-
common/rectangle.h | 10 ++++++++++
common/x86/util.h | 3 +++
configure | 4 +++-
4 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/common/common.h b/common/common.h
index fcf0250..496542e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -851,11 +851,12 @@ struct x264_t
// included at the end because it needs x264_t
#include "macroblock.h"
-#include "rectangle.h"
#if HAVE_MMX
#include "x86/util.h"
#endif
+#include "rectangle.h"
+
#endif
diff --git a/common/rectangle.h b/common/rectangle.h
index aeaa2b9..770de2c 100644
--- a/common/rectangle.h
+++ b/common/rectangle.h
@@ -80,6 +80,15 @@ static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, i
{
/* height 1, width 16 doesn't occur */
assert( h != 1 );
+#if HAVE_VECTOREXT && defined(__SSE__)
+ v4si v16 = {v,v,v,v};
+
+ M128( d+s*0+0 ) = (__m128)v16;
+ M128( d+s*1+0 ) = (__m128)v16;
+ if( h == 2 ) return;
+ M128( d+s*2+0 ) = (__m128)v16;
+ M128( d+s*3+0 ) = (__m128)v16;
+#else
if( WORD_SIZE == 8 )
{
do
@@ -103,6 +112,7 @@ static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, i
d += s;
} while( --h );
}
+#endif
}
else
assert(0);
diff --git a/common/x86/util.h b/common/x86/util.h
index 0b786cf..1e91c3b 100644
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -154,6 +154,9 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*dst)[2], in
#define M128_ZERO ((__m128){0,0,0,0})
#define x264_union128_t x264_union128_sse_t
typedef union { __m128 i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_sse_t;
+#if HAVE_VECTOREXT
+typedef uint32_t v4si __attribute__((vector_size (16)));
+#endif
#endif
#endif
diff --git a/configure b/configure
index 29977a2..1a7cb33 100755
--- a/configure
+++ b/configure
@@ -223,7 +223,7 @@ cross_prefix=""
EXE=""
# list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT"
# parse options
@@ -812,6 +812,8 @@ if [ "$avs" = "auto" ] ; then
fi
fi
+cc_check "stdint.h" "" "uint32_t test_vec __attribute__ ((vector_size (16))) = {0,1,2,3};" && define HAVE_VECTOREXT
+
if [ "$pic" = "yes" ] ; then
CFLAGS="$CFLAGS -fPIC"
ASFLAGS="$ASFLAGS -DPIC"
More information about the x264-devel
mailing list