[x264-devel] Faster pixel_memset
Jason Garrett-Glaser
git at videolan.org
Thu May 12 08:39:12 CEST 2011
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Wed May 4 23:26:19 2011 -0700| [160557abe60d0671fbee282b51bd38fe764917c3] | committer: Jason Garrett-Glaser
Faster pixel_memset
~4x faster.
Also inline plane_expand_border for improved constant propagation.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=160557abe60d0671fbee282b51bd38fe764917c3
---
common/frame.c | 55 ++++++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 44 insertions(+), 11 deletions(-)
diff --git a/common/frame.c b/common/frame.c
index bca4f1f..2d56921 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -331,23 +331,56 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
static void ALWAYS_INLINE pixel_memset( pixel *dst, pixel *src, int len, int size )
{
uint8_t *dstp = (uint8_t*)dst;
- if( size == 1 )
- memset(dst, *src, len);
- else if( size == 2 )
+ uint8_t v1 = *src;
+ uint16_t v2 = size == 1 ? v1 + (v1 << 8) : M16( src );
+ uint32_t v4 = size <= 2 ? v2 + (v2 << 16) : M32( src );
+ int i = 0;
+ len *= size;
+
+ /* Align the input pointer if it isn't already */
+ if( (intptr_t)dstp & (WORD_SIZE - 1) )
+ {
+ if( size <= 2 && ((intptr_t)dstp & 3) )
+ {
+ if( size == 1 && ((intptr_t)dstp & 1) )
+ dstp[i++] = v1;
+ if( (intptr_t)dstp & 2 )
+ {
+ M16( dstp+i ) = v2;
+ i += 2;
+ }
+ }
+ if( WORD_SIZE == 8 && (intptr_t)dstp & 4 )
+ {
+ M32( dstp+i ) = v4;
+ i += 4;
+ }
+ }
+
+ /* Main copy loop */
+ if( WORD_SIZE == 8 )
{
- int v = M16( src );
- for( int i = 0; i < len; i++ )
- M16( dstp+i*2 ) = v;
+ uint64_t v8 = v4 + ((uint64_t)v4<<32);
+ for( ; i < len - 7; i+=8 )
+ M64( dstp+i ) = v8;
}
- else if( size == 4 )
+ for( ; i < len - 3; i+=4 )
+ M32( dstp+i ) = v4;
+
+ /* Finish up the last few bytes */
+ if( size <= 2 )
{
- int v = M32( src );
- for( int i = 0; i < len; i++ )
- M32( dstp+i*4 ) = v;
+ if( i < len - 1 )
+ {
+ M16( dstp+i ) = v2;
+ i += 2;
+ }
+ if( size == 1 && i != len )
+ dstp[i] = v1;
}
}
-static void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
+static void ALWAYS_INLINE plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
for( int y = 0; y < i_height; y++ )
More information about the x264-devel
mailing list