[x264-devel] commit: Faster macroblock_cache_rect (Jason Garrett-Glaser )
git version control
git at videolan.org
Mon Dec 15 22:16:40 CET 2008
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon Dec 15 13:15:29 2008 -0800| [e59ee249829049de338bebc3a2a00f9e471b40f3] | committer: Jason Garrett-Glaser
Faster macroblock_cache_rect
Explicit loop unrolling
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=e59ee249829049de338bebc3a2a00f9e471b40f3
---
common/macroblock.h | 29 +++++++++++++++++++----------
1 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/common/macroblock.h b/common/macroblock.h
index 2884349..17fd7b2 100644
--- a/common/macroblock.h
+++ b/common/macroblock.h
@@ -351,35 +351,44 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
}
static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int height, uint8_t val )
{
- int dy;
if( width == 4 )
{
uint32_t val2 = val * 0x01010101;
- for( dy = 0; dy < height; dy++ )
- ((uint32_t*)dst)[2*dy] = val2;
+ ((uint32_t*)dst)[0] = val2;
+ if( height >= 2 ) ((uint32_t*)dst)[2] = val2;
+ if( height == 4 ) ((uint32_t*)dst)[4] = val2;
+ if( height == 4 ) ((uint32_t*)dst)[6] = val2;
}
else // 2
{
uint32_t val2 = val * 0x0101;
- for( dy = 0; dy < height; dy++ )
- ((uint16_t*)dst)[4*dy] = val2;
+ ((uint16_t*)dst)[ 0] = val2;
+ if( height >= 2 ) ((uint16_t*)dst)[ 4] = val2;
+ if( height == 4 ) ((uint16_t*)dst)[ 8] = val2;
+ if( height == 4 ) ((uint16_t*)dst)[12] = val2;
}
}
static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val )
{
- int dy, dx;
+ int dy;
if( width == 1 || WORD_SIZE < 8 )
{
for( dy = 0; dy < height; dy++ )
- for( dx = 0; dx < width; dx++ )
- ((uint32_t*)dst)[dx+8*dy] = val;
+ {
+ ((uint32_t*)dst)[8*dy+0] = val;
+ if( width >= 2 ) ((uint32_t*)dst)[8*dy+1] = val;
+ if( width == 4 ) ((uint32_t*)dst)[8*dy+2] = val;
+ if( width == 4 ) ((uint32_t*)dst)[8*dy+3] = val;
+ }
}
else
{
uint64_t val64 = val + ((uint64_t)val<<32);
for( dy = 0; dy < height; dy++ )
- for( dx = 0; dx < width/2; dx++ )
- ((uint64_t*)dst)[dx+4*dy] = val64;
+ {
+ ((uint64_t*)dst)[4*dy+0] = val64;
+ if( width == 4 ) ((uint64_t*)dst)[4*dy+1] = val64;
+ }
}
}
#define x264_macroblock_cache_mv_ptr(a,x,y,w,h,l,mv) x264_macroblock_cache_mv(a,x,y,w,h,l,*(uint32_t*)mv)
More information about the x264-devel
mailing list