[x264-devel] commit: Faster macroblock_cache_rect (Jason Garrett-Glaser )

git version control git at videolan.org
Mon Dec 15 22:16:40 CET 2008


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon Dec 15 13:15:29 2008 -0800| [e59ee249829049de338bebc3a2a00f9e471b40f3] | committer: Jason Garrett-Glaser 

Faster macroblock_cache_rect
Explicit loop unrolling

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=e59ee249829049de338bebc3a2a00f9e471b40f3
---

 common/macroblock.h |   29 +++++++++++++++++++----------
 1 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/common/macroblock.h b/common/macroblock.h
index 2884349..17fd7b2 100644
--- a/common/macroblock.h
+++ b/common/macroblock.h
@@ -351,35 +351,44 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
 }
 static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int height, uint8_t val )
 {
-    int dy;
     if( width == 4 )
     {
         uint32_t val2 = val * 0x01010101;
-        for( dy = 0; dy < height; dy++ )
-            ((uint32_t*)dst)[2*dy] = val2;
+                          ((uint32_t*)dst)[0] = val2;
+        if( height >= 2 ) ((uint32_t*)dst)[2] = val2;
+        if( height == 4 ) ((uint32_t*)dst)[4] = val2;
+        if( height == 4 ) ((uint32_t*)dst)[6] = val2;
     }
     else // 2
     {
         uint32_t val2 = val * 0x0101;
-        for( dy = 0; dy < height; dy++ )
-            ((uint16_t*)dst)[4*dy] = val2;
+                          ((uint16_t*)dst)[ 0] = val2;
+        if( height >= 2 ) ((uint16_t*)dst)[ 4] = val2;
+        if( height == 4 ) ((uint16_t*)dst)[ 8] = val2;
+        if( height == 4 ) ((uint16_t*)dst)[12] = val2;
     }
 }
 static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val )
 {
-    int dy, dx;
+    int dy;
     if( width == 1 || WORD_SIZE < 8 )
     {
         for( dy = 0; dy < height; dy++ )
-            for( dx = 0; dx < width; dx++ )
-                ((uint32_t*)dst)[dx+8*dy] = val;
+        {
+                             ((uint32_t*)dst)[8*dy+0] = val;
+            if( width >= 2 ) ((uint32_t*)dst)[8*dy+1] = val;
+            if( width == 4 ) ((uint32_t*)dst)[8*dy+2] = val;
+            if( width == 4 ) ((uint32_t*)dst)[8*dy+3] = val;
+        }
     }
     else
     {
         uint64_t val64 = val + ((uint64_t)val<<32);
         for( dy = 0; dy < height; dy++ )
-            for( dx = 0; dx < width/2; dx++ )
-                ((uint64_t*)dst)[dx+4*dy] = val64;
+        {
+                             ((uint64_t*)dst)[4*dy+0] = val64;
+            if( width == 4 ) ((uint64_t*)dst)[4*dy+1] = val64;
+        }
     }
 }
 #define x264_macroblock_cache_mv_ptr(a,x,y,w,h,l,mv) x264_macroblock_cache_mv(a,x,y,w,h,l,*(uint32_t*)mv)



More information about the x264-devel mailing list