[x264-devel] commit: 4% faster deblock: special-case macroblock edges ( Jason Garrett-Glaser )

git version control git at videolan.org
Tue Aug 19 07:44:04 CEST 2008


x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon Aug 18 23:03:37 2008 -0600| [9881ffdf482017798ab0067f4f894ab53c18e626] | committer: Jason Garrett-Glaser 

4% faster deblock: special-case macroblock edges
Along with a bit of related code reorganization and macroification

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=9881ffdf482017798ab0067f4f894ab53c18e626
---

 common/frame.c |  104 ++++++++++++++++++++++++++++---------------------------
 1 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/common/frame.c b/common/frame.c
index f6e29eb..77c5854 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -667,20 +667,55 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
             }\
         }
 
+        #define DEBLOCK_STRENGTH(i_dir)\
+        {\
+            /* *** Get bS for each 4px for the current edge *** */\
+            if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
+                *(uint32_t*)bS = 0x03030303;\
+            else\
+            {\
+                *(uint32_t*)bS = 0x00000000;\
+                for( i = 0; i < 4; i++ )\
+                {\
+                    int x  = i_dir == 0 ? i_edge : i;\
+                    int y  = i_dir == 0 ? i      : i_edge;\
+                    int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
+                    int yn = i_dir == 0 ? y : (y - 1)&0x03;\
+                    if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
+                        h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
+                        bS[i] = 2;\
+                    else\
+                    {\
+                        /* FIXME: A given frame may occupy more than one position in\
+                         * the reference list. So we should compare the frame numbers,\
+                         * not the indices in the ref list.\
+                         * No harm yet, as we don't generate that case.*/\
+                        int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
+                        int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
+                        int i4p= mb_4x4+x+y*s4x4;\
+                        int i4q= mbn_4x4+xn+yn*s4x4;\
+                        for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
+                            if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
+                                abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
+                                abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
+                            {\
+                                bS[i] = 1;\
+                                break;\
+                            }\
+                    }\
+                }\
+            }\
+        }
+
         /* i_dir == 0 -> vertical edge
          * i_dir == 1 -> horizontal edge */
-        #define deblock_dir(i_dir)\
+        #define DEBLOCK_DIR(i_dir)\
         {\
             int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
             int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
             DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
             if( i_edge )\
-            {\
                 i_edge+= b_8x8_transform;\
-                mbn_xy  = mb_xy;\
-                mbn_8x8 = mb_8x8;\
-                mbn_4x4 = mb_4x4;\
-            }\
             else\
             {\
                 mbn_xy  = i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride;\
@@ -695,60 +730,27 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
                 else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
                 {\
                     FILTER_DIR( _intra, i_dir );\
-                    i_edge += b_8x8_transform+1;\
-                    mbn_xy  = mb_xy;\
-                    mbn_8x8 = mb_8x8;\
-                    mbn_4x4 = mb_4x4;\
+                    goto end##i_dir;\
                 }\
+                DEBLOCK_STRENGTH(i_dir);\
+                if( *(uint32_t*)bS )\
+                    FILTER_DIR( , i_dir);\
+                end##i_dir:\
+                i_edge += b_8x8_transform+1;\
             }\
+            mbn_xy  = mb_xy;\
+            mbn_8x8 = mb_8x8;\
+            mbn_4x4 = mb_4x4;\
             for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
             {\
-                /* *** Get bS for each 4px for the current edge *** */\
-                if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )\
-                    *(uint32_t*)bS = 0x03030303;\
-                else\
-                {\
-                    *(uint32_t*)bS = 0x00000000;\
-                    for( i = 0; i < 4; i++ )\
-                    {\
-                        int x  = i_dir == 0 ? i_edge : i;\
-                        int y  = i_dir == 0 ? i      : i_edge;\
-                        int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;\
-                        int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;\
-                        if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
-                            h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
-                            bS[i] = 2;\
-                        else\
-                        {\
-                            /* FIXME: A given frame may occupy more than one position in\
-                             * the reference list. So we should compare the frame numbers,\
-                             * not the indices in the ref list.\
-                             * No harm yet, as we don't generate that case.*/\
-                            int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
-                            int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
-                            int i4p= mb_4x4+x+y*s4x4;\
-                            int i4q= mbn_4x4+xn+yn*s4x4;\
-                            for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
-                                if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
-                                    abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
-                                    abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
-                                {\
-                                    bS[i] = 1;\
-                                    break;\
-                                }\
-                        }\
-                    }\
-                }\
+                DEBLOCK_STRENGTH(i_dir);\
                 if( *(uint32_t*)bS )\
                     FILTER_DIR( , i_dir);\
-                mbn_xy  = mb_xy;\
-                mbn_8x8 = mb_8x8;\
-                mbn_4x4 = mb_4x4;\
             }\
         }
 
-        deblock_dir(0);
-        deblock_dir(1);
+        DEBLOCK_DIR(0);
+        DEBLOCK_DIR(1);
     }
 
     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )



More information about the x264-devel mailing list