[x264-devel] commit: Faster psy-trellis init (Jason Garrett-Glaser )

Thu Jan 14 05:51:07 CET 2010

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Tue Jan 12 20:14:35 2010 -0500| [ecca2f572b584f8f0e006a0f82048e30ada75b9c] | committer: Jason Garrett-Glaser 

Faster psy-trellis init
Remove some unncessary zigzags.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ecca2f572b584f8f0e006a0f82048e30ada75b9c
---

 encoder/analyse.c |   15 ++-------------
 encoder/rdo.c     |    2 +-
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/encoder/analyse.c b/encoder/analyse.c
index a5aca3c..2874910 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -566,23 +566,12 @@ static inline const int8_t *predict_4x4_mode_available( int i_neighbour )
 /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
 static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
 {
-    ALIGNED_ARRAY_16( int16_t, dct8x8,[4],[64] );
-    ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[16] );
     ALIGNED_16( static uint8_t zero[16*FDEC_STRIDE] ) = {0};
-    int i;
 
     if( do_both_dct || h->mb.b_transform_8x8 )
-    {
-        h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], zero );
-        for( i = 0; i < 4; i++ )
-            h->zigzagf.scan_8x8( h->mb.pic.fenc_dct8[i], dct8x8[i] );
-    }
+        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
     if( do_both_dct || !h->mb.b_transform_8x8 )
-    {
-        h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], zero );
-        for( i = 0; i < 16; i++ )
-            h->zigzagf.scan_4x4( h->mb.pic.fenc_dct4[i], dct4x4[i] );
-    }
+        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
 }
 
 /* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */
diff --git a/encoder/rdo.c b/encoder/rdo.c
index 1403a3d..9dee56d 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -516,7 +516,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
             /* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
             if( h->mb.i_psy_trellis && i && !dc && i_ctxBlockCat != DCT_CHROMA_AC )
             {
-                int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][i] : h->mb.pic.fenc_dct4[idx][i];
+                int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][zigzag[i]] : h->mb.pic.fenc_dct4[idx][zigzag[i]];
                 int predicted_coef = orig_coef - i_coef * signs[i];
                 int psy_value = h->mb.i_psy_trellis * abs(predicted_coef + unquant_abs_level * signs[i]);
                 int psy_weight = (i_coefs == 64) ? x264_dct8_weight_tab[zigzag[i]] : x264_dct4_weight_tab[zigzag[i]];