[x264-devel] commit: Extend trellis to support luma/chroma DC and chroma AC ( Jason Garrett-Glaser )
git version control
git at videolan.org
Fri Oct 31 16:57:50 CET 2008
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Thu Oct 16 03:17:53 2008 -0700| [79194caffdc216e338674d88e50adca2f4ea8fa2] | committer: Jason Garrett-Glaser
Extend trellis to support luma/chroma DC and chroma AC
Small speed loss in trellis 1, slightly larger in trellis 2, but significant quality improvement.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=79194caffdc216e338674d88e50adca2f4ea8fa2
---
encoder/macroblock.c | 25 +++++++++++++++++++------
encoder/macroblock.h | 2 ++
encoder/rdo.c | 40 +++++++++++++++++++++++++++-------------
3 files changed, 48 insertions(+), 19 deletions(-)
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index e1e215c..f51d5ff 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -187,7 +187,10 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
}
h->dctf.dct4x4dc( dct_dc4x4 );
- h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
+ if( h->mb.b_trellis )
+ x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);
+ else
+ h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
/* output samples to fdec */
@@ -239,8 +242,10 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
dct2x2[i>>1][i&1] = dct4x4[i][0][0];
dct4x4[i][0][0] = 0;
- /* no trellis; it doesn't seem to help chroma noticeably */
- h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
+ if( h->mb.b_trellis )
+ x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 0 );
+ else
+ h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
if( b_decimate )
@@ -248,7 +253,10 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
}
h->dctf.dct2x2dc( dct2x2 );
- h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
+ if( h->mb.b_trellis )
+ x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
+ else
+ h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
/* output samples to fdec */
@@ -937,9 +945,14 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
- h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
+ dct4x4[0][0] = 0;
+
+ if( h->mb.b_trellis )
+ x264_quant_4x4_trellis( h, dct4x4, CQM_4PC, i_qp, DCT_CHROMA_AC, 0, 0 );
+ else
+ h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
+
h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
- h->dct.luma4x4[16+i8+ch*4][0] = 0;
if( array_non_zero( dct4x4 ) )
{
h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
diff --git a/encoder/macroblock.h b/encoder/macroblock.h
index 9fda183..b25509a 100644
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -54,6 +54,8 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp );
void x264_cabac_mb_skip( x264_t *h, int b_skip );
+void x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat,
+ int i_qp, int i_ctxBlockCat, int b_intra );
void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
int i_qp, int i_ctxBlockCat, int b_intra, int idx );
void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
diff --git a/encoder/rdo.c b/encoder/rdo.c
index c56b6e0..dbdcbe8 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -333,7 +333,6 @@ typedef struct {
} trellis_node_t;
// TODO:
-// support chroma and i16x16 DC
// save cabac state between blocks?
// use trellis' RD score instead of x264_mb_decimate_score?
// code 8x8 sig/last flags forwards with deadzone and save the contexts at
@@ -353,10 +352,10 @@ typedef struct {
// comparable to the input. so unquant is the direct inverse of quant,
// and uses the dct scaling factors, not the idct ones.
-static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
+static ALWAYS_INLINE void quant_trellis_cabac( x264_t *h, int16_t *dct,
const uint16_t *quant_mf, const int *unquant_mf,
const int *coef_weight, const uint8_t *zigzag,
- int i_ctxBlockCat, int i_lambda2, int b_ac, int i_coefs, int idx )
+ int i_ctxBlockCat, int i_lambda2, int b_ac, int dc, int i_coefs, int idx )
{
int abs_coefs[64], signs[64];
trellis_node_t nodes[2][8];
@@ -381,7 +380,7 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
/* init coefs */
for( i = i_coefs-1; i >= b_ac; i-- )
- if( (unsigned)(dct[zigzag[i]] * quant_mf[zigzag[i]] + f-1) >= 2*f )
+ if( (unsigned)(dct[zigzag[i]] * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) + f-1) >= 2*f )
break;
if( i < b_ac )
@@ -425,17 +424,22 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
cabac_state_last[i] = ctx_last[ last_coeff_flag_offset_8x8[i] ];
}
}
- else
+ else if( !dc || i_ctxBlockCat != DCT_CHROMA_DC )
{
memcpy( cabac_state_sig, &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 15 );
memcpy( cabac_state_last, &h->cabac.state[ last_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 15 );
}
+ else
+ {
+ memcpy( cabac_state_sig, &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 3 );
+ memcpy( cabac_state_last, &h->cabac.state[ last_coeff_flag_offset[b_interlaced][i_ctxBlockCat] ], 3 );
+ }
memcpy( nodes_cur[0].cabac_state, &h->cabac.state[ coeff_abs_level_m1_offset[i_ctxBlockCat] ], 10 );
for( i = i_last_nnz; i >= b_ac; i-- )
{
int i_coef = abs_coefs[i];
- int q = ( f + i_coef * quant_mf[zigzag[i]] ) >> 16;
+ int q = ( f + i_coef * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) ) >> 16;
int abs_level;
int cost_sig[2], cost_last[2];
trellis_node_t n;
@@ -488,11 +492,11 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
// that are better left coded, especially at QP > 40.
for( abs_level = q; abs_level >= q-1; abs_level-- )
{
- int unquant_abs_level = ((unquant_mf[zigzag[i]] * abs_level + 128) >> 8);
+ int unquant_abs_level = (((dc?unquant_mf[0]<<1:unquant_mf[zigzag[i]]) * abs_level + 128) >> 8);
int d = i_coef - unquant_abs_level;
int64_t ssd;
/* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
- if( h->mb.i_psy_trellis && i )
+ if( h->mb.i_psy_trellis && i && !dc && i_ctxBlockCat != DCT_CHROMA_AC )
{
int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][i] : h->mb.pic.fenc_dct4[idx][i];
int predicted_coef = orig_coef - i_coef * signs[i];
@@ -501,7 +505,8 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
ssd = (int64_t)d*d * coef_weight[i] - psy_weight * psy_value;
}
else
- ssd = (int64_t)d*d * coef_weight[i];
+ /* FIXME: for i16x16 dc is this weight optimal? */
+ ssd = (int64_t)d*d * (dc?256:coef_weight[i]);
for( j = 0; j < 8; j++ )
{
@@ -563,19 +568,28 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
}
}
+const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3};
+
+void x264_quant_dc_trellis( x264_t *h, int16_t *dct, int i_quant_cat,
+ int i_qp, int i_ctxBlockCat, int b_intra )
+{
+ quant_trellis_cabac( h, (int16_t*)dct,
+ h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
+ NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
+ i_ctxBlockCat, lambda2_tab[b_intra][i_qp], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
+}
void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
int i_qp, int i_ctxBlockCat, int b_intra, int idx )
{
- int b_ac = (i_ctxBlockCat == DCT_LUMA_AC);
+ int b_ac = (i_ctxBlockCat == DCT_LUMA_AC || i_ctxBlockCat == DCT_CHROMA_AC);
quant_trellis_cabac( h, (int16_t*)dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
x264_dct4_weight2_zigzag[h->mb.b_interlaced],
x264_zigzag_scan4[h->mb.b_interlaced],
- i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 16, idx );
+ i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 0, 16, idx );
}
-
void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
int i_qp, int b_intra, int idx )
{
@@ -583,6 +597,6 @@ void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
x264_dct8_weight2_zigzag[h->mb.b_interlaced],
x264_zigzag_scan8[h->mb.b_interlaced],
- DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 64, idx );
+ DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 0, 64, idx );
}
More information about the x264-devel
mailing list