Index: common/common.c =================================================================== --- common/common.c (revision 661) +++ common/common.c (working copy) @@ -463,6 +463,8 @@ p->analyse.i_luma_deadzone[1] = atoi(value); OPT("nr") p->analyse.i_noise_reduction = atoi(value); + OPT("lowpass") + p->analyse.b_dct_lowpass = atobool(value); OPT("bitrate") { p->rc.i_bitrate = atoi(value); @@ -889,6 +891,7 @@ s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset ); s += sprintf( s, " threads=%d", p->i_threads ); s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction ); + s += sprintf( s, " lowpass=%d", p->analyse.b_dct_lowpass ); s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate ); s += sprintf( s, " mbaff=%d", p->b_interlaced ); Index: common/common.h =================================================================== --- common/common.h (revision 661) +++ common/common.h (working copy) @@ -428,6 +428,7 @@ int b_chroma_me; int b_trellis; int b_noise_reduction; + int b_dct_lowpass; int b_interlaced; Index: encoder/analyse.c =================================================================== --- encoder/analyse.c (revision 661) +++ encoder/analyse.c (working copy) @@ -210,6 +210,7 @@ h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->b_mbrd; h->mb.b_transform_8x8 = 0; h->mb.b_noise_reduction = 0; + h->mb.b_dct_lowpass = 0; /* I: Intra part */ a->i_satd_i16x16 = @@ -2562,6 +2563,7 @@ h->mb.b_trellis = h->param.analyse.i_trellis; h->mb.b_noise_reduction = h->param.analyse.i_noise_reduction; + h->mb.b_dct_lowpass = h->param.analyse.b_dct_lowpass; } /*-------------------- Update MB from the analysis ----------------------*/ Index: encoder/encoder.c =================================================================== --- encoder/encoder.c (revision 661) +++ encoder/encoder.c (working copy) @@ -407,6 +407,7 @@ h->param.analyse.i_trellis = 0; h->param.analyse.b_fast_pskip = 0; h->param.analyse.i_noise_reduction = 0; + h->param.analyse.b_dct_lowpass = 0; h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 ); } if( h->param.rc.i_rc_method == X264_RC_CQP ) @@ -749,6 +750,7 @@ COPY( analyse.i_me_method ); COPY( analyse.i_me_range ); COPY( analyse.i_noise_reduction ); + COPY( analyse.b_dct_lowpass ); COPY( analyse.i_subpel_refine ); COPY( analyse.i_trellis ); COPY( analyse.b_bidir_me ); Index: encoder/macroblock.c =================================================================== --- encoder/macroblock.c (revision 661) +++ encoder/macroblock.c (working copy) @@ -98,6 +98,9 @@ h->dctf.sub4x4_dct( dct4x4, p_src, p_dst ); + if( h->mb.b_dct_lowpass ) + x264_lowpass_dct( h, (int16_t*)dct4x4 ); + if( h->mb.b_trellis ) x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 ); else @@ -120,6 +123,8 @@ h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst ); + if( h->mb.b_dct_lowpass ) + x264_lowpass_dct( h, (int16_t*)dct8x8 ); if( h->mb.b_trellis ) x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 ); else @@ -159,6 +164,9 @@ /* copy dc coeff */ dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0]; + if( h->mb.b_dct_lowpass ) + x264_lowpass_dct( h, (int16_t*)dct4x4[1+i] ); + /* quant/scan/dequant */ if( h->mb.b_trellis ) x264_quant_4x4_trellis( h, dct4x4[1+i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 ); @@ -427,6 +435,8 @@ { if( h->mb.b_noise_reduction ) x264_denoise_dct( h, (int16_t*)dct8x8[idx] ); + if( h->mb.b_dct_lowpass ) + x264_lowpass_dct( h, (int16_t*)dct8x8[idx] ); if( h->mb.b_trellis ) x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 ); else @@ -479,6 +489,8 @@ if( h->mb.b_noise_reduction ) x264_denoise_dct( h, (int16_t*)dct4x4[idx] ); + if( h->mb.b_dct_lowpass ) + x264_lowpass_dct( h, (int16_t*)dct4x4[idx] ); if( h->mb.b_trellis ) x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 ); else @@ -725,6 +737,141 @@ } } +#define DCT_SUBBANDS 4 + +const static DECLARE_ALIGNED( int16_t, subbands_4x4[4][4], 16) = +{ + {0, 1, 2, 3}, + {1, 1, 2, 3}, + {2, 2, 2, 3}, + {3, 3, 3, 3} +}; + +const static DECLARE_ALIGNED( int16_t, subbands_8x8[8][8], 16) = +{ + {0, 0, 1, 1, 2, 2, 3, 3}, + {0, 0, 1, 1, 2, 2, 3, 3}, + {1, 1, 1, 1, 2, 2, 3, 3}, + {1, 1, 1, 1, 2, 2, 3, 3}, + {2, 2, 2, 2, 2, 2, 3, 3}, + {2, 2, 2, 2, 2, 2, 3, 3}, + {3, 3, 3, 3, 3, 3, 3, 3}, + {3, 3, 3, 3, 3, 3, 3, 3}, +}; + +#define FLT_COEF(a) ((int)(a*128.)) + +const static int dct_lowpass_coeffs[52][DCT_SUBBANDS] = +{ + //0 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + //5 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + //10 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + //15 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + //20 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + //25 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), }, + //30 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(.9), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(.8), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(.7), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(.6), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(1), FLT_COEF(.5), }, + //35 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.9), FLT_COEF(.4), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.9), FLT_COEF(.3), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.9), FLT_COEF(.2), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.8), FLT_COEF(.1), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.8), FLT_COEF(0), }, + //40 + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.7), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.6), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.5), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(1), FLT_COEF(.4), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(.9), FLT_COEF(.2), FLT_COEF(0), }, + //45 + {FLT_COEF(1), FLT_COEF(.9), FLT_COEF(.1), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(.8), FLT_COEF(0), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(.6), FLT_COEF(0), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(.4), FLT_COEF(0), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(.3), FLT_COEF(0), FLT_COEF(0), }, + //50 + {FLT_COEF(1), FLT_COEF(.2), FLT_COEF(0), FLT_COEF(0), }, + {FLT_COEF(1), FLT_COEF(.2), FLT_COEF(0), FLT_COEF(0), } +}; + +#undef FLT_COEF + +void x264_lowpass_dct( x264_t *h, int16_t *dct ) +{ + DECLARE_ALIGNED(int, subbands_zigzag[8*8], 16); + const int cat = h->mb.b_transform_8x8; + int i=0; + int iQp = h->mb.i_qp; + + if (h->mb.i_type == I_16x16 || h->mb.i_type == I_8x8 || h->mb.i_type == I_4x4) + iQp -= 6; + if (iQp < 0) + iQp = 0; + + + if (cat) + { + h->zigzagf.scan_8x8( subbands_zigzag, subbands_8x8 ); + } + else + { + h->zigzagf.scan_4x4( subbands_zigzag, subbands_4x4 ); + } + + for( i = (cat ? 63 : 15); i >= 1; i-- ) + { + int level = dct[i]; + int subband = subbands_zigzag[i]; + if (level > 0) + { + level = (level * dct_lowpass_coeffs[iQp][subband] + 64) / 128; + } + else + { + level = -(-level * dct_lowpass_coeffs[iQp][subband] + 64) / 128; + } + dct[i] = level; + } + + + +} + + void x264_denoise_dct( x264_t *h, int16_t *dct ) { const int cat = h->mb.b_transform_8x8; Index: encoder/macroblock.h =================================================================== --- encoder/macroblock.h (revision 661) +++ encoder/macroblock.h (working copy) @@ -53,6 +53,7 @@ void x264_noise_reduction_update( x264_t *h ); void x264_denoise_dct( x264_t *h, int16_t *dct ); +void x264_lowpass_dct( x264_t *h, int16_t *dct ); #define array_non_zero(a) array_non_zero_int(a, sizeof(a)) static inline int array_non_zero_int( void *v, int i_count ) Index: x264.c =================================================================== --- x264.c (revision 661) +++ x264.c (working copy) @@ -253,6 +253,7 @@ " - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis ); H0( " --no-fast-pskip Disables early SKIP detection on P-frames\n" ); H0( " --no-dct-decimate Disables coefficient thresholding on P-frames\n" ); + H0( " --lowpass Enables adaptive lowpass filter\n" ); H0( " --nr Noise reduction [%d]\n", defaults->analyse.i_noise_reduction ); H1( "\n" ); H1( " --deadzone-inter Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] ); @@ -414,6 +415,7 @@ { "bime", no_argument, NULL, 0 }, { "8x8dct", no_argument, NULL, '8' }, { "trellis", required_argument, NULL, 't' }, + { "lowpass", no_argument, NULL, 0 }, { "no-fast-pskip", no_argument, NULL, 0 }, { "no-dct-decimate", no_argument, NULL, 0 }, { "deadzone-inter", required_argument, NULL, '0' }, Index: x264.h =================================================================== --- x264.h (revision 661) +++ x264.h (working copy) @@ -224,6 +224,7 @@ int b_fast_pskip; /* early SKIP detection on P-frames */ int b_dct_decimate; /* transform coefficient thresholding on P-frames */ int i_noise_reduction; /* adaptive pseudo-deadzone */ + int b_dct_lowpass; /* adaptive dct lowpass filter */ /* the deadzone size that will be used in luma quantization */ int i_luma_deadzone[2]; /* {inter, intra} */