[x264-devel] commit: Faster x264_exp2fix8 (Jason Garrett-Glaser )
git version control
git at videolan.org
Thu Aug 27 08:32:55 CEST 2009
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Wed Aug 26 20:16:10 2009 -0700| [e765dcfc71c6f70060e488dc60d9b58e8b8191c1] | committer: Jason Garrett-Glaser
Faster x264_exp2fix8
22->13 cycles on Core 2 with mfpmath=sse
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=e765dcfc71c6f70060e488dc60d9b58e8b8191c1
---
common/common.h | 11 ++++-------
encoder/ratecontrol.c | 2 +-
encoder/slicetype.c | 9 +++++----
3 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/common/common.h b/common/common.h
index f78a35c..f395d03 100644
--- a/common/common.h
+++ b/common/common.h
@@ -163,13 +163,10 @@ static const uint8_t exp2_lut[64] = {
static ALWAYS_INLINE int x264_exp2fix8( float x )
{
- int i, f;
- x += 8;
- if( x <= 0 ) return 0;
- if( x >= 16 ) return 0xffff;
- i = x;
- f = (x-i)*64;
- return (exp2_lut[f]+256) << i >> 8;
+ if( x <= -512.f/6.f ) return 0;
+ if( x >= 512.f/6.f ) return 0xffff;
+ int i = x*(-64.f/6.f) + 512;
+ return (exp2_lut[i&63]+256) << (i>>6) >> 8;
}
static const float log2_lut[128] = {
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index cf3dc9a..fd9aa94 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -251,7 +251,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] =
frame->f_qp_offset_aq[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
if( h->frames.b_have_lowres )
- frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));
+ frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj);
}
}
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
index b9a7bb0..14ab0b8 100644
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -389,7 +389,7 @@ static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **fram
int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
- i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
+ i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj) + 128) >> 8;
row_satd[ h->mb.i_mb_y ] += i_mb_cost;
if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
@@ -410,6 +410,9 @@ static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, int b_b
memcpy( frame->f_qp_offset, frame->f_qp_offset_aq, sizeof( frame->f_qp_offset ) );
else
{
+ /* Allow the strength to be adjusted via qcompress, since the two
+ * concepts are very similar. */
+ float strength = 5.0f * (1.0f - h->param.rc.f_qcompress);
for( mb_index = 0; mb_index < h->mb.i_mb_count; mb_index++ )
{
int intra_cost = (frame->i_intra_cost[mb_index] * frame->i_inv_qscale_factor[mb_index]+128)>>8;
@@ -417,9 +420,7 @@ static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, int b_b
{
int propagate_cost = frame->i_propagate_cost[mb_index];
float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
- /* Allow the constant to be adjusted via qcompress, since the two
- * concepts are very similar. */
- frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
+ frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - strength * log2_ratio;
}
}
}
More information about the x264-devel
mailing list