[x264-devel] commit: Replace some divisions with shifts (Henrik Gramner )
git at videolan.org
git at videolan.org
Wed Jun 9 18:39:09 CEST 2010
x264 | branch: master | Henrik Gramner <hengar-6 at student.ltu.se> | Tue Jun 8 16:29:16 2010 +0200| [59a9e0337cf5f75046b7031c61fdd2d88fc9021c] | committer: Jason Garrett-Glaser
Replace some divisions with shifts
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=59a9e0337cf5f75046b7031c61fdd2d88fc9021c
---
common/bitstream.h | 2 +-
common/pixel.c | 2 +-
common/ppc/deblock.c | 2 +-
common/ppc/mc.c | 2 +-
common/ppc/pixel.c | 14 +++++++-------
encoder/analyse.c | 28 ++++++++++++++--------------
encoder/cabac.c | 2 +-
encoder/me.c | 6 +++---
8 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/common/bitstream.h b/common/bitstream.h
index 84dcd50..9ce5bd7 100644
--- a/common/bitstream.h
+++ b/common/bitstream.h
@@ -97,7 +97,7 @@ static inline int bs_pos( bs_t *s )
static inline void bs_flush( bs_t *s )
{
M32( s->p ) = endian_fix32( s->cur_bits << (s->i_left&31) );
- s->p += WORD_SIZE - s->i_left / 8;
+ s->p += WORD_SIZE - (s->i_left >> 3);
s->i_left = WORD_SIZE*8;
}
/* The inverse of bs_flush: prepare the bitstream to be written to again. */
diff --git a/common/pixel.c b/common/pixel.c
index 580686e..a8cb1df 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -547,7 +547,7 @@ float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
int z = 0;
float ssim = 0.0;
int (*sum0)[4] = buf;
- int (*sum1)[4] = sum0 + width/4+3;
+ int (*sum1)[4] = sum0 + (width >> 2) + 3;
width >>= 2;
height >>= 2;
for( int y = 1; y < height; y++ )
diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c
index 0b6c738..0c8d2d4 100644
--- a/common/ppc/deblock.c
+++ b/common/ppc/deblock.c
@@ -45,7 +45,7 @@ static inline void write16x4( uint8_t *dst, int dst_stride,
{
ALIGNED_16(unsigned char result[64]);
uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
- int int_dst_stride = dst_stride/4;
+ int int_dst_stride = dst_stride >> 2;
vec_st(r0, 0, result);
vec_st(r1, 16, result);
diff --git a/common/ppc/mc.c b/common/ppc/mc.c
index 83c60b1..7ad8050 100644
--- a/common/ppc/mc.c
+++ b/common/ppc/mc.c
@@ -703,7 +703,7 @@ void x264_hpel_filter_altivec( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint
static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
int src_stride, int dst_stride, int width, int height )
{
- int w = width/16;
+ int w = width >> 4;
int end = (width & 15);
vec_u8_t src0v, src1v, src2v;
vec_u8_t lv, hv, src1p1v;
diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
index 832ec50..3f99606 100644
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -153,7 +153,7 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
satdv = vec_splat( satdv, 1 );
vec_ste( satdv, 0, &i_satd );
- return i_satd / 2;
+ return i_satd >> 1;
}
/***********************************************************************
@@ -207,7 +207,7 @@ static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
satdv = vec_splat( satdv, 1 );
vec_ste( satdv, 0, &i_satd );
- return i_satd / 2;
+ return i_satd >> 1;
}
/***********************************************************************
@@ -261,7 +261,7 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
satdv = vec_splat( satdv, 1 );
vec_ste( satdv, 0, &i_satd );
- return i_satd / 2;
+ return i_satd >> 1;
}
/***********************************************************************
@@ -321,7 +321,7 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
satdv = vec_splat( satdv, 3 );
vec_ste( satdv, 0, &i_satd );
- return i_satd / 2;
+ return i_satd >> 1;
}
/***********************************************************************
@@ -405,7 +405,7 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
satdv = vec_splat( satdv, 3 );
vec_ste( satdv, 0, &i_satd );
- return i_satd / 2;
+ return i_satd >> 1;
}
/***********************************************************************
@@ -489,7 +489,7 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
satdv = vec_splat( satdv, 3 );
vec_ste( satdv, 0, &i_satd );
- return i_satd / 2;
+ return i_satd >> 1;
}
/***********************************************************************
@@ -615,7 +615,7 @@ static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
satdv = vec_splat( satdv, 3 );
vec_ste( satdv, 0, &i_satd );
- return i_satd / 2;
+ return i_satd >> 1;
}
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 677ca48..84a553d 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -1236,8 +1236,8 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
for( int i = 0; i < 4; i++ )
{
x264_me_t *l0m = &a->l0.me8x8[i];
- const int x8 = i%2;
- const int y8 = i/2;
+ int x8 = i&1;
+ int y8 = i>>1;
m.i_pixel = PIXEL_8x8;
@@ -1312,8 +1312,8 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
for( int i = 0; i < 4; i++ )
{
x264_me_t *m = &a->l0.me8x8[i];
- const int x8 = i%2;
- const int y8 = i/2;
+ int x8 = i&1;
+ int y8 = i>>1;
m->i_pixel = PIXEL_8x8;
m->i_ref_cost = i_ref_cost;
@@ -1793,8 +1793,8 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i )
{
- const int x = 2*(i%2);
- const int y = 2*(i/2);
+ int x = 2*(i&1);
+ int y = i&2;
switch( h->mb.i_sub_partition[i] )
{
@@ -1823,8 +1823,8 @@ static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int
static void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
{
- const int x = 2*(idx&1);
- const int y = 2*(idx>>1);
+ int x = 2*(idx&1);
+ int y = idx&2;
x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
x264_macroblock_cache_mv_ptr( h, x, y, 2, 2, 0, h->mb.cache.direct_mv[0][idx] );
@@ -1859,8 +1859,8 @@ static void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
{
- int x = (i%2)*2;
- int y = (i/2)*2;
+ int x = 2*(i&1);
+ int y = i&2;
if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
{
x264_mb_load_mv_direct8x8( h, i );
@@ -1923,8 +1923,8 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
for( int i = 0; i < 4; i++ )
{
- int x8 = i%2;
- int y8 = i/2;
+ int x8 = i&1;
+ int y8 = i>>1;
int i_part_cost;
int i_part_cost_bi;
int stride[2] = {8,8};
@@ -2005,8 +2005,8 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
for( int i = 0; i < 4; i++ )
{
- const int x8 = i%2;
- const int y8 = i/2;
+ int x8 = i&1;
+ int y8 = i>>1;
int i_part_cost;
int i_part_cost_bi = 0;
int stride[2] = {8,8};
diff --git a/encoder/cabac.c b/encoder/cabac.c
index 2452165..8bd40f1 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -953,7 +953,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
else
{
for( int i = 0; i < 16; i++ )
- if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
+ if( h->mb.i_cbp_luma & ( 1 << ( i >> 2 ) ) )
block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
}
diff --git a/encoder/me.c b/encoder/me.c
index 9e0148e..dc0af99 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -622,7 +622,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
continue;
bsad -= ycost;
xn = h->pixf.ads[i_pixel]( enc_dc, sums_base + min_x + my * stride, delta,
- cost_fpel_mvx+min_x, xs, width, bsad*17/16 );
+ cost_fpel_mvx+min_x, xs, width, bsad * 17 >> 4 );
for( i = 0; i < xn-2; i += 3 )
{
pixel *ref = p_fref_w+min_x+my*stride;
@@ -789,14 +789,14 @@ if( b_refine_qpel || (dir^1) != odir ) \
+ p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
if( b_chroma_me && cost < bcost ) \
{ \
- h->mc.mc_chroma( pix, 8, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw/2, bh/2 ); \
+ h->mc.mc_chroma( pix, 8, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
if( m->weight[1].weightfn ) \
m->weight[1].weightfn[x264_pixel_size[i_pixel].w>>3]( pix, 8, pix, 8, \
&m->weight[1], x264_pixel_size[i_pixel].h>>1 ); \
cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix, 8 ); \
if( cost < bcost ) \
{ \
- h->mc.mc_chroma( pix, 8, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw/2, bh/2 ); \
+ h->mc.mc_chroma( pix, 8, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
if( m->weight[2].weightfn ) \
m->weight[2].weightfn[x264_pixel_size[i_pixel].w>>3]( pix, 8, pix, 8, \
&m->weight[2], x264_pixel_size[i_pixel].h>>1 ); \
More information about the x264-devel
mailing list