[x264-devel] commit: faster residual (Loren Merritt )
git version control
git at videolan.org
Tue Mar 25 09:01:57 CET 2008
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Tue Mar 25 00:59:50 2008 -0600| [5e0e058c72e6dcf0f432157b48c0b07566535fe6]
faster residual
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=5e0e058c72e6dcf0f432157b48c0b07566535fe6
---
encoder/cabac.c | 14 +++++-----
encoder/cavlc.c | 75 +++++++++++-------------------------------------------
2 files changed, 23 insertions(+), 66 deletions(-)
diff --git a/encoder/cabac.c b/encoder/cabac.c
index b47b4bb..705ae15 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -717,7 +717,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
int i_last = 0;
int i_sigmap_size;
int node_ctx = 0;
- int i;
+ int i, j;
const int *significant_coeff_flag_offset;
const int *last_coeff_flag_offset;
@@ -730,17 +730,17 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
* 5-> Luma8x8 i_idx = luma8x8idx
*/
- for( i = 0; i < i_count; i++ )
- {
+ for( j = i_count - 4; j >= 0; j -= 4 )
+ if( *(uint64_t*)(l+j) )
+ break;
+ for( i = 0; i < j+4; i++ )
if( l[i] != 0 )
{
- i_coeff_abs_m1[i_coeff] = abs( l[i] ) - 1;
- i_coeff_sign[i_coeff] = ( l[i] < 0 );
+ i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;
+ i_coeff_sign[i_coeff] = l[i] < 0;
i_coeff++;
-
i_last = i;
}
- }
if( i_count != 64 )
{
diff --git a/encoder/cavlc.c b/encoder/cavlc.c
index e04ba5b..e4e84cc 100644
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -70,17 +70,15 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
int i_total_zero;
int i_last;
unsigned int i_sign;
-
int i;
- int i_zero_left;
int i_suffix_length;
/* first find i_last */
- i_last = i_count - 1;
+ for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
+ if( *(uint64_t*)(l+i_last-3) )
+ break;
while( i_last >= 0 && l[i_last] == 0 )
- {
i_last--;
- }
i_sign = 0;
i_total = 0;
@@ -94,16 +92,11 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
/* level and run and total */
while( i_last >= 0 )
{
- level[idx] = l[i_last--];
-
- run[idx] = 0;
- while( i_last >= 0 && l[i_last] == 0 )
- {
- run[idx]++;
- i_last--;
- }
-
- idx++;
+ int r = 0;
+ level[idx] = l[i_last];
+ while( --i_last >= 0 && l[i_last] == 0 )
+ r++;
+ run[idx++] = r;
}
i_total = idx;
@@ -112,7 +105,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
i_trailing = X264_MIN(3, idx);
for( idx = 0; idx < i_trailing; idx++ )
{
- if( abs(level[idx]) > 1 )
+ if( (unsigned)(level[idx]+1) > 2 )
{
i_trailing = idx;
break;
@@ -136,9 +129,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
}
if( i_total <= 0 )
- {
return;
- }
i_suffix_length = i_total > 10 && i_trailing < 3 ? 1 : 0;
if( i_trailing > 0 )
@@ -147,29 +138,18 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
}
for( i = i_trailing; i < i_total; i++ )
{
- unsigned int i_level_code;
+ int mask = level[i] >> 15;
+ int abs_level = (level[i]^mask)-mask;
+ int i_level_code = abs_level*2-mask-2;
- /* calculate level code */
- if( level[i] < 0 )
- {
- i_level_code = -2*level[i] - 1;
- }
- else /* if( level[i] > 0 ) */
- {
- i_level_code = 2 * level[i] - 2;
- }
if( i == i_trailing && i_trailing < 3 )
- {
i_level_code -= 2; /* as level[i] can't be 1 for the first one if i_trailing < 3 */
- }
if( ( i_level_code >> i_suffix_length ) < 14 )
{
bs_write_vlc( s, x264_level_prefix[i_level_code >> i_suffix_length] );
if( i_suffix_length > 0 )
- {
bs_write( s, i_suffix_length, i_level_code );
- }
}
else if( i_suffix_length == 0 && i_level_code < 30 )
{
@@ -186,54 +166,31 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
bs_write_vlc( s, x264_level_prefix[15] );
i_level_code -= 15 << i_suffix_length;
if( i_suffix_length == 0 )
- {
i_level_code -= 15;
- }
-
if( i_level_code >= 1<<12 )
- {
x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d\n", i_level_code );
- }
-
bs_write( s, 12, i_level_code );
}
if( i_suffix_length == 0 )
- {
i_suffix_length++;
- }
- if( abs( level[i] ) > ( 3 << ( i_suffix_length - 1 ) ) && i_suffix_length < 6 )
- {
+ if( abs_level > (3 << (i_suffix_length-1)) && i_suffix_length < 6 )
i_suffix_length++;
- }
}
if( i_total < i_count )
{
if( i_idx == BLOCK_INDEX_CHROMA_DC )
- {
bs_write_vlc( s, x264_total_zeros_dc[i_total-1][i_total_zero] );
- }
else
- {
bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] );
- }
}
- for( i = 0, i_zero_left = i_total_zero; i < i_total - 1; i++ )
+ for( i = 0; i < i_total-1 && i_total_zero > 0; i++ )
{
- int i_zl;
-
- if( i_zero_left <= 0 )
- {
- break;
- }
-
- i_zl = X264_MIN( i_zero_left - 1, 6 );
-
+ int i_zl = X264_MIN( i_total_zero - 1, 6 );
bs_write_vlc( s, x264_run_before[i_zl][run[i]] );
-
- i_zero_left -= run[i];
+ i_total_zero -= run[i];
}
}
More information about the x264-devel
mailing list