[x264-devel] Remove explicit run calculation from coeff_level_run

Wed Mar 7 03:20:14 CET 2012

x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Mon Feb 13 18:31:51 2012 -0800| [de5a0adca1a7d08b1233b317ec092dbf19263d2f] | committer: Jason Garrett-Glaser

Remove explicit run calculation from coeff_level_run
Not necessary with the CAVLC lookup table for zero run codes.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=de5a0adca1a7d08b1233b317ec092dbf19263d2f
---

 common/bitstream.h     |    1 -
 common/quant.c         |    7 ++-----
 common/vlc.c           |    4 +++-
 common/x86/quant-a.asm |    9 +++++----
 tools/checkasm.c       |    3 +--
 5 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/common/bitstream.h b/common/bitstream.h
index d728961..3beb1ea 100644
--- a/common/bitstream.h
+++ b/common/bitstream.h
@@ -58,7 +58,6 @@ typedef struct
     int     last;
     int     mask;
     dctcoef level[16];
-    uint8_t run[16];
 } x264_run_level_t;
 
 extern const vlc_t x264_coeff0_token[6];
diff --git a/common/quant.c b/common/quant.c
index cc08526..17a6e1c 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -376,12 +376,9 @@ static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )
     int mask = 0;\
     do\
     {\
-        int r = 0;\
-        runlevel->level[i_total] = dct[i_last];\
+        runlevel->level[i_total++] = dct[i_last];\
         mask |= 1 << (i_last);\
-        while( --i_last >= 0 && dct[i_last] == 0 )\
-            r++;\
-        runlevel->run[i_total++] = r;\
+        while( --i_last >= 0 && dct[i_last] == 0 );\
     } while( i_last >= 0 );\
     runlevel->mask = mask;\
     return i_total;\
diff --git a/common/vlc.c b/common/vlc.c
index 12bdad0..8af36de 100644
--- a/common/vlc.c
+++ b/common/vlc.c
@@ -852,15 +852,17 @@ void x264_cavlc_init( x264_t *h )
             dct[j] = i&(1<<j);
         int total = h->quantf.coeff_level_run[DCT_LUMA_4x4]( dct, &runlevel );
         int zeros = runlevel.last + 1 - total;
+        uint32_t mask = i << (x264_clz( i ) + 1);
         for( int j = 0; j < total-1 && zeros > 0; j++ )
         {
             int idx = X264_MIN(zeros, 7) - 1;
-            int run = runlevel.run[j];
+            int run = x264_clz( mask );
             int len = run_before[idx][run].i_size;
             size += len;
             bits <<= len;
             bits |= run_before[idx][run].i_bits;
             zeros -= run;
+            mask <<= run + 1;
         }
         x264_run_before[i] = (bits << 5) + size;
     }
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index 456cce6..970811f 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -1368,15 +1368,16 @@ cglobal coeff_level_run%1,0,7
     LZCOUNT t3d, t5d, 0x1f
 %if HIGH_BIT_DEPTH
     mov    t2d, [t0+t4*4]
-    mov   [t1+t6+8+16*4], t3b
-    mov   [t1+t6*4+ 8], t2d
 %else
     mov    t2w, [t0+t4*2]
-    mov   [t1+t6+8+16*2], t3b
-    mov   [t1+t6*2+ 8], t2w
 %endif
     inc    t3d
     shl    t5d, t3b
+%if HIGH_BIT_DEPTH
+    mov   [t1+t6*4+ 8], t2d
+%else
+    mov   [t1+t6*2+ 8], t2w
+%endif
     inc    t6d
     sub    t4d, t3d
     jge .loop
diff --git a/tools/checkasm.c b/tools/checkasm.c
index a340fff..630a01d 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -2052,8 +2052,7 @@ static int check_quant( int cpu_ref, int cpu_new )
             int result_a = call_a( qf_a.lastname, dct1+ac, &runlevel_a ); \
             if( result_c != result_a || runlevel_c.last != runlevel_a.last || \
                 runlevel_c.mask != runlevel_a.mask || \
-                memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c) || \
-                memcmp(runlevel_c.run, runlevel_a.run, sizeof(uint8_t)*(result_c-1)) ) \
+                memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c)) \
             { \
                 ok = 0; \
                 fprintf( stderr, #name ": [FAILED]\n" ); \