[x264-devel] commit: New AQ algorithm option (Jason Garrett-Glaser )

Mon Jul 20 09:53:00 CEST 2009

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Sat Jul 18 16:30:18 2009 -0700| [5698e501483ff01e6e821fd0b8541baf597c39e9] | committer: Jason Garrett-Glaser 

New AQ algorithm option
"Auto-variance" uses log(var)^2 instead of log(var) and attempts to adapt strength per-frame.
Generates significantly better SSIM; on by default with --tune ssim.
Whether it generates visually better quality is still up for debate.
Available as --aq-mode 2.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=5698e501483ff01e6e821fd0b8541baf597c39e9
---

 encoder/encoder.c     |    2 +-
 encoder/me.c          |    3 +--
 encoder/ratecontrol.c |   46 ++++++++++++++++++++++++++++++++++++++--------
 x264.c                |    6 ++++--
 x264.h                |    1 +
 5 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index fdcc957..22e8ce4 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -533,7 +533,7 @@ static int x264_validate_parameters( x264_t *h )
     else
         h->mb.i_psy_trellis = 0;
     h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
-    h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 1 );
+    h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
     h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
     if( h->param.rc.f_aq_strength == 0 )
         h->param.rc.i_aq_mode = 0;
diff --git a/encoder/me.c b/encoder/me.c
index f13e84b..fa14dab 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -561,8 +561,7 @@ me_hex2:
              * because sum(abs(diff)) >= abs(diff(sum)). */
             uint16_t *sums_base = m->integral;
             /* due to a GCC bug on some platforms (win32?), zero[] may not actually be aligned.
-             * unlike the similar case in ratecontrol.c, this is not a problem because it is not used for any
-             * SSE instructions and the only loss is a tiny bit of performance. */
+             * this is not a problem because it is not used for any SSE instructions. */
             DECLARE_ALIGNED_16( static uint8_t zero[8*FENC_STRIDE] );
             DECLARE_ALIGNED_16( int enc_dc[4] );
             int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 2dd34d0..df2bbff 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -168,13 +168,13 @@ static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
 }
 
 // Find the total AC energy of the block in all planes.
-static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
+static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
 {
     /* This function contains annoying hacks because GCC has a habit of reordering emms
      * and putting it after floating point ops.  As a result, we put the emms at the end of the
      * function and make sure that its always called before the float math.  Noinline makes
      * sure no reordering goes on. */
-    unsigned int var = 0, i;
+    uint32_t var = 0, i;
     for( i = 0; i < 3; i++ )
     {
         int w = i ? 8 : 16;
@@ -186,7 +186,6 @@ static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *f
         stride <<= h->mb.b_interlaced;
         var += h->pixf.var[pix]( frame->plane[i]+offset, stride );
     }
-    var = X264_MAX(var,1);
     x264_emms();
     return var;
 }
@@ -217,7 +216,13 @@ static const uint8_t exp2_lut[64] = {
     177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
 };
 
-static int x264_exp2fix8( float x )
+static ALWAYS_INLINE float x264_log2( uint32_t x )
+{
+    int lz = x264_clz( x );
+    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
+}
+
+static ALWAYS_INLINE int x264_exp2fix8( float x )
 {
     int i, f;
     x += 8;
@@ -232,14 +237,39 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
 {
     /* constants chosen to result in approximately the same overall bitrate as without AQ.
      * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */
-    float strength = h->param.rc.f_aq_strength * 1.0397;
     int mb_x, mb_y;
+    float strength;
+    float avg_adj = 0.f;
+    if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
+    {
+        for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
+            for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
+            {
+                uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
+                float qp_adj = x264_log2( energy + 2 );
+                qp_adj *= qp_adj;
+                frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
+                avg_adj += qp_adj;
+            }
+        avg_adj /= h->mb.i_mb_count;
+        strength = h->param.rc.f_aq_strength * avg_adj * (1.f / 6000.f);
+    }
+    else
+        strength = h->param.rc.f_aq_strength * 1.0397f;
     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
         for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
         {
-            uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
-            int lz = x264_clz( energy );
-            float qp_adj = strength * (log2_lut[(energy<<lz>>24)&0x7f] - lz + 16.573f);
+            float qp_adj;
+            if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
+            {
+                qp_adj = frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride];
+                qp_adj = strength * (qp_adj - avg_adj);
+            }
+            else
+            {
+                uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
+                qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
+            }
             frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
             if( h->frames.b_have_lowres )
                 frame->i_inv_qscale_factor[mb_x + mb_y*h->mb.i_mb_stride] = x264_exp2fix8(qp_adj*(-1.f/6.f));
diff --git a/x264.c b/x264.c
index 6e04edd..e9eb91d 100644
--- a/x264.c
+++ b/x264.c
@@ -215,7 +215,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
     H1( "      --chroma-qp-offset <integer>  QP difference between chroma and luma [%d]\n", defaults->analyse.i_chroma_qp_offset );
     H1( "      --aq-mode <integer>     AQ method [%d]\n"
         "                                  - 0: Disabled\n"
-        "                                  - 1: Variance AQ (complexity mask)\n", defaults->rc.i_aq_mode );
+        "                                  - 1: Variance AQ (complexity mask)\n"
+        "                                  - 2: Auto-variance AQ (experimental)\n", defaults->rc.i_aq_mode );
     H0( "      --aq-strength <float>   Reduces blocking and blurring in flat and\n"
         "                              textured areas. [%.1f]\n"
         "                                  - 0.5: weak AQ\n"
@@ -640,11 +641,12 @@ static int  Parse( int argc, char **argv,
             else if( !strcasecmp( optarg, "psnr" ) )
             {
                 param->analyse.f_psy_rd = 0;
-                param->rc.i_aq_mode = 0;
+                param->rc.i_aq_mode = X264_AQ_NONE;
             }
             else if( !strcasecmp( optarg, "ssim" ) )
             {
                 param->analyse.f_psy_rd = 0;
+                param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
             }
             else if( !strcasecmp( optarg, "touhou" ) )
             {
diff --git a/x264.h b/x264.h
index e61040e..2dfcc8d 100644
--- a/x264.h
+++ b/x264.h
@@ -88,6 +88,7 @@ typedef struct x264_t x264_t;
 #define X264_RC_ABR                  2
 #define X264_AQ_NONE                 0
 #define X264_AQ_VARIANCE             1
+#define X264_AQ_AUTOVARIANCE         2
 #define X264_B_ADAPT_NONE            0
 #define X264_B_ADAPT_FAST            1
 #define X264_B_ADAPT_TRELLIS         2