[x264-devel] Re: [PATCH] 2-pass ratecontrol

Mon Aug 23 05:11:09 CEST 2004

On Sun, 22 Aug 2004, Loren Merritt wrote:

> Mostly borrowed from libavcodec.
>
> There is not much theoretical basis behind my choice of defaults for
> rc_eq, qcompress, qblur, and ip_factor.

Forgot to svn add eval.c

--Loren Merritt
-------------- next part --------------
Index: encoder/encoder.c
===================================================================

--- encoder/encoder.c	(revision 38)
+++ encoder/encoder.c	(working copy)
@@ -440,7 +440,8 @@
     x264_csp_init( h->param.cpu, h->param.i_csp, &h->csp );
 
     /* rate control */
-    x264_ratecontrol_new( h );
+    if( x264_ratecontrol_new( h ) < 0 )
+        return NULL;
 
     h->i_last_intra_size = 0;
     h->i_last_inter_size = 0;
@@ -709,14 +710,13 @@
     }
 }
 
-static inline void x264_slice_write( x264_t *h, int i_nal_type, int i_nal_ref_idc, int i_mb_count[18] )
+static inline void x264_slice_write( x264_t *h, int i_nal_type, int i_nal_ref_idc )
 {
     int i_skip;
     int mb_xy;
-    int i;
 
     /* Init stats */
-    for( i = 0; i < 17; i++ ) i_mb_count[i] = 0;
+    memset(&h->frame_stat, 0, sizeof(h->frame_stat));
 
     /* Slice */
     x264_nal_start( h, i_nal_type, i_nal_ref_idc );
@@ -725,7 +725,7 @@
     x264_slice_header_write( &h->out.bs, &h->sh, i_nal_ref_idc );
     if( h->param.b_cabac )
     {
-        /* alignement needed */
+        /* alignment needed */
         bs_align_1( &h->out.bs );
 
         /* init cabac */
@@ -806,7 +806,7 @@
         /* save cache */
         x264_macroblock_cache_save( h );
 
-        i_mb_count[h->mb.i_type]++;
+        h->frame_stat.i_mb_count[h->mb.i_type]++;
 
         x264_ratecontrol_mb(h, bs_pos(&h->out.bs) - mb_spos);
     }
@@ -841,6 +841,11 @@
     }
 
     x264_nal_end( h );
+
+    h->frame_stat.i_misc_bits = bs_pos(&h->out.bs)
+                              - h->frame_stat.i_itex_bits
+                              - h->frame_stat.i_ptex_bits
+                              - h->frame_stat.i_mv_bits;
 }
 
 /****************************************************************************
@@ -869,7 +874,7 @@
 
     int   i_global_qp;
 
-    int i_mb_count[18];
+    int *i_mb_count = h->frame_stat.i_mb_count;
 
     /* no data out */
     *pi_nal = 0;
@@ -886,8 +891,23 @@
 
         x264_frame_copy_picture( h, fenc, pic );
 
+        fenc->i_presentation_num = h->i_input_frame;
+        h->i_input_frame ++;
+
         /* 2: get its type */
-        if( ( h->frames.i_last_i + 1 >= h->param.i_iframe && h->frames.i_last_idr + 1 >= h->param.i_idrframe ) ||
+        if( h->param.i_pass & 2 )
+        {
+            /* XXX: trusts that the first pass used compatible B and IDR frequencies */
+            fenc->i_type = x264_ratecontrol_slice_type( h, fenc->i_presentation_num );
+            if( fenc->i_type == X264_TYPE_I && h->frames.next[0] == NULL
+                && h->frames.i_last_idr + 1 >= h->param.i_idrframe )
+            {
+                fenc->i_type = X264_TYPE_IDR;
+                h->i_poc       = 0;
+                h->i_frame_num = 0;
+            }
+        }
+        else if( ( h->frames.i_last_i + 1 >= h->param.i_iframe && h->frames.i_last_idr + 1 >= h->param.i_idrframe ) ||
             pic->i_type == X264_TYPE_IDR )
         {
             /* IDR */
@@ -1088,7 +1108,7 @@
     }
 
     /* Write the slice */
-    x264_slice_write( h, i_nal_type, i_nal_ref_idc, i_mb_count );
+    x264_slice_write( h, i_nal_type, i_nal_ref_idc );
 
     /* XXX: this scene cut won't work with B frame (it may never create IDR -> bad) */
     if( i_slice_type != SLICE_TYPE_I)
@@ -1190,6 +1210,8 @@
 
     TIMER_STOP( i_mtime_encode_frame );
 
+    x264_ratecontrol_write_stats( h, i_slice_type, i_global_qp );
+
     /* ---------------------- Compute/Print statistics --------------------- */
     /* Slice stat */
     h->stat.i_slice_count[i_slice_type]++;
Index: encoder/ratecontrol.h
===================================================================
--- encoder/ratecontrol.h	(revision 38)
+++ encoder/ratecontrol.h	(working copy)
@@ -27,10 +27,17 @@
 int  x264_ratecontrol_new   ( x264_t * );
 void x264_ratecontrol_delete( x264_t * );
 
+int  x264_ratecontrol_slice_type( x264_t *, int frame_num );
 void x264_ratecontrol_start( x264_t *, int i_slice_type );
 void x264_ratecontrol_mb( x264_t *, int bits );
 int  x264_ratecontrol_qp( x264_t * );
 void x264_ratecontrol_end( x264_t *, int bits );
+void x264_ratecontrol_write_stats( x264_t *, int slice_type, double qp );
 
+double x264_eval(char *s, double *const_value, const char **const_name,
+                 double (**func1)(void *, double), const char **func1_name,
+                 double (**func2)(void *, double, double), char **func2_name,
+                 void *opaque);
+
 #endif
 
Index: encoder/eval.c
===================================================================
--- encoder/eval.c	(revision 0)
+++ encoder/eval.c	(revision 0)
@@ -0,0 +1,254 @@
+/*
+ * simple arithmetic expression evaluator
+ *
+ * Copyright (c) 2002 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/**
+ * @file eval.c
+ * simple arithmetic expression evaluator.
+ *
+ * see http://joe.hotchkiss.com/programming/eval/eval.html
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#ifndef NAN
+  #define NAN 0
+#endif
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#define STACK_SIZE 100
+
+typedef struct Parser{
+    double stack[STACK_SIZE];
+    int stack_index;
+    char *s;
+    double *const_value;
+    const char **const_name;          // NULL terminated
+    double (**func1)(void *, double a); // NULL terminated
+    const char **func1_name;          // NULL terminated
+    double (**func2)(void *, double a, double b); // NULL terminated
+    char **func2_name;          // NULL terminated
+    void *opaque;
+} Parser;
+
+static void evalExpression(Parser *p);
+
+static void push(Parser *p, double d){
+    if(p->stack_index+1>= STACK_SIZE){
+        fprintf(stderr, "stack overflow in the parser\n");
+        return;
+    }
+    p->stack[ p->stack_index++ ]= d;
+//printf("push %f\n", d); fflush(stdout);
+}
+
+static double pop(Parser *p){
+    if(p->stack_index<=0){
+        fprintf(stderr, "stack underflow in the parser\n");
+        return NAN;
+    }
+//printf("pop\n"); fflush(stdout);
+    return p->stack[ --p->stack_index ];
+}
+
+static int strmatch(const char *s, const char *prefix){
+    int i;
+    for(i=0; prefix[i]; i++){
+        if(prefix[i] != s[i]) return 0;
+    }
+    return 1;
+}
+
+static void evalPrimary(Parser *p){
+    double d, d2=NAN;
+    char *next= p->s;
+    int i;
+
+    /* number */
+    d= strtod(p->s, &next);
+    if(next != p->s){
+        push(p, d);
+        p->s= next;
+        return;
+    }
+
+    /* named constants */
+    for(i=0; p->const_name[i]; i++){
+        if(strmatch(p->s, p->const_name[i])){
+            push(p, p->const_value[i]);
+            p->s+= strlen(p->const_name[i]);
+            return;
+        }
+    }
+
+    p->s= strchr(p->s, '(');
+    if(p->s==NULL){
+        fprintf(stderr, "Parser: missing ( in \"%s\"\n", next);
+        return;
+    }
+    p->s++; // "("
+    evalExpression(p);
+    d= pop(p);
+    if(p->s[0]== ','){
+        p->s++; // ","
+        evalExpression(p);
+        d2= pop(p);
+    }
+    if(p->s[0] != ')'){
+        fprintf(stderr, "Parser: missing ) in \"%s\"\n", next);
+        return;
+    }
+    p->s++; // ")"
+
+         if( strmatch(next, "sinh"  ) ) d= sinh(d);
+    else if( strmatch(next, "cosh"  ) ) d= cosh(d);
+    else if( strmatch(next, "tanh"  ) ) d= tanh(d);
+    else if( strmatch(next, "sin"   ) ) d= sin(d);
+    else if( strmatch(next, "cos"   ) ) d= cos(d);
+    else if( strmatch(next, "tan"   ) ) d= tan(d);
+    else if( strmatch(next, "exp"   ) ) d= exp(d);
+    else if( strmatch(next, "log"   ) ) d= log(d);
+    else if( strmatch(next, "squish") ) d= 1/(1+exp(4*d));
+    else if( strmatch(next, "gauss" ) ) d= exp(-d*d/2)/sqrt(2*M_PI);
+    else if( strmatch(next, "abs"   ) ) d= fabs(d);
+    else if( strmatch(next, "max"   ) ) d= d > d2 ? d : d2;
+    else if( strmatch(next, "min"   ) ) d= d < d2 ? d : d2;
+    else if( strmatch(next, "gt"    ) ) d= d > d2 ? 1.0 : 0.0;
+    else if( strmatch(next, "gte"    ) ) d= d >= d2 ? 1.0 : 0.0;
+    else if( strmatch(next, "lt"    ) ) d= d > d2 ? 0.0 : 1.0;
+    else if( strmatch(next, "lte"    ) ) d= d >= d2 ? 0.0 : 1.0;
+    else if( strmatch(next, "eq"    ) ) d= d == d2 ? 1.0 : 0.0;
+//    else if( strmatch(next, "l1"    ) ) d= 1 + d2*(d - 1);
+//    else if( strmatch(next, "sq01"  ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0;
+    else{
+        int error=1;
+        for(i=0; p->func1_name && p->func1_name[i]; i++){
+            if(strmatch(next, p->func1_name[i])){
+                d= p->func1[i](p->opaque, d);
+                error=0;
+                break;
+            }
+        }
+
+        for(i=0; p->func2_name && p->func2_name[i]; i++){
+            if(strmatch(next, p->func2_name[i])){
+                d= p->func2[i](p->opaque, d, d2);
+                error=0;
+                break;
+            }
+        }
+
+        if(error){
+            fprintf(stderr, "Parser: unknown function in \"%s\"\n", next);
+            return;
+        }
+    }
+
+    push(p, d);
+}
+
+static void evalPow(Parser *p){
+    int neg= 0;
+    if(p->s[0]=='+') p->s++;
+
+    if(p->s[0]=='-'){
+        neg= 1;
+        p->s++;
+    }
+
+    if(p->s[0]=='('){
+        p->s++;;
+        evalExpression(p);
+
+        if(p->s[0]!=')')
+            fprintf(stderr, "Parser: missing )\n");
+        p->s++;
+    }else{
+        evalPrimary(p);
+    }
+
+    if(neg) push(p, -pop(p));
+}
+
+static void evalFactor(Parser *p){
+    evalPow(p);
+    while(p->s[0]=='^'){
+        double d;
+
+        p->s++;
+        evalPow(p);
+        d= pop(p);
+        push(p, pow(pop(p), d));
+    }
+}
+
+static void evalTerm(Parser *p){
+    evalFactor(p);
+    while(p->s[0]=='*' || p->s[0]=='/'){
+        int inv= p->s[0]=='/';
+        double d;
+
+        p->s++;
+        evalFactor(p);
+        d= pop(p);
+        if(inv) d= 1.0/d;
+        push(p, d * pop(p));
+    }
+}
+
+static void evalExpression(Parser *p){
+    evalTerm(p);
+    while(p->s[0]=='+' || p->s[0]=='-'){
+        int sign= p->s[0]=='-';
+        double d;
+
+        p->s++;
+        evalTerm(p);
+        d= pop(p);
+        if(sign) d= -d;
+        push(p, d + pop(p));
+    }
+}
+
+double x264_eval(char *s, double *const_value, const char **const_name,
+                 double (**func1)(void *, double), const char **func1_name,
+                 double (**func2)(void *, double, double), char **func2_name,
+                 void *opaque){
+    Parser p;
+
+    p.stack_index=0;
+    p.s= s;
+    p.const_value= const_value;
+    p.const_name = const_name;
+    p.func1      = func1;
+    p.func1_name = func1_name;
+    p.func2      = func2;
+    p.func2_name = func2_name;
+    p.opaque     = opaque;
+
+    evalExpression(&p);
+    return pop(&p);
+}
Index: encoder/cavlc.c
===================================================================
--- encoder/cavlc.c	(revision 38)
+++ encoder/cavlc.c	(working copy)
@@ -265,6 +265,8 @@
     const int i_mb_type = h->mb.i_type;
     int i_mb_i_offset;
     int i;
+    int mb_pos = bs_pos(s);
+    int mb_pos2;
 
     switch( h->sh.i_type )
     {
@@ -632,6 +634,10 @@
         return;
     }
 
+    mb_pos2 = bs_pos(s);
+    h->frame_stat.i_mv_bits += mb_pos2 - mb_pos;
+    mb_pos = mb_pos2;
+
     /* Coded block patern */
     if( i_mb_type == I_4x4 )
     {
@@ -684,4 +690,13 @@
             }
         }
     }
+
+    if(IS_INTRA(i_mb_type))
+    {
+        h->frame_stat.i_itex_bits += bs_pos(s) - mb_pos;
+    }
+    else
+    {
+        h->frame_stat.i_ptex_bits += bs_pos(s) - mb_pos;
+    }
 }
Index: encoder/cabac.c
===================================================================
--- encoder/cabac.c	(revision 38)
+++ encoder/cabac.c	(working copy)
@@ -962,6 +962,8 @@
 {
     const int i_mb_type = h->mb.i_type;
     int i;
+    int mb_pos = bs_pos(s);
+    int mb_pos2;
 
     /* Write the MB type */
     x264_cabac_mb_type( h );
@@ -1141,6 +1143,10 @@
         }
     }
 
+    mb_pos2 = bs_pos(s);
+    h->frame_stat.i_mv_bits += mb_pos2 - mb_pos;
+    mb_pos = mb_pos2;
+
     if( i_mb_type != I_16x16 )
     {
         x264_cabac_mb_cbp_luma( h );
@@ -1190,5 +1196,14 @@
             }
         }
     }
+
+    if(IS_INTRA(i_mb_type))
+    {
+        h->frame_stat.i_itex_bits += bs_pos(s) - mb_pos;
+    }
+    else
+    {
+        h->frame_stat.i_ptex_bits += bs_pos(s) - mb_pos;
+    }
 }
 
Index: encoder/ratecontrol.c
===================================================================
--- encoder/ratecontrol.c	(revision 38)
+++ encoder/ratecontrol.c	(working copy)
@@ -5,6 +5,8 @@
  * $Id: ratecontrol.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
  *
  * Authors: M?ns Rullg?rd <mru at mru.ath.cx>
+ * 2 pass code: Michael Niedermayer <michaelni at gmx.at>
+ *              Loren Merritt
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,31 +25,53 @@
 
 #define _ISOC99_SOURCE
 
+#undef NDEBUG // always check asserts, the speed effect is far too small to disable them
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <math.h>
 #include <limits.h>
+#include <assert.h>
 
 #include "../core/common.h"
 #include "../core/cpu.h"
+#include "../core/macroblock.h"
 #include "ratecontrol.h"
 
 #ifdef SYS_MACOSX
 #define exp2f(x) ( (float) exp2( (x) ) )
 #endif
 
+typedef struct ratecontrol_entry_t {
+    int pict_type;
+    float qscale;
+    int mv_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int misc_bits;
+    uint64_t expected_bits;
+    int new_pict_type;
+    float new_qscale;
+    int new_qp;
+    int i_count;
+    int p_count;
+    int s_count;
+    int f_code;
+    int b_code;
+} ratecontrol_entry_t;
+
 struct x264_ratecontrol_t
 {
     /* constants */
-    float fps;
+    double fps;
     int gop_size;
     int bitrate;
-    int nmb;                    /* number of MBs */
+    int nmb;                    /* number of macroblocks in a frame */
     int buffer_size;
     int rcbufrate;
     int init_qp;
 
+    /* 1 pass stuff */
     int gop_qp;
     int buffer_fullness;
     int frames;                 /* frames in current gop */
@@ -67,13 +91,65 @@
     int nzcoeffs;               /* # of 0-quantized coefficients */
     int ncoeffs;                /* total # of coefficients */
     int overhead;
+
+    /* 2 pass stuff */
+    FILE *stats_file;
+    int num_entries;            /* number of ratecontrol_entry_ts */
+    ratecontrol_entry_t *entry;
+    double last_qscale;
+    double last_qscale_for[5];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff  */
+    int last_non_b_pict_type;
+    double lmin[5];                /* min qscale by frame type */
+    double lmax[5];
+    double i_cplx_sum[5];       /* estimated total texture bits in intra MBs at qscale=1 */
+    double p_cplx_sum[5];
+    double mv_bits_sum[5];
+    int frame_count[5];         /* number of frames of each type */
 };
 
+
+static int init_pass2(x264_t *);
+static float rate_estimate_qscale(x264_t *h, int pict_type);
+
+/* Terminology:
+ * qp = h.264's quantizer
+ * qscale = an arbitrary linear scale, mappable to qp
+ */
+
+static inline double qp2qscale(double qp){
+    return pow(2.0, qp / 6.0);
+}
+
+static inline double qscale2qp(double qscale){
+    return 6.0 * log(qscale) / log(2.0);
+}
+
+static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale){
+    if(qscale<=0.0){
+        fprintf(stderr, "qscale<=0.0\n");
+        qscale = 0.1;
+    }
+    return (double)(rce->i_tex_bits + rce->p_tex_bits + 1) * rce->qscale / qscale;
+ }
+
+static inline double bits2qscale(ratecontrol_entry_t *rce, double bits){
+    if(bits<0.9){
+        fprintf(stderr, "bits<0.9\n");
+        bits = 1.0;
+    }
+    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits + 1) / bits;
+}
+
+
 int x264_ratecontrol_new( x264_t *h )
 {
     x264_ratecontrol_t *rc = x264_malloc( sizeof( x264_ratecontrol_t ) );
     float bpp;
+    int i;
 
+    x264_cpu_restore( h->param.cpu );
+
+    h->rc = rc;
     memset(rc, 0, sizeof(*rc));
 
     /* FIXME: use integers */
@@ -125,14 +201,112 @@
     x264_log(h, X264_LOG_DEBUG, "%f fps, %i bps, bufsize %i\n",
              rc->fps, rc->bitrate, rc->buffer_size);
 
-    h->rc = rc;
 
+    for(i=0; i<5; i++){
+        rc->last_qscale_for[i] = qp2qscale(26);
+        rc->lmin[i] = qp2qscale(h->param.i_qp_min);
+        rc->lmax[i] = qp2qscale(h->param.i_qp_max);
+    }
+#if 0 // FIXME: do we want to assign lmin/lmax based on ip_factor, or leave them all the same?
+    rc->lmin[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
+    rc->lmax[SLICE_TYPE_I] /= fabs(h->param.f_ip_factor);
+    rc->lmin[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
+    rc->lmax[SLICE_TYPE_B] *= fabs(h->param.f_pb_factor);
+#endif
+
+    if(h->param.i_pass & 2){
+        int stats_size;
+        char *p, *stats_in;
+        FILE *stats_file;
+
+        /* read 1st pass stats */
+        assert(h->param.s_2pass_file_in);
+        stats_file = fopen(h->param.s_2pass_file_in, "r");
+        if(!stats_file){
+            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
+            return -1;
+        }
+        // FIXME: error checking
+        fseek(stats_file, 0, SEEK_END);
+        stats_size = ftell(stats_file);
+        fseek(stats_file, 0, SEEK_SET);
+        stats_in = x264_malloc(stats_size+10);
+        fread(stats_in, 1, stats_size, stats_file);
+        fclose(stats_file);
+
+        /* find number of pics */
+        p = stats_in;
+        for(i=-1; p; i++){
+            p = strchr(p+1, ';');
+        }
+        i += h->param.i_bframe;
+        rc->entry = (ratecontrol_entry_t*) x264_malloc(i*sizeof(ratecontrol_entry_t));
+        memset(rc->entry, 0, i*sizeof(ratecontrol_entry_t));
+        rc->num_entries= i;
+
+        /* init all to skipped p frames */
+        for(i=0; i<rc->num_entries; i++){
+            ratecontrol_entry_t *rce = &rc->entry[i];
+            rce->pict_type = rce->new_pict_type = SLICE_TYPE_P;
+            rce->qscale = rce->new_qscale = qp2qscale(20);
+            rce->misc_bits = rc->nmb + 10;
+            rce->new_qp = 0;
+        }
+
+        /* read stats */
+        p = stats_in;
+        for(i=0; i < rc->num_entries - h->param.i_bframe; i++){
+            ratecontrol_entry_t *rce;
+            int picture_number;
+            int e;
+            char *next;
+            float qp;
+
+            next= strchr(p, ';');
+            if(next){
+                (*next)=0; //sscanf is unbelievably slow on looong strings
+                next++;
+            }
+            e = sscanf(p, " in:%d ", &picture_number);
+
+            assert(picture_number >= 0);
+            assert(picture_number < rc->num_entries);
+            rce = &rc->entry[picture_number];
+
+            e += sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d",
+                   &rce->pict_type, &qp, &rce->i_tex_bits, &rce->p_tex_bits,
+                   &rce->mv_bits, &rce->misc_bits, &rce->i_count, &rce->p_count, &rce->s_count);
+            if(e != 10){
+                x264_log(h, X264_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
+                return -1;
+            }
+            rce->qscale = qp2qscale(qp);
+            p = next;
+        }
+
+        x264_free(stats_in);
+
+        if(init_pass2(h) < 0) return -1;
+    }
+
+    if(h->param.i_pass & 1){
+        assert(h->param.s_2pass_file_out);
+        h->rc->stats_file = fopen(h->param.s_2pass_file_out, "w");
+        if(!h->rc->stats_file){
+            x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
+            return -1;
+        }
+    }
+
     return 0;
+    return 0;
 }
 
 void x264_ratecontrol_delete( x264_t *h )
 {
     x264_ratecontrol_t *rc = h->rc;
+    if(rc->stats_file) fclose(rc->stats_file);
+    if(rc->entry) x264_free(rc->entry);
     x264_free( rc );
 }
 
@@ -151,6 +325,15 @@
 
     x264_cpu_restore( h->param.cpu );
 
+    if(h->param.i_pass & 2){
+        int frame = h->fenc->i_presentation_num;
+        assert(frame >= 0 && frame < rc->num_entries);
+        ratecontrol_entry_t *rce = &h->rc->entry[frame];
+        rce->new_qscale = rate_estimate_qscale(h, i_slice_type);
+        rc->qpm = rc->qp = rce->new_qp = (int)(qscale2qp(rce->new_qscale) + 0.5);
+        return;
+    }
+
     rc->slice_type = i_slice_type;
 
     switch(i_slice_type){
@@ -269,7 +452,7 @@
     int dqp;
     int i;
 
-    if( !h->param.b_cbr )
+    if(!h->param.b_cbr || (h->param.i_pass & 2))
         return;
 
     x264_cpu_restore( h->param.cpu );
@@ -319,7 +502,7 @@
 {
     x264_ratecontrol_t *rc = h->rc;
 
-    if(!h->param.b_cbr)
+    if(!h->param.b_cbr || (h->param.i_pass & 2))
         return;
 
     rc->buffer_fullness += rc->rcbufrate - bits;
@@ -352,3 +535,313 @@
     rc->frames++;
     rc->mb = 0;
 }
+
+
+/*****************************************************
+ * 2 pass functions
+ ****************************************************/
+
+void x264_ratecontrol_write_stats(x264_t *h, int slice_type, double qp){
+    if(!(h->param.i_pass & 1))
+        return;
+    assert(h->rc->stats_file);
+    fprintf(h->rc->stats_file,
+            "in:%d out:%d type:%d q:%.3f itex:%d ptex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d;\n",
+            h->fenc->i_presentation_num, h->i_frame-1,
+            slice_type, qp,
+            h->frame_stat.i_itex_bits, h->frame_stat.i_ptex_bits,
+            h->frame_stat.i_mv_bits, h->frame_stat.i_misc_bits,
+            h->frame_stat.i_mb_count[I_4x4] + h->frame_stat.i_mb_count[I_16x16],
+            h->frame_stat.i_mb_count[P_L0]  + h->frame_stat.i_mb_count[P_8x8],
+            h->frame_stat.i_mb_count[P_SKIP]);
+}
+
+/**
+ * modifies the bitrate curve from pass1 for one frame
+ */
+static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor){
+    x264_ratecontrol_t *rcc= h->rc;
+    double bits;
+    //double q, avg_cplx;
+    const int pict_type = rce->new_pict_type;
+
+    x264_cpu_restore( h->param.cpu );
+
+    //avg_cplx = (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type];
+
+    double const_values[]={
+        rce->i_tex_bits * rce->qscale,
+        rce->p_tex_bits * rce->qscale,
+        (rce->i_tex_bits + rce->p_tex_bits) * rce->qscale,
+        rce->mv_bits / rcc->nmb,
+        (double)rce->i_count / rcc->nmb,
+        (double)rce->p_count / rcc->nmb,
+        (double)rce->s_count / rcc->nmb,
+        rce->pict_type == SLICE_TYPE_I,
+        rce->pict_type == SLICE_TYPE_P,
+        rce->pict_type == SLICE_TYPE_B,
+        h->param.f_qcompress,
+        rcc->i_cplx_sum[SLICE_TYPE_I] / rcc->frame_count[SLICE_TYPE_I],
+        rcc->i_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
+        rcc->p_cplx_sum[SLICE_TYPE_P] / rcc->frame_count[SLICE_TYPE_P],
+        rcc->p_cplx_sum[SLICE_TYPE_B] / rcc->frame_count[SLICE_TYPE_B],
+        (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / rcc->frame_count[pict_type],
+        0
+    };
+    static const char *const_names[]={
+        "iTex",
+        "pTex",
+        "tex",
+        "mv",
+        "iCount",
+        "pCount",
+        "sCount",
+        "isI",
+        "isP",
+        "isB",
+        "qComp",
+        "avgIITex",
+        "avgPITex",
+        "avgPPTex",
+        "avgBPTex",
+        "avgTex",
+        NULL
+    };
+    static double (*func1[])(void *, double)={
+        (void *)bits2qscale,
+        (void *)qscale2bits,
+        NULL
+    };
+    static const char *func1_names[]={
+        "bits2qp",
+        "qp2bits",
+        NULL
+    };
+
+    bits = x264_eval((char*)h->param.s_rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
+
+    bits *= rate_factor;
+    if(bits<0.0) bits=0.0;
+    bits += 1.0; //avoid 1/0 issues
+
+    /* I/B difference */
+    if     (pict_type==SLICE_TYPE_I && h->param.f_ip_factor > 0)
+        bits *= h->param.f_ip_factor;
+    else if(pict_type==SLICE_TYPE_B && h->param.f_pb_factor > 0)
+        bits /= h->param.f_pb_factor;
+
+    return bits2qscale(rce, bits);
+}
+
+static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q){
+    x264_ratecontrol_t *rcc = h->rc;
+    const int pict_type = rce->new_pict_type;
+
+    // force I/B quants as a function of P quants
+    const double last_p_q    = rcc->last_qscale_for[SLICE_TYPE_P];
+    const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
+    if     (pict_type==SLICE_TYPE_I && h->param.f_ip_factor < 0)
+        q = last_p_q     / -h->param.f_ip_factor;
+    else if(pict_type==SLICE_TYPE_B && h->param.f_pb_factor < 0)
+        q = last_non_b_q * -h->param.f_pb_factor;
+
+    /* last qscale / qdiff stuff */
+    if(rcc->last_non_b_pict_type==pict_type || pict_type!=SLICE_TYPE_I)
+    {
+        double last_q = rcc->last_qscale_for[pict_type];
+        const double max_qscale = qp2qscale(qscale2qp(last_q) + h->param.i_qp_step);
+        const double min_qscale = qp2qscale(qscale2qp(last_q) - h->param.i_qp_step);
+
+        if     (q > max_qscale) q = max_qscale;
+        else if(q < min_qscale) q = min_qscale;
+    }
+
+    rcc->last_qscale_for[pict_type] = q; //Note we can't do that after blurring
+    if(pict_type!=SLICE_TYPE_B)
+        rcc->last_non_b_pict_type = pict_type;
+    return q;
+}
+
+static double modify_qscale(x264_t *h, ratecontrol_entry_t *rce, double q){
+    x264_ratecontrol_t *rcc = h->rc;
+    const int pict_type = rce->new_pict_type;
+    double lmin = rcc->lmin[pict_type];
+    double lmax = rcc->lmax[pict_type];
+
+    if(lmin==lmax /* || !h->param.b_qsquish */){
+        if     (q<lmin) q = lmin;
+        else if(q>lmax) q = lmax;
+    }else{
+        double min2 = log(lmin);
+        double max2 = log(lmax);
+
+        q = log(q);
+        q = (q - min2)/(max2-min2) - 0.5;
+        q *= -4.0;
+        q = 1.0/(1.0 + exp(q));
+        q = q*(max2-min2) + min2;
+
+        q = exp(q);
+    }
+    return q;
+}
+
+// update qscale for 1 frame based on actual bits used so far
+static float rate_estimate_qscale(x264_t *h, int pict_type)
+{
+    float q;
+    float br_compensation;
+    double diff;
+    int picture_number = h->fenc->i_presentation_num;
+    x264_ratecontrol_t *rcc = h->rc;
+    ratecontrol_entry_t *rce;
+    double lmin = rcc->lmin[pict_type];
+    double lmax = rcc->lmax[pict_type];
+    int64_t wanted_bits;
+    int64_t total_bits = 8*(h->stat.i_slice_size[SLICE_TYPE_I]
+                          + h->stat.i_slice_size[SLICE_TYPE_P]
+                          + h->stat.i_slice_size[SLICE_TYPE_B]);
+
+//printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);
+
+    rce = &rcc->entry[picture_number];
+
+    if(pict_type!=SLICE_TYPE_I)
+        assert(pict_type == rce->new_pict_type);
+
+    wanted_bits = rce->expected_bits;
+
+    diff = total_bits - wanted_bits;
+    br_compensation = (rcc->buffer_size - diff) / rcc->buffer_size;
+    if(br_compensation<=0.0) br_compensation=0.001;
+
+    q = rce->new_qscale / br_compensation;
+
+    if     (q<lmin) q=lmin;
+    else if(q>lmax) q=lmax;
+
+    rcc->last_qscale = q;
+    return q;
+}
+
+static int init_pass2(x264_t *h)
+{
+    x264_ratecontrol_t *rcc = h->rc;
+    uint64_t all_const_bits = 0;
+    uint64_t all_available_bits = (uint64_t)(h->param.i_bitrate * 1000 * (double)rcc->num_entries / rcc->fps);
+    double rate_factor, step, step_mult;
+    double qblur = h->param.f_qblur;
+    const int filter_size = (int)(qblur*4) | 1;
+    double expected_bits;
+    double *qscale, *blurred_qscale;
+    int i;
+
+    /* find total/average complexity & const_bits */
+    for(i=0; i<rcc->num_entries; i++){
+        ratecontrol_entry_t *rce = &rcc->entry[i];
+        rce->new_pict_type = rce->pict_type;
+        all_const_bits += rce->mv_bits + rce->misc_bits;
+        rcc->i_cplx_sum[rce->new_pict_type] += rce->i_tex_bits * rce->qscale;
+        rcc->p_cplx_sum[rce->new_pict_type] += rce->p_tex_bits * rce->qscale;
+        rcc->mv_bits_sum[rce->new_pict_type] += rce->mv_bits;
+        rcc->frame_count[rce->new_pict_type] ++;
+    }
+
+    if(all_available_bits < all_const_bits){
+        x264_log(h, X264_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
+                 (int)(all_const_bits * rcc->fps / (rcc->num_entries * 1000)));
+        return -1;
+    }
+
+    qscale = x264_malloc(sizeof(double)*rcc->num_entries);
+    if(filter_size > 1)
+        blurred_qscale = x264_malloc(sizeof(double)*rcc->num_entries);
+    else
+        blurred_qscale = qscale;
+
+    expected_bits = 0;
+    for(i=0; i<rcc->num_entries; i++)
+        expected_bits += qscale2bits(&rcc->entry[i], get_qscale(h, &rcc->entry[i], 1.0));
+    step_mult = all_available_bits / expected_bits;
+
+    rate_factor = 0;
+    for(step = 1E4 * step_mult; step > 1E-7 * step_mult; step*=0.5){
+        expected_bits = 0;
+        rate_factor += step;
+
+        /* find qscale */
+        for(i=0; i<rcc->num_entries; i++){
+            qscale[i] = get_qscale(h, &rcc->entry[i], rate_factor);
+        }
+
+        /* fixed I/B QP relative to P mode */
+        for(i=rcc->num_entries-1; i>=0; i--){
+            qscale[i] = get_diff_limited_q(h, &rcc->entry[i], qscale[i]);
+            assert(qscale[i] >= 0);
+        }
+
+        /* smooth curve */
+        if(filter_size > 1){
+            assert(filter_size%2==1);
+            for(i=0; i<rcc->num_entries; i++){
+                ratecontrol_entry_t *rce = &rcc->entry[i];
+                const int pict_type = rce->new_pict_type;
+                int j;
+                double q=0.0, sum=0.0;
+
+                for(j=0; j<filter_size; j++){
+                    int index = i+j-filter_size/2;
+                    double d = index-i;
+                    double coeff = qblur==0 ? 1.0 : exp(-d*d/(qblur*qblur));
+                    if(index < 0 || index >= rcc->num_entries) continue;
+                    if(pict_type != rcc->entry[index].new_pict_type) continue;
+                    q += qscale[index] * coeff;
+                    sum += coeff;
+                }
+                blurred_qscale[i] = q/sum;
+            }
+        }
+
+        /* find expected bits */
+        for(i=0; i<rcc->num_entries; i++){
+            ratecontrol_entry_t *rce = &rcc->entry[i];
+            double bits;
+            rce->new_qscale = modify_qscale(h, rce, blurred_qscale[i]);
+            assert(rce->new_qscale >= 0);
+            bits = qscale2bits(rce, rce->new_qscale) + rce->mv_bits + rce->misc_bits;
+
+            rce->expected_bits = expected_bits;
+            expected_bits += bits;
+        }
+
+//printf("expected:%f available:%d factor:%f\n", expected_bits, (int)all_available_bits, rate_factor);
+        if(expected_bits > all_available_bits) rate_factor -= step;
+    }
+
+    x264_free(qscale);
+    if(filter_size > 1)
+        x264_free(blurred_qscale);
+
+    if(fabs(expected_bits/all_available_bits - 1.0) > 0.01 ){
+        x264_log(h, X264_LOG_ERROR, "Error: 2pass curve failed to converge\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+int x264_ratecontrol_slice_type(x264_t *h, int frame_num)
+{
+    if(h->param.i_pass & 2){
+        assert(frame_num < h->rc->num_entries);
+        switch(h->rc->entry[frame_num].new_pict_type){
+            case SLICE_TYPE_I: return X264_TYPE_I;
+            default:
+            case SLICE_TYPE_P: return X264_TYPE_P;
+            case SLICE_TYPE_B: return X264_TYPE_B;
+        }
+    }else{
+        return X264_TYPE_AUTO;
+    }
+}
+
Index: x264.c
===================================================================
--- x264.c	(revision 38)
+++ x264.c	(working copy)
@@ -120,6 +120,11 @@
              "      --rcbuf <integer>       Size of VBV buffer\n"
              "      --rcinitbuf <integer>   Initial VBV buffer occupancy\n"
              "\n"
+             "  -p, --pass <1|2>            Enable 2 pass ratecontrol\n"
+             "      --stats <string>        Filename for 2 pass stats\n"
+             "      --rceq <string>         Ratecontrol equation\n"
+             "      --qcomp <float>         0.0 => CBR, 1.0 => CQP, 0.6 => default\n"
+             "\n"
              "  -A, --analyse <string>      Analyse options:\n"
              "                                  - i4x4\n"
              "                                  - psub16x16,psub8x8\n"
@@ -161,6 +166,9 @@
 #define OPT_PBRATIO 261
 #define OPT_RCBUF 262
 #define OPT_RCIBUF 263
+#define OPT_RCSTATS 264
+#define OPT_RCEQ 265
+#define OPT_QCOMP 266
         static struct option long_options[] =
         {
             { "help",    no_argument,       NULL, 'h' },
@@ -185,12 +193,16 @@
             { "rcinitbuf",required_argument, NULL, OPT_RCIBUF },
             { "ipratio", required_argument, NULL, OPT_IPRATIO },
             { "pbratio", required_argument, NULL, OPT_PBRATIO },
+            { "pass",    required_argument, NULL, 'p' },
+            { "stats",   required_argument, NULL, OPT_RCSTATS },
+            { "rceq",    required_argument, NULL, OPT_RCEQ },
+            { "qcomp",   required_argument, NULL, OPT_QCOMP },
             {0, 0, 0, 0}
         };
 
         int c;
 
-        c = getopt_long( argc, argv, "hi:I:b:r:cxB:q:no:s:A:",
+        c = getopt_long( argc, argv, "hi:I:b:r:cxB:q:no:s:A:p:",
                          long_options, &long_options_index);
 
         if( c == -1 )
@@ -297,6 +309,19 @@
             case OPT_PBRATIO:
                 param->f_pb_factor = atoi(optarg);
                 break;
+            case 'p':
+                param->i_pass = atoi(optarg);
+                break;
+            case OPT_RCSTATS:
+                param->s_2pass_file_in = optarg;
+                param->s_2pass_file_out = optarg;
+                break;
+            case OPT_RCEQ:
+                param->s_rc_eq = optarg;
+                break;
+            case OPT_QCOMP:
+                param->f_qcompress = atof(optarg);
+                break;
             default:
                 fprintf( stderr, "unknown option (%c)\n", optopt );
                 return -1;
Index: core/frame.h
===================================================================
--- core/frame.h	(revision 38)
+++ core/frame.h	(working copy)
@@ -31,6 +31,7 @@
     int     i_type;
     int     i_qpplus1;
     int64_t i_pts;
+    int     i_presentation_num;
 
     /* YUV buffer */
     int     i_plane;
Index: core/common.c
===================================================================
--- core/common.c	(revision 38)
+++ core/common.c	(working copy)
@@ -80,6 +80,13 @@
     param->f_ip_factor = 2.0;
     param->f_pb_factor = 2.0;
 
+    param->i_pass = 0;
+    param->s_2pass_file_out = "x264_2pass.log";
+    param->s_2pass_file_in = "x264_2pass.log";
+    param->s_rc_eq = "(tex^qComp)*(avgTex^(1-qComp))";
+    param->f_qcompress = 0.6;
+    param->f_qblur = 0.5;
+
     /* Log */
     param->pf_log = x264_log_default;
     param->p_log_private = NULL;
Index: core/common.h
===================================================================
--- core/common.h	(revision 38)
+++ core/common.h	(working copy)
@@ -191,6 +191,7 @@
     /* frame number/poc */
     int             i_frame;
     int             i_poc;
+    int             i_input_frame;
 
     int             i_frame_offset; /* decoding only */
     int             i_frame_num;    /* decoding only */
@@ -348,6 +349,15 @@
         int   i_mb_count[5][18];
     } stat;
 
+    struct
+    {
+        int i_itex_bits;
+        int i_ptex_bits;
+        int i_mv_bits;
+        int i_misc_bits;
+        int i_mb_count[18];
+    } frame_stat;
+
     /* CPU functions dependants */
     x264_predict_t      predict_16x16[4+3];
     x264_predict_t      predict_8x8[4+3];
Index: x264.h
===================================================================
--- x264.h	(revision 38)
+++ x264.h	(working copy)
@@ -125,6 +125,13 @@
     float       f_ip_factor;
     float       f_pb_factor;
 
+    int         i_pass;         /* 1 => write stats, 2 => use those stats, 3 => both */
+    const char* s_2pass_file_out;
+    const char* s_2pass_file_in;
+    const char* s_rc_eq;        /* rate control equation */
+    float       f_qcompress;    /* 0.0 => cbr, 1.0 => constant qp */
+    float       f_qblur;        /* temporally blur quants */
+
     /* Log */
     void        (*pf_log)( void *, int i_level, const char *psz, va_list );
     void        *p_log_private;
Index: Jamfile
===================================================================
--- Jamfile	(revision 38)
+++ Jamfile	(working copy)
@@ -24,7 +24,7 @@
             core/common.c core/mdate.c core/csp.c
             encoder/analyse.c encoder/me.c encoder/ratecontrol.c
             encoder/set.c encoder/macroblock.c encoder/cabac.c encoder/cavlc.c
-            encoder/encoder.c ;
+            encoder/encoder.c encoder/eval.c ;
 
 SOURCES_X86 = core/i386/cpu.asm ;
 SOURCES_MMX = core/i386/mc-c.c core/i386/dct-c.c core/i386/predict.c core/i386/dct.asm core/i386/pixel.asm core/i386/mc.asm ;