[vlc-devel] [PATCH] Phosphor deinterlacer, reworked++

Mon Feb 28 22:10:19 CET 2011

Hi,

On Thu, Feb 24, 2011 at 01:34:03AM +0200, Juha Jeronen wrote:
> +#define PHOSPHOR_CHROMA_TEXT N_("Phosphor chroma mode for 4:2:0 input")
> +#define PHOSPHOR_CHROMA_LONGTEXT N_("Chroma output mode for the Phosphor framerate doubler, when input is 4:2:0. "\
> +                                    "This affects the color representation of those (and only those) output "\
> +                                    "frames that fall across input frame boundaries. Possible modes: "\
> +                                    ""\
> +                                    "Average input field chroma: default, good for most material. "\
> +                                    ""\
> +                                    "Use chroma only from latest field: the output frame takes its chroma "\
> +                                    "from the latest arrived field. The chroma of the older field is ignored. "\
> +                                    ""\
> +                                    "Alternate line copy (like IVTC): take chroma line 1 from top field, "\
> +                                    "chroma line 2 from bottom field, chroma line 3 from top field, etc. "\
> +                                    "IVTC filters often do this, so this choice is good for NTSC "\
> +                                    "telecined input that has 4:2:0 chroma (common on NTSC anime DVDs). "\
> +                                    ""\
> +                                    "Upconvert output to 4:2:2: the output frame will have independent chroma "\
> +                                    "for each field. In this mode, all input chroma data is used. Upconvert is "\
> +                                    "an alternative for Average, but requires more CPU and memory bandwidth. "\
> +                                    "Note that for telecined input, Alternate line copy is best.")
> +
> +#define PHOSPHOR_DIMMER_TEXT N_("Phosphor old field dimmer strength")
> +#define PHOSPHOR_DIMMER_LONGTEXT N_("This controls the strength of the darkening filter that "\
> +                                    "simulates CRT TV phosphor light decay for the old field "\
> +                                    "in the Phosphor framerate doubler. "\
> +                                    "Default: low.")
> +
> +static const char *const phosphor_chroma_list[] = { "merge", "newonly", "altline", "upconvert" };
> +static const char *const phosphor_chroma_list_text[] = { N_("Average input field chroma"),
> +                                                         N_("Use chroma only from latest field"),
> +                                                         N_("Alternate line copy (like IVTC)"),
> +                                                         N_("Upconvert output to 4:2:2") };
> +
> +static const char *const phosphor_dimmer_list[] = { "off", "low", "medium", "high" };
> +static const char *const phosphor_dimmer_list_text[] = { N_("Off (no light decay)"),
> +                                                         N_("Low"),
> +                                                         N_("Medium"),
> +                                                         N_("High") };
 Using an integer type for dimmer would simplify the code. You can still have
a name associated to each numerical value.
 I think you could also use a integer type for chroma (dunno if it
simplify in this case).

> +#define PHOSPHOR_WORKING_AREA_SIZE (1)
 Do you need to be able to change it? Or is 1 the only sensible value?

> @@ -154,6 +208,11 @@ struct filter_sys_t
>  
>      /* Input frame history buffer for algorithms that perform temporal filtering. */
>      picture_t *pp_history[HISTORY_SIZE];
> +
> +    /* Phosphor */
> +    phosphor_chroma_t i_phosphor_chroma_for_420;
> +    int i_phosphor_dimmer_strength;
> +    picture_t *pp_phosphor_working_area[PHOSPHOR_WORKING_AREA_SIZE];
 struct {
 	phosphor_chroma_t chroma_....
 } phospor;
 could be used.

> @@ -271,6 +330,13 @@ static void SetFilterMethod( filter_t *p_filter, const char *psz_method, vlc_fou
> @@ -330,6 +397,12 @@ static void GetOutputFormat( filter_t *p_filter,
>              break;
>          }
>      }
> +    else if( p_sys->i_phosphor_chroma_for_420 == PC_UPCONVERT )
> +    {
> +        if( p_sys->i_mode == DEINTERLACE_PHOSPHOR )
 The test order does not seem right (even if it works).
> +            p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_J420 ? VLC_CODEC_J422 :
> +                                                                  VLC_CODEC_I422;
> +    }
>  }
>  

> +static void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
> +                          picture_t *p_inpic_top, picture_t *p_inpic_bottom, compose_chroma_t i_output_chroma )
> +{
> +    assert( p_filter != NULL );
> +    assert( p_outpic != NULL );
> +    assert( p_inpic_top != NULL );
> +    assert( p_inpic_bottom != NULL );
> +
> +    /* Valid 4:2:0 chroma handling modes. */
> +    assert( i_output_chroma == CC_ALTLINE       ||
> +            i_output_chroma == CC_UPCONVERT     ||
> +            i_output_chroma == CC_SOURCE_TOP    ||
> +            i_output_chroma == CC_SOURCE_BOTTOM ||
> +            i_output_chroma == CC_MERGE );
> +
> +    const int i_chroma = p_filter->fmt_in.video.i_chroma;
> +    const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
> +                        i_chroma == VLC_CODEC_J422;
> +    const bool b_upconvert_chroma = ( !b_i422  &&  i_output_chroma == CC_UPCONVERT );
> +
> +    for( int i_plane = 0 ; i_plane < p_inpic_top->i_planes ; i_plane++ )
> +    {
> +        uint8_t *p_in_top, *p_in_bottom, *p_out_end, *p_out;
> +
> +        p_in_top    = p_inpic_top->p[i_plane].p_pixels;
> +        p_in_bottom = p_inpic_bottom->p[i_plane].p_pixels;
> +
> +        bool b_is_chroma_plane = ( i_plane == U_PLANE || i_plane == V_PLANE );
> +
> +        /* YV12 is YVU - swap chroma planes in output when converting to 4:2:2 to get YUV. */
> +        /* FIXME: do we need to do this, or is it already done elsewhere? */
> +        int i_out_plane;
> +        if( b_is_chroma_plane  &&  b_upconvert_chroma  &&  i_chroma == VLC_CODEC_YV12 )
> +        {
> +            if( i_plane == U_PLANE )
> +                i_out_plane = V_PLANE;
> +            else /* V_PLANE */
> +                i_out_plane = U_PLANE;
> +        }
> +        else
> +        {
> +            i_out_plane = i_plane; /* Y or A plane, or the output chroma format matches input. */
> +        }
> +
> +        p_out = p_outpic->p[i_out_plane].p_pixels;
> +        p_out_end = p_out + p_outpic->p[i_out_plane].i_pitch
> +                          * p_outpic->p[i_out_plane].i_visible_lines;
> +
> +        assert( p_inpic_top->p[i_plane].i_pitch == p_inpic_bottom->p[i_plane].i_pitch );
> +        assert( p_outpic->p[i_out_plane].i_visible_lines % 2 == 0 );
> +
> +        /* Copy luma or chroma, alternating between input fields. */
> +        if( !b_is_chroma_plane  ||  b_i422  ||  i_output_chroma == CC_ALTLINE )
> +        {
> +            /* Do an alternating line copy. This is always done for luma, and for 4:2:2 chroma.
> +               It can be requested for 4:2:0 chroma using CC_ALTLINE. (This is useful for IVTC.) */
> +
> +            /* In the frame for the bottom field, skip the first line, which belongs to the top field. */
> +            p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> +
> +            for( ; p_out < p_out_end ; )
> +            {
> +                vlc_memcpy( p_out, p_in_top, p_inpic_top->p[i_plane].i_pitch );
> +                p_out += p_outpic->p[i_out_plane].i_pitch;
> +                vlc_memcpy( p_out, p_in_bottom, p_inpic_bottom->p[i_plane].i_pitch );
> +                p_out += p_outpic->p[i_out_plane].i_pitch;
> +
> +                p_in_top    += 2 * p_inpic_top->p[i_plane].i_pitch;
> +                p_in_bottom += 2 * p_inpic_bottom->p[i_plane].i_pitch;
> +            }
> +        }
> +        else /* Input is 4:2:0, we are on a chroma plane, and not in altline mode. */
> +        {
> +            if( i_output_chroma == CC_UPCONVERT )
> +            {
> +                /* Upconverting copy - use all data from both input fields.
> +
> +                   This produces an output picture with independent chroma for each field.
> +                   It can be used for general input when the two input frames are different.
> +                */
> +                for( ; p_out < p_out_end ; )
> +                {
> +                    vlc_memcpy( p_out, p_in_top, p_inpic_top->p[i_plane].i_pitch );
> +                    p_out       += p_outpic->p[i_out_plane].i_pitch;
> +                    vlc_memcpy( p_out, p_in_bottom, p_inpic_bottom->p[i_plane].i_pitch );
> +                    p_out       += p_outpic->p[i_out_plane].i_pitch;
> +
> +                    /* Because input is 4:2:0, this steps 2 lines w.r.t output. */
> +                    p_in_top    += p_inpic_top->p[i_plane].i_pitch;
> +                    p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> +                }
> +            }
> +            else if( i_output_chroma == CC_SOURCE_TOP )
> +            {
> +                /* Copy chroma of input top field. Ignore chroma of input bottom field. */
> +                for( ; p_out < p_out_end ; )
> +                {
> +                    vlc_memcpy( p_out, p_in_top, p_inpic_top->p[i_plane].i_pitch );
> +                    p_out       += p_outpic->p[i_out_plane].i_pitch;
> +
> +                    /* Input and output are both 4:2:0. This steps one chroma line. */
> +                    p_in_top    += p_inpic_top->p[i_plane].i_pitch;
> +                }
> +            }
> +            else if( i_output_chroma == CC_SOURCE_BOTTOM )
> +            {
> +                /* Copy chroma of input bottom field. Ignore chroma of input top field. */
> +                for( ; p_out < p_out_end ; )
> +                {
> +                    vlc_memcpy( p_out, p_in_bottom, p_inpic_bottom->p[i_plane].i_pitch );
> +                    p_out       += p_outpic->p[i_out_plane].i_pitch;
> +
> +                    /* Input and output are both 4:2:0. This steps one chroma line. */
> +                    p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> +                }
> +            }
> +            else /* i_output_chroma == CC_MERGE */
> +            {
> +                /* Average the chroma of the input fields. */
> +#define Merge p_filter->p_sys->pf_merge
> +                for( ; p_out < p_out_end ; )
> +                {
> +                    Merge( p_out, p_in_top, p_in_bottom, p_inpic_top->p[i_plane].i_pitch );
> +
> +                    p_out       += p_outpic->p[i_out_plane].i_pitch;
> +                    p_in_top    += p_inpic_top->p[i_plane].i_pitch;
> +                    p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> +                }
> +#undef Merge
> +                EndMerge();
> +            }
> +        }
> +    }
> +}
 I think that this function can simplified by using plane_Copy() and maybe a
helper that returns a plane_t for a field.
 Dunno how it is called but becareful that output pitch can be lower than
the input one (plane_Copy() take care of that).

> +static void DecayLuma( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, int i_field, int i_strength )
> +{
> +    assert( p_filter != NULL );
> +    assert( p_dst != NULL );
> +    assert( p_src != NULL );
> +    assert( i_field == 0 || i_field == 1 );
> +    assert( i_strength >= 1 && i_strength <= 3 );
> +
> +    int i_plane = Y_PLANE;
> +
> +    uint8_t *p_in, *p_out, *p_out_end;
> +    p_in  = p_src->p[i_plane].p_pixels;
> +    p_out = p_dst->p[i_plane].p_pixels;
> +    p_out_end = p_out + p_dst->p[i_plane].i_pitch
> +                      * p_dst->p[i_plane].i_visible_lines;
 I am pretty sure that iterating over y would make the code easier to read (it
applies for the whole file, and not only for this occurence).

> +    /* FIXME? (What to do more sensibly in case of input and output pitch mismatch?) */
> +    int w = p_src->p[i_plane].i_pitch;
> +    if( p_dst->p[i_plane].i_pitch < w )
> +        w = p_dst->p[i_plane].i_pitch;
 The min is the right choice, using visible_pitch would probably be better.

> +    /* skip first line for bottom field */
> +    if( i_field == 1 )
> +    {
> +        /* copy line if not in-place render */
> +        if( p_dst != p_src )
> +            vlc_memcpy( p_out, p_in, w );
> +
> +        p_in  += p_src->p[i_plane].i_pitch;
> +        p_out += p_dst->p[i_plane].i_pitch;
> +    }
> +
> +    /* This operation is so simple it can be vectorized even without MMX. */
> +
> +    /* Bitwise ANDing with this clears the i_strength highest bits of each byte */
> +    uint64_t remove_high;
> +    if( i_strength == 1 )
> +        remove_high = INT64_C(0x7F7F7F7F7F7F7F7F);
> +    else if( i_strength == 2 )
> +        remove_high = INT64_C(0x3F3F3F3F3F3F3F3F);
> +    else /* i_strength == 3 */
> +        remove_high = INT64_C(0x1F1F1F1F1F1F1F1F);
> +
> +    int wm8 = w % 8;   /* remainder */ 
> +    int w8  = w - wm8; /* part that is divisible by 8 */
> +    int parity = 1;
> +    for( ; p_out < p_out_end ; p_out += p_dst->p[i_plane].i_pitch, p_in += p_src->p[i_plane].i_pitch )
> +    {
> +        if( parity )
> +        {
> +            /* process this line */
> +            uint64_t *pi = (uint64_t *)p_in; /* actually, 8-bit pixel values - we just vectorize */
> +            uint64_t *po = (uint64_t *)p_out;
> +            for( int x = 0 ; x < w8; x += 8 )
> +            {
> +                (*po) = ( ((*pi) >> i_strength) & remove_high );
> +                ++po; /* note: steps forward by 8 bytes because of the type */
> +                ++pi;
 *po++ = (*pi++ >> i_strength) && remove_high
is simpler. The comment is not usefull IMHO.
> +            }
> +            /* handle the remainder from division (usually none; video widths tend to be divisible by 8) */
> +            if( wm8 )
> +            {
> +                uint8_t *pi_temp = (uint8_t *)pi;
> +                uint8_t *po_temp = (uint8_t *)po;
 Useless casts.
> +                for( int x = 0 ; x < wm8; ++x )
> +                {
> +                    (*po_temp) = ( ((*pi_temp) >> i_strength) & remove_high );
> +                    ++pi_temp;
> +                    ++po_temp;
 *po_temp++ = ((*pi_temp++) >> i_strength) & remove_high;

> +                }
> +            }
> +        }
> +        else
> +        {
> +            /* copy line if not in-place render */
> +            if( p_dst != p_src )
> +                vlc_memcpy( p_out, p_in, w );
> +        }
> +        parity = !parity;
> +    }
> +
> +    /* Copy other planes if not performing an in-place render */
> +    if( p_dst != p_src )
> +    {
> +        for( int i_plane = 0 ; i_plane < p_src->i_planes ; i_plane++ )
> +        {
> +            if( i_plane == Y_PLANE )
> +                continue; /* luma already handled */
> +
> +            uint8_t *p_in, *p_out, *p_out_end;
> +            p_in  = p_src->p[i_plane].p_pixels;
> +            p_out = p_dst->p[i_plane].p_pixels;
> +            p_out_end = p_out + p_dst->p[i_plane].i_pitch
> +                              * p_dst->p[i_plane].i_visible_lines;
> +
> +            /* FIXME? (What to do more sensibly in case of input and output pitch mismatch?) */
> +            int w = p_src->p[i_plane].i_pitch;
> +            if( p_dst->p[i_plane].i_pitch < w )
> +                w = p_dst->p[i_plane].i_pitch;
> +
> +            for( ; p_out < p_out_end ; )
> +            {
> +                vlc_memcpy( p_out, p_in, w );
> +                p_out += p_dst->p[i_plane].i_pitch;
> +                p_in  += p_src->p[i_plane].i_pitch;
> +            }
> +        }
 If it is a simple plane copy, then plane_Copy().
 By the way, why not doing the same processing for the luma and chroma?

> +/**
> + * Deinterlace filter. Simulates an interlaced CRT TV (to some extent).
> + *
> + * The main use case for this filter is anime for which IVTC is not applicable. This is the case,
> + * if 24fps telecined material has been mixed with 60fps interlaced effects, such as in Sol Bianca
> + * or Silent Mobius.
> + *
> + * Technically speaking, this is an interlaced field renderer targeted for progressive displays.
> + * It works by framerate doubling, and simulating one step of light output decay of the "old"
> + * field during the "new" field, until the next new field comes in to replace the "old" one.
> + * Unlike the usual Bob algorithm, this filter keeps track of where the "old" field came from,
> + * as if the video was actually being rendered on a CRT.
> + *
> + * While playback is running, the simulated light decay gives the picture an appearance of visible
> + * "scanlines", much like on a real TV. Only when the video is paused, it is clearly visible that
> + * one of the fields is actually brighter than the other. The filter is thus functionally rather
> + * similar to "Bob" with scanlines, but the light decay has two effects: horizontal lines don't
> + * seem to flicker as much, and the image is 38% darker on average (this is unavoidable).
> + *
> + * The output vertical resolution should be large enough for the scaling not to have a too adverse effect
> + * on the regular scanline pattern. In practice, NTSC video can be acceptably rendered already at 1024x600
> + * if fullscreen even on an LCD. PAL video requires more.
> + *
> + * Due to the field rendering principle, this filter works properly only if the input framerate
> + * is stable. Otherwise the picture will flicker.
> + *
> + * Soft field repeat (repeat_pict) is supported. Note that the generated "repeated" output picture
> + * is unique because of the simulated light decay (its "old" field comes from the same input frame
> + * as the "new" one, unlike the first output picture of the same frame).
> + *
> + * As many output frames should be requested for each input frame as is indicated by p_src->i_nb_fields.
> + * This is done by calling this function several times, first with i_order = 0, and then with all other
> + * parameters the same, but a new p_dst and increasing i_order (first 1 for second field, and then
> + * if i_nb_fields = 3, also i_order = 2 to get the repeated first field).
> + *
> + * @param p_filter The filter instance this operation belongs to.
> + * @param p_dst Output frame. Must be allocated by caller.
> + * @param p_src Input frame. Must exist.
> + * @param i_order Which field (on time axis) for this output frame? 0 if first, 1 if second, 2 if repeated first.
> + * @param i_field Render which field? 0 = top field, 1 = bottom field.
> + * @return VLC error code (int).
> + * @retval VLC_SUCCESS The requested field was rendered into p_dst.
> + * @retval VLC_ENOMEM Internal picture allocation failed, probably out of memory.
> + * @retval VLC_EGENERIC No pictures in history buffer, cannot render.
> + * @see RenderBob()
> + * @see RenderLinear()
> + * @see Deinterlace()
> + */
> +static int RenderPhosphor( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
> +{
> +    assert( p_filter != NULL );
> +    assert( p_dst != NULL );
> +    assert( p_src != NULL );
> +    assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
> +    assert( i_field == 0 || i_field == 1 );
> +
> +    filter_sys_t *p_sys = p_filter->p_sys;
> +
> +    /* Allocate working area at first call after filter Open() */
> +    if( !p_sys->pp_phosphor_working_area[0] )
 Why not doing it in the Open() call ?

> +    {
> +        video_format_t temp_format = p_src->format;
> +        if( p_sys->i_phosphor_chroma_for_420 == PC_UPCONVERT )
> +        {
> +            const int i_chroma = p_filter->fmt_in.video.i_chroma;
> +            if( i_chroma == VLC_CODEC_I420 || i_chroma == VLC_CODEC_YV12 )
> +                temp_format.i_chroma = VLC_CODEC_I422;
> +            else if( i_chroma == VLC_CODEC_J420 )
> +                temp_format.i_chroma = VLC_CODEC_J422;
> +        } /* no "else"; otherwise we simply match input format. */
> +
> +        bool b_success = true;
> +        for( int i = 0; i < PHOSPHOR_WORKING_AREA_SIZE; i++ )
> +        {
> +            picture_t *p_new = picture_NewFromFormat( &temp_format );
> +            if( !p_new )
> +            {
> +                b_success = false;
> +                break;
> +            }
> +            picture_CopyProperties( p_new, p_src );
 I don't think you need it.
> +            p_sys->pp_phosphor_working_area[i] = p_new;
> +        }
> +        if( !b_success )
> +        {
> +            for( int i = 0; i < PHOSPHOR_WORKING_AREA_SIZE; i++ )
> +            {
> +                if( p_sys->pp_phosphor_working_area[i] )
> +                    picture_Release( p_sys->pp_phosphor_working_area[i] );
> +                p_sys->pp_phosphor_working_area[i] = NULL;
> +            }
> +
> +            return VLC_ENOMEM;
 If you can't move the code into Open(), then here you have a memleak.
> +        }
> +    }
> +    /* Pitches must match in ComposeFrame(), so we only use pictures from picture_NewFromFormat(). */
> +    picture_t *p_in  = p_sys->pp_history[HISTORY_SIZE-1];
> +    picture_t *p_old = p_sys->pp_history[HISTORY_SIZE-2];
> +    picture_t *p_out = p_sys->pp_phosphor_working_area[0]; /* render buffer with matching pitch */
> +
> +    /* Use the same input picture as "old" at the first frame after startup */
> +    if( !p_old )
> +        p_old = p_in;
> +
> +    /* If the history mechanism has failed, we can't do anything. */
> +    if( !p_in )
> +        return VLC_EGENERIC;
> +
> +    assert( p_old != NULL );
> +    assert( p_in != NULL );
> +    assert( p_out != NULL );
> +
> +    /* Decide sources for top & bottom fields of output. */
> +    picture_t *p_in_top    = p_in;
> +    picture_t *p_in_bottom = p_in;
> +    /* For the first output field this frame, grab "old" field from previous frame. */
> +    if( i_order == 0 )
> +    {
> +        if( i_field == 0 ) /* rendering top field */
> +            p_in_bottom = p_old;
> +        else /* i_field == 1, rendering bottom field */
> +            p_in_top = p_old;
> +    }
> +
> +    compose_chroma_t cc;
> +    switch( p_sys->i_phosphor_chroma_for_420 )
> +    {
> +        case PC_MERGE:
> +            /* In practice chroma averaging for 4:2:0 seems to give a better-looking result
> +               than using top or bottom field as chroma source based on i_field.
> +               Thus, this is the default mode in the settings. */
> +            cc = CC_MERGE;
> +            break;
> +        case PC_LATEST:
> +            if( i_field == 0 )
> +                cc = CC_SOURCE_TOP;
> +            else /* i_field == 1 */
> +                cc = CC_SOURCE_BOTTOM;
> +            break;
> +        case PC_ALTLINE:
> +            cc = CC_ALTLINE;
> +            break;
> +        case PC_UPCONVERT:
> +            cc = CC_UPCONVERT;
> +            break;
> +        default:
> +            /* The above are the only possibilities, if there are no bugs.
> +               Trigger an assert failure. */
> +            assert( p_sys->i_phosphor_chroma_for_420 == PC_MERGE   ||
> +                    p_sys->i_phosphor_chroma_for_420 == PC_LATEST  ||
> +                    p_sys->i_phosphor_chroma_for_420 == PC_ALTLINE ||
> +                    p_sys->i_phosphor_chroma_for_420 == PC_UPCONVERT );
 The assert cannot happen here. if the default case cannot happen, then assert(0); break; is what you need.
> +    }
> +
> +    ComposeFrame( p_filter, p_out, p_in_top, p_in_bottom, cc );
> +
> +    /* Simulate phosphor light output decay for the field that is not the current one.
> +
> +       We eliminate one copy operation by writing the luma decay result picture to p_dst directly.
> +       Note that usually the pitch of p_dst is slightly different from that of p_out.
> +    */
> +    if( p_sys->i_phosphor_dimmer_strength > 0 )
> +        DecayLuma( p_filter, p_dst, p_out, !i_field, p_sys->i_phosphor_dimmer_strength );
> +    else /* dimmer off */
> +        picture_Copy( p_dst, p_out );
 What does i_phosphor_dimmer_strength == 0 do?

Regards,

-- 
fenrir