[vlc-devel] [PATCH] Phosphor deinterlacer, reworked++
Laurent Aimar
fenrir at elivagar.org
Mon Feb 28 22:10:19 CET 2011
Hi,
On Thu, Feb 24, 2011 at 01:34:03AM +0200, Juha Jeronen wrote:
> +#define PHOSPHOR_CHROMA_TEXT N_("Phosphor chroma mode for 4:2:0 input")
> +#define PHOSPHOR_CHROMA_LONGTEXT N_("Chroma output mode for the Phosphor framerate doubler, when input is 4:2:0. "\
> + "This affects the color representation of those (and only those) output "\
> + "frames that fall across input frame boundaries. Possible modes: "\
> + ""\
> + "Average input field chroma: default, good for most material. "\
> + ""\
> + "Use chroma only from latest field: the output frame takes its chroma "\
> + "from the latest arrived field. The chroma of the older field is ignored. "\
> + ""\
> + "Alternate line copy (like IVTC): take chroma line 1 from top field, "\
> + "chroma line 2 from bottom field, chroma line 3 from top field, etc. "\
> + "IVTC filters often do this, so this choice is good for NTSC "\
> + "telecined input that has 4:2:0 chroma (common on NTSC anime DVDs). "\
> + ""\
> + "Upconvert output to 4:2:2: the output frame will have independent chroma "\
> + "for each field. In this mode, all input chroma data is used. Upconvert is "\
> + "an alternative for Average, but requires more CPU and memory bandwidth. "\
> + "Note that for telecined input, Alternate line copy is best.")
> +
> +#define PHOSPHOR_DIMMER_TEXT N_("Phosphor old field dimmer strength")
> +#define PHOSPHOR_DIMMER_LONGTEXT N_("This controls the strength of the darkening filter that "\
> + "simulates CRT TV phosphor light decay for the old field "\
> + "in the Phosphor framerate doubler. "\
> + "Default: low.")
> +
> +static const char *const phosphor_chroma_list[] = { "merge", "newonly", "altline", "upconvert" };
> +static const char *const phosphor_chroma_list_text[] = { N_("Average input field chroma"),
> + N_("Use chroma only from latest field"),
> + N_("Alternate line copy (like IVTC)"),
> + N_("Upconvert output to 4:2:2") };
> +
> +static const char *const phosphor_dimmer_list[] = { "off", "low", "medium", "high" };
> +static const char *const phosphor_dimmer_list_text[] = { N_("Off (no light decay)"),
> + N_("Low"),
> + N_("Medium"),
> + N_("High") };
Using an integer type for dimmer would simplify the code. You can still have
a name associated to each numerical value.
I think you could also use a integer type for chroma (dunno if it
simplify in this case).
> +#define PHOSPHOR_WORKING_AREA_SIZE (1)
Do you need to be able to change it? Or is 1 the only sensible value?
> @@ -154,6 +208,11 @@ struct filter_sys_t
>
> /* Input frame history buffer for algorithms that perform temporal filtering. */
> picture_t *pp_history[HISTORY_SIZE];
> +
> + /* Phosphor */
> + phosphor_chroma_t i_phosphor_chroma_for_420;
> + int i_phosphor_dimmer_strength;
> + picture_t *pp_phosphor_working_area[PHOSPHOR_WORKING_AREA_SIZE];
struct {
phosphor_chroma_t chroma_....
} phospor;
could be used.
> @@ -271,6 +330,13 @@ static void SetFilterMethod( filter_t *p_filter, const char *psz_method, vlc_fou
> @@ -330,6 +397,12 @@ static void GetOutputFormat( filter_t *p_filter,
> break;
> }
> }
> + else if( p_sys->i_phosphor_chroma_for_420 == PC_UPCONVERT )
> + {
> + if( p_sys->i_mode == DEINTERLACE_PHOSPHOR )
The test order does not seem right (even if it works).
> + p_dst->i_chroma = p_src->i_chroma == VLC_CODEC_J420 ? VLC_CODEC_J422 :
> + VLC_CODEC_I422;
> + }
> }
>
> +static void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
> + picture_t *p_inpic_top, picture_t *p_inpic_bottom, compose_chroma_t i_output_chroma )
> +{
> + assert( p_filter != NULL );
> + assert( p_outpic != NULL );
> + assert( p_inpic_top != NULL );
> + assert( p_inpic_bottom != NULL );
> +
> + /* Valid 4:2:0 chroma handling modes. */
> + assert( i_output_chroma == CC_ALTLINE ||
> + i_output_chroma == CC_UPCONVERT ||
> + i_output_chroma == CC_SOURCE_TOP ||
> + i_output_chroma == CC_SOURCE_BOTTOM ||
> + i_output_chroma == CC_MERGE );
> +
> + const int i_chroma = p_filter->fmt_in.video.i_chroma;
> + const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
> + i_chroma == VLC_CODEC_J422;
> + const bool b_upconvert_chroma = ( !b_i422 && i_output_chroma == CC_UPCONVERT );
> +
> + for( int i_plane = 0 ; i_plane < p_inpic_top->i_planes ; i_plane++ )
> + {
> + uint8_t *p_in_top, *p_in_bottom, *p_out_end, *p_out;
> +
> + p_in_top = p_inpic_top->p[i_plane].p_pixels;
> + p_in_bottom = p_inpic_bottom->p[i_plane].p_pixels;
> +
> + bool b_is_chroma_plane = ( i_plane == U_PLANE || i_plane == V_PLANE );
> +
> + /* YV12 is YVU - swap chroma planes in output when converting to 4:2:2 to get YUV. */
> + /* FIXME: do we need to do this, or is it already done elsewhere? */
> + int i_out_plane;
> + if( b_is_chroma_plane && b_upconvert_chroma && i_chroma == VLC_CODEC_YV12 )
> + {
> + if( i_plane == U_PLANE )
> + i_out_plane = V_PLANE;
> + else /* V_PLANE */
> + i_out_plane = U_PLANE;
> + }
> + else
> + {
> + i_out_plane = i_plane; /* Y or A plane, or the output chroma format matches input. */
> + }
> +
> + p_out = p_outpic->p[i_out_plane].p_pixels;
> + p_out_end = p_out + p_outpic->p[i_out_plane].i_pitch
> + * p_outpic->p[i_out_plane].i_visible_lines;
> +
> + assert( p_inpic_top->p[i_plane].i_pitch == p_inpic_bottom->p[i_plane].i_pitch );
> + assert( p_outpic->p[i_out_plane].i_visible_lines % 2 == 0 );
> +
> + /* Copy luma or chroma, alternating between input fields. */
> + if( !b_is_chroma_plane || b_i422 || i_output_chroma == CC_ALTLINE )
> + {
> + /* Do an alternating line copy. This is always done for luma, and for 4:2:2 chroma.
> + It can be requested for 4:2:0 chroma using CC_ALTLINE. (This is useful for IVTC.) */
> +
> + /* In the frame for the bottom field, skip the first line, which belongs to the top field. */
> + p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> +
> + for( ; p_out < p_out_end ; )
> + {
> + vlc_memcpy( p_out, p_in_top, p_inpic_top->p[i_plane].i_pitch );
> + p_out += p_outpic->p[i_out_plane].i_pitch;
> + vlc_memcpy( p_out, p_in_bottom, p_inpic_bottom->p[i_plane].i_pitch );
> + p_out += p_outpic->p[i_out_plane].i_pitch;
> +
> + p_in_top += 2 * p_inpic_top->p[i_plane].i_pitch;
> + p_in_bottom += 2 * p_inpic_bottom->p[i_plane].i_pitch;
> + }
> + }
> + else /* Input is 4:2:0, we are on a chroma plane, and not in altline mode. */
> + {
> + if( i_output_chroma == CC_UPCONVERT )
> + {
> + /* Upconverting copy - use all data from both input fields.
> +
> + This produces an output picture with independent chroma for each field.
> + It can be used for general input when the two input frames are different.
> + */
> + for( ; p_out < p_out_end ; )
> + {
> + vlc_memcpy( p_out, p_in_top, p_inpic_top->p[i_plane].i_pitch );
> + p_out += p_outpic->p[i_out_plane].i_pitch;
> + vlc_memcpy( p_out, p_in_bottom, p_inpic_bottom->p[i_plane].i_pitch );
> + p_out += p_outpic->p[i_out_plane].i_pitch;
> +
> + /* Because input is 4:2:0, this steps 2 lines w.r.t output. */
> + p_in_top += p_inpic_top->p[i_plane].i_pitch;
> + p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> + }
> + }
> + else if( i_output_chroma == CC_SOURCE_TOP )
> + {
> + /* Copy chroma of input top field. Ignore chroma of input bottom field. */
> + for( ; p_out < p_out_end ; )
> + {
> + vlc_memcpy( p_out, p_in_top, p_inpic_top->p[i_plane].i_pitch );
> + p_out += p_outpic->p[i_out_plane].i_pitch;
> +
> + /* Input and output are both 4:2:0. This steps one chroma line. */
> + p_in_top += p_inpic_top->p[i_plane].i_pitch;
> + }
> + }
> + else if( i_output_chroma == CC_SOURCE_BOTTOM )
> + {
> + /* Copy chroma of input bottom field. Ignore chroma of input top field. */
> + for( ; p_out < p_out_end ; )
> + {
> + vlc_memcpy( p_out, p_in_bottom, p_inpic_bottom->p[i_plane].i_pitch );
> + p_out += p_outpic->p[i_out_plane].i_pitch;
> +
> + /* Input and output are both 4:2:0. This steps one chroma line. */
> + p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> + }
> + }
> + else /* i_output_chroma == CC_MERGE */
> + {
> + /* Average the chroma of the input fields. */
> +#define Merge p_filter->p_sys->pf_merge
> + for( ; p_out < p_out_end ; )
> + {
> + Merge( p_out, p_in_top, p_in_bottom, p_inpic_top->p[i_plane].i_pitch );
> +
> + p_out += p_outpic->p[i_out_plane].i_pitch;
> + p_in_top += p_inpic_top->p[i_plane].i_pitch;
> + p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
> + }
> +#undef Merge
> + EndMerge();
> + }
> + }
> + }
> +}
I think that this function can simplified by using plane_Copy() and maybe a
helper that returns a plane_t for a field.
Dunno how it is called but becareful that output pitch can be lower than
the input one (plane_Copy() take care of that).
> +static void DecayLuma( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, int i_field, int i_strength )
> +{
> + assert( p_filter != NULL );
> + assert( p_dst != NULL );
> + assert( p_src != NULL );
> + assert( i_field == 0 || i_field == 1 );
> + assert( i_strength >= 1 && i_strength <= 3 );
> +
> + int i_plane = Y_PLANE;
> +
> + uint8_t *p_in, *p_out, *p_out_end;
> + p_in = p_src->p[i_plane].p_pixels;
> + p_out = p_dst->p[i_plane].p_pixels;
> + p_out_end = p_out + p_dst->p[i_plane].i_pitch
> + * p_dst->p[i_plane].i_visible_lines;
I am pretty sure that iterating over y would make the code easier to read (it
applies for the whole file, and not only for this occurence).
> + /* FIXME? (What to do more sensibly in case of input and output pitch mismatch?) */
> + int w = p_src->p[i_plane].i_pitch;
> + if( p_dst->p[i_plane].i_pitch < w )
> + w = p_dst->p[i_plane].i_pitch;
The min is the right choice, using visible_pitch would probably be better.
> + /* skip first line for bottom field */
> + if( i_field == 1 )
> + {
> + /* copy line if not in-place render */
> + if( p_dst != p_src )
> + vlc_memcpy( p_out, p_in, w );
> +
> + p_in += p_src->p[i_plane].i_pitch;
> + p_out += p_dst->p[i_plane].i_pitch;
> + }
> +
> + /* This operation is so simple it can be vectorized even without MMX. */
> +
> + /* Bitwise ANDing with this clears the i_strength highest bits of each byte */
> + uint64_t remove_high;
> + if( i_strength == 1 )
> + remove_high = INT64_C(0x7F7F7F7F7F7F7F7F);
> + else if( i_strength == 2 )
> + remove_high = INT64_C(0x3F3F3F3F3F3F3F3F);
> + else /* i_strength == 3 */
> + remove_high = INT64_C(0x1F1F1F1F1F1F1F1F);
> +
> + int wm8 = w % 8; /* remainder */
> + int w8 = w - wm8; /* part that is divisible by 8 */
> + int parity = 1;
> + for( ; p_out < p_out_end ; p_out += p_dst->p[i_plane].i_pitch, p_in += p_src->p[i_plane].i_pitch )
> + {
> + if( parity )
> + {
> + /* process this line */
> + uint64_t *pi = (uint64_t *)p_in; /* actually, 8-bit pixel values - we just vectorize */
> + uint64_t *po = (uint64_t *)p_out;
> + for( int x = 0 ; x < w8; x += 8 )
> + {
> + (*po) = ( ((*pi) >> i_strength) & remove_high );
> + ++po; /* note: steps forward by 8 bytes because of the type */
> + ++pi;
*po++ = (*pi++ >> i_strength) && remove_high
is simpler. The comment is not usefull IMHO.
> + }
> + /* handle the remainder from division (usually none; video widths tend to be divisible by 8) */
> + if( wm8 )
> + {
> + uint8_t *pi_temp = (uint8_t *)pi;
> + uint8_t *po_temp = (uint8_t *)po;
Useless casts.
> + for( int x = 0 ; x < wm8; ++x )
> + {
> + (*po_temp) = ( ((*pi_temp) >> i_strength) & remove_high );
> + ++pi_temp;
> + ++po_temp;
*po_temp++ = ((*pi_temp++) >> i_strength) & remove_high;
> + }
> + }
> + }
> + else
> + {
> + /* copy line if not in-place render */
> + if( p_dst != p_src )
> + vlc_memcpy( p_out, p_in, w );
> + }
> + parity = !parity;
> + }
> +
> + /* Copy other planes if not performing an in-place render */
> + if( p_dst != p_src )
> + {
> + for( int i_plane = 0 ; i_plane < p_src->i_planes ; i_plane++ )
> + {
> + if( i_plane == Y_PLANE )
> + continue; /* luma already handled */
> +
> + uint8_t *p_in, *p_out, *p_out_end;
> + p_in = p_src->p[i_plane].p_pixels;
> + p_out = p_dst->p[i_plane].p_pixels;
> + p_out_end = p_out + p_dst->p[i_plane].i_pitch
> + * p_dst->p[i_plane].i_visible_lines;
> +
> + /* FIXME? (What to do more sensibly in case of input and output pitch mismatch?) */
> + int w = p_src->p[i_plane].i_pitch;
> + if( p_dst->p[i_plane].i_pitch < w )
> + w = p_dst->p[i_plane].i_pitch;
> +
> + for( ; p_out < p_out_end ; )
> + {
> + vlc_memcpy( p_out, p_in, w );
> + p_out += p_dst->p[i_plane].i_pitch;
> + p_in += p_src->p[i_plane].i_pitch;
> + }
> + }
If it is a simple plane copy, then plane_Copy().
By the way, why not doing the same processing for the luma and chroma?
> +/**
> + * Deinterlace filter. Simulates an interlaced CRT TV (to some extent).
> + *
> + * The main use case for this filter is anime for which IVTC is not applicable. This is the case,
> + * if 24fps telecined material has been mixed with 60fps interlaced effects, such as in Sol Bianca
> + * or Silent Mobius.
> + *
> + * Technically speaking, this is an interlaced field renderer targeted for progressive displays.
> + * It works by framerate doubling, and simulating one step of light output decay of the "old"
> + * field during the "new" field, until the next new field comes in to replace the "old" one.
> + * Unlike the usual Bob algorithm, this filter keeps track of where the "old" field came from,
> + * as if the video was actually being rendered on a CRT.
> + *
> + * While playback is running, the simulated light decay gives the picture an appearance of visible
> + * "scanlines", much like on a real TV. Only when the video is paused, it is clearly visible that
> + * one of the fields is actually brighter than the other. The filter is thus functionally rather
> + * similar to "Bob" with scanlines, but the light decay has two effects: horizontal lines don't
> + * seem to flicker as much, and the image is 38% darker on average (this is unavoidable).
> + *
> + * The output vertical resolution should be large enough for the scaling not to have a too adverse effect
> + * on the regular scanline pattern. In practice, NTSC video can be acceptably rendered already at 1024x600
> + * if fullscreen even on an LCD. PAL video requires more.
> + *
> + * Due to the field rendering principle, this filter works properly only if the input framerate
> + * is stable. Otherwise the picture will flicker.
> + *
> + * Soft field repeat (repeat_pict) is supported. Note that the generated "repeated" output picture
> + * is unique because of the simulated light decay (its "old" field comes from the same input frame
> + * as the "new" one, unlike the first output picture of the same frame).
> + *
> + * As many output frames should be requested for each input frame as is indicated by p_src->i_nb_fields.
> + * This is done by calling this function several times, first with i_order = 0, and then with all other
> + * parameters the same, but a new p_dst and increasing i_order (first 1 for second field, and then
> + * if i_nb_fields = 3, also i_order = 2 to get the repeated first field).
> + *
> + * @param p_filter The filter instance this operation belongs to.
> + * @param p_dst Output frame. Must be allocated by caller.
> + * @param p_src Input frame. Must exist.
> + * @param i_order Which field (on time axis) for this output frame? 0 if first, 1 if second, 2 if repeated first.
> + * @param i_field Render which field? 0 = top field, 1 = bottom field.
> + * @return VLC error code (int).
> + * @retval VLC_SUCCESS The requested field was rendered into p_dst.
> + * @retval VLC_ENOMEM Internal picture allocation failed, probably out of memory.
> + * @retval VLC_EGENERIC No pictures in history buffer, cannot render.
> + * @see RenderBob()
> + * @see RenderLinear()
> + * @see Deinterlace()
> + */
> +static int RenderPhosphor( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, int i_order, int i_field )
> +{
> + assert( p_filter != NULL );
> + assert( p_dst != NULL );
> + assert( p_src != NULL );
> + assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
> + assert( i_field == 0 || i_field == 1 );
> +
> + filter_sys_t *p_sys = p_filter->p_sys;
> +
> + /* Allocate working area at first call after filter Open() */
> + if( !p_sys->pp_phosphor_working_area[0] )
Why not doing it in the Open() call ?
> + {
> + video_format_t temp_format = p_src->format;
> + if( p_sys->i_phosphor_chroma_for_420 == PC_UPCONVERT )
> + {
> + const int i_chroma = p_filter->fmt_in.video.i_chroma;
> + if( i_chroma == VLC_CODEC_I420 || i_chroma == VLC_CODEC_YV12 )
> + temp_format.i_chroma = VLC_CODEC_I422;
> + else if( i_chroma == VLC_CODEC_J420 )
> + temp_format.i_chroma = VLC_CODEC_J422;
> + } /* no "else"; otherwise we simply match input format. */
> +
> + bool b_success = true;
> + for( int i = 0; i < PHOSPHOR_WORKING_AREA_SIZE; i++ )
> + {
> + picture_t *p_new = picture_NewFromFormat( &temp_format );
> + if( !p_new )
> + {
> + b_success = false;
> + break;
> + }
> + picture_CopyProperties( p_new, p_src );
I don't think you need it.
> + p_sys->pp_phosphor_working_area[i] = p_new;
> + }
> + if( !b_success )
> + {
> + for( int i = 0; i < PHOSPHOR_WORKING_AREA_SIZE; i++ )
> + {
> + if( p_sys->pp_phosphor_working_area[i] )
> + picture_Release( p_sys->pp_phosphor_working_area[i] );
> + p_sys->pp_phosphor_working_area[i] = NULL;
> + }
> +
> + return VLC_ENOMEM;
If you can't move the code into Open(), then here you have a memleak.
> + }
> + }
> + /* Pitches must match in ComposeFrame(), so we only use pictures from picture_NewFromFormat(). */
> + picture_t *p_in = p_sys->pp_history[HISTORY_SIZE-1];
> + picture_t *p_old = p_sys->pp_history[HISTORY_SIZE-2];
> + picture_t *p_out = p_sys->pp_phosphor_working_area[0]; /* render buffer with matching pitch */
> +
> + /* Use the same input picture as "old" at the first frame after startup */
> + if( !p_old )
> + p_old = p_in;
> +
> + /* If the history mechanism has failed, we can't do anything. */
> + if( !p_in )
> + return VLC_EGENERIC;
> +
> + assert( p_old != NULL );
> + assert( p_in != NULL );
> + assert( p_out != NULL );
> +
> + /* Decide sources for top & bottom fields of output. */
> + picture_t *p_in_top = p_in;
> + picture_t *p_in_bottom = p_in;
> + /* For the first output field this frame, grab "old" field from previous frame. */
> + if( i_order == 0 )
> + {
> + if( i_field == 0 ) /* rendering top field */
> + p_in_bottom = p_old;
> + else /* i_field == 1, rendering bottom field */
> + p_in_top = p_old;
> + }
> +
> + compose_chroma_t cc;
> + switch( p_sys->i_phosphor_chroma_for_420 )
> + {
> + case PC_MERGE:
> + /* In practice chroma averaging for 4:2:0 seems to give a better-looking result
> + than using top or bottom field as chroma source based on i_field.
> + Thus, this is the default mode in the settings. */
> + cc = CC_MERGE;
> + break;
> + case PC_LATEST:
> + if( i_field == 0 )
> + cc = CC_SOURCE_TOP;
> + else /* i_field == 1 */
> + cc = CC_SOURCE_BOTTOM;
> + break;
> + case PC_ALTLINE:
> + cc = CC_ALTLINE;
> + break;
> + case PC_UPCONVERT:
> + cc = CC_UPCONVERT;
> + break;
> + default:
> + /* The above are the only possibilities, if there are no bugs.
> + Trigger an assert failure. */
> + assert( p_sys->i_phosphor_chroma_for_420 == PC_MERGE ||
> + p_sys->i_phosphor_chroma_for_420 == PC_LATEST ||
> + p_sys->i_phosphor_chroma_for_420 == PC_ALTLINE ||
> + p_sys->i_phosphor_chroma_for_420 == PC_UPCONVERT );
The assert cannot happen here. if the default case cannot happen, then assert(0); break; is what you need.
> + }
> +
> + ComposeFrame( p_filter, p_out, p_in_top, p_in_bottom, cc );
> +
> + /* Simulate phosphor light output decay for the field that is not the current one.
> +
> + We eliminate one copy operation by writing the luma decay result picture to p_dst directly.
> + Note that usually the pitch of p_dst is slightly different from that of p_out.
> + */
> + if( p_sys->i_phosphor_dimmer_strength > 0 )
> + DecayLuma( p_filter, p_dst, p_out, !i_field, p_sys->i_phosphor_dimmer_strength );
> + else /* dimmer off */
> + picture_Copy( p_dst, p_out );
What does i_phosphor_dimmer_strength == 0 do?
Regards,
--
fenrir
More information about the vlc-devel
mailing list