[vlc-devel] [PATCH] Added IVTC deinterlacer (NTSC film mode)
Juha Jeronen
juha.jeronen at jyu.fi
Mon Jan 3 01:33:52 CET 2011
Hi all,
...or, what I just sent would have been a double post, if the internet
hadn't eaten the first one. Is there an attachment size limit on this list?
Anyway, here is the updated IVTC patch (in the message body this time).
Changelog:
- Code now refers to "IVTC", not "Film". User label is "Film NTSC (IVTC)".
- Wrapped user label in N_() for localization.
- IVTC state refactored into ivtc_sys_t, which is now a substructure of
filter_sys_t.
- Removed the changes that shouldn't have been in the patch.
- Fixed the types in CalculateInterlaceScore() (both C and MMX).
- #undef T right after CalculateInterlaceScore()
- enums added for ivtc_field_pair, ivtc_cadence_pos, ivtc_op
- added filter restart when input video size or chroma changes
I left the loop counters as-is, since it's consistent with the other
deinterlacers. Also, I didn't yet fix the allocation assert, because
it's an open question how to quit gracefully. For now, I marked it with
a FIXME.
Any comments appreciated :)
-J
---
From d8dd93f230515ca43d71d6a49bb9c6b9651059f1 Mon Sep 17 00:00:00 2001
From: Juha Jeronen <juha.jeronen at jyu.fi>
Date: Mon, 3 Jan 2011 01:44:39 +0200
Subject: [PATCH] Added IVTC deinterlacer
---
modules/video_filter/deinterlace.c | 1247
+++++++++++++++++++++++++++++++++++-
src/control/video.c | 3 +-
src/libvlc-module.c | 4 +-
src/video_output/interlacing.c | 1 +
4 files changed, 1249 insertions(+), 6 deletions(-)
diff --git a/modules/video_filter/deinterlace.c
b/modules/video_filter/deinterlace.c
index fd05bd6..14ae025 100644
--- a/modules/video_filter/deinterlace.c
+++ b/modules/video_filter/deinterlace.c
@@ -5,6 +5,7 @@
* $Id$
*
* Author: Sam Hocevar <sam at zoy.org>
+ * Juha Jeronen <juha.jeronen at jyu.fi> (inverse telecine)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -30,6 +31,7 @@
#endif
#include <assert.h>
+#include <stdint.h> /* int_fast32_t */
#ifdef HAVE_ALTIVEC_H
# include <altivec.h>
@@ -52,6 +54,7 @@
#define DEINTERLACE_X 6
#define DEINTERLACE_YADIF 7
#define DEINTERLACE_YADIF2X 8
+#define DEINTERLACE_IVTC 9
/*****************************************************************************
* Module descriptor
@@ -68,9 +71,9 @@ static void Close( vlc_object_t * );
#define FILTER_CFG_PREFIX "sout-deinterlace-"
static const char *const mode_list[] = {
- "discard", "blend", "mean", "bob", "linear", "x", "yadif", "yadif2x" };
+ "discard", "blend", "mean", "bob", "linear", "x", "yadif",
"yadif2x", "ivtc" };
static const char *const mode_list_text[] = {
- N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"),
"X", "Yadif", "Yadif (2x)" };
+ N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"),
"X", "Yadif", "Yadif (2x)", N_("Film NTSC (IVTC)") };
vlc_module_begin ()
set_description( N_("Deinterlacing video filter") )
@@ -98,6 +101,7 @@ static void RenderBlend ( filter_t *, picture_t *,
picture_t * );
static void RenderLinear ( filter_t *, picture_t *, picture_t *, int );
static void RenderX ( picture_t *, picture_t * );
static int RenderYadif ( filter_t *, picture_t *, picture_t *, int,
int );
+static int RenderIVTC ( filter_t *, picture_t *, picture_t * );
static void MergeGeneric ( void *, const void *, const void *, size_t );
#if defined(CAN_COMPILE_C_ALTIVEC)
@@ -126,6 +130,27 @@ static const char *const ppsz_filter_options[] = {
"mode", NULL
};
+/* IVTC */
+#define IVTC_NUM_FIELD_PAIRS 7
+#define IVTC_DETECTION_HISTORY_SIZE 3
+#define IVTC_WORKING_AREA_SIZE 2
+/* inverse telecine subsystem state */
+struct ivtc_sys_t
+{
+ bool b_possible_cadence_break_detected;
+ int i_telecine_field_dominance; /* Whether TFF or BFF telecine;
detected from the video */
+ int i_cadence_pos; /* Cadence counter (starts once the filter locks
on). */
+ int i_ivtc_filter_mode; /* Telecined or not. See RenderIVTC. */
+ int pi_interlace_scores[IVTC_NUM_FIELD_PAIRS];
+ int pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE]; /*
Detected cadence data (raw) */
+ picture_t *pp_ivtc_working_area[IVTC_WORKING_AREA_SIZE]; /* Temp
frames; stored here to avoid reallocation */
+ /* These are used for detecting need for reallocation of the
working area. */
+ int i_old_chroma;
+ int i_old_size_x;
+ int i_old_size_y;
+};
+typedef struct ivtc_sys_t ivtc_sys_t;
+
#define HISTORY_SIZE (3)
struct filter_sys_t
{
@@ -138,8 +163,11 @@ struct filter_sys_t
mtime_t i_last_date;
- /* Yadif */
+ /* Yadif, IVTC */
picture_t *pp_history[HISTORY_SIZE];
+
+ /* IVTC */
+ ivtc_sys_t *p_ivtc;
};
/*****************************************************************************
@@ -189,6 +217,12 @@ static void SetFilterMethod( filter_t *p_filter,
const char *psz_method, vlc_fou
p_sys->b_double_rate = true;
p_sys->b_half_height = false;
}
+ else if( !strcmp( psz_method, "ivtc" ) )
+ {
+ p_sys->i_mode = DEINTERLACE_IVTC;
+ p_sys->b_double_rate = false;
+ p_sys->b_half_height = false;
+ }
else if( !strcmp( psz_method, "discard" ) )
{
const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
@@ -236,6 +270,7 @@ static void GetOutputFormat( filter_t *p_filter,
case DEINTERLACE_X:
case DEINTERLACE_YADIF:
case DEINTERLACE_YADIF2X:
+ case DEINTERLACE_IVTC:
p_dst->i_chroma = p_src->i_chroma;
break;
default:
@@ -1533,6 +1568,1161 @@ static int RenderYadif( filter_t *p_filter,
picture_t *p_dst, picture_t *p_src,
}
/*****************************************************************************
+ * Inverse telecine (IVTC) for NTSC telecined progressive film material.
+
*****************************************************************************/
+
+/* Overall explanation:
+
+ This filter attempts to do in realtime what Transcode's
ivtc->decimate->32detect chain does offline.
+ It is an original design, based on some ideas from Transcode and
some of my own (JJ).
+
+ If the input material is pure NTSC telecined film, inverse telecine
(also known as "film mode")
+ will (ideally) exactly recover the original progressive film
frames. The output will run at
+ 4/5 of the original framerate with no loss of information.
Interlacing artifacts are removed,
+ and motion becomes as smooth as it was on the original film.
+
+ This filter, in addition to IVTC, includes a residual interlace
removal mechanism like
+ Transcode's 32detect, in order to catch possible incorrectly IVTC'd
frames and any true
+ interlaced material mixed in with the film. The resulting
"progressive" frames are checked
+ for interlacing, and if any is still present, then handed over to
an area-based deinterlacer
+ (RenderX()) to interpolate the missing data in the interlaced parts
(and, preferably,
+ not to damage the progressive parts of the frame too much).
+
+ Put in simple terms, this filter is targeted for NTSC movies and
especially anime.
+ Virtually all 1990s and early 2000s anime is hard-telecined.
Because the source material
+ is like that, the issue affects also virtually all official R1 (US)
anime DVDs.
+
+ Note that especially much post-2000 anime is a hybrid of telecined
film and true interlaced
+ computer-generated effects. In this case, applying IVTC will
effectively attempt to reconstruct
+ the frames based on the film component, but even if this is
successful, the framerate reduction
+ will cause the computer-generated effects to stutter. This is
mathematically unavoidable.
+ Working around this problem would require a much more advanced
filter incorporating some kind of
+ blind source separation and motion tracking techniques. This filter
does not do anything quite
+ so complicated.
+
+ Finally, note also that IVTC is the only correct way to deinterlace
NTSC telecined material.
+ Simply applying an interpolating deinterlacing filter is harmful
for two reasons.
+ First, even if the filter does not damage already progressive
frames, it will lose
+ half of the available vertical resolution of those frames that are
judged interlaced.
+ Some algorithms combining data from multiple frames may be able to
counter this
+ to an extent (effectively performing something akin to the frame
reconstruction part of IVTC).
+ A more serious problem is that any motion will stutter, because
(even in the ideal case)
+ one out of every four film frames will be shown twice, while the
other three will be shown only once.
+ Duplicate removal and framerate reduction - which are part of IVTC
- are also needed
+ to properly play back telecined material on progressive displays.
+
+ So, try this filter on your NTSC anime DVDs. It just might help.
+
+
+ Technical details:
+
+
+ First, NTSC telecine in a nutshell:
+
+ Film is commonly captured at 24 fps. The framerate must be raised
from 24 fps to 59.94 fields
+ per second, Actually, this starts by pretending that the original
framerate is 23.976 fps
+ (when authoring, the audio can be slowed down by 0.1% to match).
Now 59.94 = 5/4 * (2*23.976),
+ which gives a nice ratio made out of small integers.
+
+ Thus, each group of four film frames must become five frames in the
NTSC video stream.
+ One cannot simply repeat one frame of every four, because this
would result in jerky motion.
+ To slightly soften the jerkiness, the extra frame is split into two
extra fields, inserted
+ at different times. The content of the extra fields is (in
classical telecine) duplicated
+ as-is from existing fields.
+
+ The field duplication technique is called "3:2 pulldown". The
pattern is called the cadence.
+ The output from 3:2 pulldown looks like this (if the telecine is
TFF, top field first):
+
+ a b c d e Telecined frame index (these are the actual
frames stored on the DVD)
+ T1 T1 T2 T3 T4 *T*op field content
+ B1 B2 B3 B3 B4 *B*ottom field content
+
+ Numbers 1-4 denote the original film frames. E.g. T1 = top field of
original film frame 1.
+ The field Tb, and one of either Bc or Bd, are the extra fields
inserted in the telecine.
+ With exact duplication, it of course doesn't matter whether Bc or
Bd is the extra field,
+ but with "full field blended" material (see below) this will affect
how to correctly
+ extract film frame 3.
+
+ See the following web pages for illustrations and discussion:
+ http://neuron2.net/LVG/telecining1.html
+ http://arbor.ee.ntu.edu.tw/~jackeikuo/dvd2avi/ivtc/
+
+ Note that film frame 2 has been stored "half and half" into two
telecined frames (b and c).
+ Note also that telecine produces a sequence of 3 progressive frames
(d, e and a) followed
+ by 2 interlaced frames (b and c).
+
+ Alternatively, the output may look like this (BFF telecine, bottom
field first):
+
+ a' b' c' d' e'
+ T1 T2 T3 T3 T4
+ B1 B1 B2 B3 B4
+
+ Now field Bb', and one of either Tc' or Td', are the extra fields.
Again, film frame 2
+ is stored "half and half" (into b' and c').
+
+ Whether the pattern is like abcde or a'b'c'd'e', depends on the
telecine field dominance
+ (TFF or BFF). The telecine field dominance has nothing to do with
the usual concept of
+ video field dominance. The usual FD is not needed for performing
inverse telecine.
+ See e.g.
http://www.cambridgeimaging.co.uk/downloads/Telecine%20field%20dominance.pdf
+ (The document discusses mostly PAL, but includes some notes on
NTSC, too.)
+
+ The reason for the words "classical telecine" above, when field
duplication was first
+ mentioned, is that there exists a "full field blended" version,
where the added fields
+ are not exact duplicates, but are blends of the original film
frames. This is rare
+ in NTSC, but some material like this reportedly exists. See
+ http://www.animemusicvideos.org/guides/avtech/videogetb2a.html
+ In these cases, the additional fields are a (probably 50%) blend of
the frames
+ between which they have been inserted. Which one of the two
possibilites is the
+ extra field then becomes important. (This filter does not support
"full field blended"
+ material.)
+
+ To summarize, the 3:2 pulldown sequence produces a group of ten
fields out of every
+ four film frames. Only eight of these fields are unique. To remove
the telecine,
+ the duplicate fields must be removed, and the original progressive
frames restored.
+ Additionally, the presentation timestamps must be adjusted, and one
frame out of five
+ (containing no new information) dropped. The duration of each frame
in the output becomes
+ 5/4 of that in the input, i.e. 25% longer.
+
+ Theoretically, this whole mess could be avoided, if the original
material is pure 24fps
+ progressive. There are flags for 3:2 pulldown that allow storing
the original progressive
+ frames on the DVD. When set, the DVD player will apply "soft"
pulldown, if the output is set
+ to 60 fields per second interlaced. However, in practice any
material with its origins
+ in Asia (including virtually all official US (R1) anime DVDs) is
always hard-telecined.
+ There are cadence position flags for hard-telecined material, too,
but in practice
+ these are never set correctly. Combined with rendering true
interlaced effects on top
+ of the hard-telecined stream, we have what can only be described as
a monstrosity.
+
+ Telecined video is often also edited directly in interlaced form,
disregarding safe cut positions
+ as pertains to the telecine sequence (there are only two: between
"d" and "e", or between "e"
+ and the next "a"). Thus, the telecine sequence will in practice
jump erratically at cuts [**].
+ An aggressive detection strategy plus a backup deinterlacer are
needed to cope with this.
+
+ [**] http://users.softlab.ece.ntua.gr/~ttsiod/ivtc.html
+
+
+ Cadence detection:
+
+ Consider viewing the TFF and BFF telecine sequences through a
three-frame stencil.
+ Let P = previous, C = current, N = next. A brief analysis leads to
the following tables.
+
+ PCN = three-frame stencil position (Previous
Current Next),
+ Dups. = duplicate fields,
+ Best field pairs... = combinations of fields which correctly
reproduce the original progressive frames,
+ * = see timestamp considerations below for why
this particular arrangement.
+
+ For TFF:
+
+ PCN Dups. Best field pairs for progressive (correct,
theoretical) Progressive output*
+ abc TP = TC TPBP = frame 1, TCBP = frame 1, TNBC = frame
2 frame 2 = TNBC (compose TN+BC)
+ bcd BC = BN TCBP = frame 2, TNBC = frame 3, TNBN = frame
3 frame 3 = TNBN (copy N)
+ cde BP = BC TCBP = frame 3, TCBC = frame 3, TNBN = frame
4 frame 4 = TNBN (copy N)
+ dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame
1 (drop)
+ eab TC = TN TPBP = frame 4, TCBC = frame 1, TNBC = frame
1 frame 1 = TCBC (copy C)
+
+ where on the last two lines, frame 1 refers to a frame from the
next group of 4.
+ "Compose TN+BC" means to construct a frame using the top field of
N, and the bottom field of C.
+
+ For BFF, swap all B and T, and rearrange the symbol pairs to again
read "TxBx". We have:
+
+ PCN Dups. Best field pairs for progressive (correct,
theoretical) Progressive output*
+ abc BP = BC TPBP = frame 1, TPBC = frame 1, TCBN = frame
2 frame 2 = TCBN (compose TC+BN)
+ bcd TC = TN TPBC = frame 2, TCBN = frame 3, TNBN = frame
3 frame 3 = TNBN (copy N)
+ cde TP = TC TPBC = frame 3, TCBC = frame 3, TNBN = frame
4 frame 4 = TNBN (copy N)
+ dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame
1 (drop)
+ eab BC = BN TPBP = frame 4, TCBC = frame 1, TCBN = frame
1 frame 1 = TCBC (copy C)
+
+ Consider all possible field pairs from two successive frames: TCBC,
TCBN, TNBC, TNBN. After one frame,
+ these become TPBP, TPBC, TCBP, TCBC. These eight pairs (seven
unique, disregarding the duplicate TCBC)
+ are the exhaustive list of possible field pairs from two successive
frames in the three-frame PCN stencil.
+
+ The field pairs can be used for cadence position detection. The
above tables list triplets of field pair
+ combinations for each cadence position, which should produce
progressive frames. All the given triplets
+ are unique in each table alone, although the one at "dea" is
indistinguishable from the case of pure
+ progressive material. It is also the only one which is not unique
across both tables.
+
+ Thus, all sequences of two neighboring triplets are unique across
both tables. (For "neighboring",
+ each table is considered to wrap around from "eab" back to "abc",
i.e. from the last row back to
+ the first row.) Furthermore, each sequence of three neighboring
triplets is redundantly unique
+ (i.e. is unique, and reduces the chance of false positives).
+
+ The important idea is: *all other* field pair combinations should
produce frames that look interlaced.
+ This includes those combinations present in the "wrong" (i.e. not
current position) rows of the table
+ (insofar as those combinations are not also present in the
"correct" row; by the uniqueness property,
+ *every* "wrong" row will always contain at least one combination
that differs from those in the
+ "correct" row).
+
+ As for how we use these observations, we generate the artificial
frames TCBC, TCBN, TNBC and TNBN.
+ Two of these are just the frames C and N, which already exist; the
two others must be generated by composing
+ the given field pairs. We then compute the interlace score for each
of these frames. The interlace scores
+ of what are now TPBP, TPBC and TCBP, also needed, were computed by
this same mechanism during the previous
+ input frame. These can be slided in history and reused.
+
+ We then check, using the computed interlace scores, which field
combination triplet given in the
+ tables produces the smallest sum of interlace scores. Unless we are
at PCN = "dea" (which could
+ also be pure progressive!), this immediately gives us the most
likely current cadence position,
+ along with the telecine TFF/BFF information. Combined with a
two-step history, the sequence of
+ three most likely positions found this way always allows us to make
a reliable detection
+ (when a reliable detection is possible; note that if the video has
no motion at all,
+ every detection will report the position "dea". In anime, still
shots are common).
+
+ The detection seems to need four full-frame interlace analyses per
frame. Actually, three are enough,
+ because the previous N is the new C, so we can slide the already
computed result. Also during initialization,
+ we only need to compute TNBN on the first frame; this has become
TPBP when the third frame is reached.
+ Similarly, we compute TNBN, TNBC and TCBN during the second frame
(just before the filter starts),
+ and these get slided into TCBC, TCBP and TPBC when the third frame
is reached. At that point,
+ initialization is complete.
+
+ Because we only compare interlace scores against each other, no
threshold is needed in the cadence detector.
+ Thus it, trivially, adapts to the material automatically.
+
+
+ Frame reconstruction:
+
+ Adapting ideas from Transcode's IVTC, we simply output the least
interlaced frame out of the combinations
+ TNBN, TNBC and TCBN. Additionally, a cadence-based frame dropping
mechanism attempts to take care that
+ we only output unique frames and drop the duplicates (one out of
five is a duplicate).
+
+ More reliable duplicate dropping could be achieved with a
five-frame future buffer and full-frame comparisons
+ between successive output frames, but we make do with the
cadence-based one, because it does not require more
+ than one future frame (for the cadence detector).
+
+
+ Timestamp mangling:
+
+ To make five into four we need to extend frame durations by 25%.
Consider the following diagram
+ (times given in 90kHz ticks, rounded to integers; this is just for
illustration):
+
+ NTSC input (29.97 fps)
+ a b c d e a (from next group) ...
+ 0 3003 6006 9009 12012 15015
+ 0 3754 7508 11261 15015
+ 1 2 3 4 1 (from next group) ...
+ Film output (23.976 fps)
+
+ Three of the film frames have length 3754, and one has 3753 (it is
1/90000 sec shorter).
+ This rounding was chosen so that the lengths of the group of four
sum to the original 15015.
+
+ From the diagram we get these deltas for presentation timestamp
adjustment (in 90 kHz ticks):
+ (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
+ 0 +751 +1502 +2252 (skip) 0 ...
+
+ In fractions of (p_next->date - p_cur->date), regardless of actual
time unit, the deltas are:
+ (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
+ 0 +0.25 +0.50 +0.75 (skip) 0 ...
+
+ This is what we actually use. (In our implementation, the values
are stored multiplied by 4, as integers.)
+
+ The "current" frame should be displayed at [original time + delta].
E.g., when "current" = b
+ (i.e. PCN = abc), start displaying film frame 2 at time [original
time of b + 751 ticks].
+ So, when we catch the cadence, we will start mangling the
timestamps according to the
+ cadence position of the "current" frame, using the deltas given
above. This will cause
+ a one-time jerk, most noticeable if the cadence happens to catch at
position "d".
+ (Alternatively, upon lock-on, we could wait until we are at "a"
before switching on IVTC,
+ but this makes the maximal delay [max. detection + max. wait] = 3 +
4 = 7 input frames,
+ which comes to 7/30 ~ 0.23 seconds instead of the 3/30 = 0.10
seconds from purely
+ the detection. I prefer the one-time jerk, which also happens to be
simpler to implement.)
+
+ When the filter falls out of film mode, the timestamps of the
incoming frames are left untouched.
+ Thus, the output from this filter has a variable framerate: 4/5 of
the input framerate when IVTC
+ is active, and the same framerate as input when it is not.
+
+
+ For other open-source IVTC codes (which may be a useful source for
ideas), see the following:
+
+ The classic filter by Vektor (written in 2001-2003 for TVTime and
adapted into Xine):
+ In xine-lib 1.1.19, it is at src/post/deinterlace/pulldown.*. Also
needed are tvtime.*, and speedy.*.
+
+ Transcode's ivtc->decimate->32detect chain by T. Tsiodras (2002,
added in Transcode 0.6.12):
+ In Transcode 1.1.5 (
http://developer.berlios.de/project/showfiles.php?group_id=10094 ),
+ the IVTC part is at filter/filter_ivtc.c.
+
+
+ And now, it's finally time to let the code speak for itself.
+
+ -JJ 2010-12-31
+*/
+
+
+/* Helper function: compose frame from given field pair.
+
+ The inputs are full pictures (frames); only one field will be used
from each.
+ Caller must manage allocation/deallocation of p_outpic.
+ Pitches of the inputs must match!
+*/
+static void ComposeFrame( filter_t *p_filter, picture_t *p_outpic,
picture_t *p_inpic_top, picture_t *p_inpic_bottom )
+{
+ assert( p_filter != NULL );
+ assert( p_outpic != NULL );
+ assert( p_inpic_top != NULL );
+ assert( p_inpic_bottom != NULL );
+
+ int i_plane;
+ for( i_plane = 0 ; i_plane < p_inpic_top->i_planes ; i_plane++ )
+ {
+ uint8_t *p_in_top, *p_in_bottom, *p_out_end, *p_out;
+
+ p_in_top = p_inpic_top->p[i_plane].p_pixels;
+ p_in_bottom = p_inpic_bottom->p[i_plane].p_pixels;
+
+ p_out = p_outpic->p[i_plane].p_pixels;
+ p_out_end = p_out + p_outpic->p[i_plane].i_pitch
+ * p_outpic->p[i_plane].i_visible_lines;
+
+ assert( p_inpic_top->p[i_plane].i_pitch ==
p_inpic_bottom->p[i_plane].i_pitch );
+ switch( p_filter->fmt_in.video.i_chroma )
+ {
+ case VLC_CODEC_I422:
+ case VLC_CODEC_J422:
+ assert( p_outpic->p[i_plane].i_visible_lines % 2 == 0 );
+
+ /* in the frame for bottom field, skip first line,
which belongs to the top field */
+ p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
+
+ /* copy luma or chroma, alternating between input fields */
+ for( ; p_out < p_out_end ; )
+ {
+ vlc_memcpy( p_out, p_in_top,
p_inpic_top->p[i_plane].i_pitch );
+ p_out += p_outpic->p[i_plane].i_pitch;
+ vlc_memcpy( p_out, p_in_bottom,
p_inpic_bottom->p[i_plane].i_pitch );
+ p_out += p_outpic->p[i_plane].i_pitch;
+
+ p_in_top += 2 * p_inpic_top->p[i_plane].i_pitch;
+ p_in_bottom += 2 * p_inpic_bottom->p[i_plane].i_pitch;
+ }
+ break;
+
+ case VLC_CODEC_I420:
+ case VLC_CODEC_J420:
+ case VLC_CODEC_YV12:
+ assert( p_outpic->p[i_plane].i_visible_lines % 2 == 0 );
+
+ /* in the frame for bottom field, skip first line,
which belongs to the top field */
+ p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
+
+ /* copy luma or chroma, alternating between input fields */
+ /* FIXME: why does this strategy work the best for 420
chroma, too?
+ I would have thought we'd need to average the
subsampled chroma... -JJ */
+ for( ; p_out < p_out_end ; )
+ {
+ vlc_memcpy( p_out, p_in_top,
p_inpic_top->p[i_plane].i_pitch );
+ p_out += p_outpic->p[i_plane].i_pitch;
+ vlc_memcpy( p_out, p_in_bottom,
p_inpic_bottom->p[i_plane].i_pitch );
+ p_out += p_outpic->p[i_plane].i_pitch;
+
+ p_in_top += 2 * p_inpic_top->p[i_plane].i_pitch;
+ p_in_bottom += 2 * p_inpic_bottom->p[i_plane].i_pitch;
+ }
+ break;
+
+ /* Original development version for these cases -
causes chroma stripes */
+ if( 0 )
+ {
+ if( i_plane == Y_PLANE )
+ {
+ /* in the frame for bottom field, skip first
line, which belongs to the top field */
+ p_in_bottom += p_inpic_bottom->p[i_plane].i_pitch;
+
+ /* copy luma, alternating between input fields */
+ for( ; p_out < p_out_end ; )
+ {
+ vlc_memcpy( p_out, p_in_top,
p_inpic_top->p[i_plane].i_pitch );
+ p_out += p_outpic->p[i_plane].i_pitch;
+ vlc_memcpy( p_out, p_in_bottom,
p_inpic_bottom->p[i_plane].i_pitch );
+ p_out += p_outpic->p[i_plane].i_pitch;
+
+ p_in_top += 2 *
p_inpic_top->p[i_plane].i_pitch;
+ p_in_bottom += 2 *
p_inpic_bottom->p[i_plane].i_pitch;
+ }
+ }
+ else
+ {
+ /* now we don't skip anything - we need full
chroma from both frames for averaging */
+
+ /* average the chroma */
+ for( ; p_out < p_out_end ; )
+ {
+ p_filter->p_sys->pf_merge( p_out, p_in_top,
p_in_bottom,
+
p_inpic_top->p[i_plane].i_pitch );
+
+ p_out += p_outpic->p[i_plane].i_pitch;
+ p_in_top += p_inpic_top->p[i_plane].i_pitch;
+ p_in_bottom +=
p_inpic_bottom->p[i_plane].i_pitch;
+ }
+ }
+ break;
+ } /* end of disabled development version */
+ }
+ }
+}
+
+/* Helper function: estimates "how much interlaced" the given picture is.
+
+ We use the comb metric from Transcode. RenderX()'s comb metric was
also tested
+ during development, and for this particular purpose, Transcode's
metric won.
+
+ Note that we *must not* subsample at all in order to catch
interlacing in telecined frames
+ with localized motion (e.g. anime with characters talking, where
only mouths move
+ and everything else stays still. These can be just a few pixels
wide, and located
+ anywhere in the frame!)
+*/
+static int CalculateInterlaceScore(filter_t *p_filter, picture_t* p_pic)
+{
+ assert( p_filter != NULL );
+ assert( p_pic != NULL );
+
+ /* This is based on the comb detector used in the IVTC filter of
Transcode 1.1.5. */
+ int i_plane;
+ int i_score = 0;
+ int y, x;
+ for( i_plane = 0 ; i_plane < p_pic->i_planes ; i_plane++ )
+ {
+ const int i_lasty = p_pic->p[i_plane].i_visible_lines-1;
+ const int w = p_pic->p[i_plane].i_pitch;
+ for( y = 1; y < i_lasty; y+=2 )
+ {
+ uint8_t *src_c = &p_pic->p[i_plane].p_pixels[y*w]; /*
current line */
+ uint8_t *src_p = &p_pic->p[i_plane].p_pixels[(y-1)*w]; /*
previous line */
+ uint8_t *src_n = &p_pic->p[i_plane].p_pixels[(y+1)*w]; /*
next line */
+
+#define T 100
+#ifdef CAN_COMPILE_MMXEXT
+ /* FIXME: this MMX version probably requires some tuning.
My MMX-fu is somewhat lacking :)
+ See below for the C-only version to see more quickly
what this does. It's very simple. -JJ */
+ if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
+ {
+ assert( w % 4 == 0 );
+
+ pxor_r2r( mm7, mm7 ); /* we need one register full of
zeros */
+ for( x = 0; x < w; x+=4 )
+ {
+ /* clear working registers */
+ pxor_r2r( mm0, mm0 );
+ pxor_r2r( mm1, mm1 );
+ pxor_r2r( mm2, mm2 );
+
+ /* move in four bytes from each row */
+ movd_m2r( *((int32_t*)src_c), mm0 );
+ movd_m2r( *((int32_t*)src_p), mm1 );
+ movd_m2r( *((int32_t*)src_n), mm2 );
+
+ /* pad with zeros to make room for computation */
+ punpcklbw_r2r( mm7, mm0 );
+ punpcklbw_r2r( mm7, mm1 );
+ punpcklbw_r2r( mm7, mm2 );
+
+ /* Let's follow here what
happens to each component word: */
+ movq_r2r( mm1, mm3 ); /* P */
+ psubsw_r2r( mm0, mm3 ); /* mm3 = P - C (signed) */
+ movq_r2r( mm2, mm4 ); /* N */
+ psubsw_r2r( mm0, mm4 ); /* mm4 = N - C (signed) */
+
+ /* (P - C) * (N - C) */
+ movq_r2r( mm4, mm0);
+ pmullw_r2r( mm3, mm4 ); /* mm4 = lw( mm3 * mm4 )
(this is now lw of "comb") */
+ pmulhw_r2r( mm3, mm0 ); /* mm0 = hw( mm3 * mm0 )
(this is now hw of "comb") */
+
+ /* unpack the two low-order results into mm1 (its
old value is no longer needed) */
+ movq_r2r( mm4, mm1 );
+ punpcklbw_r2r( mm0, mm1 );
+
+ /* unpack the two high-order results into mm2 (its
old value is no longer needed) */
+ movq_r2r( mm4, mm2 );
+ punpckhbw_r2r( mm0, mm2 );
+
+ /* FIXME: we need a signed comparison here. Get rid
of the negative values first?
+ But how, since there is no paddsd
instruction?
+
+ Let's stop with the MMX here, transfer
the results to memory and
+ do the rest the old-fashioned way. */
+
+ /* Let's "evacuate" mm1 first. */
+ /* Low-order part: */
+ movq_r2r( mm1, mm0 );
+ punpckldq_r2r( mm7, mm0 );
+ int32_t result1;
+ movq_r2m( mm0, result1 );
+ /* High-order part: */
+ movq_r2r( mm1, mm0 );
+ punpckhdq_r2r( mm7, mm0 );
+ int32_t result2;
+ movq_r2m( mm0, result2 );
+ /* Then same for mm2. */
+ /* Low-order part: */
+ movq_r2r( mm2, mm0 );
+ punpckldq_r2r( mm7, mm0 );
+ int32_t result3;
+ movq_r2m( mm0, result3 );
+ /* High-order part: */
+ movq_r2r( mm2, mm0 );
+ punpckhdq_r2r( mm7, mm0 );
+ int32_t result4;
+ movq_r2m( mm0, result4 );
+
+ if( result1 > T )
+ ++i_score;
+ if( result2 > T )
+ ++i_score;
+ if( result3 > T )
+ ++i_score;
+ if( result4 > T )
+ ++i_score;
+
+ src_c += 4;
+ src_p += 4;
+ src_n += 4;
+ }
+ }
+ else
+ {
+#endif
+ for( x = 0; x < w; ++x )
+ {
+ /* worst case: need 17 bits for "comb" */
+ int_fast32_t C = *src_c;
+ int_fast32_t P = *src_p;
+ int_fast32_t N = *src_n;
+ /* Comments in Transcode's filter_ivtc.c attribute
this combing metric to Gunnar Thalin.
+
+ It seems the idea is that if the picture is
interlaced, both expressions will have
+ the same signs, and this comes up positive. The
original author has chosen the
+ "T = 100" carefully... -JJ
+ */
+ int_fast32_t comb = (P - C) * (N - C);
+ if( comb > T )
+ ++i_score;
+
+ ++src_c;
+ ++src_p;
+ ++src_n;
+ }
+#ifdef CAN_COMPILE_MMXEXT
+ }
+#endif
+ }
+ }
+
+#ifdef CAN_COMPILE_MMXEXT
+ if( vlc_CPU() & CPU_CAPABILITY_MMXEXT )
+ emms();
+#endif
+
+ return i_score;
+}
+#undef T
+
+
+/* IVTC filter modes */
+#define IVTC_MODE_DETECTING 0
+#define IVTC_MODE_TELECINED_NTSC 1
+
+/* Field pair combinations from successive frames in the PCN stencil
+ (T = top, B = bottom, P = previous, C = current, N = next).
+ We will use these as array indices; hence the explicit numbering. */
+enum ivtc_field_pair { FIELD_PAIR_TPBP = 0, FIELD_PAIR_TPBC = 1,
+ FIELD_PAIR_TCBP = 2, FIELD_PAIR_TCBC = 3,
+ FIELD_PAIR_TCBN = 4, FIELD_PAIR_TNBC = 5,
+ FIELD_PAIR_TNBN = 6 };
+typedef enum ivtc_field_pair ivtc_field_pair;
+
+/* Cadence positions (PCN, Previous Current Next).
+ Note: only valid ones count for NUM.
+
+ Note also that "dea" in both cadence tables and a pure progressive
signal are indistinguishable.
+
+ Again, used as array indices except the -1.
+*/
+#define NUM_CADENCE_POS 9
+enum ivtc_cadence_pos { CADENCE_POS_INVALID = -1,
+ CADENCE_POS_PROGRESSIVE = 0,
+ CADENCE_POS_TFF_ABC = 1,
+ CADENCE_POS_TFF_BCD = 2,
+ CADENCE_POS_TFF_CDE = 3,
+ CADENCE_POS_TFF_EAB = 4,
+ CADENCE_POS_BFF_ABC = 5,
+ CADENCE_POS_BFF_BCD = 6,
+ CADENCE_POS_BFF_CDE = 7,
+ CADENCE_POS_BFF_EAB = 8 };
+typedef enum ivtc_cadence_pos ivtc_cadence_pos;
+
+/* Telecine field dominance */
+#define TFD_INVALID -1
+#define TFD_TFF 0
+#define TFD_BFF 1
+
+/* Position detection table. These are the (only) field pair
combinations that should give progressive frames. */
+static const ivtc_field_pair pi_best_field_pairs[9][3] = {
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* prog. */
+
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TCBP, FIELD_PAIR_TNBC}, /* TFF ABC */
+ {FIELD_PAIR_TCBP,
FIELD_PAIR_TNBC, FIELD_PAIR_TNBN}, /* TFF BCD */
+ {FIELD_PAIR_TCBP,
FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* TFF CDE */
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TCBC, FIELD_PAIR_TNBC}, /* TFF EAB */
+
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TPBC, FIELD_PAIR_TCBN}, /* BFF ABC */
+ {FIELD_PAIR_TPBC,
FIELD_PAIR_TCBN, FIELD_PAIR_TNBN}, /* BFF BCD */
+ {FIELD_PAIR_TPBC,
FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* BFF CDE */
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TCBC, FIELD_PAIR_TCBN}, /* BFF EAB */
+ };
+/* These, correspondingly, should give only interlaced frames.
+ Currently unused. During development it was tested that whether we
detect best or worst, the resulting
+ detected cadence positions are identical (neither strategy performs
any different from the other). */
+static const ivtc_field_pair pi_worst_field_pairs[9][4] = {
+ {FIELD_PAIR_TPBC,
FIELD_PAIR_TCBP,
+ FIELD_PAIR_TCBN,
FIELD_PAIR_TNBC}, /* prog. */
+
+ {FIELD_PAIR_TPBC,
FIELD_PAIR_TCBC,
+ FIELD_PAIR_TCBN,
FIELD_PAIR_TNBN}, /* TFF ABC */
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TPBC,
+ FIELD_PAIR_TCBC,
FIELD_PAIR_TCBN}, /* TFF BCD */
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TPBC,
+ FIELD_PAIR_TCBN,
FIELD_PAIR_TNBC}, /* TFF CDE */
+ {FIELD_PAIR_TPBC,
FIELD_PAIR_TCBP,
+ FIELD_PAIR_TCBN,
FIELD_PAIR_TNBN}, /* TFF EAB */
+
+ {FIELD_PAIR_TCBP,
FIELD_PAIR_TCBC,
+ FIELD_PAIR_TNBC,
FIELD_PAIR_TNBN}, /* BFF ABC */
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TCBP,
+ FIELD_PAIR_TCBC,
FIELD_PAIR_TNBC}, /* BFF BCD */
+ {FIELD_PAIR_TPBP,
FIELD_PAIR_TCBP,
+ FIELD_PAIR_TNBC,
FIELD_PAIR_TCBN}, /* BFF CDE */
+ {FIELD_PAIR_TCBP,
FIELD_PAIR_TPBC,
+ FIELD_PAIR_TNBC,
FIELD_PAIR_TNBN}, /* BFF EAB */
+ };
+
+/* These tables are used for separating the detected cadence position
(ivtc_cadence_pos; first index
+ to pi_best_field_pairs) into TFF/BFF information and the cadence
counter going from 0 to 4,
+ where "abc" = 0, "bcd" = 1, ...
+*/
+static const int pi_detected_pos_to_cadence_pos[NUM_CADENCE_POS] = { 3,
/* prog. */
+ 0,
/* TFF ABC */
+ 1,
/* TFF BCD */
+ 2,
/* TFF CDE */
+ 4,
/* TFF EAB */
+ 0,
/* BFF ABC */
+ 1,
/* BFF BCD */
+ 2,
/* BFF CDE */
+ 4,
/* BFF EAB */
+ };
+static const int pi_detected_pos_to_tfd[NUM_CADENCE_POS] = {
TFD_INVALID, /* prog. */
+ TFD_TFF,
/* TFF ABC */
+ TFD_TFF,
/* TFF BCD */
+ TFD_TFF,
/* TFF CDE */
+ TFD_TFF,
/* TFF EAB */
+ TFD_BFF,
/* BFF ABC */
+ TFD_BFF,
/* BFF BCD */
+ TFD_BFF,
/* BFF CDE */
+ TFD_BFF,
/* BFF EAB */
+ };
+
+/* Valid telecine sequences (TFF and BFF). Indices: [TFD][i_cadence_pos] */
+/* Currently unused and left here for documentation only.
+ There is an easier way - just decode the i_cadence_pos part of the
+ detected position using the pi_detected_pos_to_cadence_pos table. */
+/*static const int pi_valid_cadences[2][5] = { {CADENCE_POS_TFF_ABC,
+ CADENCE_POS_TFF_BCD,
+ CADENCE_POS_TFF_CDE,
+ CADENCE_POS_PROGRESSIVE,
+ CADENCE_POS_TFF_EAB},
+
+ {CADENCE_POS_BFF_ABC,
+ CADENCE_POS_BFF_BCD,
+ CADENCE_POS_BFF_CDE,
+ CADENCE_POS_PROGRESSIVE,
+ CADENCE_POS_BFF_EAB},
+ };
+*/
+
+/* Operations for film frame reconstruction.
+ Indices: [TFD][i_cadence_pos] */
+enum ivtc_op { IVTC_OP_DROP_FRAME, IVTC_OP_COPY_N, IVTC_OP_COPY_C,
IVTC_OP_COMPOSE_TNBC, IVTC_OP_COMPOSE_TCBN };
+typedef enum ivtc_op ivtc_op;
+static const ivtc_op pi_reconstruction_ops[2][5] = { {IVTC_OP_COMPOSE_TNBC,
+ IVTC_OP_COPY_N,
+ IVTC_OP_COPY_N,
+ IVTC_OP_DROP_FRAME,
+ IVTC_OP_COPY_C},
/* TFF */
+
+ {IVTC_OP_COMPOSE_TCBN,
+ IVTC_OP_COPY_N,
+ IVTC_OP_COPY_N,
+ IVTC_OP_DROP_FRAME,
+ IVTC_OP_COPY_C},
/* BFF */
+ };
+
+/* For timestamp mangling. Index: i_cadence_pos.
+ Valid values are nonnegative. The -1 corresponds to the dropped frame
+ and is never used, except for a debug assert. */
+static const int pi_timestamp_deltas[5] = { 1, 2, 3, -1, 0 };
+
+/* How much interlacing must be detected before we judge that the frame
has residual interlacing artifacts.
+ This is a raw value from CalculateInterlaceScore(). The value 3000
was chosen after watching several
+ different anime :)
+*/
+#define RENDERIVTC_INTERLACE_THRESHOLD 3000
+
+/* The inverse teleciner itself.
+
+ Return values: VLC_SUCCESS = a film frame was reconstructed to p_dst.
+ VLC_EGENERIC = this frame was dropped as part of
normal IVTC operation.
+*/
+static int RenderIVTC( filter_t *p_filter, picture_t *p_dst, picture_t
*p_src )
+{
+ filter_sys_t *p_sys = p_filter->p_sys;
+ ivtc_sys_t *p_ivtc = p_sys->p_ivtc;
+ mtime_t t_final = VLC_TS_INVALID; /* for timestamp mangling */
+
+ /* We keep a three-frame cache like Yadif does.
+ We use this for telecine sequence detection and film frame
reconstruction. */
+
+ picture_t *p_dup = picture_NewFromFormat( &p_src->format );
+ if( p_dup )
+ picture_Copy( p_dup, p_src );
+
+ /* Slide the history */
+ if( p_sys->pp_history[0] )
+ picture_Release( p_sys->pp_history[0] );
+ for( int i = 1; i < HISTORY_SIZE; i++ )
+ p_sys->pp_history[i-1] = p_sys->pp_history[i];
+ p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
+
+ picture_t *p_prev = p_sys->pp_history[0];
+ picture_t *p_curr = p_sys->pp_history[1];
+ picture_t *p_next = p_sys->pp_history[2];
+
+ /* Slide history of detected cadence positions */
+ for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+ p_ivtc->pi_cadence_pos_history[i-1] =
p_ivtc->pi_cadence_pos_history[i];
+ /* The latest position has not been detected yet. */
+ p_ivtc->pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE-1] =
CADENCE_POS_INVALID;
+
+ /* Slide history of field pair interlace scores */
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TPBP] =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBC];
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TPBC] =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBN];
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBP] =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBC];
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBC] =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN];
+
+ /* Allocate/reallocate working area used for composition of
artificial frames.
+ We avoid reallocation if possible, and only do that if the video
size or chroma type changes.
+ Size is detected from the luma plane. The final deallocation is
eventually done by Flush().
+ */
+ assert( p_next != NULL );
+ int i_chroma = p_filter->fmt_in.video.i_chroma;
+ int i_size_x = p_next->p[Y_PLANE].i_pitch;
+ int i_size_y = p_next->p[Y_PLANE].i_visible_lines;
+ /* Note that both frames in the working area are always allocated
at the same time,
+ so it is enough to check [0]. */
+ if( !p_ivtc->pp_ivtc_working_area[0] || i_chroma !=
p_ivtc->i_old_chroma ||
+ i_size_x != p_ivtc->i_old_size_x || i_size_y !=
p_ivtc->i_old_size_y )
+ {
+ /* If we were already allocated, restart the filter, flushing
the frame cache.
+ This is done so that our assumption of matching picture
sizes (in the helper functions) will hold.
+ */
+ if( p_ivtc->pp_ivtc_working_area[0] )
+ {
+ msg_Dbg( p_filter, "IVTC: Input video size or chroma
changed. Restarting filter." );
+ if( p_sys->pp_history[0] )
+ picture_Release( p_sys->pp_history[0] );
+ if( p_sys->pp_history[1] )
+ picture_Release( p_sys->pp_history[1] );
+ p_sys->pp_history[0] = NULL;
+ p_sys->pp_history[1] = NULL;
+ p_prev = NULL;
+ p_curr = NULL;
+ }
+
+ for( int i = 0; i < IVTC_WORKING_AREA_SIZE; i++ )
+ {
+ /* Deallocate old if any */
+ if( p_ivtc->pp_ivtc_working_area[i] )
+ {
+ msg_Dbg( p_filter, "IVTC: Reallocating temporary
picture." );
+ picture_Release( p_ivtc->pp_ivtc_working_area[i] );
+ p_ivtc->pp_ivtc_working_area[i] = NULL;
+ }
+
+ /* Allocate new */
+ p_ivtc->pp_ivtc_working_area[i] = picture_NewFromFormat(
&p_next->format );
+
+ /* FIXME! Instead of asserting, we should shut down the
filter nicely if allocation fails.
+ However, in the current API it does not seem to be
possible for a filter to quit itself.
+ This should be fixed as soon as the API allows it.
+ */
+ assert( p_ivtc->pp_ivtc_working_area[i] != NULL );
+
+ /* Take properties from the new frame. */
+ picture_CopyProperties( p_ivtc->pp_ivtc_working_area[i],
p_next );
+ }
+
+ /* Remember new chroma type and video size. */
+ p_ivtc->i_old_chroma = i_chroma;
+ p_ivtc->i_old_size_x = i_size_x;
+ p_ivtc->i_old_size_y = i_size_y;
+ }
+
+ /* The artificial frames TNBC and TCBN will actually be generated a
few lines further down;
+ we just give the pointers more descriptive names now. */
+ picture_t *p_tnbc = p_ivtc->pp_ivtc_working_area[0];
+ picture_t *p_tcbn = p_ivtc->pp_ivtc_working_area[1];
+
+ /* Filter if we have all the pictures we need */
+ if( p_prev && p_curr && p_next )
+ {
+ assert( p_tnbc != NULL );
+ assert( p_tcbn != NULL );
+
+ /* Start of cadence detection. */
+
+ /* Generate artificial frames TNBC and TCBN into the temporary
working area. */
+ ComposeFrame( p_filter, p_tnbc, p_next, p_curr );
+ ComposeFrame( p_filter, p_tcbn, p_curr, p_next );
+
+ /* Compute interlace scores for TNBN, TNBC and TCBN. Note that
p_next contains TNBN. */
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN] =
CalculateInterlaceScore( p_filter, p_next );
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBC] =
CalculateInterlaceScore( p_filter, p_tnbc );
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBN] =
CalculateInterlaceScore( p_filter, p_tcbn );
+
+ /* Detect likely cadence position according to the tables,
using the tabulated combinations
+ of all 7 available interlace scores. */
+ int pi_ivtc_scores[NUM_CADENCE_POS];
+ for( int i = 0; i < NUM_CADENCE_POS; i++ )
+ pi_ivtc_scores[i] = p_ivtc->pi_interlace_scores[
pi_best_field_pairs[i][0] ]
+ + p_ivtc->pi_interlace_scores[
pi_best_field_pairs[i][1] ]
+ + p_ivtc->pi_interlace_scores[
pi_best_field_pairs[i][2] ];
+ /* Find minimum */
+ int j = 0;
+ int minscore = pi_ivtc_scores[j];
+ for( int i = 1; i < NUM_CADENCE_POS; i++ )
+ {
+ if( pi_ivtc_scores[i] < minscore )
+ {
+ minscore = pi_ivtc_scores[i];
+ j = i;
+ }
+ }
+ /* The current raw detected cadence position is: */
+ p_ivtc->pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE-1] = j;
+
+ /* End of cadence detection. */
+ /* Start of cadence analysis. */
+
+ /* Remember old state of "potential cadence break", so that we
can detect two breaks in a row
+ after the cadence analysis is done. We allow for one error
(which could be a fluke), and
+ only exit film mode if two breaks are detected in a row.
+ */
+ const bool b_old_possible_cadence_break_detected =
p_ivtc->b_possible_cadence_break_detected;
+
+ bool b_film_mode = p_ivtc->i_ivtc_filter_mode ==
IVTC_MODE_TELECINED_NTSC;
+
+ /* If the detection history has been completely filled (three
positions of the stencil), start analysis.
+ See if the picked up sequenceis a valid NTSC telecine. The
history is complete if its farthest past
+ element has been filled.
+ */
+ if( p_ivtc->pi_cadence_pos_history[0] != CADENCE_POS_INVALID )
+ {
+ /* Convert the history elements to cadence position and TFD. */
+ int pi_tfd[IVTC_DETECTION_HISTORY_SIZE];
+ int pi_pos[IVTC_DETECTION_HISTORY_SIZE];
+ for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++)
+ {
+ const int i_detected_pos =
p_ivtc->pi_cadence_pos_history[i];
+ pi_pos[i] = pi_detected_pos_to_cadence_pos[i_detected_pos];
+ pi_tfd[i] = pi_detected_pos_to_tfd[i_detected_pos];
+ }
+
+ /* See if the sequence is valid. The cadence positions must
be successive mod 5.
+ We can't say anything about TFF/BFF yet, because the
progressive-looking position "dea"
+ may be there. If the sequence otherwise looks valid, we
handle that last by voting. */
+
+ bool b_sequence_valid = true;
+ int j = pi_pos[0];
+ for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+ {
+ if( pi_pos[i] != (++j % 5) )
+ {
+ b_sequence_valid = false;
+ break;
+ }
+ }
+
+ if( b_sequence_valid )
+ {
+ /* Determine TFF/BFF. */
+ int i_vote_invalid = 0;
+ int i_vote_tff = 0;
+ int i_vote_bff = 0;
+ for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+ {
+ if( pi_tfd[i] == TFD_INVALID )
+ i_vote_invalid++;
+ else if( pi_tfd[i] == TFD_TFF )
+ i_vote_tff++;
+ else if( pi_tfd[i] == TFD_BFF )
+ i_vote_bff++;
+ }
+ int i_telecine_field_dominance = TFD_INVALID;
+
+ /* With three entries, two votes for any one item are
enough to decide this conclusively. */
+ if( i_vote_tff >= 2)
+ i_telecine_field_dominance = TFD_TFF;
+ else if( i_vote_bff >= 2)
+ i_telecine_field_dominance = TFD_BFF;
+ /* in all other cases, "invalid" won or no winner - no
NTSC telecine detected. */
+
+ /* Reset the cadence break flag if this round came up
ok. */
+ if( i_telecine_field_dominance != TFD_INVALID )
+ p_ivtc->b_possible_cadence_break_detected = false;
+
+ /* Update the cadence counter from detected data
whenever we can.
+ Upon testing several strategies, this was found to
be the most reliable one.
+ */
+ if( i_telecine_field_dominance == TFD_TFF )
+ {
+ if( p_ivtc->i_ivtc_filter_mode !=
IVTC_MODE_TELECINED_NTSC )
+ msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC
TFF telecine detected. Film mode on." );
+ p_ivtc->i_ivtc_filter_mode = IVTC_MODE_TELECINED_NTSC;
+ p_ivtc->i_cadence_pos =
pi_pos[IVTC_DETECTION_HISTORY_SIZE-1];
+ p_ivtc->i_telecine_field_dominance = TFD_TFF;
+ p_ivtc->b_possible_cadence_break_detected = false;
+ }
+ else if( i_telecine_field_dominance == TFD_BFF )
+ {
+ if( p_ivtc->i_ivtc_filter_mode !=
IVTC_MODE_TELECINED_NTSC )
+ msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC
BFF telecine detected. Film mode on." );
+ p_ivtc->i_ivtc_filter_mode = IVTC_MODE_TELECINED_NTSC;
+ p_ivtc->i_cadence_pos =
pi_pos[IVTC_DETECTION_HISTORY_SIZE-1];
+ p_ivtc->i_telecine_field_dominance = TFD_BFF;
+ p_ivtc->b_possible_cadence_break_detected = false;
+ }
+ else if( b_film_mode && i_telecine_field_dominance ==
TFD_INVALID )
+ {
+ msg_Dbg( p_filter, "IVTC: 3:2 pulldown: telecine
field dominance not found. Possible cadence break detected." );
+ p_ivtc->b_possible_cadence_break_detected = true;
+ }
+
+ /* In case we are in NTSC film mode, but did not detect
a possible cadence break,
+ make one final sanity check. Detect a case where the
sequence is still valid,
+ but the predicted (counted) position does not match
the latest detected one.
+
+ During development it was found that this check is
very important. If this is
+ left out, weaving might continue on a broken cadence
and the detector won't
+ notice anything wrong.
+ */
+ if( p_ivtc->i_ivtc_filter_mode ==
IVTC_MODE_TELECINED_NTSC &&
+ !p_ivtc->b_possible_cadence_break_detected &&
+ p_ivtc->i_cadence_pos !=
pi_pos[IVTC_DETECTION_HISTORY_SIZE-1] )
+ {
+ msg_Dbg( p_filter, "IVTC: 3:2 pulldown: predicted
and detected position do not match. Possible cadence break detected." );
+ p_ivtc->b_possible_cadence_break_detected = true;
+ }
+ }
+ else /* No valid NTSC telecine sequence detected.
+ Either there is no NTSC telecine, or there is no
motion at all. */
+ {
+ ; /* Currently, do nothing. During development,
strategies for detecting
+ true progressive, true interlaced and PAL
telecined were tested,
+ but in practice these caused more harm than good.
Their main effect
+ was that they confused the NTSC telecine detector.
+ */
+ }
+ }
+
+ /* Detect cadence breaks. If we see two possible breaks in a
row, we consider the cadence broken.
+ Note that this is the only reason to exit film mode.
+ */
+ if( p_ivtc->i_ivtc_filter_mode == IVTC_MODE_TELECINED_NTSC &&
+ p_ivtc->b_possible_cadence_break_detected &&
b_old_possible_cadence_break_detected )
+ {
+ msg_Dbg( p_filter, "IVTC: 3:2 pulldown: cadence break
detected. Film mode off." );
+ p_ivtc->i_ivtc_filter_mode = IVTC_MODE_DETECTING;
+ p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
+ p_ivtc->i_telecine_field_dominance = TFD_INVALID;
+ p_ivtc->b_possible_cadence_break_detected = false;
+ }
+
+ /* End of cadence analysis. */
+
+ /* Perform IVTC if we're (still) in film mode. This means that
the cadence is locked on. */
+ picture_t *p_ivtc_result = NULL; /* this will become either
TCBC, TNBC, TCBN or TNBN */
+ int i_result_interlace_score = -1;
+ if( p_ivtc->i_ivtc_filter_mode == IVTC_MODE_TELECINED_NTSC )
+ {
+ assert( p_ivtc->i_telecine_field_dominance != TFD_INVALID );
+ assert( p_ivtc->i_cadence_pos != CADENCE_POS_INVALID );
+
+ /* Apply film frame reconstruction. */
+
+ /* Decide what to do. Always use the cadence counter do
decide whether to drop this frame. */
+ int op =
pi_reconstruction_ops[p_ivtc->i_telecine_field_dominance][p_ivtc->i_cadence_pos];
+ if( op == IVTC_OP_DROP_FRAME )
+ {
+ /* Bump cadence counter into the next expected position */
+ p_ivtc->i_cadence_pos = ++p_ivtc->i_cadence_pos % 5;
+
+ /* Drop frame. We're done. */
+ return VLC_EGENERIC; /* Not an error. Tell the caller
not to expect an output frame. */
+ }
+ /* During development, a strategy trusting the cadence
counter for frame reconstruction,
+ in cases when no break has been detected was tried, but
in practice always using the
+ Transcode strategy works better on average, if we also
use the Transcode comb metric.
+
+ The Transcode strategy catches localized motion better,
while the cadence counter
+ is better at following slow vertical camera pans (which
won't show combing,
+ but will look just slightly messed up). In my opinion
(JJ), it's more important
+ to catch localized motion reliably.
+
+ The old code has been left here so it can be enabled
later, if desired.
+ */
+ else if( 0 && !p_ivtc->b_possible_cadence_break_detected )
+ {
+ if( op == IVTC_OP_COPY_N )
+ {
+ p_ivtc_result = p_next;
+ i_result_interlace_score =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN];
+ }
+ else if( op == IVTC_OP_COPY_C )
+ {
+ p_ivtc_result = p_curr;
+ i_result_interlace_score =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBC];
+ }
+ else if( op == IVTC_OP_COMPOSE_TNBC )
+ {
+ p_ivtc_result = p_tnbc;
+ i_result_interlace_score =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBC];
+ }
+ else if( op == IVTC_OP_COMPOSE_TCBN )
+ {
+ p_ivtc_result = p_tcbn;
+ i_result_interlace_score =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBN];
+ }
+ }
+ else /* Transcode strategy for producing film frames */
+ {
+ /* We check which field paired with TN or BN gives the
smallest interlace score,
+ and declare that combination the resulting
progressive frame.
+
+ The difference to Transcode is that we still use our
cadence counter to decide
+ the fate of the timestamps, and which frames get
dropped. We can't be sure
+ that our frame drops hit the duplicates instead of
any useful frames, but
+ that's the best (I think) that can be done with this
realtime strategy.
+
+ (Another approach would be to use a five-frame
future buffer, compare all
+ consecutive frames, and drop the one that looks the
most like a duplicate
+ (e.g. smallest absolute value of signed
pixel-by-pixel difference summed over the picture)
+ Transcode's "decimate" filter works this way.)
+ */
+ int tnbn =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN]; /* may match on both TFF
and BFF */
+ int tnbc =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBC]; /* may match on TFF
material */
+ int tcbn =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBN]; /* may match on BFF
material */
+
+ int i_chosen = -1;
+ /* This is the right choice 50% of the time (see the
rightmost entries in the "best field pairs"
+ column of the cadence tables). */
+ if( tnbn <= tnbc && tnbn <= tcbn )
+ {
+ p_ivtc_result = p_next;
+ i_result_interlace_score = tnbn;
+ i_chosen = FIELD_PAIR_TNBN;
+ }
+ else if( tnbc <= tnbn && tnbc <= tcbn )
+ {
+ p_ivtc_result = p_tnbc;
+ i_result_interlace_score = tnbc;
+ i_chosen = FIELD_PAIR_TNBC;
+ }
+ else if( tcbn <= tnbn && tcbn <= tnbc )
+ {
+ p_ivtc_result = p_tcbn;
+ i_result_interlace_score = tcbn;
+ i_chosen = FIELD_PAIR_TCBN;
+ }
+ /* The above was an exhaustive list of possibilities.
No "else" is needed. */
+ }
+
+ /* Note that we get to this point only if we didn't drop
the frame.
+ Mangle the presentation timestamp to convert 30 -> 24 fps.
+ */
+ int i_timestamp_delta =
pi_timestamp_deltas[p_ivtc->i_cadence_pos];
+ assert( i_timestamp_delta >= 0); /* When we get here, we
must always have a valid adjustment. */
+
+ /* "Current" is the frame that is being extracted now. Use
its original timestamp as the base. */
+ t_final = p_curr->date + (p_next->date -
p_curr->date)*i_timestamp_delta/4;
+
+ /* Bump cadence counter into the next expected position. */
+ p_ivtc->i_cadence_pos = ++p_ivtc->i_cadence_pos % 5;
+ }
+ else /* Not film mode, IVTC bypassed. Just use the latest frame
as the "IVTC result". */
+ {
+ p_ivtc_result = p_next;
+ i_result_interlace_score =
p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN];
+ }
+ assert( p_ivtc_result != NULL );
+
+ /* We're almost done. Estimate whether the resulting frame is
interlaced or not.
+ Deinterlace if it is, and return that to caller. Otherwise
just pass the frame through to caller.
+
+ Note that we shouldn't run all frames through RenderX() just
in case, because it tends to
+ mangle (especially Japanese) text in some opening credits.
For an example, play the opening
+ sequence from the anime Utakata using RenderX() to see this
effect.
+ */
+ if( i_result_interlace_score > RENDERIVTC_INTERLACE_THRESHOLD)
+ {
+ char temp[1000];
+ sprintf(temp, "IVTC: removing residual interlacing (score
%d > %d)", i_result_interlace_score, RENDERIVTC_INTERLACE_THRESHOLD );
+ msg_Dbg( p_filter, temp );
+
+ RenderX( p_dst, p_ivtc_result );
+ }
+ else /* Not interlaced, just copy the frame. */
+ {
+ picture_Copy( p_dst, p_ivtc_result );
+ }
+
+ /* Note that picture_Copy() copies the presentation timestamp, too.
+ Apply timestamp mangling now, if any was needed.
+ */
+ if( t_final > VLC_TS_INVALID )
+ p_dst->date = t_final;
+
+ return VLC_SUCCESS;
+ }
+ else if( !p_prev && !p_curr && p_next ) /* first frame */
+ {
+ /* Render the first frame using any means necessary, so that a
picture appears immediately. */
+
+ /* Let's do some init for the filter. This score will become
TPBP by the time the actual filter starts. */
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN] =
CalculateInterlaceScore( p_filter, p_next );
+
+ /* Do the usual conditional area-based deinterlacing (see
above). */
+ if( p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN] >
RENDERIVTC_INTERLACE_THRESHOLD)
+ RenderX( p_dst, p_next );
+ else
+ picture_Copy( p_dst, p_next );
+
+ return VLC_SUCCESS;
+ }
+ else /* now the only possibility is (!p_prev && p_curr && p_next) */
+ {
+ /* This is the second frame. We need three for the detector to
work, so we drop this one.
+ We will only do some initialization for the detector here. */
+
+ /* Generate artificial frames TNBC and TCBN into the temporary
working area */
+ ComposeFrame( p_filter, p_tnbc, p_next, p_curr );
+ ComposeFrame( p_filter, p_tcbn, p_curr, p_next );
+
+ /* These scores will become TCBC, TCBP and TPBC when the filter
starts.
+ The score for the current TCBC has already been computed at
the first frame,
+ and slid into place at the start of this frame.
+ */
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBN] =
CalculateInterlaceScore( p_filter, p_next );
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TNBC] =
CalculateInterlaceScore( p_filter, p_tnbc );
+ p_ivtc->pi_interlace_scores[FIELD_PAIR_TCBN] =
CalculateInterlaceScore( p_filter, p_tcbn );
+
+ return VLC_EGENERIC; /* Not really an error. This is expected,
but we must
+ signal the caller not to expect an
output frame. */
+ }
+}
+
+/*****************************************************************************
* video filter2 functions
*****************************************************************************/
static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
@@ -1609,6 +2799,13 @@ static picture_t *Deinterlace( filter_t
*p_filter, picture_t *p_pic )
if( p_dst[1] )
RenderYadif( p_filter, p_dst[1], p_pic, 1,
p_pic->b_top_field_first );
break;
+
+ case DEINTERLACE_IVTC:
+ /* Note: RenderIVTC will automatically drop the duplicate
frames produced by IVTC.
+ This is part of normal IVTC operation. */
+ if( RenderIVTC( p_filter, p_dst[0], p_pic ) )
+ goto drop;
+ break;
}
p_dst[0]->b_progressive = true;
@@ -1629,6 +2826,7 @@ drop:
static void Flush( filter_t *p_filter )
{
filter_sys_t *p_sys = p_filter->p_sys;
+ ivtc_sys_t *p_ivtc = p_sys->p_ivtc;
p_sys->i_last_date = VLC_TS_INVALID;
for( int i = 0; i < HISTORY_SIZE; i++ )
@@ -1637,6 +2835,25 @@ static void Flush( filter_t *p_filter )
picture_Release( p_sys->pp_history[i] );
p_sys->pp_history[i] = NULL;
}
+
+ /* IVTC */
+ for( int i = 0; i < IVTC_WORKING_AREA_SIZE; i++ )
+ {
+ if( p_ivtc->pp_ivtc_working_area[i] )
+ picture_Release( p_ivtc->pp_ivtc_working_area[i] );
+ p_ivtc->pp_ivtc_working_area[i] = NULL;
+ }
+ p_ivtc->b_possible_cadence_break_detected = false;
+ p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
+ p_ivtc->i_telecine_field_dominance = TFD_INVALID;
+ p_ivtc->i_ivtc_filter_mode = IVTC_MODE_DETECTING;
+ for( int i = 0; i < IVTC_NUM_FIELD_PAIRS; i++ )
+ p_ivtc->pi_interlace_scores[i] = 0;
+ for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+ p_ivtc->pi_cadence_pos_history[i] = CADENCE_POS_INVALID; /*
detected positions */
+ p_ivtc->i_old_chroma = -1;
+ p_ivtc->i_old_size_x = -1;
+ p_ivtc->i_old_size_y = -1;
}
static int Mouse( filter_t *p_filter,
@@ -1657,6 +2874,7 @@ static int Open( vlc_object_t *p_this )
{
filter_t *p_filter = (filter_t*)p_this;
filter_sys_t *p_sys;
+ ivtc_sys_t *p_ivtc;
if( !IsChromaSupported( p_filter->fmt_in.video.i_chroma ) )
return VLC_EGENERIC;
@@ -1665,6 +2883,13 @@ static int Open( vlc_object_t *p_this )
p_sys = p_filter->p_sys = malloc( sizeof( *p_sys ) );
if( !p_sys )
return VLC_ENOMEM;
+ p_ivtc = p_filter->p_sys->p_ivtc = malloc( sizeof( *p_ivtc ) );
+ if( !p_ivtc )
+ {
+ free( p_filter->p_sys );
+ p_filter->p_sys = NULL;
+ return VLC_ENOMEM;
+ }
p_sys->i_mode = DEINTERLACE_BLEND;
p_sys->b_double_rate = false;
@@ -1673,6 +2898,21 @@ static int Open( vlc_object_t *p_this )
for( int i = 0; i < HISTORY_SIZE; i++ )
p_sys->pp_history[i] = NULL;
+ /* IVTC */
+ for( int i = 0; i < IVTC_WORKING_AREA_SIZE; i++ )
+ p_ivtc->pp_ivtc_working_area[i] = NULL;
+ p_ivtc->b_possible_cadence_break_detected = false;
+ p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
+ p_ivtc->i_telecine_field_dominance = TFD_INVALID;
+ p_ivtc->i_ivtc_filter_mode = IVTC_MODE_DETECTING;
+ for( int i = 0; i < IVTC_NUM_FIELD_PAIRS; i++ )
+ p_ivtc->pi_interlace_scores[i] = 0;
+ for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+ p_ivtc->pi_cadence_pos_history[i] = CADENCE_POS_INVALID;
+ p_ivtc->i_old_chroma = -1;
+ p_ivtc->i_old_size_x = -1;
+ p_ivtc->i_old_size_y = -1;
+
#if defined(CAN_COMPILE_C_ALTIVEC)
if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
{
@@ -1755,6 +2995,7 @@ static void Close( vlc_object_t *p_this )
filter_t *p_filter = (filter_t*)p_this;
Flush( p_filter );
+ free( p_filter->p_sys->p_ivtc );
free( p_filter->p_sys );
}
diff --git a/src/control/video.c b/src/control/video.c
index 54742db..4193f2a 100644
--- a/src/control/video.c
+++ b/src/control/video.c
@@ -566,7 +566,8 @@ void libvlc_video_set_deinterlace(
libvlc_media_player_t *p_mi,
&& strcmp (psz_mode, "blend") && strcmp (psz_mode, "bob")
&& strcmp (psz_mode, "discard") && strcmp (psz_mode, "linear")
&& strcmp (psz_mode, "mean") && strcmp (psz_mode, "x")
- && strcmp (psz_mode, "yadif") && strcmp (psz_mode, "yadif2x"))
+ && strcmp (psz_mode, "yadif") && strcmp (psz_mode, "yadif2x")
+ && strcmp (psz_mode, "ivtc"))
return;
if (*psz_mode)
diff --git a/src/libvlc-module.c b/src/libvlc-module.c
index 6701889..7c9bcb5 100644
--- a/src/libvlc-module.c
+++ b/src/libvlc-module.c
@@ -487,11 +487,11 @@ static const char * const ppsz_deinterlace_text[] = {
"Deinterlace method to use for video processing.")
static const char * const ppsz_deinterlace_mode[] = {
"discard", "blend", "mean", "bob",
- "linear", "x", "yadif", "yadif2x"
+ "linear", "x", "yadif", "yadif2x", "ivtc"
};
static const char * const ppsz_deinterlace_mode_text[] = {
N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"),
- N_("Linear"), "X", "Yadif", "Yadif (2x)"
+ N_("Linear"), "X", "Yadif", "Yadif (2x)", N_("Film NTSC (IVTC)")
};
static const int pi_pos_values[] = { 0, 1, 2, 4, 8, 5, 6, 9, 10 };
diff --git a/src/video_output/interlacing.c b/src/video_output/interlacing.c
index c168ef8..185c34f 100644
--- a/src/video_output/interlacing.c
+++ b/src/video_output/interlacing.c
@@ -48,6 +48,7 @@ static const char *deinterlace_modes[] = {
"x",
"yadif",
"yadif2x",
+ "ivtc",
NULL
};
static bool DeinterlaceIsModeValid(const char *mode)
--
1.7.2.3
More information about the vlc-devel
mailing list