[vlc-devel] [PATCH] filters: deinterlace: add slice threading (yadif2x)

Francois Cartegnie fcvlcdev at free.fr
Tue Mar 31 15:59:30 CEST 2020


Not sure if that breaks on d3d11/dxva2

---
 modules/hw/d3d11/d3d11_deinterlace.c          | 12 ++-
 modules/hw/d3d9/dxva2_deinterlace.c           | 12 ++-
 modules/video_filter/deinterlace/algo_basic.c | 21 ++--
 modules/video_filter/deinterlace/algo_basic.h | 21 ++--
 modules/video_filter/deinterlace/algo_ivtc.c  | 11 ++-
 modules/video_filter/deinterlace/algo_ivtc.h  | 11 ++-
 .../video_filter/deinterlace/algo_phosphor.c  |  5 +-
 .../video_filter/deinterlace/algo_phosphor.h  |  6 +-
 modules/video_filter/deinterlace/algo_x.c     |  5 +-
 modules/video_filter/deinterlace/algo_x.h     |  3 +-
 modules/video_filter/deinterlace/algo_yadif.c | 63 ++++++++++--
 modules/video_filter/deinterlace/algo_yadif.h |  8 +-
 modules/video_filter/deinterlace/common.c     | 55 +++++++++--
 modules/video_filter/deinterlace/common.h     | 47 +++++++--
 .../video_filter/deinterlace/deinterlace.c    | 96 +++++++++++++++----
 .../video_filter/deinterlace/deinterlace.h    |  2 +
 16 files changed, 291 insertions(+), 87 deletions(-)

diff --git a/modules/hw/d3d11/d3d11_deinterlace.c b/modules/hw/d3d11/d3d11_deinterlace.c
index 3ad3224daa..fa320cb052 100644
--- a/modules/hw/d3d11/d3d11_deinterlace.c
+++ b/modules/hw/d3d11/d3d11_deinterlace.c
@@ -78,11 +78,13 @@ static void Flush(filter_t *filter)
     FlushDeinterlacing(&p_sys->context);
 }
 
-static int RenderPic( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic,
+static int RenderPic( struct deinterlace_thread_ctx *p_ctx,
+                      picture_t *p_outpic, picture_t *p_pic,
                       int order, int i_field )
 {
     VLC_UNUSED(order);
     HRESULT hr;
+    filter_t *p_filter = p_ctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
     picture_sys_d3d11_t *p_out_sys = ActiveD3D11PictureSys(p_outpic);
 
@@ -176,9 +178,10 @@ static int RenderPic( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic,
     return VLC_SUCCESS;
 }
 
-static int RenderSinglePic( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
+static int RenderSinglePic( struct deinterlace_thread_ctx *p_ctx,
+                            picture_t *p_outpic, picture_t *p_pic )
 {
-    return RenderPic( p_filter, p_outpic, p_pic, 0, 0 );
+    return RenderPic( p_ctx, p_outpic, p_pic, 0, 0 );
 }
 
 static picture_t *Deinterlace(filter_t *p_filter, picture_t *p_pic)
@@ -363,8 +366,9 @@ int D3D11OpenDeinterlace(vlc_object_t *obj)
         goto error;
     }
 
-    InitDeinterlacingContext( &sys->context );
+    InitDeinterlacingContext( &sys->context, filter );
 
+    sys->context.i_thread_count = 1;
     sys->context.settings = p_mode->settings;
     sys->context.settings.b_use_frame_history = rateCaps.PastFrames != 0 ||
         rateCaps.FutureFrames != 0;
diff --git a/modules/hw/d3d9/dxva2_deinterlace.c b/modules/hw/d3d9/dxva2_deinterlace.c
index 7c0c4ec692..277f71b1bd 100644
--- a/modules/hw/d3d9/dxva2_deinterlace.c
+++ b/modules/hw/d3d9/dxva2_deinterlace.c
@@ -179,9 +179,11 @@ static void FillBlitParams( filter_sys_t *sys,
     params->ProcAmpValues.Saturation.Value = sys->Saturation;
 }
 
-static int RenderPic( filter_t *filter, picture_t *p_outpic, picture_t *src,
+static int RenderPic( struct deinterlace_thread_ctx *p_ctx,
+                      picture_t *p_outpic, picture_t *src,
                       int order, int i_field )
 {
+    filter_t *filter = p_ctx->ctx->priv;
     filter_sys_t *sys = filter->p_sys;
     picture_sys_d3d9_t *p_out_sys = ActiveD3D9PictureSys(p_outpic);
     const int i_samples = sys->decoder_caps.NumBackwardRefSamples + 1 +
@@ -257,9 +259,10 @@ static int RenderPic( filter_t *filter, picture_t *p_outpic, picture_t *src,
     return VLC_SUCCESS;
 }
 
-static int RenderSinglePic( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
+static int RenderSinglePic( struct deinterlace_thread_ctx *p_ctx,
+                            picture_t *p_outpic, picture_t *p_pic )
 {
-    return RenderPic( p_filter, p_outpic, p_pic, 0, 0 );
+    return RenderPic( p_ctx, p_outpic, p_pic, 0, 0 );
 }
 
 static picture_t *Deinterlace(filter_t *p_filter, picture_t *p_pic)
@@ -476,13 +479,14 @@ int D3D9OpenDeinterlace(vlc_object_t *obj)
     sys->hdecoder_dll = hdecoder_dll;
     sys->decoder_caps = best_caps;
 
-    InitDeinterlacingContext( &sys->context );
+    InitDeinterlacingContext( &sys->context, filter );
 
     sys->context.settings = p_mode->settings;
     sys->context.settings.b_use_frame_history = best_caps.NumBackwardRefSamples != 0 ||
                                        best_caps.NumForwardRefSamples  != 0;
     if (sys->context.settings.b_use_frame_history != p_mode->settings.b_use_frame_history)
         msg_Dbg( filter, "deinterlacing not using frame history as requested");
+    sys->context.i_thread_count = 1;
     if (sys->context.settings.b_double_rate)
         sys->context.pf_render_ordered = RenderPic;
     else
diff --git a/modules/video_filter/deinterlace/algo_basic.c b/modules/video_filter/deinterlace/algo_basic.c
index 8d675e01c6..856ad15445 100644
--- a/modules/video_filter/deinterlace/algo_basic.c
+++ b/modules/video_filter/deinterlace/algo_basic.c
@@ -42,9 +42,10 @@
  * RenderDiscard: only keep TOP or BOTTOM field, discard the other.
  *****************************************************************************/
 
-int RenderDiscard( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
+int RenderDiscard( struct deinterlace_thread_ctx *p_ctx,
+                   picture_t *p_outpic, picture_t *p_pic )
 {
-    VLC_UNUSED(p_filter);
+    VLC_UNUSED(p_ctx);
     int i_plane;
 
     /* Copy image and skip lines */
@@ -73,10 +74,11 @@ int RenderDiscard( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
  * RenderBob: renders a BOB picture - simple copy
  *****************************************************************************/
 
-int RenderBob( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic,
+int RenderBob( struct deinterlace_thread_ctx *p_ctx,
+               picture_t *p_outpic, picture_t *p_pic,
                int order, int i_field )
 {
-    VLC_UNUSED(p_filter);
+    VLC_UNUSED(p_ctx);
     VLC_UNUSED(order);
     int i_plane;
 
@@ -129,12 +131,13 @@ int RenderBob( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic,
  * RenderLinear: BOB with linear interpolation
  *****************************************************************************/
 
-int RenderLinear( filter_t *p_filter,
+int RenderLinear( struct deinterlace_thread_ctx *p_ctx,
                   picture_t *p_outpic, picture_t *p_pic, int order, int i_field )
 {
     VLC_UNUSED(order);
     int i_plane;
 
+    filter_t *p_filter = p_ctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
 
     /* Copy image and skip lines */
@@ -188,10 +191,12 @@ int RenderLinear( filter_t *p_filter,
  * RenderMean: Half-resolution blender
  *****************************************************************************/
 
-int RenderMean( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
+int RenderMean( struct deinterlace_thread_ctx *p_ctx,
+                picture_t *p_outpic, picture_t *p_pic )
 {
     int i_plane;
 
+    filter_t *p_filter = p_ctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
 
     /* Copy image and skip lines */
@@ -223,10 +228,12 @@ int RenderMean( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
  * RenderBlend: Full-resolution blender
  *****************************************************************************/
 
-int RenderBlend( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
+int RenderBlend( struct deinterlace_thread_ctx *p_ctx,
+                 picture_t *p_outpic, picture_t *p_pic )
 {
     int i_plane;
 
+    filter_t *p_filter = p_ctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
 
     /* Copy image and skip lines */
diff --git a/modules/video_filter/deinterlace/algo_basic.h b/modules/video_filter/deinterlace/algo_basic.h
index 1f25563ab8..962784551d 100644
--- a/modules/video_filter/deinterlace/algo_basic.h
+++ b/modules/video_filter/deinterlace/algo_basic.h
@@ -31,8 +31,8 @@
  */
 
 /* Forward declarations */
-struct filter_t;
 struct picture_t;
+struct deinterlace_thread_ctx;
 
 /*****************************************************************************
  * Functions
@@ -48,7 +48,8 @@ struct picture_t;
  * @see RenderBob()
  * @see Deinterlace()
  */
-int RenderDiscard( filter_t *, picture_t *p_outpic, picture_t *p_pic );
+int RenderDiscard( struct deinterlace_thread_ctx *p_ctx,
+                   picture_t *p_outpic, picture_t *p_pic );
 
 /**
  * RenderBob: basic framerate doubler.
@@ -63,7 +64,7 @@ int RenderDiscard( filter_t *, picture_t *p_outpic, picture_t *p_pic );
  * @see RenderLinear()
  * @see Deinterlace()
  */
-int RenderBob( filter_t *,
+int RenderBob( struct deinterlace_thread_ctx *p_ctx,
                picture_t *p_outpic, picture_t *p_pic, int order, int i_field );
 
 /**
@@ -71,14 +72,14 @@ int RenderBob( filter_t *,
  *
  * There is no 1x (non-doubling) equivalent for this filter.
  *
- * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_ctx Deinterlace thread context. Must be non-NULL.
  * @param p_outpic Output frame. Must be allocated by caller.
  * @param p_pic Input frame. Must exist.
  * @param i_field Render which field? 0 = top field, 1 = bottom field.
  * @see RenderBob()
  * @see Deinterlace()
  */
-int RenderLinear( filter_t *p_filter,
+int RenderLinear( struct deinterlace_thread_ctx *p_ctx,
                   picture_t *p_outpic, picture_t *p_pic, int order, int i_field );
 
 /**
@@ -88,12 +89,13 @@ int RenderLinear( filter_t *p_filter,
  *
  * Obviously, there is no 2x equivalent for this filter.
  *
- * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_ctx Deinterlace thread context. Must be non-NULL.
  * @param p_outpic Output frame. Must be allocated by caller.
  * @param p_pic Input frame. Must exist.
  * @see Deinterlace()
  */
-int RenderMean( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic );
+int RenderMean( struct deinterlace_thread_ctx *p_ctx,
+                picture_t *p_outpic, picture_t *p_pic );
 
 /**
  * RenderBlend: full-resolution blender.
@@ -103,11 +105,12 @@ int RenderMean( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic );
  *
  * Obviously, there is no 2x equivalent for this filter.
  *
- * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_ctx Deinterlace thread context. Must be non-NULL.
  * @param p_outpic Output frame. Must be allocated by caller.
  * @param p_pic Input frame. Must exist.
  * @see Deinterlace()
  */
-int RenderBlend( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic );
+int RenderBlend( struct deinterlace_thread_ctx *p_ctx,
+                 picture_t *p_outpic, picture_t *p_pic );
 
 #endif
diff --git a/modules/video_filter/deinterlace/algo_ivtc.c b/modules/video_filter/deinterlace/algo_ivtc.c
index 2e6d09b69d..305509dfd2 100644
--- a/modules/video_filter/deinterlace/algo_ivtc.c
+++ b/modules/video_filter/deinterlace/algo_ivtc.c
@@ -1468,12 +1468,14 @@ static bool IVTCOutputOrDropFrame( filter_t *p_filter, picture_t *p_dst )
  *****************************************************************************/
 
 /* See function doc in header. */
-int RenderIVTC( filter_t *p_filter, picture_t *p_dst, picture_t *p_pic )
+int RenderIVTC( struct deinterlace_thread_ctx *p_ctx,
+                picture_t *p_dst, picture_t *p_pic )
 {
     VLC_UNUSED(p_pic);
-    assert( p_filter != NULL );
+    assert( p_ctx != NULL );
     assert( p_dst != NULL );
 
+    filter_t *p_filter = p_ctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
     ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
 
@@ -1571,10 +1573,11 @@ int RenderIVTC( filter_t *p_filter, picture_t *p_dst, picture_t *p_pic )
 }
 
 /* See function doc in header. */
-void IVTCClearState( filter_t *p_filter )
+void IVTCClearState( struct deinterlace_thread_ctx *p_ctx )
 {
-    assert( p_filter != NULL );
+    assert( p_ctx != NULL );
 
+    filter_t *p_filter = p_ctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
     ivtc_sys_t *p_ivtc = &p_sys->ivtc;
 
diff --git a/modules/video_filter/deinterlace/algo_ivtc.h b/modules/video_filter/deinterlace/algo_ivtc.h
index f032d7e21b..55a35685a6 100644
--- a/modules/video_filter/deinterlace/algo_ivtc.h
+++ b/modules/video_filter/deinterlace/algo_ivtc.h
@@ -24,7 +24,7 @@
 #define VLC_DEINTERLACE_ALGO_IVTC_H 1
 
 /* Forward declarations */
-struct filter_t;
+struct deinterlace_thread_ctx;
 struct picture_t;
 
 /*****************************************************************************
@@ -128,7 +128,7 @@ typedef struct
  *
  * See the file comment for a detailed description of the algorithm.
  *
- * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_ctx Deinterlace thread context. Must be non-NULL.
  * @param[out] p_dst Output frame. Must be allocated by caller.
  * @return VLC error code (int).
  * @retval VLC_SUCCESS A film frame was reconstructed to p_dst.
@@ -138,7 +138,8 @@ typedef struct
  * @see CalculateInterlaceScore()
  * @see EstimateNumBlocksWithMotion()
  */
-int RenderIVTC( filter_t *p_filter, picture_t *p_dst, picture_t *p_pic );
+int RenderIVTC( struct deinterlace_thread_ctx *p_ctx,
+                picture_t *p_dst, picture_t *p_pic );
 
 /**
  * Clears the inverse telecine subsystem state.
@@ -146,12 +147,12 @@ int RenderIVTC( filter_t *p_filter, picture_t *p_dst, picture_t *p_pic );
  * Used during initialization and uninitialization
  * (called from Open() and Flush()).
  *
- * @param p_filter The filter instance.
+ * @param p_ctx Deinterlace thread context. Must be non-NULL.
  * @see RenderIVTC()
  * @see Open()
  * @see Flush()
  */
-void IVTCClearState( filter_t *p_filter );
+void IVTCClearState( struct deinterlace_thread_ctx *p_ctx );
 
 /*****************************************************************************
  * Extra documentation
diff --git a/modules/video_filter/deinterlace/algo_phosphor.c b/modules/video_filter/deinterlace/algo_phosphor.c
index 289eed783b..b30d980500 100644
--- a/modules/video_filter/deinterlace/algo_phosphor.c
+++ b/modules/video_filter/deinterlace/algo_phosphor.c
@@ -276,16 +276,17 @@ static void DarkenFieldMMX( picture_t *p_dst,
  *****************************************************************************/
 
 /* See header for function doc. */
-int RenderPhosphor( filter_t *p_filter,
+int RenderPhosphor( struct deinterlace_thread_ctx *p_ctx,
                     picture_t *p_dst, picture_t *p_pic,
                     int i_order, int i_field )
 {
     VLC_UNUSED(p_pic);
-    assert( p_filter != NULL );
+    assert( p_ctx != NULL );
     assert( p_dst != NULL );
     assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
     assert( i_field == 0 || i_field == 1 );
 
+    filter_t *p_filter = p_ctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
 
     /* Last two input frames */
diff --git a/modules/video_filter/deinterlace/algo_phosphor.h b/modules/video_filter/deinterlace/algo_phosphor.h
index 3cc5aaca46..f395bc28ee 100644
--- a/modules/video_filter/deinterlace/algo_phosphor.h
+++ b/modules/video_filter/deinterlace/algo_phosphor.h
@@ -24,8 +24,8 @@
 #define VLC_DEINTERLACE_ALGO_PHOSPHOR_H 1
 
 /* Forward declarations */
-struct filter_t;
 struct picture_t;
+struct deinterlace_thread_ctx;
 
 /*****************************************************************************
  * Data structures etc.
@@ -88,7 +88,7 @@ typedef struct
  * field), and alternating i_field (starting, at i_order = 0, with the field
  * according to p_src->b_top_field_first). See Deinterlace() for an example.
  *
- * @param p_filter The filter instance. Must be non-NULL.
+ * @param p_ctx Deinterlace thread context. Must be non-NULL.
  * @param p_dst Output frame. Must be allocated by caller.
  * @param i_order Temporal field number: 0 = first, 1 = second, 2 = rep. first.
  * @param i_field Render which field? 0 = top field, 1 = bottom field.
@@ -99,7 +99,7 @@ typedef struct
  * @see RenderLinear()
  * @see Deinterlace()
  */
-int RenderPhosphor( filter_t *p_filter,
+int RenderPhosphor( struct deinterlace_thread_ctx *p_ctx,
                     picture_t *p_dst, picture_t *p_pic,
                     int i_order, int i_field );
 
diff --git a/modules/video_filter/deinterlace/algo_x.c b/modules/video_filter/deinterlace/algo_x.c
index 411351d1cd..c6be9ae348 100644
--- a/modules/video_filter/deinterlace/algo_x.c
+++ b/modules/video_filter/deinterlace/algo_x.c
@@ -511,9 +511,10 @@ static inline void XDeintBand8x8MMXEXT( uint8_t *dst, int i_dst,
  * Public functions
  *****************************************************************************/
 
-int RenderX( filter_t *p_filter, picture_t *p_outpic, picture_t *p_pic )
+int RenderX( struct deinterlace_thread_ctx *p_ctx,
+             picture_t *p_outpic, picture_t *p_pic )
 {
-    VLC_UNUSED(p_filter);
+    VLC_UNUSED(p_ctx);
     int i_plane;
 #if defined (CAN_COMPILE_MMXEXT)
     const bool mmxext = vlc_CPU_MMXEXT();
diff --git a/modules/video_filter/deinterlace/algo_x.h b/modules/video_filter/deinterlace/algo_x.h
index dd70cbd732..2ad5fb4a56 100644
--- a/modules/video_filter/deinterlace/algo_x.h
+++ b/modules/video_filter/deinterlace/algo_x.h
@@ -49,6 +49,7 @@ struct picture_t;
  * @param[out] p_outpic Output frame. Must be allocated by caller.
  * @see Deinterlace()
  */
-int RenderX( filter_t *, picture_t *p_outpic, picture_t *p_pic );
+int RenderX( struct deinterlace_thread_ctx *p_ctx,
+             picture_t *p_outpic, picture_t *p_pic );
 
 #endif
diff --git a/modules/video_filter/deinterlace/algo_yadif.c b/modules/video_filter/deinterlace/algo_yadif.c
index 6fdecf53a5..cef6836421 100644
--- a/modules/video_filter/deinterlace/algo_yadif.c
+++ b/modules/video_filter/deinterlace/algo_yadif.c
@@ -44,18 +44,18 @@
 
 /* yadif.h comes from yadif.c of FFmpeg project.
    Necessary preprocessor macros are defined in common.h. */
+
 #include "yadif.h"
 
-int RenderYadifSingle( filter_t *p_filter, picture_t *p_dst, picture_t *p_src )
-{
-    return RenderYadif( p_filter, p_dst, p_src, 0, 0 );
-}
+#define MIN_SLICE_HEIGHT 64
 
-int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
-                 int i_order, int i_field )
+static int RenderYadifSliced( struct deinterlace_thread_ctx *threadctx,
+                              picture_t *p_dst, picture_t *p_src,
+                              int i_order, int i_field )
 {
     VLC_UNUSED(p_src);
 
+    filter_t *p_filter = threadctx->ctx->priv;
     filter_sys_t *p_sys = p_filter->p_sys;
 
     /* */
@@ -130,6 +130,8 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
         if( p_sys->chroma->pixel_size == 2 )
             filter = yadif_filter_line_c_16bit;
 
+        bool b_worker = (threadctx->i_thread_number > 0);
+
         for( int n = 0; n < p_dst->i_planes; n++ )
         {
             const plane_t *prevp = &p_prev->p[n];
@@ -137,7 +139,39 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
             const plane_t *nextp = &p_next->p[n];
             plane_t *dstp        = &p_dst->p[n];
 
-            for( int y = 1; y < dstp->i_visible_lines - 1; y++ )
+            int start, end;
+
+            unsigned i_thread_count = threadctx->ctx->i_thread_count;
+            int slice_height = dstp->i_visible_lines / i_thread_count;
+            if( i_thread_count > 1 && slice_height < MIN_SLICE_HEIGHT )
+            {
+                slice_height = MIN_SLICE_HEIGHT;
+                i_thread_count = __MIN(1, dstp->i_visible_lines / MIN_SLICE_HEIGHT);
+            }
+
+            if( threadctx->i_thread_number >= i_thread_count )
+                continue;
+
+            if( i_thread_count > 1 )
+            {
+                if( b_worker )
+                {
+                    start = slice_height * (threadctx->i_thread_number - 1);
+                    end = start + slice_height + 2; /* we always need to copy 2 more lines */
+                }
+                else /* parent thread */
+                {
+                    start = slice_height * (i_thread_count - 1);
+                    end = dstp->i_visible_lines;
+                }
+            }
+            else
+            {
+                start = 0;
+                end = dstp->i_visible_lines;
+            }
+
+            for( int y = start + 1; y < end - 1; y++ )
             {
                 if( (y % 2) == i_field  ||  yadif_parity == 2 )
                 {
@@ -184,7 +218,7 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
                  as set by Open() or SetFilterMethod(). It is always 0. */
 
         /* FIXME not good as it does not use i_order/i_field */
-        RenderX( p_filter, p_dst, p_next );
+        RenderX( threadctx, p_dst, p_next );
         return VLC_SUCCESS;
     }
     else
@@ -194,3 +228,16 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
         return VLC_EGENERIC;
     }
 }
+
+int RenderYadifSingle( struct deinterlace_thread_ctx *ctx,
+                       picture_t *p_dst, picture_t *p_src )
+{
+    return RenderYadifSliced( ctx, p_dst, p_src, 0, 0 );
+}
+
+int RenderYadif( struct deinterlace_thread_ctx *ctx,
+                 picture_t *p_dst, picture_t *p_src,
+                 int i_order, int i_field )
+{
+    return RenderYadifSliced( ctx, p_dst, p_src, i_order, i_field );
+}
diff --git a/modules/video_filter/deinterlace/algo_yadif.h b/modules/video_filter/deinterlace/algo_yadif.h
index 483ca48f67..e2b644d33e 100644
--- a/modules/video_filter/deinterlace/algo_yadif.h
+++ b/modules/video_filter/deinterlace/algo_yadif.h
@@ -31,8 +31,8 @@
  */
 
 /* Forward declarations */
-struct filter_t;
 struct picture_t;
+struct deinterlace_thread_ctx;
 
 /*****************************************************************************
  * Functions
@@ -82,12 +82,14 @@ struct picture_t;
  * @retval VLC_EGENERIC Frame dropped; only occurs at the second frame after start.
  * @see Deinterlace()
  */
-int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
+int RenderYadif( struct deinterlace_thread_ctx *,
+                 picture_t *p_dst, picture_t *p_src,
                  int i_order, int i_field );
 
 /**
  * Same as RenderYadif() but with no temporal references
  */
-int RenderYadifSingle( filter_t *p_filter, picture_t *p_dst, picture_t *p_src );
+int RenderYadifSingle( struct deinterlace_thread_ctx *,
+                       picture_t *p_dst, picture_t *p_src );
 
 #endif
diff --git a/modules/video_filter/deinterlace/common.c b/modules/video_filter/deinterlace/common.c
index f0bc3e830e..136f9881c6 100644
--- a/modules/video_filter/deinterlace/common.c
+++ b/modules/video_filter/deinterlace/common.c
@@ -32,8 +32,10 @@
 
 #include "common.h"
 
-void InitDeinterlacingContext( struct deinterlace_ctx *p_context )
+void InitDeinterlacingContext( struct deinterlace_ctx *p_context, void *priv )
 {
+    p_context->priv = priv;
+
     p_context->settings.b_double_rate = false;
     p_context->settings.b_half_height = false;
     p_context->settings.b_use_frame_history = false;
@@ -49,6 +51,16 @@ void InitDeinterlacingContext( struct deinterlace_ctx *p_context )
                                   cannot have offset) */
     for( int i = 0; i < HISTORY_SIZE; i++ )
         p_context->pp_history[i] = NULL;
+
+    for( unsigned i = 0; i < DEINTERLACE_MAX_THREADS; i++ )
+    {
+        p_context->threadctxes[i].ctx = p_context;
+        p_context->threadctxes[i].i_thread_number = i;
+        p_context->threadctxes[i].b_terminate = false;
+        vlc_sem_init(&p_context->threadctxes[i].wait_sem, 0);
+        vlc_sem_init(&p_context->threadctxes[i].done_sem, 0);
+    }
+    p_context->i_thread_count = 1;
 }
 
 void FlushDeinterlacing(struct deinterlace_ctx *p_context)
@@ -124,6 +136,33 @@ void GetDeinterlacingOutput( const struct deinterlace_ctx *p_context,
 
 #define CUSTOM_PTS -1
 
+static int FanoutRendering( struct deinterlace_ctx *p_context,
+                            picture_t *p_dst, picture_t *p_pic,
+                            bool b_ordered, int order, int i_field )
+{
+    for( unsigned i=1; i<p_context->i_thread_count; i++ )
+    {
+        p_context->threadctxes[i].params.p_dst = p_dst;
+        p_context->threadctxes[i].params.p_src = p_pic;
+        p_context->threadctxes[i].params.i_order = order;
+        p_context->threadctxes[i].params.i_field = i_field;
+        p_context->threadctxes[i].params.b_ordered = b_ordered;
+        vlc_sem_post(&p_context->threadctxes[i].wait_sem);
+    }
+
+    int ret;
+    if( b_ordered )
+        ret = p_context->func.pf_render_ordered( &p_context->threadctxes[0],
+                p_dst, p_pic, order, i_field );
+    else
+        ret = p_context->func.pf_render_single_pic( &p_context->threadctxes[0],
+                p_dst, p_pic );
+    for( unsigned i=1; i<p_context->i_thread_count; i++ )
+        vlc_sem_wait(&p_context->threadctxes[i].done_sem);
+
+    return ret;
+}
+
 picture_t *DoDeinterlacing( filter_t *p_filter,
                             struct deinterlace_ctx *p_context, picture_t *p_pic )
 {
@@ -264,22 +303,22 @@ picture_t *DoDeinterlacing( filter_t *p_filter,
     /* Render */
     if ( !p_context->settings.b_double_rate )
     {
-        if ( p_context->pf_render_single_pic( p_filter, p_dst[0], p_pic ) )
+        if( FanoutRendering( p_context, p_dst[0], p_pic, false, 0, 0 ) )
             goto drop;
     }
     else
     {
         /* Note: RenderIVTC will automatically drop the duplicate frames
                  produced by IVTC. This is part of normal operation. */
-        if ( p_context->pf_render_ordered( p_filter, p_dst[0], p_pic,
-                                           0, !b_top_field_first ) )
+        if( FanoutRendering( p_context, p_dst[0], p_pic, true,
+                             0, !b_top_field_first ) )
             goto drop;
         if ( p_dst[1] )
-            p_context->pf_render_ordered( p_filter, p_dst[1], p_pic,
-                                          1, b_top_field_first );
+            FanoutRendering( p_context, p_dst[1], p_pic, true,
+                             1, b_top_field_first );
         if ( p_dst[2] )
-            p_context->pf_render_ordered( p_filter, p_dst[2], p_pic,
-                                          2, !b_top_field_first );
+            FanoutRendering( p_context, p_dst[2], p_pic, true,
+                             2, !b_top_field_first );
     }
 
     if ( p_context->settings.b_custom_pts )
diff --git a/modules/video_filter/deinterlace/common.h b/modules/video_filter/deinterlace/common.h
index 144523647c..2477bfbe5d 100644
--- a/modules/video_filter/deinterlace/common.h
+++ b/modules/video_filter/deinterlace/common.h
@@ -29,6 +29,24 @@
 
 #include <assert.h>
 
+struct deinterlace_thread_ctx
+{
+    struct deinterlace_ctx *ctx;
+    vlc_thread_t id;
+    vlc_sem_t wait_sem;
+    vlc_sem_t done_sem;
+    unsigned i_thread_number;
+    bool b_terminate;
+    struct
+    {
+        picture_t *p_dst;
+        picture_t *p_src;
+        int i_order;
+        int i_field;
+        bool b_ordered;
+    } params;
+} ;
+
 /**
  * \file
  * Common macros for the VLC deinterlacer.
@@ -53,6 +71,15 @@ typedef struct {
 
 #define METADATA_SIZE (3)
 #define HISTORY_SIZE (3)
+union render_functions {
+    /**
+     * @param i_order Temporal field number: 0 = first, 1 = second, 2 = repeat first.
+     * @param i_field Keep which field? 0 = top field, 1 = bottom field.
+     */
+    int (*pf_render_ordered)(struct deinterlace_thread_ctx *, picture_t *p_dst, picture_t *p_pic,
+                             int order, int i_field);
+    int (*pf_render_single_pic)(struct deinterlace_thread_ctx *, picture_t *p_dst, picture_t *p_pic);
+};
 
 typedef struct  {
     bool b_double_rate;       /**< Shall we double the framerate? */
@@ -61,8 +88,12 @@ typedef struct  {
     bool b_half_height;       /**< Shall be divide the height by 2 */
 } deinterlace_algo;
 
+#define DEINTERLACE_MAX_THREADS 8
 struct deinterlace_ctx
 {
+    /* Caller level context */
+    void *priv;
+
     /* Algorithm behaviour flags */
     deinterlace_algo   settings;
 
@@ -79,20 +110,16 @@ struct deinterlace_ctx
     /** Input frame history buffer for algorithms with temporal filtering. */
     picture_t *pp_history[HISTORY_SIZE];
 
-    union {
-        /**
-         * @param i_order Temporal field number: 0 = first, 1 = second, 2 = repeat first.
-         * @param i_field Keep which field? 0 = top field, 1 = bottom field.
-         */
-        int (*pf_render_ordered)(filter_t *, picture_t *p_dst, picture_t *p_pic,
-                                 int order, int i_field);
-        int (*pf_render_single_pic)(filter_t *, picture_t *p_dst, picture_t *p_pic);
-    };
+    union render_functions func;
+
+    /** For sliced rendering */
+    unsigned i_thread_count;
+    struct deinterlace_thread_ctx threadctxes[DEINTERLACE_MAX_THREADS];
 };
 
 #define DEINTERLACE_DST_SIZE 3
 
-void InitDeinterlacingContext( struct deinterlace_ctx * );
+void InitDeinterlacingContext( struct deinterlace_ctx *, void * );
 
 /**
  * @brief Get the field duration based on the previous fields or the frame rate
diff --git a/modules/video_filter/deinterlace/deinterlace.c b/modules/video_filter/deinterlace/deinterlace.c
index 01d7ad5e61..bc432cb44c 100644
--- a/modules/video_filter/deinterlace/deinterlace.c
+++ b/modules/video_filter/deinterlace/deinterlace.c
@@ -313,6 +313,8 @@ vlc_module_begin ()
                 PHOSPHOR_DIMMER_LONGTEXT, true )
         change_integer_list( phosphor_dimmer_list, phosphor_dimmer_list_text )
         change_safe ()
+    add_integer(FILTER_CFG_PREFIX "threads", 1, "sliced filtering", NULL, true )
+        change_integer_range( 1, DEINTERLACE_MAX_THREADS )
     add_shortcut( "deinterlace" )
     set_callbacks( Open, Close )
 vlc_module_end ()
@@ -340,37 +342,40 @@ struct filter_mode_t
 {
     const char           *psz_mode;
     union {
-    int (*pf_render_ordered)(filter_t *, picture_t *p_dst, picture_t *p_pic,
+    int (*pf_render_ordered)(struct deinterlace_thread_ctx *,
+                             picture_t *p_dst, picture_t *p_pic,
                              int order, int i_field);
-    int (*pf_render_single_pic)(filter_t *, picture_t *p_dst, picture_t *p_pic);
+    int (*pf_render_single_pic)(struct deinterlace_thread_ctx *,
+                                picture_t *p_dst, picture_t *p_pic);
     };
     deinterlace_algo     settings;
     bool                 can_pack;         /**< can handle packed pixel */
     bool                 b_high_bit_depth; /**< can handle high bit depth */
+    unsigned             i_max_threads;
 };
 static struct filter_mode_t filter_mode [] = {
     { "discard", .pf_render_single_pic = RenderDiscard,
-                 { false, false, false, true }, true, true },
+                 { false, false, false, true }, true, true, 1 },
     { "bob", .pf_render_ordered = RenderBob,
-                 { true, false, false, false }, true, true },
+                 { true, false, false, false }, true, true, 1 },
     { "progressive-scan", .pf_render_ordered = RenderBob,
-                 { true, false, false, false }, true, true },
+                 { true, false, false, false }, true, true, 1 },
     { "linear", .pf_render_ordered = RenderLinear,
-                 { true, false, false, false }, true, true },
+                 { true, false, false, false }, true, true, 1 },
     { "mean", .pf_render_single_pic = RenderMean,
-                 { false, false, false, true }, true, true },
+                 { false, false, false, true }, true, true, 1 },
     { "blend", .pf_render_single_pic = RenderBlend,
-                 { false, false, false, false }, true, true },
+                 { false, false, false, false }, true, true, 1 },
     { "yadif", .pf_render_single_pic = RenderYadifSingle,
-                 { false, true, false, false }, false, true },
+                 { false, true, false, false }, false, true, DEINTERLACE_MAX_THREADS },
     { "yadif2x", .pf_render_ordered = RenderYadif,
-                 { true, true, false, false }, false, true },
+                 { true, true, false, false }, false, true, DEINTERLACE_MAX_THREADS },
     { "x", .pf_render_single_pic = RenderX,
-                 { false, false, false, false }, false, false },
+                 { false, false, false, false }, false, false, 1 },
     { "phosphor", .pf_render_ordered = RenderPhosphor,
-                 { true, true, false, false }, false, false },
+                 { true, true, false, false }, false, false, 1 },
     { "ivtc", .pf_render_single_pic = RenderIVTC,
-                 { false, true, true, false }, false, false },
+                 { false, true, true, false }, false, false, 1 },
 };
 
 /**
@@ -410,7 +415,8 @@ static void SetFilterMethod( filter_t *p_filter, const char *mode, bool pack )
 
             msg_Dbg( p_filter, "using %s deinterlace method", mode );
             p_sys->context.settings = filter_mode[i].settings;
-            p_sys->context.pf_render_ordered = filter_mode[i].pf_render_ordered;
+            p_sys->context.func.pf_render_ordered = filter_mode[i].pf_render_ordered;
+            p_sys->context.i_thread_count = filter_mode[i].i_max_threads;
             return;
         }
     }
@@ -462,7 +468,8 @@ void Flush( filter_t *p_filter )
     filter_sys_t *p_sys = p_filter->p_sys;
     FlushDeinterlacing(&p_sys->context);
 
-    IVTCClearState( p_filter );
+    for( unsigned i=0; i<p_sys->context.i_thread_count; i++ )
+        IVTCClearState( &p_sys->context.threadctxes[i] );
 }
 
 /*****************************************************************************
@@ -482,6 +489,31 @@ int Mouse( filter_t *p_filter,
 }
 
 
+static void * SliceThread( void *p_this )
+{
+    struct deinterlace_thread_ctx *ctx = p_this;
+
+    for( ;; )
+    {
+        vlc_sem_wait( &ctx->wait_sem );
+        if( ctx->b_terminate )
+            break;
+        if( ctx->params.b_ordered )
+            ctx->ctx->func.pf_render_ordered( ctx,
+                                              ctx->params.p_dst,
+                                              ctx->params.p_src,
+                                              ctx->params.i_order,
+                                              ctx->params.i_field );
+        else
+            ctx->ctx->func.pf_render_single_pic( ctx,
+                                                 ctx->params.p_dst,
+                                                 ctx->params.p_src );
+        vlc_sem_post( &ctx->done_sem );
+    }
+
+    return NULL;
+}
+
 /*****************************************************************************
  * Open
  *****************************************************************************/
@@ -528,14 +560,35 @@ notsupp:
 
     p_sys->chroma = chroma;
 
-    InitDeinterlacingContext( &p_sys->context );
+    InitDeinterlacingContext( &p_sys->context, p_filter );
 
     config_ChainParse( p_filter, FILTER_CFG_PREFIX, ppsz_filter_options,
                        p_filter->p_cfg );
     char *psz_mode = var_InheritString( p_filter, FILTER_CFG_PREFIX "mode" );
     SetFilterMethod( p_filter, psz_mode, packed );
 
-    IVTCClearState( p_filter );
+    unsigned i_threads = var_InheritInteger( p_filter, FILTER_CFG_PREFIX "threads" );
+    if( i_threads > 1 )
+    {
+        assert(DEINTERLACE_MAX_THREADS >= p_sys->context.i_thread_count);
+        i_threads = __MIN(i_threads, p_sys->context.i_thread_count);
+        p_sys->context.i_thread_count = i_threads;
+        for( unsigned i=1; i<p_sys->context.i_thread_count; i++ )
+        {
+            if( vlc_clone( &p_sys->context.threadctxes[i].id,
+                           SliceThread,
+                           &p_sys->context.threadctxes[i],
+                           VLC_THREAD_PRIORITY_VIDEO ) )
+            {
+                p_sys->context.i_thread_count = i;
+                break;
+            }
+        }
+    }
+    else p_sys->context.i_thread_count = 1;
+
+    for( unsigned i=0; i<p_sys->context.i_thread_count; i++ )
+        IVTCClearState( &p_sys->context.threadctxes[i] );
 
 #if defined(CAN_COMPILE_C_ALTIVEC)
     if( pixel_size == 1 && vlc_CPU_ALTIVEC() )
@@ -661,6 +714,15 @@ notsupp:
 void Close( vlc_object_t *p_this )
 {
     filter_t *p_filter = (filter_t*)p_this;
+    filter_sys_t *p_sys = p_filter->p_sys;
+
+    for( unsigned i=1; i<p_sys->context.i_thread_count; i++ )
+    {
+        p_sys->context.threadctxes[i].b_terminate = true;
+        vlc_sem_post(&p_sys->context.threadctxes[i].wait_sem);
+    }
+    for( unsigned i=1; i<p_sys->context.i_thread_count; i++ )
+        vlc_join( p_sys->context.threadctxes[i].id, NULL );
 
     Flush( p_filter );
     free( p_filter->p_sys );
diff --git a/modules/video_filter/deinterlace/deinterlace.h b/modules/video_filter/deinterlace/deinterlace.h
index 959cd6ae96..8269f14d9e 100644
--- a/modules/video_filter/deinterlace/deinterlace.h
+++ b/modules/video_filter/deinterlace/deinterlace.h
@@ -34,6 +34,7 @@ struct vlc_object_t;
 
 #include <vlc_common.h>
 #include <vlc_mouse.h>
+#include <vlc_threads.h>
 
 /* Local algorithm headers */
 #include "algo_basic.h"
@@ -82,6 +83,7 @@ typedef struct
         phosphor_sys_t phosphor; /**< Phosphor algorithm state. */
         ivtc_sys_t ivtc;         /**< IVTC algorithm state. */
     };
+
 } filter_sys_t;
 
 #endif
-- 
2.25.1



More information about the vlc-devel mailing list