[vlc-devel] [PATCH] vout: opengl: add NV12/NV12 support

Thomas Guillem thomas at gllm.fr
Sun Dec 4 11:13:43 CET 2016



On Sat, Dec 3, 2016, at 13:10, Rémi Denis-Courmont wrote:
> On December 1, 2016 8:15:52 AM EST, Thomas Guillem <thomas at gllm.fr>
> wrote:
> >---
> >modules/video_output/opengl.c | 152
> >+++++++++++++++++++++++++++++++++---------
> > 1 file changed, 121 insertions(+), 31 deletions(-)
> >
> >diff --git a/modules/video_output/opengl.c
> >b/modules/video_output/opengl.c
> >index 2b994bd..7ae7a92 100644
> >--- a/modules/video_output/opengl.c
> >+++ b/modules/video_output/opengl.c
> >@@ -104,6 +104,9 @@
> > #ifndef GL_R16
> > #define GL_R16 0
> > #endif
> >+#ifndef GL_RG
> >+#define GL_RG 0
> >+#endif
> > 
> > #define SPHERE_RADIUS 1.f
> > 
> >@@ -131,10 +134,11 @@ struct vout_display_opengl_t {
> > 
> >     video_format_t fmt;
> >     const vlc_chroma_description_t *chroma;
> >+    unsigned samples_per_pixel[PICTURE_PLANE_MAX];
> > 
> >     int        tex_target;
> >-    int        tex_format;
> >-    int        tex_internal;
> >+    int        tex_format[PICTURE_PLANE_MAX];
> >+    int        tex_internal[PICTURE_PLANE_MAX];
> >     int        tex_type;
> > 
> >     int        tex_width[PICTURE_PLANE_MAX];
> >@@ -322,7 +326,7 @@ static void
> >BuildYUVFragmentShader(vout_display_opengl_t *vgl,
> >     };
> > 
> >  /* Basic linear YUV -> RGB conversion using bilinear interpolation */
> >-    const char *template_glsl_yuv =
> >+    static const char *template_glsl_yuv3 =
> >         "#version " GLSL_VERSION "\n"
> >         PRECISION
> >         "uniform sampler2D Texture0;"
> >@@ -350,14 +354,54 @@ static void
> >BuildYUVFragmentShader(vout_display_opengl_t *vgl,
> >         " result = (z * Coefficient[2]) + result;"
> >         " gl_FragColor = result;"
> >         "}";
> >-    bool swap_uv = fmt->i_chroma == VLC_CODEC_YV12 ||
> >-                   fmt->i_chroma == VLC_CODEC_YV9;
> >+    static const char *template_glsl_yuv2 =
> >+        "#version " GLSL_VERSION "\n"
> >+        PRECISION
> >+        "uniform sampler2D Texture0;"
> >+        "uniform sampler2D Texture1;"
> >+        "uniform vec4      Coefficient[4];"
> >+        "varying vec4      TexCoord0,TexCoord1;"
> >+
> >+        "void main(void) {"
> >+        " vec4 x,y,z,result;"
> >+
> >+        " float val0 = texture2D(Texture0, TexCoord0.st).x;"
> >+        " float val1 = texture2D(Texture1, TexCoord1.st).x;"
> >+        /* The second sample on the second texture is either on 'g'
> >(for GL_RG)
> >+         * or on 'a' (for GL_LUMINANCE_ALPHA) */
> >+        " float val2 = texture2D(Texture1, TexCoord1.st).%c;"
> >+        " x  = vec4(val0, val0, val0, 1);"
> >+        " %c = vec4(val1, val1, val1, 1);"
> >+        " %c = vec4(val2, val2, val2, 1l);"
> >+
> >+        " result = x * Coefficient[0] + Coefficient[3];"
> >+        " result = (y * Coefficient[1]) + result;"
> >+        " result = (z * Coefficient[2]) + result;"
> >+        " gl_FragColor = result;"
> >+        "}";
> > 
> >     char *code;
> >-    if (asprintf(&code, template_glsl_yuv,
> >-                 swap_uv ? 'z' : 'y',
> >-                 swap_uv ? 'y' : 'z') < 0)
> >-        return;
> >+    if (vgl->chroma->plane_count == 3)
> >+    {
> >+        bool swap_uv = fmt->i_chroma == VLC_CODEC_YV12 ||
> >+                       fmt->i_chroma == VLC_CODEC_YV9;
> >+        if (asprintf(&code, template_glsl_yuv3,
> >+                     swap_uv ? 'z' : 'y',
> >+                     swap_uv ? 'y' : 'z') < 0)
> >+            return;
> >+    }
> >+    else
> >+    {
> >+        assert(vgl->tex_format[0] == GL_RED || vgl->tex_format[0] ==
> >GL_LUMINANCE);
> >+        assert(vgl->tex_format[1] == GL_RG || vgl->tex_format[1] ==
> >GL_LUMINANCE_ALPHA);
> >+
> >+        bool swap_uv = fmt->i_chroma == VLC_CODEC_NV21;
> >+        if (asprintf(&code, template_glsl_yuv2,
> >+                     vgl->tex_format[1] == GL_RG ? 'g' : 'a',
> >+                     swap_uv ? 'z' : 'y',
> >+                     swap_uv ? 'y' : 'z') < 0)
> >+            return;
> >+    }
> > 
> >     for (int i = 0; i < 4; i++) {
> >         float correction = i < 3 ? yuv_range_correction : 1.f;
> >@@ -485,9 +529,11 @@ vout_display_opengl_t
> >*vout_display_opengl_New(video_format_t *fmt,
> >  const bool oglv3 = strverscmp((const char *)ogl_version, "3.0") >= 0;
> >     const int yuv_plane_texformat = oglv3 ? GL_RED : GL_LUMINANCE;
> >    const int yuv_plane_texformat_16 = oglv3 ? GL_R16 : GL_LUMINANCE16;
> >+    const int yuv_plane_texformat_2samples = oglv3 ? GL_RG :
> >GL_LUMINANCE_ALPHA;
> > #else
> >     bool supports_shaders = false;
> >     const int yuv_plane_texformat = GL_LUMINANCE;
> >+    const int yuv_plane_texformat_2samples = GL_LUMINANCE_ALPHA;
> > #endif
> > 
> > #if USE_OPENGL_ES == 2
> >@@ -596,10 +642,11 @@ vout_display_opengl_t
> >*vout_display_opengl_New(video_format_t *fmt,
> >     vgl->fmt.i_gmask  = 0x0000ff00;
> >     vgl->fmt.i_bmask  = 0x00ff0000;
> > #   endif
> >-    vgl->tex_target   = GL_TEXTURE_2D;
> >-    vgl->tex_format   = GL_RGBA;
> >-    vgl->tex_internal = GL_RGBA;
> >-    vgl->tex_type     = GL_UNSIGNED_BYTE;
> >+    vgl->tex_target      = GL_TEXTURE_2D;
> >+    vgl->tex_format[0]   = GL_RGBA;
> >+    vgl->tex_internal[0] = GL_RGBA;
> >+    vgl->samples_per_pixel[0] = 1;
> >+    vgl->tex_type        = GL_UNSIGNED_BYTE;
> >     /* Use YUV if possible and needed */
> >     bool need_fs_yuv = false;
> >     bool need_fs_xyz = false;
> >@@ -607,16 +654,41 @@ vout_display_opengl_t
> >*vout_display_opengl_New(video_format_t *fmt,
> >    bool need_vs = fmt->projection_mode != PROJECTION_MODE_RECTANGULAR;
> >     float yuv_range_correction = 1.0;
> > 
> >-    if (max_texture_units >= 3 && supports_shaders &&
> >vlc_fourcc_IsYUV(fmt->i_chroma)) {
> >+    if (max_texture_units >= 2 && supports_shaders &&
> >vlc_fourcc_IsYUV(fmt->i_chroma)) {
> >   const vlc_fourcc_t *list = vlc_fourcc_GetYUVFallback(fmt->i_chroma);
> >         while (*list) {
> >const vlc_chroma_description_t *dsc =
> >vlc_fourcc_GetChromaDescription(*list);
> >-            if (dsc && dsc->plane_count == 3 && dsc->pixel_size == 1)
> >{
> >+            if (!dsc)
> >+                continue;
> >+            if ((unsigned) max_texture_units >= dsc->plane_count
> >+                && dsc->plane_count <= 3 && dsc->pixel_size == 1) {
> >+                /* Accept yuv format with 2 or 3 planes if there are
> >enough
> >+                 * textures */
> >+
> >+                if (dsc->plane_count == 2)
> >+                {
> >+                    if (*list != VLC_CODEC_NV12 && *list !=
> >VLC_CODEC_NV21)
> >+                        continue;
> >+                    vgl->samples_per_pixel[0] = 1;
> >+                    vgl->samples_per_pixel[1] = 2;
> >+                    vgl->tex_format[0]   = yuv_plane_texformat;
> >+                    vgl->tex_internal[0] = yuv_plane_texformat;
> >+                    vgl->tex_format[1]   =
> >yuv_plane_texformat_2samples;
> >+                    vgl->tex_internal[1] =
> >yuv_plane_texformat_2samples;
> >+                }
> >+                else
> >+                {
> >+                    for (unsigned i = 0; i < dsc->plane_count; ++i)
> >+                    {
> >+                        vgl->tex_format[i]   = yuv_plane_texformat;
> >+                        vgl->tex_internal[i] = yuv_plane_texformat;
> >+                        vgl->samples_per_pixel[i] = 1;
> >+                    }
> >+                }
> >+
> >                 need_fs_yuv       = true;
> >                 vgl->fmt          = *fmt;
> >                 vgl->fmt.i_chroma = *list;
> >-                vgl->tex_format   = yuv_plane_texformat;
> >-                vgl->tex_internal = yuv_plane_texformat;
> >                 vgl->tex_type     = GL_UNSIGNED_BYTE;
> >                 yuv_range_correction = 1.0;
> >                 break;
> >@@ -629,9 +701,13 @@ vout_display_opengl_t
> >*vout_display_opengl_New(video_format_t *fmt,
> >                 need_fs_yuv       = true;
> >                 vgl->fmt          = *fmt;
> >                 vgl->fmt.i_chroma = *list;
> >-                vgl->tex_format   = yuv_plane_texformat;
> >-                vgl->tex_internal = yuv_plane_texformat_16;
> >                 vgl->tex_type     = GL_UNSIGNED_SHORT;
> >+                for (unsigned i = 0; i < dsc->plane_count; ++i)
> >+                {
> >+                    vgl->tex_format[i]   = yuv_plane_texformat;
> >+                    vgl->tex_internal[i] = yuv_plane_texformat_16;
> >+                    vgl->samples_per_pixel[i] = 1;
> >+                }
> >yuv_range_correction = (float)((1 << 16) - 1) / ((1 << dsc->pixel_bits)
> >- 1);
> >                 break;
> > #endif
> >@@ -639,23 +715,26 @@ vout_display_opengl_t
> >*vout_display_opengl_New(video_format_t *fmt,
> >             list++;
> >         }
> >     }
> >-
> >     if (fmt->i_chroma == VLC_CODEC_XYZ12) {
> >         need_fs_xyz       = true;
> >         vgl->fmt          = *fmt;
> >         vgl->fmt.i_chroma = VLC_CODEC_XYZ12;
> >-        vgl->tex_format   = GL_RGB;
> >-        vgl->tex_internal = GL_RGB;
> >         vgl->tex_type     = GL_UNSIGNED_SHORT;
> >+        vgl->tex_format[0]           = GL_RGB;
> >+        vgl->tex_internal[0]         = GL_RGB;
> >+        vgl->samples_per_pixel[0]    = 1;
> >     }
> >+
> >     vgl->chroma = vlc_fourcc_GetChromaDescription(vgl->fmt.i_chroma);
> >     assert(vgl->chroma != NULL);
> >     vgl->use_multitexture = vgl->chroma->plane_count > 1;
> > 
> >     /* Texture size */
> >     for (unsigned j = 0; j < vgl->chroma->plane_count; j++) {
> >-        int w = vgl->fmt.i_visible_width  * vgl->chroma->p[j].w.num /
> >vgl->chroma->p[j].w.den;
> >-        int h = vgl->fmt.i_visible_height * vgl->chroma->p[j].h.num /
> >vgl->chroma->p[j].h.den;
> >+        int w = vgl->fmt.i_visible_width * vgl->chroma->p[j].w.num
> >+              / vgl->chroma->p[j].w.den / vgl->samples_per_pixel[j];
> >+        int h = vgl->fmt.i_visible_height * vgl->chroma->p[j].h.num
> >+              / vgl->chroma->p[j].h.den;
> >         if (vgl->supports_npot) {
> >             vgl->tex_width[j]  = w;
> >             vgl->tex_height[j] = h;
> >@@ -937,9 +1016,9 @@ picture_pool_t
> >*vout_display_opengl_GetPool(vout_display_opengl_t *vgl, unsigned
> > glTexParameteri(vgl->tex_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
> > 
> >       /* Call glTexImage2D only once, and use glTexSubImage2D later */
> >-            glTexImage2D(vgl->tex_target, 0,
> >-                         vgl->tex_internal, vgl->tex_width[j],
> >vgl->tex_height[j],
> >-                         0, vgl->tex_format, vgl->tex_type, NULL);
> >+            glTexImage2D(vgl->tex_target, 0, vgl->tex_internal[j],
> >+                         vgl->tex_width[j], vgl->tex_height[j], 0,
> >+                         vgl->tex_format[j], vgl->tex_type, NULL);
> >         }
> >     }
> > 
> >@@ -1034,8 +1113,13 @@ int
> >vout_display_opengl_Prepare(vout_display_opengl_t *vgl,
> > 
> >Upload(vgl, picture->format.i_visible_width, vgl->fmt.i_visible_height,
> >                vgl->fmt.i_width, vgl->fmt.i_height,
> >-               vgl->chroma->p[j].w.num, vgl->chroma->p[j].w.den,
> >vgl->chroma->p[j].h.num, vgl->chroma->p[j].h.den,
> >-               picture->p[j].i_pitch, picture->p[j].i_pixel_pitch, 0,
> >picture->p[j].p_pixels, vgl->tex_target, vgl->tex_format,
> >vgl->tex_type);
> >+               vgl->chroma->p[j].w.num,
> >+               vgl->chroma->p[j].w.den * vgl->samples_per_pixel[j],
> >+               vgl->chroma->p[j].h.num, vgl->chroma->p[j].h.den,
> >+               picture->p[j].i_pitch,
> >+               picture->p[j].i_pixel_pitch *
> >vgl->samples_per_pixel[j], 0,
> >+               picture->p[j].p_pixels, vgl->tex_target,
> >vgl->tex_format[j],
> >+               vgl->tex_type);
> >     }
> > 
> >     int         last_count = vgl->region_count;
> >@@ -1617,6 +1701,10 @@ static void
> >DrawWithShaders(vout_display_opengl_t *vgl,
> >vgl->Uniform1i(vgl->GetUniformLocation(vgl->program[0], "Texture0"),
> >0);
> >vgl->Uniform1i(vgl->GetUniformLocation(vgl->program[0], "Texture1"),
> >1);
> >vgl->Uniform1i(vgl->GetUniformLocation(vgl->program[0], "Texture2"),
> >2);
> >+        } else if (vgl->chroma->plane_count == 2) {
> >+            vgl->Uniform4fv(vgl->GetUniformLocation(vgl->program[0],
> >"Coefficient"), 4, vgl->local_value);
> >+            vgl->Uniform1i(vgl->GetUniformLocation(vgl->program[0],
> >"Texture0"), 0);
> >+            vgl->Uniform1i(vgl->GetUniformLocation(vgl->program[0],
> >"Texture1"), 1);
> >         }
> >         else if (vgl->chroma->plane_count == 1) {
> >vgl->Uniform1i(vgl->GetUniformLocation(vgl->program[0], "Texture0"),
> >0);
> >@@ -1747,8 +1835,10 @@ int
> >vout_display_opengl_Display(vout_display_opengl_t *vgl,
> >         float scale_w, scale_h;
> > 
> >         if (vgl->tex_target == GL_TEXTURE_2D) {
> >-            scale_w = (float)vgl->chroma->p[j].w.num /
> >vgl->chroma->p[j].w.den / vgl->tex_width[j];
> >-            scale_h = (float)vgl->chroma->p[j].h.num /
> >vgl->chroma->p[j].h.den / vgl->tex_height[j];
> >+            scale_w = (float)vgl->chroma->p[j].w.num /
> >vgl->chroma->p[j].w.den
> >+                    / vgl->samples_per_pixel[j] / vgl->tex_width[j];
> >+            scale_h = (float)vgl->chroma->p[j].h.num /
> >vgl->chroma->p[j].h.den
> >+                    / vgl->tex_height[j];
> > 
> >         } else {
> >             scale_w = 1.0;
> >-- 
> >2.10.2
> >
> >_______________________________________________
> >vlc-devel mailing list
> >To unsubscribe or modify your subscription options:
> >https://mailman.videolan.org/listinfo/vlc-devel
> 
> Hi,
> 
> I have not had time to read my OpenGL paper book, so I can't comment on
> the OpenGL aspects. But I think that the file is getting much too big,
> but I don't like meaningless splits with tight dependencies between files
> either :/
> 
> I would though think that chroma conversion belongs in a separate
> conversion plugin. It might not be feasible without an opaque OpenGL
> texture chroma type however.
> 
> Also, NV12 specifically is a hardware decoder output format. For sure, it
> needs to be supported eventually... But does it really make sense at this
> point, that the OpenGL output does not support pass-through, so the NV12
> will be copied anyway. If copied, then conversion to I420 can be done on
> the fly for free in SIMD. IIRC, Laurent even implemented that for x86
> already.

Not really free, there is an extra memcpy when doing NV12 -> I420.
I did some really quick benchmarks, it seems that NV12 -SIMD-> I420
-shader-> RGB is faster than NV12 -shader-> RGB. I don't really
understand why for now.

> -- 
> Rémi Denis-Courmont
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel


More information about the vlc-devel mailing list