[vlc-devel] [PATCH 5/7] opengl: avoid intermediate transpose step

Wed Jan 22 13:33:47 CET 2020

Directly initialize the conversion matrix in place in column-major
order.

Note: We could not just pass GL_TRUE to the transpose parameter of
glUniformMatrix4fv, because it is not supported on OpenGL ES 2:

> GL_INVALID_VALUE is generated if transpose is not GL_FALSE.

<https://www.khronos.org/registry/OpenGL-Refpages/es2.0/xhtml/glUniform.xml>
---
 .../video_output/opengl/fragment_shaders.c    | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/modules/video_output/opengl/fragment_shaders.c b/modules/video_output/opengl/fragment_shaders.c
index fa8c238d86..efc543e8e1 100644
--- a/modules/video_output/opengl/fragment_shaders.c
+++ b/modules/video_output/opengl/fragment_shaders.c
@@ -96,7 +96,9 @@ init_conv_matrix(float conv_matrix_out[],
             space_matrix = MATRIX_BT709;
     }
 
-    /* Init the conversion matrix in row-major order. */
+    /* Init the conversion matrix in column-major order (OpenGL expects
+     * column-major order by default, and OpenGL ES does not support row-major
+     * order at all). */
 
     const float *range_matrix = color_range == COLOR_RANGE_FULL
                               ? MATRIX_COLOR_RANGE_FULL
@@ -109,15 +111,17 @@ init_conv_matrix(float conv_matrix_out[],
             float sum = 0;
             for (int k = 0; k < 3; ++k)
                 sum += space_matrix[y * 3 + k] * range_matrix[k * 4 + x];
-            conv_matrix_out[y * 4 + x] = sum;
+            /* Notice the reversed indices: x is now the row, y is the
+             * column. */
+            conv_matrix_out[x * 4 + y] = sum;
         }
     }
 
-    /* Add a row to fill a 4x4 matrix.
+    /* Add a row to fill a 4x4 matrix (remember it's in column-major order).
      * (non-square matrices are not supported on old OpenGL ES versions) */
-    conv_matrix_out[12] = 0;
-    conv_matrix_out[13] = 0;
-    conv_matrix_out[14] = 0;
+    conv_matrix_out[3] = 0;
+    conv_matrix_out[7] = 0;
+    conv_matrix_out[11] = 0;
     conv_matrix_out[15] = 1;
 }
 
@@ -129,7 +133,7 @@ tc_yuv_base_init(opengl_tex_converter_t *tc, vlc_fourcc_t chroma,
 {
     /* The current implementation always converts from limited to full range. */
     const video_color_range_t range = COLOR_RANGE_LIMITED;
-    float matrix[4*4];
+    float *matrix = tc->yuv_coefficients;
     init_conv_matrix(matrix, yuv_space, range);
 
     if (desc->pixel_size == 2)
@@ -162,11 +166,6 @@ tc_yuv_base_init(opengl_tex_converter_t *tc, vlc_fourcc_t chroma,
         }
     }
 
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++)
-            tc->yuv_coefficients[i*4+j] = matrix[j*4+i];
-    }
-
     tc->yuv_color = true;
 
     *swap_uv = chroma == VLC_CODEC_YV12 || chroma == VLC_CODEC_YV9 ||
-- 
2.25.0