[vlc-commits] d3d_dynamic_shader: premultiply the Colorspace and Whitepoint matrices

Thu Feb 18 06:46:13 UTC 2021

vlc | branch: master | Steve Lhomme <robux4 at ycbcr.xyz> | Wed Feb 17 14:25:00 2021 +0100| [aabb0485c5019fd6b805f4a5c5681e453ba15247] | committer: Steve Lhomme

d3d_dynamic_shader: premultiply the Colorspace and Whitepoint matrices

No need to do the operation for each pixel each time.

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=aabb0485c5019fd6b805f4a5c5681e453ba15247
---

 modules/video_output/win32/d3d_dynamic_shader.c |  3 +-
 modules/video_output/win32/d3d_shaders.c        | 57 +++++++++++++++++++++----
 modules/video_output/win32/d3d_shaders.h        |  3 +-
 3 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/modules/video_output/win32/d3d_dynamic_shader.c b/modules/video_output/win32/d3d_dynamic_shader.c
index 132160e8be..e41a818063 100644
--- a/modules/video_output/win32/d3d_dynamic_shader.c
+++ b/modules/video_output/win32/d3d_dynamic_shader.c
@@ -38,7 +38,6 @@
 static const char globPixelShaderDefault[] = "\
 cbuffer PS_CONSTANT_BUFFER : register(b0)\n\
 {\n\
-    float4x4 WhitePoint;\n\
     float4x4 Colorspace;\n\
     float4x4 Primaries;\n\
     float Opacity;\n\
@@ -267,7 +266,7 @@ float4 main( PS_INPUT In ) : SV_TARGET\n\
         sample = sampleTexture( borderSampler, In.uv );\n\
     else\n\
         sample = sampleTexture( normalSampler, In.uv );\n\
-    float4 rgba = max(mul(mul(sample, WhitePoint), Colorspace),0);\n\
+    float4 rgba = max(mul(sample, Colorspace),0);\n\
     float opacity = rgba.a * Opacity;\n\
     float4 rgb = rgba; rgb.a = 0;\n\
     rgb = sourceToLinear(rgb);\n\
diff --git a/modules/video_output/win32/d3d_shaders.c b/modules/video_output/win32/d3d_shaders.c
index b1dc10435f..527724632e 100644
--- a/modules/video_output/win32/d3d_shaders.c
+++ b/modules/video_output/win32/d3d_shaders.c
@@ -283,6 +283,45 @@ bool D3D_UpdateQuadLuminanceScale(d3d_quad_t *quad, float luminanceScale)
     return true;
 }
 
+static void MultMat4(FLOAT dst[4*4], const FLOAT left[4*4], const FLOAT right[4*4])
+{
+    // Cache the invariants in registers
+    FLOAT x = left[0*4 + 0];
+    FLOAT y = left[0*4 + 1];
+    FLOAT z = left[0*4 + 2];
+    FLOAT w = left[0*4 + 3];
+    // Perform the operation on the first row
+    dst[0*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w);
+    dst[0*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w);
+    dst[0*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w);
+    dst[0*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w);
+    // Repeat for all the other rows
+    x = left[1*4 + 0];
+    y = left[1*4 + 1];
+    z = left[1*4 + 2];
+    w = left[1*4 + 3];
+    dst[1*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w);
+    dst[1*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w);
+    dst[1*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w);
+    dst[1*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w);
+    x = left[2*4 + 0];
+    y = left[2*4 + 1];
+    z = left[2*4 + 2];
+    w = left[2*4 + 3];
+    dst[2*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w);
+    dst[2*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w);
+    dst[2*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w);
+    dst[2*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w);
+    x = left[3*4 + 0];
+    y = left[3*4 + 1];
+    z = left[3*4 + 2];
+    w = left[3*4 + 3];
+    dst[3*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w);
+    dst[3*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w);
+    dst[3*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w);
+    dst[3*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w);
+}
+
 void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad,
                    const display_info_t *displayFormat)
 {
@@ -368,7 +407,8 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad,
                        0.f,              0.f,              0.f, 1.f,
     };
 
-    memcpy(quad->shaderConstants->WhitePoint, IDENTITY_4X4, sizeof(quad->shaderConstants->WhitePoint));
+    FLOAT WhitePoint[4*4];
+    memcpy(WhitePoint, IDENTITY_4X4, sizeof(WhitePoint));
 
     const FLOAT *ppColorspace;
     if (RGB_src_shader == DxgiIsRGBFormat(displayFormat->pixelFormat))
@@ -378,9 +418,9 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad,
     else if (RGB_src_shader)
     {
         ppColorspace = COLORSPACE_FULL_RGBA_TO_BT601_YUV;
-        quad->shaderConstants->WhitePoint[0*4 + 3] = -itu_black_level;
-        quad->shaderConstants->WhitePoint[1*4 + 3] = itu_achromacy;
-        quad->shaderConstants->WhitePoint[2*4 + 3] = itu_achromacy;
+        WhitePoint[0*4 + 3] = -itu_black_level;
+        WhitePoint[1*4 + 3] = itu_achromacy;
+        WhitePoint[2*4 + 3] = itu_achromacy;
     }
     else
     {
@@ -406,13 +446,14 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad,
                 }
                 break;
         }
+
         /* all matrices work in studio range and output in full range */
-        quad->shaderConstants->WhitePoint[0*4 + 3] = -itu_black_level;
-        quad->shaderConstants->WhitePoint[1*4 + 3] = -itu_achromacy;
-        quad->shaderConstants->WhitePoint[2*4 + 3] = -itu_achromacy;
+        WhitePoint[0*4 + 3] = -itu_black_level;
+        WhitePoint[1*4 + 3] = -itu_achromacy;
+        WhitePoint[2*4 + 3] = -itu_achromacy;
     }
 
-    memcpy(quad->shaderConstants->Colorspace, ppColorspace, sizeof(quad->shaderConstants->Colorspace));
+    MultMat4(quad->shaderConstants->Colorspace, ppColorspace, WhitePoint);
 
     if (fmt->primaries != displayFormat->primaries)
     {
diff --git a/modules/video_output/win32/d3d_shaders.h b/modules/video_output/win32/d3d_shaders.h
index 2c4c5cd5f1..70c90f631a 100644
--- a/modules/video_output/win32/d3d_shaders.h
+++ b/modules/video_output/win32/d3d_shaders.h
@@ -45,14 +45,13 @@ typedef struct {
 
 /* structures passed to the pixel shader */
 typedef struct {
-    FLOAT WhitePoint[4*4];
     FLOAT Colorspace[4*4];
     FLOAT Primaries[4*4];
     FLOAT Opacity;
     FLOAT LuminanceScale;
     FLOAT BoundaryX;
     FLOAT BoundaryY;
-    FLOAT padding[12]; // 256 bytes alignment
+    FLOAT padding[28]; // 256 bytes alignment
 } PS_CONSTANT_BUFFER;
 
 typedef struct {