[vlc-commits] d3d_dynamic_shader: do more operations on float3

Steve Lhomme git at videolan.org
Thu Feb 18 06:46:15 UTC 2021


vlc | branch: master | Steve Lhomme <robux4 at ycbcr.xyz> | Wed Feb 17 15:41:04 2021 +0100| [6ab9638604f3b2d36a6b2f928d3d1ee86e53c092] | committer: Steve Lhomme

d3d_dynamic_shader: do more operations on float3

No need to multiply the alpha value, we're not using the result.

We can do the primaries transformation before the linear conversion, it's
just translating from one coordinate system to another.

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=6ab9638604f3b2d36a6b2f928d3d1ee86e53c092
---

 modules/video_output/win32/d3d_dynamic_shader.c | 34 +++++++++----------
 modules/video_output/win32/d3d_shaders.c        | 45 +++++++++++--------------
 modules/video_output/win32/d3d_shaders.h        |  6 ++--
 3 files changed, 39 insertions(+), 46 deletions(-)

diff --git a/modules/video_output/win32/d3d_dynamic_shader.c b/modules/video_output/win32/d3d_dynamic_shader.c
index 4bd71dbd1c..5cde7130c1 100644
--- a/modules/video_output/win32/d3d_dynamic_shader.c
+++ b/modules/video_output/win32/d3d_dynamic_shader.c
@@ -38,8 +38,8 @@
 static const char globPixelShaderDefault[] = "\
 cbuffer PS_CONSTANT_BUFFER : register(b0)\n\
 {\n\
-    float4x4 Colorspace;\n\
-    float4x4 Primaries;\n\
+    float4x3 Colorspace;\n\
+    float4x3 Primaries;\n\
     float Opacity;\n\
     float LuminanceScale;\n\
     float2 Boundary;\n\
@@ -89,7 +89,7 @@ struct PS_INPUT\n\
 \n\
 #if (TONE_MAPPING==TONE_MAP_HABLE)\n\
 /* see http://filmicworlds.com/blog/filmic-tonemapping-operators/ */\n\
-inline float4 hable(float4 x) {\n\
+inline float3 hable(float3 x) {\n\
     const float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;\n\
     return ((x * (A*x + (C*B))+(D*E))/(x * (A*x + B) + (D*F))) - E/F;\n\
 }\n\
@@ -110,7 +110,7 @@ inline float inverse_HLG(float x){\n\
 }\n\
 #endif\n\
 \n\
-inline float4 sourceToLinear(float4 rgb) {\n\
+inline float3 sourceToLinear(float3 rgb) {\n\
 #if (SRC_TO_LINEAR==SRC_TRANSFER_PQ)\n\
     const float ST2084_m1 = 2610.0 / (4096.0 * 4);\n\
     const float ST2084_m2 = (2523.0 / 4096.0) * 128.0;\n\
@@ -140,7 +140,7 @@ inline float4 sourceToLinear(float4 rgb) {\n\
 #endif\n\
 }\n\
 \n\
-inline float4 linearToDisplay(float4 rgb) {\n\
+inline float3 linearToDisplay(float3 rgb) {\n\
 #if (LINEAR_TO_DST==DST_TRANSFER_SRGB)\n\
     return pow(rgb, 1.0 / 2.2);\n\
 #elif (LINEAR_TO_DST==DST_TRANSFER_PQ)\n\
@@ -157,24 +157,24 @@ inline float4 linearToDisplay(float4 rgb) {\n\
 #endif\n\
 }\n\
 \n\
-inline float4 transformPrimaries(float4 rgb) {\n\
+inline float3 transformPrimaries(float4 rgb) {\n\
 #if (PRIMARIES_MODE==TRANSFORM_PRIMARIES)\n\
     return max(mul(rgb, Primaries), 0);\n\
 #else\n\
-    return rgb;\n\
+    return rgb.rgb;\n\
 #endif\n\
 }\n\
 \n\
-inline float4 toneMapping(float4 rgb) {\n\
+inline float3 toneMapping(float3 rgb) {\n\
     rgb = rgb * LuminanceScale;\n\
 #if (TONE_MAPPING==TONE_MAP_HABLE)\n\
-    const float4 HABLE_DIV = hable(11.2);\n\
+    const float3 HABLE_DIV = hable(11.2);\n\
     rgb = hable(rgb) / HABLE_DIV;\n\
 #endif\n\
     return rgb;\n\
 }\n\
 \n\
-inline float4 adjustRange(float4 rgb) {\n\
+inline float3 adjustRange(float3 rgb) {\n\
 #if (SRC_RANGE!=DST_RANGE)\n\
     return clamp((rgb + BLACK_LEVEL_SHIFT) * RANGE_FACTOR, MIN_BLACK_VALUE, MAX_BLACK_VALUE);\n\
 #else\n\
@@ -266,13 +266,13 @@ float4 main( PS_INPUT In ) : SV_TARGET\n\
         sample = sampleTexture( borderSampler, In.uv );\n\
     else\n\
         sample = sampleTexture( normalSampler, In.uv );\n\
-    float4 rgba = max(mul(sample, Colorspace),0);\n\
-    rgba = sourceToLinear(rgba);\n\
-    rgba = transformPrimaries(rgba);\n\
-    rgba = toneMapping(rgba);\n\
-    rgba = linearToDisplay(rgba);\n\
-    rgba = adjustRange(rgba);\n\
-    return float4(rgba.rgb, saturate(sample.a * Opacity));\n\
+    float3 rgb1 = max(mul(sample, Colorspace),0);\n\
+    float3 rgb = transformPrimaries(float4(rgb1, 0));\n\
+    rgb = sourceToLinear(rgb);\n\
+    rgb = toneMapping(rgb);\n\
+    rgb = linearToDisplay(rgb);\n\
+    rgb = adjustRange(rgb);\n\
+    return float4(rgb, saturate(sample.a * Opacity));\n\
 }\n\
 ";
 
diff --git a/modules/video_output/win32/d3d_shaders.c b/modules/video_output/win32/d3d_shaders.c
index 527724632e..cb64d3305e 100644
--- a/modules/video_output/win32/d3d_shaders.c
+++ b/modules/video_output/win32/d3d_shaders.c
@@ -236,7 +236,7 @@ static void GetXYZ2RGBMatrix(const struct cie1931_primaries *primaries,
     Float3x3Inverse(out);
 }
 
-static void GetPrimariesTransform(FLOAT Primaries[4*4], video_color_primaries_t src,
+static void GetPrimariesTransform(FLOAT Primaries[4*3], video_color_primaries_t src,
                                   video_color_primaries_t dst)
 {
     const struct cie1931_primaries *p_src = &STANDARD_PRIMARIES[src];
@@ -261,8 +261,6 @@ static void GetPrimariesTransform(FLOAT Primaries[4*4], video_color_primaries_t
             Primaries[j + i*4] = xyz2rgb[j + i*3];
         Primaries[3 + i*4] = 0;
     }
-    for (size_t j=0;j<4; ++j)
-        Primaries[j + 3*4] = j == 3;
 }
 
 bool D3D_UpdateQuadOpacity(d3d_quad_t *quad, float opacity)
@@ -283,7 +281,7 @@ bool D3D_UpdateQuadLuminanceScale(d3d_quad_t *quad, float luminanceScale)
     return true;
 }
 
-static void MultMat4(FLOAT dst[4*4], const FLOAT left[4*4], const FLOAT right[4*4])
+static void MultMat43(FLOAT dst[4*3], const FLOAT left[4*3], const FLOAT right[4*3])
 {
     // Cache the invariants in registers
     FLOAT x = left[0*4 + 0];
@@ -312,14 +310,14 @@ static void MultMat4(FLOAT dst[4*4], const FLOAT left[4*4], const FLOAT right[4*
     dst[2*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w);
     dst[2*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w);
     dst[2*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w);
-    x = left[3*4 + 0];
-    y = left[3*4 + 1];
-    z = left[3*4 + 2];
-    w = left[3*4 + 3];
-    dst[3*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w);
-    dst[3*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w);
-    dst[3*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w);
-    dst[3*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w);
+    // x = left[3*4 + 0];
+    // y = left[3*4 + 1];
+    // z = left[3*4 + 2];
+    // w = left[3*4 + 3];
+    // dst[3*4 + 0] = (right[0*4 + 0] * x) + (right[1*4 + 0] * y) + (right[2*4 + 0] * z) + (right[3*4 + 0] * w);
+    // dst[3*4 + 1] = (right[0*4 + 1] * x) + (right[1*4 + 1] * y) + (right[2*4 + 1] * z) + (right[3*4 + 1] * w);
+    // dst[3*4 + 2] = (right[0*4 + 2] * x) + (right[1*4 + 2] * y) + (right[2*4 + 2] * z) + (right[3*4 + 2] * w);
+    // dst[3*4 + 3] = (right[0*4 + 3] * x) + (right[1*4 + 3] * y) + (right[2*4 + 3] * z) + (right[3*4 + 3] * w);
 }
 
 void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad,
@@ -369,51 +367,46 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad,
         }
     }
 
-    static const FLOAT IDENTITY_4X4[4 * 4] = {
+    static const FLOAT IDENTITY_4X3[4 * 3] = {
         1.f, 0.f, 0.f, 0.f,
         0.f, 1.f, 0.f, 0.f,
         0.f, 0.f, 1.f, 0.f,
-        0.f, 0.f, 0.f, 1.f,
     };
 
     /* matrices for studio range */
     /* see https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion, in studio range */
-    static const FLOAT COLORSPACE_BT601_YUV_TO_FULL_RGBA[4*4] = {
+    static const FLOAT COLORSPACE_BT601_YUV_TO_FULL_RGBA[4*3] = {
         1.164383561643836f,                 0.f,  1.596026785714286f, 0.f,
         1.164383561643836f, -0.391762290094914f, -0.812967647237771f, 0.f,
         1.164383561643836f,  2.017232142857142f,                 0.f, 0.f,
-                       0.f,                 0.f,                 0.f, 1.f,
     };
 
-    static const FLOAT COLORSPACE_FULL_RGBA_TO_BT601_YUV[4*4] = {
+    static const FLOAT COLORSPACE_FULL_RGBA_TO_BT601_YUV[4*3] = {
         0.299000f,  0.587000f,  0.114000f, 0.f,
        -0.168736f, -0.331264f,  0.500000f, 0.f,
         0.500000f, -0.418688f, -0.081312f, 0.f,
-              0.f,        0.f,        0.f, 1.f,
     };
 
     /* see https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.709_conversion, in studio range */
-    static const FLOAT COLORSPACE_BT709_YUV_TO_FULL_RGBA[4*4] = {
+    static const FLOAT COLORSPACE_BT709_YUV_TO_FULL_RGBA[4*3] = {
         1.164383561643836f,                 0.f,  1.792741071428571f, 0.f,
         1.164383561643836f, -0.213248614273730f, -0.532909328559444f, 0.f,
         1.164383561643836f,  2.112401785714286f,                 0.f, 0.f,
-                       0.f,                 0.f,                 0.f, 1.f,
     };
     /* see https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion, in studio range */
-    static const FLOAT COLORSPACE_BT2020_YUV_TO_FULL_RGBA[4*4] = {
+    static const FLOAT COLORSPACE_BT2020_YUV_TO_FULL_RGBA[4*3] = {
         1.164383561643836f,  0.000000000000f,  1.678674107143f, 0.f,
         1.164383561643836f, -0.127007098661f, -0.440987687946f, 0.f,
         1.164383561643836f,  2.141772321429f,  0.000000000000f, 0.f,
-                       0.f,              0.f,              0.f, 1.f,
     };
 
-    FLOAT WhitePoint[4*4];
-    memcpy(WhitePoint, IDENTITY_4X4, sizeof(WhitePoint));
+    FLOAT WhitePoint[4*3];
+    memcpy(WhitePoint, IDENTITY_4X3, sizeof(WhitePoint));
 
     const FLOAT *ppColorspace;
     if (RGB_src_shader == DxgiIsRGBFormat(displayFormat->pixelFormat))
     {
-        ppColorspace = IDENTITY_4X4;
+        ppColorspace = IDENTITY_4X3;
     }
     else if (RGB_src_shader)
     {
@@ -453,7 +446,7 @@ void D3D_SetupQuad(vlc_object_t *o, const video_format_t *fmt, d3d_quad_t *quad,
         WhitePoint[2*4 + 3] = -itu_achromacy;
     }
 
-    MultMat4(quad->shaderConstants->Colorspace, ppColorspace, WhitePoint);
+    MultMat43(quad->shaderConstants->Colorspace, ppColorspace, WhitePoint);
 
     if (fmt->primaries != displayFormat->primaries)
     {
diff --git a/modules/video_output/win32/d3d_shaders.h b/modules/video_output/win32/d3d_shaders.h
index 70c90f631a..2077642742 100644
--- a/modules/video_output/win32/d3d_shaders.h
+++ b/modules/video_output/win32/d3d_shaders.h
@@ -45,13 +45,13 @@ typedef struct {
 
 /* structures passed to the pixel shader */
 typedef struct {
-    FLOAT Colorspace[4*4];
-    FLOAT Primaries[4*4];
+    FLOAT Colorspace[4*3];
+    FLOAT Primaries[4*3];
     FLOAT Opacity;
     FLOAT LuminanceScale;
     FLOAT BoundaryX;
     FLOAT BoundaryY;
-    FLOAT padding[28]; // 256 bytes alignment
+    FLOAT padding[36]; // 256 bytes alignment
 } PS_CONSTANT_BUFFER;
 
 typedef struct {



More information about the vlc-commits mailing list