duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

shadergen.cpp (29815B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "shadergen.h"
      5 
      6 #include "common/assert.h"
      7 #include "common/bitutils.h"
      8 #include "common/log.h"
      9 
     10 #include <cstdio>
     11 #include <cstring>
     12 #include <iomanip>
     13 
     14 #ifdef ENABLE_OPENGL
     15 #include "opengl_loader.h"
     16 #endif
     17 
     18 Log_SetChannel(ShaderGen);
     19 
     20 ShaderGen::ShaderGen(RenderAPI render_api, GPUShaderLanguage shader_language, bool supports_dual_source_blend,
     21                      bool supports_framebuffer_fetch)
     22   : m_render_api(render_api), m_shader_language(shader_language),
     23     m_glsl(shader_language == GPUShaderLanguage::GLSL || shader_language == GPUShaderLanguage::GLSLES ||
     24            shader_language == GPUShaderLanguage::GLSLVK),
     25     m_spirv(shader_language == GPUShaderLanguage::GLSLVK), m_supports_dual_source_blend(supports_dual_source_blend),
     26     m_supports_framebuffer_fetch(supports_framebuffer_fetch)
     27 {
     28   if (m_glsl)
     29   {
     30 #ifdef ENABLE_OPENGL
     31     if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES)
     32       m_glsl_version_string = GetGLSLVersionString(m_render_api, GetGLSLVersion(render_api));
     33 
     34     m_use_glsl_interface_blocks =
     35       (shader_language == GPUShaderLanguage::GLSLVK || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_VERSION_3_2);
     36     m_use_glsl_binding_layout = (shader_language == GPUShaderLanguage::GLSLVK || UseGLSLBindingLayout());
     37 
     38 #ifdef _WIN32
     39     if (m_shader_language == GPUShaderLanguage::GLSL)
     40     {
     41       // SSAA with interface blocks is broken on AMD's OpenGL driver.
     42       const char* gl_vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
     43       if (std::strcmp(gl_vendor, "ATI Technologies Inc.") == 0)
     44         m_use_glsl_interface_blocks = false;
     45     }
     46 #endif
     47 #else
     48     m_use_glsl_interface_blocks = true;
     49     m_use_glsl_binding_layout = true;
     50 #endif
     51   }
     52 }
     53 
     54 ShaderGen::~ShaderGen() = default;
     55 
     56 GPUShaderLanguage ShaderGen::GetShaderLanguageForAPI(RenderAPI api)
     57 {
     58   switch (api)
     59   {
     60     case RenderAPI::D3D11:
     61     case RenderAPI::D3D12:
     62       return GPUShaderLanguage::HLSL;
     63 
     64     case RenderAPI::Vulkan:
     65     case RenderAPI::Metal:
     66       return GPUShaderLanguage::GLSLVK;
     67 
     68     case RenderAPI::OpenGL:
     69       return GPUShaderLanguage::GLSL;
     70 
     71     case RenderAPI::OpenGLES:
     72       return GPUShaderLanguage::GLSLES;
     73 
     74     case RenderAPI::None:
     75     default:
     76       return GPUShaderLanguage::None;
     77   }
     78 }
     79 
     80 bool ShaderGen::UseGLSLBindingLayout()
     81 {
     82 #ifdef ENABLE_OPENGL
     83   return (GLAD_GL_ES_VERSION_3_1 || GLAD_GL_VERSION_4_3 ||
     84           (GLAD_GL_ARB_explicit_attrib_location && GLAD_GL_ARB_explicit_uniform_location &&
     85            GLAD_GL_ARB_shading_language_420pack));
     86 #else
     87   return true;
     88 #endif
     89 }
     90 
     91 void ShaderGen::DefineMacro(std::stringstream& ss, const char* name, bool enabled)
     92 {
     93   ss << "#define " << name << " " << BoolToUInt32(enabled) << "\n";
     94 }
     95 
     96 void ShaderGen::DefineMacro(std::stringstream& ss, const char* name, s32 value)
     97 {
     98   ss << "#define " << name << " " << value << "\n";
     99 }
    100 
    101 #ifdef ENABLE_OPENGL
    102 u32 ShaderGen::GetGLSLVersion(RenderAPI render_api)
    103 {
    104   const char* glsl_version = reinterpret_cast<const char*>(glGetString(GL_SHADING_LANGUAGE_VERSION));
    105   const bool glsl_es = (render_api == RenderAPI::OpenGLES);
    106   Assert(glsl_version != nullptr);
    107 
    108   // Skip any strings in front of the version code.
    109   const char* glsl_version_start = glsl_version;
    110   while (*glsl_version_start != '\0' && (*glsl_version_start < '0' || *glsl_version_start > '9'))
    111     glsl_version_start++;
    112 
    113   int major_version = 0, minor_version = 0;
    114   if (std::sscanf(glsl_version_start, "%d.%d", &major_version, &minor_version) == 2)
    115   {
    116     // Cap at GLSL 4.3, we're not using anything newer for now.
    117     if (!glsl_es && (major_version > 4 || (major_version == 4 && minor_version > 30)))
    118     {
    119       major_version = 4;
    120       minor_version = 30;
    121     }
    122     else if (glsl_es && (major_version > 3 || (major_version == 3 && minor_version > 20)))
    123     {
    124       major_version = 3;
    125       minor_version = 20;
    126     }
    127   }
    128   else
    129   {
    130     ERROR_LOG("Invalid GLSL version string: '{}' ('{}')", glsl_version, glsl_version_start);
    131     if (glsl_es)
    132     {
    133       major_version = 3;
    134       minor_version = 0;
    135     }
    136   }
    137 
    138   return (static_cast<u32>(major_version) * 100) + static_cast<u32>(minor_version);
    139 }
    140 
    141 TinyString ShaderGen::GetGLSLVersionString(RenderAPI render_api, u32 version)
    142 {
    143   const bool glsl_es = (render_api == RenderAPI::OpenGLES);
    144   const u32 major_version = (version / 100);
    145   const u32 minor_version = (version % 100);
    146 
    147   return TinyString::from_format("#version {}{:02d}{}", major_version, minor_version,
    148                                  (glsl_es && major_version >= 3) ? " es" : "");
    149 }
    150 #endif
    151 
    152 void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */)
    153 {
    154   if (m_shader_language == GPUShaderLanguage::GLSL || m_shader_language == GPUShaderLanguage::GLSLES)
    155     ss << m_glsl_version_string << "\n\n";
    156   else if (m_spirv)
    157     ss << "#version 450 core\n\n";
    158 
    159 #ifdef __APPLE__
    160   // TODO: Do this for Vulkan as well.
    161   if (m_render_api == RenderAPI::Metal)
    162   {
    163     if (!m_supports_framebuffer_fetch)
    164       ss << "#extension GL_EXT_samplerless_texture_functions : require\n";
    165   }
    166 #endif
    167 
    168 #ifdef ENABLE_OPENGL
    169   // Extension enabling for OpenGL.
    170   if (m_shader_language == GPUShaderLanguage::GLSL || m_shader_language == GPUShaderLanguage::GLSLES)
    171   {
    172     if (GLAD_GL_EXT_shader_framebuffer_fetch)
    173       ss << "#extension GL_EXT_shader_framebuffer_fetch : require\n";
    174     else if (GLAD_GL_ARM_shader_framebuffer_fetch)
    175       ss << "#extension GL_ARM_shader_framebuffer_fetch : require\n";
    176   }
    177 
    178   if (m_shader_language == GPUShaderLanguage::GLSLES)
    179   {
    180     // Enable EXT_blend_func_extended for dual-source blend on OpenGL ES.
    181     if (GLAD_GL_EXT_blend_func_extended)
    182       ss << "#extension GL_EXT_blend_func_extended : require\n";
    183     if (GLAD_GL_ARB_blend_func_extended)
    184       ss << "#extension GL_ARB_blend_func_extended : require\n";
    185 
    186     // Test for V3D driver - we have to fudge coordinates slightly.
    187     if (std::strstr(reinterpret_cast<const char*>(glGetString(GL_VENDOR)), "Broadcom") &&
    188         std::strstr(reinterpret_cast<const char*>(glGetString(GL_RENDERER)), "V3D"))
    189     {
    190       ss << "#define DRIVER_V3D 1\n";
    191     }
    192     else if (std::strstr(reinterpret_cast<const char*>(glGetString(GL_RENDERER)), "PowerVR"))
    193     {
    194       ss << "#define DRIVER_POWERVR 1\n";
    195     }
    196   }
    197   else if (m_shader_language == GPUShaderLanguage::GLSL)
    198   {
    199     // Need extensions for binding layout if GL<4.3.
    200     if (m_use_glsl_binding_layout && !GLAD_GL_VERSION_4_3)
    201     {
    202       ss << "#extension GL_ARB_explicit_attrib_location : require\n";
    203       ss << "#extension GL_ARB_explicit_uniform_location : require\n";
    204       ss << "#extension GL_ARB_shading_language_420pack : require\n";
    205     }
    206 
    207     if (!GLAD_GL_VERSION_3_2)
    208       ss << "#extension GL_ARB_uniform_buffer_object : require\n";
    209 
    210     // Enable SSBOs if it's not required by the version.
    211     if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_ES_VERSION_3_1 && GLAD_GL_ARB_shader_storage_buffer_object)
    212       ss << "#extension GL_ARB_shader_storage_buffer_object : require\n";
    213   }
    214   else if (m_shader_language == GPUShaderLanguage::GLSLVK)
    215   {
    216     if (enable_rov)
    217       ss << "#extension GL_ARB_fragment_shader_interlock : require\n";
    218   }
    219 #endif
    220 
    221   DefineMacro(ss, "API_OPENGL", m_render_api == RenderAPI::OpenGL);
    222   DefineMacro(ss, "API_OPENGL_ES", m_render_api == RenderAPI::OpenGLES);
    223   DefineMacro(ss, "API_D3D11", m_render_api == RenderAPI::D3D11);
    224   DefineMacro(ss, "API_D3D12", m_render_api == RenderAPI::D3D12);
    225   DefineMacro(ss, "API_VULKAN", m_render_api == RenderAPI::Vulkan);
    226   DefineMacro(ss, "API_METAL", m_render_api == RenderAPI::Metal);
    227 
    228 #ifdef ENABLE_OPENGL
    229   if (m_shader_language == GPUShaderLanguage::GLSLES)
    230   {
    231     ss << "precision highp float;\n";
    232     ss << "precision highp int;\n";
    233     ss << "precision highp sampler2D;\n";
    234     ss << "precision highp isampler2D;\n";
    235     ss << "precision highp usampler2D;\n";
    236 
    237     if (GLAD_GL_ES_VERSION_3_1)
    238       ss << "precision highp sampler2DMS;\n";
    239 
    240     if (GLAD_GL_ES_VERSION_3_2)
    241       ss << "precision highp usamplerBuffer;\n";
    242 
    243     ss << "\n";
    244   }
    245 #endif
    246 
    247   if (m_glsl)
    248   {
    249     ss << "#define GLSL 1\n";
    250     ss << "#define float2 vec2\n";
    251     ss << "#define float3 vec3\n";
    252     ss << "#define float4 vec4\n";
    253     ss << "#define int2 ivec2\n";
    254     ss << "#define int3 ivec3\n";
    255     ss << "#define int4 ivec4\n";
    256     ss << "#define uint2 uvec2\n";
    257     ss << "#define uint3 uvec3\n";
    258     ss << "#define uint4 uvec4\n";
    259     ss << "#define float2x2 mat2\n";
    260     ss << "#define float3x3 mat3\n";
    261     ss << "#define float4x4 mat4\n";
    262     ss << "#define mul(x, y) ((x) * (y))\n";
    263     ss << "#define nointerpolation flat\n";
    264     ss << "#define frac fract\n";
    265     ss << "#define lerp mix\n";
    266 
    267     ss << "#define CONSTANT const\n";
    268     ss << "#define GLOBAL\n";
    269     ss << "#define FOR_UNROLL for\n";
    270     ss << "#define FOR_LOOP for\n";
    271     ss << "#define IF_BRANCH if\n";
    272     ss << "#define IF_FLATTEN if\n";
    273     ss << "#define VECTOR_EQ(a, b) ((a) == (b))\n";
    274     ss << "#define VECTOR_NEQ(a, b) ((a) != (b))\n";
    275     ss << "#define VECTOR_COMP_EQ(a, b) equal((a), (b))\n";
    276     ss << "#define VECTOR_COMP_NEQ(a, b) notEqual((a), (b))\n";
    277     ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
    278     ss << "#define SAMPLE_TEXTURE_OFFSET(name, coords, offset) textureOffset(name, coords, offset)\n";
    279     ss << "#define SAMPLE_TEXTURE_LEVEL(name, coords, level) textureLod(name, coords, level)\n";
    280     ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLodOffset(name, coords, level, "
    281           "offset)\n";
    282     ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
    283     ss << "#define LOAD_TEXTURE_MS(name, coords, sample) texelFetch(name, coords, int(sample))\n";
    284     ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";
    285     ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n";
    286     ss << "#define BEGIN_ARRAY(type, size) type[size](\n";
    287     ss << "#define END_ARRAY )\n";
    288 
    289     ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n";
    290     ss << "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n";
    291     ss << "float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n";
    292     ss << "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, "
    293           "1.0)); }\n";
    294   }
    295   else
    296   {
    297     ss << "#define HLSL 1\n";
    298     ss << "#define roundEven round\n";
    299     ss << "#define mix lerp\n";
    300     ss << "#define fract frac\n";
    301     ss << "#define vec2 float2\n";
    302     ss << "#define vec3 float3\n";
    303     ss << "#define vec4 float4\n";
    304     ss << "#define ivec2 int2\n";
    305     ss << "#define ivec3 int3\n";
    306     ss << "#define ivec4 int4\n";
    307     ss << "#define uivec2 uint2\n";
    308     ss << "#define uivec3 uint3\n";
    309     ss << "#define uivec4 uint4\n";
    310     ss << "#define mat2 float2x2\n";
    311     ss << "#define mat3 float3x3\n";
    312     ss << "#define mat4 float4x4\n";
    313     ss << "#define CONSTANT static const\n";
    314     ss << "#define GLOBAL static\n";
    315     ss << "#define FOR_UNROLL [unroll] for\n";
    316     ss << "#define FOR_LOOP [loop] for\n";
    317     ss << "#define IF_BRANCH [branch] if\n";
    318     ss << "#define IF_FLATTEN [flatten] if\n";
    319     ss << "#define VECTOR_EQ(a, b) (all((a) == (b)))\n";
    320     ss << "#define VECTOR_NEQ(a, b) (any((a) != (b)))\n";
    321     ss << "#define VECTOR_COMP_EQ(a, b) ((a) == (b))\n";
    322     ss << "#define VECTOR_COMP_NEQ(a, b) ((a) != (b))\n";
    323     ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n";
    324     ss << "#define SAMPLE_TEXTURE_OFFSET(name, coords, offset) name.Sample(name##_ss, coords, offset)\n";
    325     ss << "#define SAMPLE_TEXTURE_LEVEL(name, coords, level) name.SampleLevel(name##_ss, coords, level)\n";
    326     ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) name.SampleLevel(name##_ss, coords, level, "
    327           "offset)\n";
    328     ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n";
    329     ss << "#define LOAD_TEXTURE_MS(name, coords, sample) name.Load(coords, sample)\n";
    330     ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n";
    331     ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n";
    332     ss << "#define BEGIN_ARRAY(type, size) {\n";
    333     ss << "#define END_ARRAY }\n";
    334   }
    335 
    336   ss << "\n";
    337 
    338   m_has_uniform_buffer = false;
    339 }
    340 
    341 void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan)
    342 {
    343   if (m_shader_language == GPUShaderLanguage::GLSLVK)
    344   {
    345     if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan)
    346     {
    347       ss << "layout(push_constant) uniform PushConstants\n";
    348     }
    349     else
    350     {
    351       ss << "layout(std140, set = 0, binding = 0) uniform UBOBlock\n";
    352       m_has_uniform_buffer = true;
    353     }
    354   }
    355   else if (m_glsl)
    356   {
    357     if (m_use_glsl_binding_layout)
    358       ss << "layout(std140, binding = 0) uniform UBOBlock\n";
    359     else
    360       ss << "layout(std140) uniform UBOBlock\n";
    361 
    362     m_has_uniform_buffer = true;
    363   }
    364   else
    365   {
    366     ss << "cbuffer UBOBlock : register(b0)\n";
    367     m_has_uniform_buffer = true;
    368   }
    369 }
    370 
    371 void ShaderGen::DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list<const char*>& members,
    372                                      bool push_constant_on_vulkan)
    373 {
    374   WriteUniformBufferDeclaration(ss, push_constant_on_vulkan);
    375 
    376   ss << "{\n";
    377   for (const char* member : members)
    378     ss << member << ";\n";
    379   ss << "};\n\n";
    380 }
    381 
    382 void ShaderGen::DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled /* = false */,
    383                                bool is_int /* = false */, bool is_unsigned /* = false */)
    384 {
    385   if (m_glsl)
    386   {
    387     if (m_spirv)
    388       ss << "layout(set = " << ((m_has_uniform_buffer || IsMetal()) ? 1 : 0) << ", binding = " << index << ") ";
    389     else if (m_use_glsl_binding_layout)
    390       ss << "layout(binding = " << index << ") ";
    391 
    392     ss << "uniform " << (is_int ? (is_unsigned ? "u" : "i") : "") << (multisampled ? "sampler2DMS " : "sampler2D ")
    393        << name << ";\n";
    394   }
    395   else
    396   {
    397     ss << (multisampled ? "Texture2DMS<" : "Texture2D<") << (is_int ? (is_unsigned ? "uint4" : "int4") : "float4")
    398        << "> " << name << " : register(t" << index << ");\n";
    399     ss << "SamplerState " << name << "_ss : register(s" << index << ");\n";
    400   }
    401 }
    402 
    403 void ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned)
    404 {
    405   if (m_glsl)
    406   {
    407     if (m_spirv)
    408       ss << "layout(set = " << ((m_has_uniform_buffer || IsMetal()) ? 1 : 0) << ", binding = " << index << ") ";
    409     else if (m_use_glsl_binding_layout)
    410       ss << "layout(binding = " << index << ") ";
    411 
    412     ss << "uniform " << (is_int ? (is_unsigned ? "u" : "i") : "") << "samplerBuffer " << name << ";\n";
    413   }
    414   else
    415   {
    416     ss << "Buffer<" << (is_int ? (is_unsigned ? "uint4" : "int4") : "float4") << "> " << name << " : register(t"
    417        << index << ");\n";
    418   }
    419 }
    420 
    421 void ShaderGen::DeclareImage(std::stringstream& ss, const char* name, u32 index, bool is_float /* = false */,
    422                              bool is_int /* = false */, bool is_unsigned /* = false */)
    423 {
    424   if (m_glsl)
    425   {
    426     if (m_spirv)
    427       ss << "layout(set = " << (m_has_uniform_buffer ? 2 : 1) << ", binding = " << index;
    428     else
    429       ss << "layout(binding = " << index;
    430 
    431     ss << ", " << (is_int ? (is_unsigned ? "rgba8ui" : "rgba8i") : "rgba8") << ") "
    432        << "uniform restrict coherent image2D " << name << ";\n";
    433   }
    434   else
    435   {
    436     ss << "RasterizerOrderedTexture2D<"
    437        << (is_int ? (is_unsigned ? "uint4" : "int4") : (is_float ? "float4" : "unorm float4")) << "> " << name
    438        << " : register(u" << index << ");\n";
    439   }
    440 }
    441 
    442 const char* ShaderGen::GetInterpolationQualifier(bool interface_block, bool centroid_interpolation,
    443                                                  bool sample_interpolation, bool is_out) const
    444 {
    445 #ifdef ENABLE_OPENGL
    446   const bool shading_language_420pack = GLAD_GL_ARB_shading_language_420pack;
    447 #else
    448   const bool shading_language_420pack = false;
    449 #endif
    450   if (m_glsl && interface_block && (!m_spirv && !shading_language_420pack))
    451   {
    452     return (sample_interpolation ? (is_out ? "sample out " : "sample in ") :
    453                                    (centroid_interpolation ? (is_out ? "centroid out " : "centroid in ") : ""));
    454   }
    455   else
    456   {
    457     return (sample_interpolation ? "sample " : (centroid_interpolation ? "centroid " : ""));
    458   }
    459 }
    460 
    461 void ShaderGen::DeclareVertexEntryPoint(
    462   std::stringstream& ss, const std::initializer_list<const char*>& attributes, u32 num_color_outputs,
    463   u32 num_texcoord_outputs, const std::initializer_list<std::pair<const char*, const char*>>& additional_outputs,
    464   bool declare_vertex_id /* = false */, const char* output_block_suffix /* = "" */, bool msaa /* = false */,
    465   bool ssaa /* = false */, bool noperspective_color /* = false */)
    466 {
    467   if (m_glsl)
    468   {
    469     if (m_use_glsl_binding_layout)
    470     {
    471       u32 attribute_counter = 0;
    472       for (const char* attribute : attributes)
    473       {
    474         ss << "layout(location = " << attribute_counter << ") in " << attribute << ";\n";
    475         attribute_counter++;
    476       }
    477     }
    478     else
    479     {
    480       for (const char* attribute : attributes)
    481         ss << "in " << attribute << ";\n";
    482     }
    483 
    484     if (m_use_glsl_interface_blocks)
    485     {
    486       const char* qualifier = GetInterpolationQualifier(true, msaa, ssaa, true);
    487 
    488       if (m_spirv)
    489         ss << "layout(location = 0) ";
    490 
    491       ss << "out VertexData" << output_block_suffix << " {\n";
    492       for (u32 i = 0; i < num_color_outputs; i++)
    493         ss << "  " << (noperspective_color ? "noperspective " : "") << qualifier << "float4 v_col" << i << ";\n";
    494 
    495       for (u32 i = 0; i < num_texcoord_outputs; i++)
    496         ss << "  " << qualifier << "float2 v_tex" << i << ";\n";
    497 
    498       for (const auto& [qualifiers, name] : additional_outputs)
    499       {
    500         const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
    501         ss << "  " << qualifier_to_use << " " << name << ";\n";
    502       }
    503       ss << "};\n";
    504     }
    505     else
    506     {
    507       const char* qualifier = GetInterpolationQualifier(false, msaa, ssaa, true);
    508 
    509       for (u32 i = 0; i < num_color_outputs; i++)
    510         ss << qualifier << (noperspective_color ? "noperspective " : "") << "out float4 v_col" << i << ";\n";
    511 
    512       for (u32 i = 0; i < num_texcoord_outputs; i++)
    513         ss << qualifier << "out float2 v_tex" << i << ";\n";
    514 
    515       for (const auto& [qualifiers, name] : additional_outputs)
    516       {
    517         const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
    518         ss << qualifier_to_use << " out " << name << ";\n";
    519       }
    520     }
    521 
    522     ss << "#define v_pos gl_Position\n\n";
    523     if (declare_vertex_id)
    524     {
    525       if (m_spirv)
    526         ss << "#define v_id uint(gl_VertexIndex)\n";
    527       else
    528         ss << "#define v_id uint(gl_VertexID)\n";
    529     }
    530 
    531     ss << "\n";
    532     ss << "void main()\n";
    533   }
    534   else
    535   {
    536     const char* qualifier = GetInterpolationQualifier(false, msaa, ssaa, true);
    537 
    538     ss << "void main(\n";
    539 
    540     if (declare_vertex_id)
    541       ss << "  in uint v_id : SV_VertexID,\n";
    542 
    543     u32 attribute_counter = 0;
    544     for (const char* attribute : attributes)
    545     {
    546       ss << "  in " << attribute << " : ATTR" << attribute_counter << ",\n";
    547       attribute_counter++;
    548     }
    549 
    550     for (u32 i = 0; i < num_color_outputs; i++)
    551       ss << "  " << qualifier << (noperspective_color ? "noperspective " : "") << "out float4 v_col" << i << " : COLOR"
    552          << i << ",\n";
    553 
    554     for (u32 i = 0; i < num_texcoord_outputs; i++)
    555       ss << "  " << qualifier << "out float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
    556 
    557     u32 additional_counter = num_texcoord_outputs;
    558     for (const auto& [qualifiers, name] : additional_outputs)
    559     {
    560       const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
    561       ss << "  " << qualifier_to_use << " out " << name << " : TEXCOORD" << additional_counter << ",\n";
    562       additional_counter++;
    563     }
    564 
    565     ss << "  out float4 v_pos : SV_Position)\n";
    566   }
    567 }
    568 
    569 void ShaderGen::DeclareFragmentEntryPoint(
    570   std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
    571   const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs /* =  */,
    572   bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool dual_source_output /* = false */,
    573   bool depth_output /* = false */, bool msaa /* = false */, bool ssaa /* = false */,
    574   bool declare_sample_id /* = false */, bool noperspective_color /* = false */, bool feedback_loop /* = false */,
    575   bool rov /* = false */)
    576 {
    577   if (m_glsl)
    578   {
    579     if (num_color_inputs > 0 || num_texcoord_inputs > 0 || additional_inputs.size() > 0)
    580     {
    581       if (m_use_glsl_interface_blocks)
    582       {
    583         const char* qualifier = GetInterpolationQualifier(true, msaa, ssaa, false);
    584 
    585         if (m_spirv)
    586           ss << "layout(location = 0) ";
    587 
    588         ss << "in VertexData {\n";
    589         for (u32 i = 0; i < num_color_inputs; i++)
    590           ss << "  " << qualifier << (noperspective_color ? "noperspective " : "") << "float4 v_col" << i << ";\n";
    591 
    592         for (u32 i = 0; i < num_texcoord_inputs; i++)
    593           ss << "  " << qualifier << "float2 v_tex" << i << ";\n";
    594 
    595         for (const auto& [qualifiers, name] : additional_inputs)
    596         {
    597           const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
    598           ss << "  " << qualifier_to_use << " " << name << ";\n";
    599         }
    600         ss << "};\n";
    601       }
    602       else
    603       {
    604         const char* qualifier = GetInterpolationQualifier(false, msaa, ssaa, false);
    605 
    606         for (u32 i = 0; i < num_color_inputs; i++)
    607           ss << qualifier << (noperspective_color ? "noperspective " : "") << "in float4 v_col" << i << ";\n";
    608 
    609         for (u32 i = 0; i < num_texcoord_inputs; i++)
    610           ss << qualifier << "in float2 v_tex" << i << ";\n";
    611 
    612         for (const auto& [qualifiers, name] : additional_inputs)
    613         {
    614           const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
    615           ss << qualifier_to_use << " in " << name << ";\n";
    616         }
    617       }
    618     }
    619 
    620     if (declare_fragcoord)
    621       ss << "#define v_pos gl_FragCoord\n";
    622 
    623     if (declare_sample_id)
    624       ss << "#define f_sample_index uint(gl_SampleID)\n";
    625 
    626     if (depth_output)
    627       ss << "#define o_depth gl_FragDepth\n";
    628 
    629     const char* target_0_qualifier = "out";
    630 
    631     if (feedback_loop)
    632     {
    633       Assert(!rov);
    634 
    635 #ifdef ENABLE_OPENGL
    636       if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES)
    637       {
    638         Assert(m_supports_framebuffer_fetch);
    639         if (GLAD_GL_EXT_shader_framebuffer_fetch)
    640         {
    641           target_0_qualifier = "inout";
    642           ss << "#define LAST_FRAG_COLOR o_col0\n";
    643         }
    644         else if (GLAD_GL_ARM_shader_framebuffer_fetch)
    645         {
    646           ss << "#define LAST_FRAG_COLOR gl_LastFragColorARM\n";
    647         }
    648       }
    649 #endif
    650 #ifdef ENABLE_VULKAN
    651       if (m_render_api == RenderAPI::Vulkan)
    652       {
    653         ss << "layout(input_attachment_index = 0, set = 2, binding = 0) uniform "
    654            << (msaa ? "subpassInputMS" : "subpassInput") << " u_input_rt; \n";
    655         ss << "#define LAST_FRAG_COLOR " << (msaa ? "subpassLoad(u_input_rt, gl_SampleID)" : "subpassLoad(u_input_rt)")
    656            << "\n";
    657       }
    658 #endif
    659 #ifdef __APPLE__
    660       if (m_render_api == RenderAPI::Metal)
    661       {
    662         if (m_supports_framebuffer_fetch)
    663         {
    664           // Set doesn't matter, because it's transformed to color0.
    665           ss << "layout(input_attachment_index = 0, set = 2, binding = 0) uniform "
    666              << (msaa ? "subpassInputMS" : "subpassInput") << " u_input_rt; \n";
    667           ss << "#define LAST_FRAG_COLOR "
    668              << (msaa ? "subpassLoad(u_input_rt, gl_SampleID)" : "subpassLoad(u_input_rt)") << "\n";
    669         }
    670         else
    671         {
    672           ss << "layout(set = 2, binding = 0) uniform " << (msaa ? "texture2DMS" : "texture2D") << " u_input_rt;\n";
    673           ss << "#define LAST_FRAG_COLOR texelFetch(u_input_rt, int2(gl_FragCoord.xy), " << (msaa ? "gl_SampleID" : "0")
    674              << ")\n";
    675         }
    676       }
    677 #endif
    678     }
    679     else if (rov)
    680     {
    681       ss << "layout(pixel_interlock_ordered) in;\n";
    682       ss << "#define ROV_LOAD(name, coords) imageLoad(name, ivec2(coords))\n";
    683       ss << "#define ROV_STORE(name, coords, value) imageStore(name, ivec2(coords), value)\n";
    684       ss << "#define BEGIN_ROV_REGION beginInvocationInterlockARB()\n";
    685       ss << "#define END_ROV_REGION endInvocationInterlockARB()\n";
    686     }
    687 
    688     if (m_use_glsl_binding_layout)
    689     {
    690       if (dual_source_output && m_supports_dual_source_blend && num_color_outputs > 1)
    691       {
    692         for (u32 i = 0; i < num_color_outputs; i++)
    693         {
    694           ss << "layout(location = 0, index = " << i << ") " << ((i == 0) ? target_0_qualifier : "out")
    695              << " float4 o_col" << i << ";\n";
    696         }
    697       }
    698       else
    699       {
    700         for (u32 i = 0; i < num_color_outputs; i++)
    701         {
    702           ss << "layout(location = " << i << ") " << ((i == 0) ? target_0_qualifier : "out") << " float4 o_col" << i
    703              << ";\n";
    704         }
    705       }
    706     }
    707     else
    708     {
    709       for (u32 i = 0; i < num_color_outputs; i++)
    710         ss << ((i == 0) ? target_0_qualifier : "out") << " float4 o_col" << i << ";\n";
    711     }
    712 
    713     ss << "\n";
    714 
    715     ss << "void main()\n";
    716   }
    717   else
    718   {
    719     if (rov)
    720     {
    721       ss << "#define ROV_LOAD(name, coords) name[uint2(coords)]\n";
    722       ss << "#define ROV_STORE(name, coords, value) name[uint2(coords)] = value\n";
    723       ss << "#define BEGIN_ROV_REGION\n";
    724       ss << "#define END_ROV_REGION\n";
    725     }
    726 
    727     const char* qualifier = GetInterpolationQualifier(false, msaa, ssaa, false);
    728 
    729     ss << "void main(\n";
    730 
    731     bool first = true;
    732     for (u32 i = 0; i < num_color_inputs; i++)
    733     {
    734       ss << (first ? "" : ",\n") << "  " << qualifier << (noperspective_color ? "noperspective " : "")
    735          << "in float4 v_col" << i << " : COLOR" << i;
    736       first = false;
    737     }
    738 
    739     for (u32 i = 0; i < num_texcoord_inputs; i++)
    740     {
    741       ss << (first ? "" : ",\n") << "  " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i;
    742       first = false;
    743     }
    744 
    745     u32 additional_counter = num_texcoord_inputs;
    746     for (const auto& [qualifiers, name] : additional_inputs)
    747     {
    748       const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
    749       ss << (first ? "" : ",\n") << "  " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter;
    750       additional_counter++;
    751       first = false;
    752     }
    753 
    754     if (declare_fragcoord)
    755     {
    756       ss << (first ? "" : ",\n") << "  in float4 v_pos : SV_Position";
    757       first = false;
    758     }
    759     if (declare_sample_id)
    760     {
    761       ss << (first ? "" : ",\n") << "  in uint f_sample_index : SV_SampleIndex";
    762       first = false;
    763     }
    764 
    765     if (depth_output)
    766     {
    767       ss << (first ? "" : ",\n") << "  out float o_depth : SV_Depth";
    768       first = false;
    769     }
    770     for (u32 i = 0; i < num_color_outputs; i++)
    771     {
    772       ss << (first ? "" : ",\n") << "  out float4 o_col" << i << " : SV_Target" << i;
    773       first = false;
    774     }
    775 
    776     ss << ")";
    777   }
    778 }
    779 
    780 std::string ShaderGen::GenerateScreenQuadVertexShader(float z /* = 0.0f */)
    781 {
    782   std::stringstream ss;
    783   WriteHeader(ss);
    784   DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true);
    785   ss << "{\n";
    786   ss << "  v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u));\n";
    787   ss << "  v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), " << std::fixed << z << "f, 1.0f);\n";
    788   ss << "  #if API_OPENGL || API_OPENGL_ES || API_VULKAN\n";
    789   ss << "    v_pos.y = -v_pos.y;\n";
    790   ss << "  #endif\n";
    791   ss << "}\n";
    792 
    793   return ss.str();
    794 }
    795 
    796 std::string ShaderGen::GenerateUVQuadVertexShader()
    797 {
    798   std::stringstream ss;
    799   WriteHeader(ss);
    800   DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max"}, true);
    801   DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true);
    802   ss << R"(
    803 {
    804   v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u));
    805   v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);
    806   v_tex0 = u_uv_min + (u_uv_max - u_uv_min) * v_tex0;
    807   #if API_OPENGL || API_OPENGL_ES || API_VULKAN
    808     v_pos.y = -v_pos.y;
    809   #endif
    810 }
    811 )";
    812 
    813   return ss.str();
    814 }
    815 
    816 std::string ShaderGen::GenerateFillFragmentShader()
    817 {
    818   std::stringstream ss;
    819   WriteHeader(ss);
    820   DeclareUniformBuffer(ss, {"float4 u_fill_color"}, true);
    821   DeclareFragmentEntryPoint(ss, 0, 1);
    822 
    823   ss << R"(
    824 {
    825   o_col0 = u_fill_color;
    826 }
    827 )";
    828 
    829   return ss.str();
    830 }
    831 
    832 std::string ShaderGen::GenerateCopyFragmentShader()
    833 {
    834   std::stringstream ss;
    835   WriteHeader(ss);
    836   DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true);
    837   DeclareTexture(ss, "samp0", 0);
    838   DeclareFragmentEntryPoint(ss, 0, 1);
    839 
    840   ss << R"(
    841 {
    842   float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw;
    843   o_col0 = SAMPLE_TEXTURE(samp0, coords);
    844 }
    845 )";
    846 
    847   return ss.str();
    848 }
    849 
    850 std::string ShaderGen::GenerateImGuiVertexShader()
    851 {
    852   std::stringstream ss;
    853   WriteHeader(ss);
    854   DeclareUniformBuffer(ss, {"float4x4 ProjectionMatrix"}, true);
    855   DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0", "float4 a_col0"}, 1, 1, {}, false);
    856   ss << R"(
    857 {
    858   v_pos = mul(ProjectionMatrix, float4(a_pos, 0.f, 1.f));
    859   v_col0 = a_col0;
    860   v_tex0 = a_tex0;
    861   #if API_VULKAN
    862     v_pos.y = -v_pos.y;
    863   #endif
    864 }
    865 )";
    866 
    867   return ss.str();
    868 }
    869 
    870 std::string ShaderGen::GenerateImGuiFragmentShader()
    871 {
    872   std::stringstream ss;
    873   WriteHeader(ss);
    874   DeclareTexture(ss, "samp0", 0);
    875   DeclareFragmentEntryPoint(ss, 1, 1);
    876 
    877   ss << R"(
    878 {
    879   o_col0 = v_col0 * SAMPLE_TEXTURE(samp0, v_tex0);
    880 }
    881 )";
    882 
    883   return ss.str();
    884 }