duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

gpu_shadergen.cpp (7896B)


      1 // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: GPL-3.0
      3 
      4 #include "gpu_shadergen.h"
      5 
      6 GPUShaderGen::GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch)
      7   : ShaderGen(render_api, GetShaderLanguageForAPI(render_api), supports_dual_source_blend, supports_framebuffer_fetch)
      8 {
      9 }
     10 
     11 GPUShaderGen::~GPUShaderGen() = default;
     12 
     13 void GPUShaderGen::WriteDisplayUniformBuffer(std::stringstream& ss)
     14 {
     15   // Rotation matrix split into rows to avoid padding in HLSL.
     16   DeclareUniformBuffer(ss,
     17                        {"float4 u_src_rect", "float4 u_src_size", "float4 u_clamp_rect", "float4 u_params",
     18                         "float2 u_rotation_matrix0", "float2 u_rotation_matrix1"},
     19                        true);
     20 
     21   ss << R"(
     22 float2 ClampUV(float2 uv) {
     23   return clamp(uv, u_clamp_rect.xy, u_clamp_rect.zw);
     24 })";
     25 }
     26 
     27 std::string GPUShaderGen::GenerateDisplayVertexShader()
     28 {
     29   std::stringstream ss;
     30   WriteHeader(ss);
     31   WriteDisplayUniformBuffer(ss);
     32   DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true);
     33   ss << R"(
     34 {
     35   float2 pos = float2(float((v_id << 1) & 2u), float(v_id & 2u));
     36   v_tex0 = u_src_rect.xy + pos * u_src_rect.zw;
     37   v_pos = float4(pos * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);
     38 
     39   // Avoid HLSL/GLSL constructor differences by explicitly multiplying the matrix.
     40   v_pos.xy = float2(dot(u_rotation_matrix0, v_pos.xy), dot(u_rotation_matrix1, v_pos.xy));
     41 
     42   #if API_VULKAN
     43     v_pos.y = -v_pos.y;
     44   #endif
     45 }
     46 )";
     47 
     48   return ss.str();
     49 }
     50 
     51 std::string GPUShaderGen::GenerateDisplayFragmentShader(bool clamp_uv)
     52 {
     53   std::stringstream ss;
     54   WriteHeader(ss);
     55   WriteDisplayUniformBuffer(ss);
     56   DeclareTexture(ss, "samp0", 0);
     57   DeclareFragmentEntryPoint(ss, 0, 1);
     58   if (clamp_uv)
     59     ss << "{\n  o_col0 = float4(SAMPLE_TEXTURE(samp0, ClampUV(v_tex0)).rgb, 1.0f);\n }";
     60   else
     61     ss << "{\n  o_col0 = float4(SAMPLE_TEXTURE(samp0, v_tex0).rgb, 1.0f);\n }";
     62 
     63   return ss.str();
     64 }
     65 
     66 std::string GPUShaderGen::GenerateDisplaySharpBilinearFragmentShader()
     67 {
     68   std::stringstream ss;
     69   WriteHeader(ss);
     70   WriteDisplayUniformBuffer(ss);
     71   DeclareTexture(ss, "samp0", 0, false);
     72 
     73   // Based on
     74   // https://github.com/rsn8887/Sharp-Bilinear-Shaders/blob/master/Copy_To_RetroPie/shaders/sharp-bilinear-simple.glsl
     75   DeclareFragmentEntryPoint(ss, 0, 1);
     76   ss << R"(
     77 {
     78   float2 scale = u_params.xy;
     79   float2 region_range = u_params.zw;
     80 
     81   float2 texel = v_tex0 * u_src_size.xy;
     82   float2 texel_floored = floor(texel);
     83   float2 s = frac(texel);
     84 
     85   float2 center_dist = s - 0.5;
     86   float2 f = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
     87   float2 mod_texel = texel_floored + f;
     88 
     89   o_col0 = float4(SAMPLE_TEXTURE(samp0, ClampUV(mod_texel * u_src_size.zw)).rgb, 1.0f);
     90 })";
     91 
     92   return ss.str();
     93 }
     94 
     95 std::string GPUShaderGen::GenerateInterleavedFieldExtractFragmentShader()
     96 {
     97   std::stringstream ss;
     98   WriteHeader(ss);
     99   DeclareUniformBuffer(ss, {"uint2 u_src_offset", "uint u_line_skip"}, true);
    100   DeclareTexture(ss, "samp0", 0, false);
    101 
    102   DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
    103   ss << R"(
    104 {
    105   uint2 tcoord = u_src_offset + uint2(uint(v_pos.x), uint(v_pos.y) << u_line_skip);
    106   o_col0 = LOAD_TEXTURE(samp0, int2(tcoord), 0);
    107 }
    108 )";
    109 
    110   return ss.str();
    111 }
    112 
    113 std::string GPUShaderGen::GenerateDeinterlaceWeaveFragmentShader()
    114 {
    115   std::stringstream ss;
    116   WriteHeader(ss);
    117   DeclareUniformBuffer(ss, {"uint2 u_src_offset", "uint u_render_field", "uint u_line_skip"}, true);
    118   DeclareTexture(ss, "samp0", 0, false);
    119 
    120   DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
    121   ss << R"(
    122 {
    123   uint2 fcoord = uint2(v_pos.xy);
    124   if ((fcoord.y & 1) != u_render_field)
    125     discard;
    126 
    127   uint2 tcoord = u_src_offset + uint2(fcoord.x, (fcoord.y / 2u) << u_line_skip);
    128   o_col0 = LOAD_TEXTURE(samp0, int2(tcoord), 0);
    129 })";
    130 
    131   return ss.str();
    132 }
    133 
    134 std::string GPUShaderGen::GenerateDeinterlaceBlendFragmentShader()
    135 {
    136   std::stringstream ss;
    137   WriteHeader(ss);
    138   DeclareTexture(ss, "samp0", 0, false);
    139   DeclareTexture(ss, "samp1", 1, false);
    140 
    141   DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
    142   ss << R"(
    143 {
    144   uint2 uv = uint2(v_pos.xy);
    145   float4 c0 = LOAD_TEXTURE(samp0, int2(uv), 0);
    146   float4 c1 = LOAD_TEXTURE(samp1, int2(uv), 0);
    147   o_col0 = (c0 + c1) * 0.5f;
    148 }
    149 )";
    150 
    151   return ss.str();
    152 }
    153 
    154 std::string GPUShaderGen::GenerateFastMADReconstructFragmentShader()
    155 {
    156   std::stringstream ss;
    157   WriteHeader(ss);
    158   DeclareUniformBuffer(ss, {"uint u_current_field", "uint u_height"}, true);
    159   DeclareTexture(ss, "samp0", 0, false);
    160   DeclareTexture(ss, "samp1", 1, false);
    161   DeclareTexture(ss, "samp2", 2, false);
    162   DeclareTexture(ss, "samp3", 3, false);
    163 
    164   ss << R"(
    165 CONSTANT float3 SENSITIVITY = float3(0.08f, 0.08f, 0.08f);
    166 )";
    167 
    168   DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
    169   ss << R"(
    170 {
    171   int2 uv = int2(int(v_pos.x), int(v_pos.y) >> 1);
    172   float3 cur = LOAD_TEXTURE(samp0, uv, 0).rgb;
    173 
    174   float3 hn = LOAD_TEXTURE(samp0, uv + int2(0, -1), 0).rgb;
    175   float3 cn = LOAD_TEXTURE(samp1, uv, 0).rgb;
    176   float3 ln = LOAD_TEXTURE(samp0, uv + int2(0, 1), 0).rgb;
    177 
    178   float3 ho = LOAD_TEXTURE(samp2, uv + int2(0, -1), 0).rgb;
    179   float3 co = LOAD_TEXTURE(samp3, uv, 0).rgb;
    180   float3 lo = LOAD_TEXTURE(samp2, uv + int2(0, 1), 0).rgb;
    181 
    182   float3 mh = abs(hn.rgb - ho.rgb) - SENSITIVITY;
    183   float3 mc = abs(cn.rgb - co.rgb) - SENSITIVITY;
    184   float3 ml = abs(ln.rgb - lo.rgb) - SENSITIVITY;
    185   float3 mmaxv = max(mh, max(mc, ml));
    186   float mmax = max(mmaxv.r, max(mmaxv.g, mmaxv.b));
    187 
    188   // Is pixel F [n][ x , y ] present in the Current Field f [n] ?
    189   uint row = uint(v_pos.y);
    190   if ((row & 1u) == u_current_field)
    191   {
    192     // Directly uses the pixel from the Current Field
    193     o_col0.rgb = cur;
    194   }
    195   else if (row > 0u && row < u_height && mmax > 0.0f)
    196   {
    197     // Reconstructs the missing pixel as the average of the same pixel from the line above and the
    198     // line below it in the Current Field.
    199     o_col0.rgb = (hn + ln) / 2.0;
    200   }
    201   else
    202   {
    203     // Reconstructs the missing pixel as the same pixel from the Previous Field.
    204     o_col0.rgb = cn;
    205   }
    206   o_col0.a = 1.0f;
    207 }
    208 )";
    209 
    210   return ss.str();
    211 }
    212 
    213 std::string GPUShaderGen::GenerateChromaSmoothingFragmentShader()
    214 {
    215   std::stringstream ss;
    216   WriteHeader(ss);
    217   DeclareUniformBuffer(ss, {"uint2 u_sample_offset", "uint2 u_clamp_size"}, true);
    218   DeclareTexture(ss, "samp0", 0);
    219 
    220   ss << R"(
    221 float3 RGBToYUV(float3 rgb)
    222 {
    223   return float3(dot(rgb.rgb, float3(0.299f, 0.587f, 0.114f)),
    224                 dot(rgb.rgb, float3(-0.14713f, -0.28886f, 0.436f)),
    225                 dot(rgb.rgb, float3(0.615f, -0.51499f, -0.10001f)));
    226 }
    227 
    228 float3 YUVToRGB(float3 yuv)
    229 {
    230   return float3(dot(yuv, float3(1.0f, 0.0f, 1.13983f)),
    231                 dot(yuv, float3(1.0f, -0.39465f, -0.58060f)),
    232                 dot(yuv, float3(1.0f, 2.03211f, 0.0f)));
    233 }
    234 
    235 float3 SampleVRAMAverage2x2(uint2 icoords)
    236 {
    237   float3 value = LOAD_TEXTURE(samp0, int2(icoords), 0).rgb;
    238   value += LOAD_TEXTURE(samp0, int2(icoords + uint2(0, 1)), 0).rgb;
    239   value += LOAD_TEXTURE(samp0, int2(icoords + uint2(1, 0)), 0).rgb;
    240   value += LOAD_TEXTURE(samp0, int2(icoords + uint2(1, 1)), 0).rgb;
    241   return value * 0.25;
    242 }
    243 )";
    244 
    245   DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
    246   ss << R"(
    247 {
    248   uint2 icoords = uint2(v_pos.xy) + u_sample_offset;
    249   int2 base = int2(icoords) - 1;
    250   uint2 low = uint2(max(base & ~1, int2(0, 0)));
    251   uint2 high = min(low + 2u, u_clamp_size);
    252   float2 coeff = vec2(base & 1) * 0.5 + 0.25;
    253 
    254   float3 p = LOAD_TEXTURE(samp0, int2(icoords), 0).rgb;
    255   float3 p00 = SampleVRAMAverage2x2(low);
    256   float3 p01 = SampleVRAMAverage2x2(uint2(low.x, high.y));
    257   float3 p10 = SampleVRAMAverage2x2(uint2(high.x, low.y));
    258   float3 p11 = SampleVRAMAverage2x2(high);
    259 
    260   float3 s = lerp(lerp(p00, p10, coeff.x),
    261                   lerp(p01, p11, coeff.x),
    262                   coeff.y);
    263 
    264   float y = RGBToYUV(p).x;
    265   float2 uv = RGBToYUV(s).yz;
    266   o_col0 = float4(YUVToRGB(float3(y, uv)), 1.0);
    267 }
    268 )";
    269 
    270   return ss.str();
    271 }