gpu_shadergen.cpp (7896B)
1 // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: GPL-3.0 3 4 #include "gpu_shadergen.h" 5 6 GPUShaderGen::GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch) 7 : ShaderGen(render_api, GetShaderLanguageForAPI(render_api), supports_dual_source_blend, supports_framebuffer_fetch) 8 { 9 } 10 11 GPUShaderGen::~GPUShaderGen() = default; 12 13 void GPUShaderGen::WriteDisplayUniformBuffer(std::stringstream& ss) 14 { 15 // Rotation matrix split into rows to avoid padding in HLSL. 16 DeclareUniformBuffer(ss, 17 {"float4 u_src_rect", "float4 u_src_size", "float4 u_clamp_rect", "float4 u_params", 18 "float2 u_rotation_matrix0", "float2 u_rotation_matrix1"}, 19 true); 20 21 ss << R"( 22 float2 ClampUV(float2 uv) { 23 return clamp(uv, u_clamp_rect.xy, u_clamp_rect.zw); 24 })"; 25 } 26 27 std::string GPUShaderGen::GenerateDisplayVertexShader() 28 { 29 std::stringstream ss; 30 WriteHeader(ss); 31 WriteDisplayUniformBuffer(ss); 32 DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true); 33 ss << R"( 34 { 35 float2 pos = float2(float((v_id << 1) & 2u), float(v_id & 2u)); 36 v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; 37 v_pos = float4(pos * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); 38 39 // Avoid HLSL/GLSL constructor differences by explicitly multiplying the matrix. 40 v_pos.xy = float2(dot(u_rotation_matrix0, v_pos.xy), dot(u_rotation_matrix1, v_pos.xy)); 41 42 #if API_VULKAN 43 v_pos.y = -v_pos.y; 44 #endif 45 } 46 )"; 47 48 return ss.str(); 49 } 50 51 std::string GPUShaderGen::GenerateDisplayFragmentShader(bool clamp_uv) 52 { 53 std::stringstream ss; 54 WriteHeader(ss); 55 WriteDisplayUniformBuffer(ss); 56 DeclareTexture(ss, "samp0", 0); 57 DeclareFragmentEntryPoint(ss, 0, 1); 58 if (clamp_uv) 59 ss << "{\n o_col0 = float4(SAMPLE_TEXTURE(samp0, ClampUV(v_tex0)).rgb, 1.0f);\n }"; 60 else 61 ss << "{\n o_col0 = float4(SAMPLE_TEXTURE(samp0, v_tex0).rgb, 1.0f);\n }"; 62 63 return ss.str(); 64 } 65 66 std::string GPUShaderGen::GenerateDisplaySharpBilinearFragmentShader() 67 { 68 std::stringstream ss; 69 WriteHeader(ss); 70 WriteDisplayUniformBuffer(ss); 71 DeclareTexture(ss, "samp0", 0, false); 72 73 // Based on 74 // https://github.com/rsn8887/Sharp-Bilinear-Shaders/blob/master/Copy_To_RetroPie/shaders/sharp-bilinear-simple.glsl 75 DeclareFragmentEntryPoint(ss, 0, 1); 76 ss << R"( 77 { 78 float2 scale = u_params.xy; 79 float2 region_range = u_params.zw; 80 81 float2 texel = v_tex0 * u_src_size.xy; 82 float2 texel_floored = floor(texel); 83 float2 s = frac(texel); 84 85 float2 center_dist = s - 0.5; 86 float2 f = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5; 87 float2 mod_texel = texel_floored + f; 88 89 o_col0 = float4(SAMPLE_TEXTURE(samp0, ClampUV(mod_texel * u_src_size.zw)).rgb, 1.0f); 90 })"; 91 92 return ss.str(); 93 } 94 95 std::string GPUShaderGen::GenerateInterleavedFieldExtractFragmentShader() 96 { 97 std::stringstream ss; 98 WriteHeader(ss); 99 DeclareUniformBuffer(ss, {"uint2 u_src_offset", "uint u_line_skip"}, true); 100 DeclareTexture(ss, "samp0", 0, false); 101 102 DeclareFragmentEntryPoint(ss, 0, 1, {}, true); 103 ss << R"( 104 { 105 uint2 tcoord = u_src_offset + uint2(uint(v_pos.x), uint(v_pos.y) << u_line_skip); 106 o_col0 = LOAD_TEXTURE(samp0, int2(tcoord), 0); 107 } 108 )"; 109 110 return ss.str(); 111 } 112 113 std::string GPUShaderGen::GenerateDeinterlaceWeaveFragmentShader() 114 { 115 std::stringstream ss; 116 WriteHeader(ss); 117 DeclareUniformBuffer(ss, {"uint2 u_src_offset", "uint u_render_field", "uint u_line_skip"}, true); 118 DeclareTexture(ss, "samp0", 0, false); 119 120 DeclareFragmentEntryPoint(ss, 0, 1, {}, true); 121 ss << R"( 122 { 123 uint2 fcoord = uint2(v_pos.xy); 124 if ((fcoord.y & 1) != u_render_field) 125 discard; 126 127 uint2 tcoord = u_src_offset + uint2(fcoord.x, (fcoord.y / 2u) << u_line_skip); 128 o_col0 = LOAD_TEXTURE(samp0, int2(tcoord), 0); 129 })"; 130 131 return ss.str(); 132 } 133 134 std::string GPUShaderGen::GenerateDeinterlaceBlendFragmentShader() 135 { 136 std::stringstream ss; 137 WriteHeader(ss); 138 DeclareTexture(ss, "samp0", 0, false); 139 DeclareTexture(ss, "samp1", 1, false); 140 141 DeclareFragmentEntryPoint(ss, 0, 1, {}, true); 142 ss << R"( 143 { 144 uint2 uv = uint2(v_pos.xy); 145 float4 c0 = LOAD_TEXTURE(samp0, int2(uv), 0); 146 float4 c1 = LOAD_TEXTURE(samp1, int2(uv), 0); 147 o_col0 = (c0 + c1) * 0.5f; 148 } 149 )"; 150 151 return ss.str(); 152 } 153 154 std::string GPUShaderGen::GenerateFastMADReconstructFragmentShader() 155 { 156 std::stringstream ss; 157 WriteHeader(ss); 158 DeclareUniformBuffer(ss, {"uint u_current_field", "uint u_height"}, true); 159 DeclareTexture(ss, "samp0", 0, false); 160 DeclareTexture(ss, "samp1", 1, false); 161 DeclareTexture(ss, "samp2", 2, false); 162 DeclareTexture(ss, "samp3", 3, false); 163 164 ss << R"( 165 CONSTANT float3 SENSITIVITY = float3(0.08f, 0.08f, 0.08f); 166 )"; 167 168 DeclareFragmentEntryPoint(ss, 0, 1, {}, true); 169 ss << R"( 170 { 171 int2 uv = int2(int(v_pos.x), int(v_pos.y) >> 1); 172 float3 cur = LOAD_TEXTURE(samp0, uv, 0).rgb; 173 174 float3 hn = LOAD_TEXTURE(samp0, uv + int2(0, -1), 0).rgb; 175 float3 cn = LOAD_TEXTURE(samp1, uv, 0).rgb; 176 float3 ln = LOAD_TEXTURE(samp0, uv + int2(0, 1), 0).rgb; 177 178 float3 ho = LOAD_TEXTURE(samp2, uv + int2(0, -1), 0).rgb; 179 float3 co = LOAD_TEXTURE(samp3, uv, 0).rgb; 180 float3 lo = LOAD_TEXTURE(samp2, uv + int2(0, 1), 0).rgb; 181 182 float3 mh = abs(hn.rgb - ho.rgb) - SENSITIVITY; 183 float3 mc = abs(cn.rgb - co.rgb) - SENSITIVITY; 184 float3 ml = abs(ln.rgb - lo.rgb) - SENSITIVITY; 185 float3 mmaxv = max(mh, max(mc, ml)); 186 float mmax = max(mmaxv.r, max(mmaxv.g, mmaxv.b)); 187 188 // Is pixel F [n][ x , y ] present in the Current Field f [n] ? 189 uint row = uint(v_pos.y); 190 if ((row & 1u) == u_current_field) 191 { 192 // Directly uses the pixel from the Current Field 193 o_col0.rgb = cur; 194 } 195 else if (row > 0u && row < u_height && mmax > 0.0f) 196 { 197 // Reconstructs the missing pixel as the average of the same pixel from the line above and the 198 // line below it in the Current Field. 199 o_col0.rgb = (hn + ln) / 2.0; 200 } 201 else 202 { 203 // Reconstructs the missing pixel as the same pixel from the Previous Field. 204 o_col0.rgb = cn; 205 } 206 o_col0.a = 1.0f; 207 } 208 )"; 209 210 return ss.str(); 211 } 212 213 std::string GPUShaderGen::GenerateChromaSmoothingFragmentShader() 214 { 215 std::stringstream ss; 216 WriteHeader(ss); 217 DeclareUniformBuffer(ss, {"uint2 u_sample_offset", "uint2 u_clamp_size"}, true); 218 DeclareTexture(ss, "samp0", 0); 219 220 ss << R"( 221 float3 RGBToYUV(float3 rgb) 222 { 223 return float3(dot(rgb.rgb, float3(0.299f, 0.587f, 0.114f)), 224 dot(rgb.rgb, float3(-0.14713f, -0.28886f, 0.436f)), 225 dot(rgb.rgb, float3(0.615f, -0.51499f, -0.10001f))); 226 } 227 228 float3 YUVToRGB(float3 yuv) 229 { 230 return float3(dot(yuv, float3(1.0f, 0.0f, 1.13983f)), 231 dot(yuv, float3(1.0f, -0.39465f, -0.58060f)), 232 dot(yuv, float3(1.0f, 2.03211f, 0.0f))); 233 } 234 235 float3 SampleVRAMAverage2x2(uint2 icoords) 236 { 237 float3 value = LOAD_TEXTURE(samp0, int2(icoords), 0).rgb; 238 value += LOAD_TEXTURE(samp0, int2(icoords + uint2(0, 1)), 0).rgb; 239 value += LOAD_TEXTURE(samp0, int2(icoords + uint2(1, 0)), 0).rgb; 240 value += LOAD_TEXTURE(samp0, int2(icoords + uint2(1, 1)), 0).rgb; 241 return value * 0.25; 242 } 243 )"; 244 245 DeclareFragmentEntryPoint(ss, 0, 1, {}, true); 246 ss << R"( 247 { 248 uint2 icoords = uint2(v_pos.xy) + u_sample_offset; 249 int2 base = int2(icoords) - 1; 250 uint2 low = uint2(max(base & ~1, int2(0, 0))); 251 uint2 high = min(low + 2u, u_clamp_size); 252 float2 coeff = vec2(base & 1) * 0.5 + 0.25; 253 254 float3 p = LOAD_TEXTURE(samp0, int2(icoords), 0).rgb; 255 float3 p00 = SampleVRAMAverage2x2(low); 256 float3 p01 = SampleVRAMAverage2x2(uint2(low.x, high.y)); 257 float3 p10 = SampleVRAMAverage2x2(uint2(high.x, low.y)); 258 float3 p11 = SampleVRAMAverage2x2(high); 259 260 float3 s = lerp(lerp(p00, p10, coeff.x), 261 lerp(p01, p11, coeff.x), 262 coeff.y); 263 264 float y = RGBToYUV(p).x; 265 float2 uv = RGBToYUV(s).yz; 266 o_col0 = float4(YUVToRGB(float3(y, uv)), 1.0); 267 } 268 )"; 269 270 return ss.str(); 271 }