effect_codegen_hlsl.cpp (56872B)
1 /* 2 * Copyright (C) 2014 Patrick Mours 3 * SPDX-License-Identifier: BSD-3-Clause 4 */ 5 6 #include "effect_parser.hpp" 7 #include "effect_codegen.hpp" 8 #include <cmath> // std::signbit, std::isinf, std::isnan 9 #include <cctype> // std::tolower 10 #include <cstdio> // std::snprintf 11 #include <cassert> 12 #include <cstring> // stricmp 13 #include <algorithm> // std::find_if, std::max 14 #include <locale> 15 #include <sstream> 16 17 using namespace reshadefx; 18 19 class codegen_hlsl final : public codegen 20 { 21 public: 22 codegen_hlsl(unsigned int shader_model, bool debug_info, bool uniforms_to_spec_constants) 23 : _shader_model(shader_model), _debug_info(debug_info), _uniforms_to_spec_constants(uniforms_to_spec_constants) 24 { 25 // Create default block and reserve a memory block to avoid frequent reallocations 26 std::string &block = _blocks.emplace(0, std::string()).first->second; 27 block.reserve(8192); 28 } 29 30 private: 31 enum class naming 32 { 33 // Name should already be unique, so no additional steps are taken 34 unique, 35 // Will be numbered when clashing with another name 36 general, 37 // Replace name with a code snippet 38 expression, 39 }; 40 41 std::string _cbuffer_block; 42 std::string _current_location; 43 std::unordered_map<id, std::string> _names; 44 std::unordered_map<id, std::string> _blocks; 45 unsigned int _shader_model = 0; 46 bool _debug_info = false; 47 bool _uniforms_to_spec_constants = false; 48 std::unordered_map<std::string, std::string> _remapped_semantics; 49 50 // Only write compatibility intrinsics to result if they are actually in use 51 bool _uses_bitwise_cast = false; 52 53 void write_result(module &module) override 54 { 55 module = std::move(_module); 56 57 std::string preamble; 58 59 if (_shader_model >= 40) 60 { 61 preamble += 62 "struct __sampler1D_int { Texture1D<int> t; SamplerState s; };\n" 63 "struct __sampler2D_int { Texture2D<int> t; SamplerState s; };\n" 64 "struct __sampler3D_int { Texture3D<int> t; SamplerState s; };\n" 65 "struct __sampler1D_uint { Texture1D<uint> t; SamplerState s; };\n" 66 "struct __sampler2D_uint { Texture2D<uint> t; SamplerState s; };\n" 67 "struct __sampler3D_uint { Texture3D<uint> t; SamplerState s; };\n" 68 "struct __sampler1D_float { Texture1D<float> t; SamplerState s; };\n" 69 "struct __sampler2D_float { Texture2D<float> t; SamplerState s; };\n" 70 "struct __sampler3D_float { Texture3D<float> t; SamplerState s; };\n" 71 "struct __sampler1D_float4 { Texture1D<float4> t; SamplerState s; };\n" 72 "struct __sampler2D_float4 { Texture2D<float4> t; SamplerState s; };\n" 73 "struct __sampler3D_float4 { Texture3D<float4> t; SamplerState s; };\n"; 74 75 if (!_cbuffer_block.empty()) 76 { 77 if (_shader_model >= 60) 78 preamble += "[[vk::binding(0, 0)]] "; // Descriptor set 0 79 80 preamble += "cbuffer _Globals {\n" + _cbuffer_block + "};\n"; 81 } 82 } 83 else 84 { 85 preamble += 86 "struct __sampler1D { sampler1D s; float1 pixelsize; };\n" 87 "struct __sampler2D { sampler2D s; float2 pixelsize; };\n" 88 "struct __sampler3D { sampler3D s; float3 pixelsize; };\n" 89 "uniform float2 __TEXEL_SIZE__ : register(c255);\n"; 90 91 if (_uses_bitwise_cast) 92 preamble += 93 "int __asint(float v) {" 94 " if (v == 0) return 0;" // Zero (does not handle negative zero) 95 // if (isinf(v)) return v < 0 ? 4286578688 : 2139095040; // Infinity 96 // if (isnan(v)) return 2147483647; // NaN (does not handle negative NaN) 97 " float e = 0;" 98 " float f = frexp(v, e) * 2 - 1;" // frexp does not include sign bit in HLSL, so can use as is 99 " float m = ldexp(f, 23);" 100 " return (v < 0 ? 2147483648 : 0) + ldexp(e + 126, 23) + m;" 101 "}\n" 102 "int2 __asint(float2 v) { return int2(__asint(v.x), __asint(v.y)); }\n" 103 "int3 __asint(float3 v) { return int3(__asint(v.x), __asint(v.y), __asint(v.z)); }\n" 104 "int4 __asint(float4 v) { return int4(__asint(v.x), __asint(v.y), __asint(v.z), __asint(v.w)); }\n" 105 106 "int __asuint(float v) { return __asint(v); }\n" 107 "int2 __asuint(float2 v) { return int2(__asint(v.x), __asint(v.y)); }\n" 108 "int3 __asuint(float3 v) { return int3(__asint(v.x), __asint(v.y), __asint(v.z)); }\n" 109 "int4 __asuint(float4 v) { return int4(__asint(v.x), __asint(v.y), __asint(v.z), __asint(v.w)); }\n" 110 111 "float __asfloat(int v) {" 112 " float m = v % exp2(23);" 113 " float f = ldexp(m, -23);" 114 " float e = floor(ldexp(v, -23) % 256);" 115 " return (v > 2147483647 ? -1 : 1) * (" 116 // e == 0 ? ldexp(f, -126) : // Denormalized 117 // e == 255 ? (m == 0 ? 1.#INF : -1.#IND) : // Infinity and NaN 118 " ldexp(1 + f, e - 127));" 119 "}\n" 120 "float2 __asfloat(int2 v) { return float2(__asfloat(v.x), __asfloat(v.y)); }\n" 121 "float3 __asfloat(int3 v) { return float3(__asfloat(v.x), __asfloat(v.y), __asfloat(v.z)); }\n" 122 "float4 __asfloat(int4 v) { return float4(__asfloat(v.x), __asfloat(v.y), __asfloat(v.z), __asfloat(v.w)); }\n"; 123 124 if (!_cbuffer_block.empty()) 125 preamble += _cbuffer_block; 126 127 // Offsets were multiplied in 'define_uniform', so adjust total size here accordingly 128 module.total_uniform_size *= 4; 129 } 130 131 module.code.assign(preamble.begin(), preamble.end()); 132 133 const std::string &main_block = _blocks.at(0); 134 module.code.insert(module.code.end(), main_block.begin(), main_block.end()); 135 } 136 137 template <bool is_param = false, bool is_decl = true> 138 void write_type(std::string &s, const type &type) const 139 { 140 if constexpr (is_decl) 141 { 142 if (type.has(type::q_static)) 143 s += "static "; 144 if (type.has(type::q_precise)) 145 s += "precise "; 146 if (type.has(type::q_groupshared)) 147 s += "groupshared "; 148 } 149 150 if constexpr (is_param) 151 { 152 if (type.has(type::q_linear)) 153 s += "linear "; 154 if (type.has(type::q_noperspective)) 155 s += "noperspective "; 156 if (type.has(type::q_centroid)) 157 s += "centroid "; 158 if (type.has(type::q_nointerpolation)) 159 s += "nointerpolation "; 160 161 if (type.has(type::q_inout)) 162 s += "inout "; 163 else if (type.has(type::q_in)) 164 s += "in "; 165 else if (type.has(type::q_out)) 166 s += "out "; 167 } 168 169 switch (type.base) 170 { 171 case type::t_void: 172 s += "void"; 173 return; 174 case type::t_bool: 175 s += "bool"; 176 break; 177 case type::t_min16int: 178 // Minimum precision types are only supported in shader model 4 and up 179 // Real 16-bit types were added in shader model 6.2 180 s += _shader_model >= 62 ? "int16_t" : _shader_model >= 40 ? "min16int" : "int"; 181 break; 182 case type::t_int: 183 s += "int"; 184 break; 185 case type::t_min16uint: 186 s += _shader_model >= 62 ? "uint16_t" : _shader_model >= 40 ? "min16uint" : "int"; 187 break; 188 case type::t_uint: 189 // In shader model 3, uints can only be used with known-positive values, so use ints instead 190 s += _shader_model >= 40 ? "uint" : "int"; 191 break; 192 case type::t_min16float: 193 s += _shader_model >= 62 ? "float16_t" : _shader_model >= 40 ? "min16float" : "float"; 194 break; 195 case type::t_float: 196 s += "float"; 197 break; 198 case type::t_struct: 199 s += id_to_name(type.definition); 200 return; 201 case type::t_sampler1d_int: 202 s += "__sampler1D"; 203 if (_shader_model >= 40) 204 s += "_int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 205 return; 206 case type::t_sampler2d_int: 207 s += "__sampler2D"; 208 if (_shader_model >= 40) 209 s += "_int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 210 return; 211 case type::t_sampler3d_int: 212 s += "__sampler3D"; 213 if (_shader_model >= 40) 214 s += "_int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 215 return; 216 case type::t_sampler1d_uint: 217 s += "__sampler1D"; 218 if (_shader_model >= 40) 219 s += "_uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 220 return; 221 case type::t_sampler2d_uint: 222 s += "__sampler2D"; 223 if (_shader_model >= 40) 224 s += "_uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 225 return; 226 case type::t_sampler3d_uint: 227 s += "__sampler3D"; 228 if (_shader_model >= 40) 229 s += "_uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 230 return; 231 case type::t_sampler1d_float: 232 s += "__sampler1D"; 233 if (_shader_model >= 40) 234 s += "_float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 235 return; 236 case type::t_sampler2d_float: 237 s += "__sampler2D"; 238 if (_shader_model >= 40) 239 s += "_float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 240 return; 241 case type::t_sampler3d_float: 242 s += "__sampler3D"; 243 if (_shader_model >= 40) 244 s += "_float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()); 245 return; 246 case type::t_storage1d_int: 247 s += "RWTexture1D<int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 248 return; 249 case type::t_storage2d_int: 250 s += "RWTexture2D<int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 251 return; 252 case type::t_storage3d_int: 253 s += "RWTexture3D<int" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 254 return; 255 case type::t_storage1d_uint: 256 s += "RWTexture1D<uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 257 return; 258 case type::t_storage2d_uint: 259 s += "RWTexture2D<uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 260 return; 261 case type::t_storage3d_uint: 262 s += "RWTexture3D<uint" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 263 return; 264 case type::t_storage1d_float: 265 s += "RWTexture1D<float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 266 return; 267 case type::t_storage2d_float: 268 s += "RWTexture2D<float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 269 return; 270 case type::t_storage3d_float: 271 s += "RWTexture3D<float" + (type.rows > 1 ? std::to_string(type.rows) : std::string()) + '>'; 272 return; 273 default: 274 assert(false); 275 return; 276 } 277 278 if (type.rows > 1) 279 s += std::to_string(type.rows); 280 if (type.cols > 1) 281 s += 'x' + std::to_string(type.cols); 282 } 283 void write_constant(std::string &s, const type &type, const constant &data) const 284 { 285 if (type.is_array()) 286 { 287 auto elem_type = type; 288 elem_type.array_length = 0; 289 290 s += "{ "; 291 292 for (int i = 0; i < type.array_length; ++i) 293 { 294 write_constant(s, elem_type, i < static_cast<int>(data.array_data.size()) ? data.array_data[i] : constant()); 295 296 if (i < type.array_length - 1) 297 s += ", "; 298 } 299 300 s += " }"; 301 return; 302 } 303 304 if (type.is_struct()) 305 { 306 // The can only be zero initializer struct constants 307 assert(data.as_uint[0] == 0); 308 309 s += '(' + id_to_name(type.definition) + ")0"; 310 return; 311 } 312 313 // There can only be numeric constants 314 assert(type.is_numeric()); 315 316 if (!type.is_scalar()) 317 write_type<false, false>(s, type), s += '('; 318 319 for (unsigned int i = 0, components = type.components(); i < components; ++i) 320 { 321 switch (type.base) 322 { 323 case type::t_bool: 324 s += data.as_uint[i] ? "true" : "false"; 325 break; 326 case type::t_min16int: 327 case type::t_int: 328 s += std::to_string(data.as_int[i]); 329 break; 330 case type::t_min16uint: 331 case type::t_uint: 332 s += std::to_string(data.as_uint[i]); 333 break; 334 case type::t_min16float: 335 case type::t_float: 336 if (std::isnan(data.as_float[i])) { 337 s += "-1.#IND"; 338 break; 339 } 340 if (std::isinf(data.as_float[i])) { 341 s += std::signbit(data.as_float[i]) ? "1.#INF" : "-1.#INF"; 342 break; 343 } 344 { 345 std::ostringstream ss; 346 ss.imbue(std::locale::classic()); 347 ss << data.as_float[i]; 348 s += ss.str(); 349 } 350 break; 351 default: 352 assert(false); 353 } 354 355 if (i < components - 1) 356 s += ", "; 357 } 358 359 if (!type.is_scalar()) 360 s += ')'; 361 } 362 template <bool force_source = false> 363 void write_location(std::string &s, const location &loc) 364 { 365 if (loc.source.empty() || !_debug_info) 366 return; 367 368 s += "#line " + std::to_string(loc.line); 369 370 size_t offset = s.size(); 371 372 // Avoid writing the file name every time to reduce output text size 373 if constexpr (force_source) 374 { 375 s += " \"" + loc.source + '\"'; 376 } 377 else if (loc.source != _current_location) 378 { 379 s += " \"" + loc.source + '\"'; 380 381 _current_location = loc.source; 382 } 383 384 // Need to escape string for new DirectX Shader Compiler (dxc) 385 if (_shader_model >= 60) 386 { 387 for (; (offset = s.find('\\', offset)) != std::string::npos; offset += 2) 388 s.insert(offset, "\\", 1); 389 } 390 391 s += '\n'; 392 } 393 void write_texture_format(std::string &s, texture_format format) 394 { 395 switch (format) 396 { 397 case texture_format::r32i: 398 s += "int"; 399 break; 400 case texture_format::r32u: 401 s += "uint"; 402 break; 403 case texture_format::r8: 404 case texture_format::r16: 405 case texture_format::r16f: 406 case texture_format::r32f: 407 s += "float"; 408 break; 409 default: 410 assert(false); 411 [[fallthrough]]; 412 case texture_format::unknown: 413 case texture_format::rg8: 414 case texture_format::rg16: 415 case texture_format::rg16f: 416 case texture_format::rg32f: 417 case texture_format::rgba8: 418 case texture_format::rgba16: 419 case texture_format::rgba16f: 420 case texture_format::rgba32f: 421 case texture_format::rgb10a2: 422 s += "float4"; 423 break; 424 } 425 } 426 427 std::string id_to_name(id id) const 428 { 429 assert(id != 0); 430 if (const auto names_it = _names.find(id); 431 names_it != _names.end()) 432 return names_it->second; 433 return '_' + std::to_string(id); 434 } 435 436 template <naming naming_type = naming::general> 437 void define_name(const id id, std::string name) 438 { 439 assert(!name.empty()); 440 if constexpr (naming_type != naming::expression) 441 if (name[0] == '_') 442 return; // Filter out names that may clash with automatic ones 443 name = escape_name(std::move(name)); 444 if constexpr (naming_type == naming::general) 445 if (std::find_if(_names.begin(), _names.end(), [&name](const auto &it) { return it.second == name; }) != _names.end()) 446 name += '_' + std::to_string(id); // Append a numbered suffix if the name already exists 447 _names[id] = std::move(name); 448 } 449 450 std::string convert_semantic(const std::string &semantic) 451 { 452 if (_shader_model < 40) 453 { 454 if (semantic == "SV_POSITION") 455 return "POSITION"; // For pixel shaders this has to be "VPOS", so need to redefine that in post 456 if (semantic == "SV_POINTSIZE") 457 return "PSIZE"; 458 if (semantic.compare(0, 9, "SV_TARGET") == 0) 459 return "COLOR" + semantic.substr(9); 460 if (semantic == "SV_DEPTH") 461 return "DEPTH"; 462 if (semantic == "SV_VERTEXID") 463 return "TEXCOORD0 /* VERTEXID */"; 464 if (semantic == "SV_ISFRONTFACE") 465 return "VFACE"; 466 467 if (semantic != "VPOS" && 468 semantic.compare(0, 5, "COLOR") != 0 && 469 semantic.compare(0, 6, "NORMAL") != 0 && 470 semantic.compare(0, 7, "TANGENT") != 0) 471 { 472 // Shader model 3 only supports a selected list of semantic names, so need to remap custom ones to that 473 if (const auto it = _remapped_semantics.find(semantic); 474 it != _remapped_semantics.end()) 475 return it->second; 476 477 // Legal semantic indices are between 0 and 15 478 if (_remapped_semantics.size() < 15) 479 { 480 const std::string remapped_semantic = "TEXCOORD" + std::to_string(_remapped_semantics.size()) + " /* " + semantic + " */"; 481 _remapped_semantics.emplace(semantic, remapped_semantic); 482 return remapped_semantic; 483 } 484 } 485 } 486 else 487 { 488 if (semantic.compare(0, 5, "COLOR") == 0) 489 return "SV_TARGET" + semantic.substr(5); 490 } 491 492 return semantic; 493 } 494 495 static std::string escape_name(std::string name) 496 { 497 static const auto stringicmp = [](const std::string &a, const std::string &b) { 498 #ifdef _WIN32 499 return _stricmp(a.c_str(), b.c_str()) == 0; 500 #else 501 return std::equal(a.begin(), a.end(), b.begin(), b.end(), [](std::string::value_type a, std::string::value_type b) { return std::tolower(a) == std::tolower(b); }); 502 #endif 503 }; 504 505 // HLSL compiler complains about "technique" and "pass" names in strict mode (no matter the casing) 506 if (stringicmp(name, "line") || 507 stringicmp(name, "pass") || 508 stringicmp(name, "technique") || 509 stringicmp(name, "point") || 510 stringicmp(name, "export") || 511 stringicmp(name, "extern") || 512 stringicmp(name, "compile") || 513 stringicmp(name, "discard") || 514 stringicmp(name, "half") || 515 stringicmp(name, "in") || 516 stringicmp(name, "lineadj") || 517 stringicmp(name, "matrix") || 518 stringicmp(name, "sample") || 519 stringicmp(name, "sampler") || 520 stringicmp(name, "shared") || 521 stringicmp(name, "precise") || 522 stringicmp(name, "register") || 523 stringicmp(name, "texture") || 524 stringicmp(name, "unorm") || 525 stringicmp(name, "triangle") || 526 stringicmp(name, "triangleadj") || 527 stringicmp(name, "out") || 528 stringicmp(name, "vector")) 529 // This is guaranteed to not clash with user defined names, since those starting with an underscore are filtered out in 'define_name' 530 name = '_' + name; 531 532 return name; 533 } 534 535 static void increase_indentation_level(std::string &block) 536 { 537 if (block.empty()) 538 return; 539 540 for (size_t pos = 0; (pos = block.find("\n\t", pos)) != std::string::npos; pos += 3) 541 block.replace(pos, 2, "\n\t\t"); 542 543 block.insert(block.begin(), '\t'); 544 } 545 546 id define_struct(const location &loc, struct_info &info) override 547 { 548 info.definition = make_id(); 549 define_name<naming::unique>(info.definition, info.unique_name); 550 551 _structs.push_back(info); 552 553 std::string &code = _blocks.at(_current_block); 554 555 write_location(code, loc); 556 557 code += "struct " + id_to_name(info.definition) + "\n{\n"; 558 559 for (const struct_member_info &member : info.member_list) 560 { 561 code += '\t'; 562 write_type<true>(code, member.type); // HLSL allows interpolation attributes on struct members, so handle this like a parameter 563 code += ' ' + member.name; 564 if (member.type.is_array()) 565 code += '[' + std::to_string(member.type.array_length) + ']'; 566 if (!member.semantic.empty()) 567 code += " : " + convert_semantic(member.semantic); 568 code += ";\n"; 569 } 570 571 code += "};\n"; 572 573 return info.definition; 574 } 575 id define_texture(const location &loc, texture_info &info) override 576 { 577 info.id = make_id(); 578 info.binding = ~0u; 579 580 define_name<naming::unique>(info.id, info.unique_name); 581 582 #if 0 583 if (_shader_model >= 40) 584 { 585 info.binding = _module.num_texture_bindings; 586 _module.num_texture_bindings += 2; 587 588 std::string &code = _blocks.at(_current_block); 589 590 write_location(code, loc); 591 592 if (_shader_model >= 60) 593 code += "[[vk::binding(" + std::to_string(info.binding + 0) + ", 2)]] "; // Descriptor set 2 594 595 code += "Texture" + std::to_string(static_cast<unsigned int>(info.type)) + "D<"; 596 write_texture_format(code, info.format); 597 code += "> __" + info.unique_name + " : register(t" + std::to_string(info.binding + 0) + "); \n"; 598 599 if (_shader_model >= 60) 600 code += "[[vk::binding(" + std::to_string(info.binding + 1) + ", 2)]] "; // Descriptor set 2 601 602 code += "Texture" + std::to_string(static_cast<unsigned int>(info.type)) + "D<"; 603 write_texture_format(code, info.format); 604 code += "> __srgb" + info.unique_name + " : register(t" + std::to_string(info.binding + 1) + "); \n"; 605 } 606 #endif 607 608 _module.textures.push_back(info); 609 610 return info.id; 611 } 612 id define_sampler(const location &loc, const texture_info &tex_info, sampler_info &info) override 613 { 614 info.id = make_id(); 615 616 define_name<naming::unique>(info.id, info.unique_name); 617 618 std::string &code = _blocks.at(_current_block); 619 620 if (_shader_model >= 40) 621 { 622 #if 0 623 // Try and reuse a sampler binding with the same sampler description 624 const auto existing_sampler = std::find_if(_module.samplers.begin(), _module.samplers.end(), 625 [&info](const auto &it) { 626 return it.filter == info.filter && it.address_u == info.address_u && it.address_v == info.address_v && it.address_w == info.address_w && it.min_lod == info.min_lod && it.max_lod == info.max_lod && it.lod_bias == info.lod_bias; 627 }); 628 629 if (existing_sampler != _module.samplers.end()) 630 { 631 info.binding = existing_sampler->binding; 632 } 633 else 634 { 635 info.binding = _module.num_sampler_bindings++; 636 637 if (_shader_model >= 60) 638 code += "[[vk::binding(" + std::to_string(info.binding) + ", 1)]] "; // Descriptor set 1 639 640 code += "SamplerState __s" + std::to_string(info.binding) + " : register(s" + std::to_string(info.binding) + ");\n"; 641 } 642 643 assert(info.srgb == 0 || info.srgb == 1); 644 info.texture_binding = tex_info.binding + info.srgb; // Offset binding by one to choose the SRGB variant 645 646 write_location(code, loc); 647 648 code += "static const "; 649 write_type(code, info.type); 650 code += ' ' + id_to_name(info.id) + " = { " + (info.srgb ? "__srgb" : "__") + info.texture_name + ", __s" + std::to_string(info.binding) + " };\n"; 651 #else 652 info.binding = _module.num_sampler_bindings++; 653 info.texture_binding = ~0u; // Unset texture binding 654 655 write_location(code, loc); 656 657 const unsigned int texture_dimension = info.type.texture_dimension(); 658 code += "Texture" + std::to_string(texture_dimension) + "D<"; 659 write_texture_format(code, tex_info.format); 660 code += "> __" + info.unique_name + "_t : register( t0); \n"; 661 662 code += "SamplerState __" + info.unique_name + "_s : register( s0);\n"; 663 664 code += "static const "; 665 write_type(code, info.type); 666 code += ' ' + id_to_name(info.id) + " = { __" + info.unique_name + "_t, __" + info.unique_name + "_s };\n"; 667 #endif 668 } 669 else 670 { 671 info.binding = _module.num_sampler_bindings++; 672 info.texture_binding = ~0u; // Unset texture binding 673 674 const unsigned int texture_dimension = info.type.texture_dimension(); 675 676 code += "sampler" + std::to_string(texture_dimension) + "D __" + info.unique_name + "_s : register(s" + std::to_string(info.binding) + ");\n"; 677 678 write_location(code, loc); 679 680 code += "static const "; 681 write_type(code, info.type); 682 code += ' ' + id_to_name(info.id) + " = { __" + info.unique_name + "_s, float" + std::to_string(texture_dimension) + '('; 683 684 if (tex_info.semantic.empty()) 685 { 686 code += "1.0 / " + std::to_string(tex_info.width); 687 if (texture_dimension >= 2) 688 code += ", 1.0 / " + std::to_string(tex_info.height); 689 if (texture_dimension >= 3) 690 code += ", 1.0 / " + std::to_string(tex_info.depth); 691 } 692 else 693 { 694 // Expect application to set inverse texture size via a define if it is not known here 695 code += tex_info.semantic + "_PIXEL_SIZE"; 696 } 697 698 code += ") }; \n"; 699 } 700 701 _module.samplers.push_back(info); 702 703 return info.id; 704 } 705 id define_storage(const location &loc, const texture_info &, storage_info &info) override 706 { 707 info.id = make_id(); 708 info.binding = ~0u; 709 710 define_name<naming::unique>(info.id, info.unique_name); 711 712 if (_shader_model >= 50) 713 { 714 info.binding = _module.num_storage_bindings++; 715 716 std::string &code = _blocks.at(_current_block); 717 718 write_location(code, loc); 719 720 if (_shader_model >= 60) 721 code += "[[vk::binding(" + std::to_string(info.binding) + ", 3)]] "; // Descriptor set 3 722 723 write_type(code, info.type); 724 code += ' ' + info.unique_name + " : register(u" + std::to_string(info.binding) + ");\n"; 725 } 726 727 _module.storages.push_back(info); 728 729 return info.id; 730 } 731 id define_uniform(const location &loc, uniform_info &info) override 732 { 733 const id res = make_id(); 734 735 define_name<naming::unique>(res, info.name); 736 737 if (_uniforms_to_spec_constants && info.has_initializer_value) 738 { 739 info.size = info.type.components() * 4; 740 if (info.type.is_array()) 741 info.size *= info.type.array_length; 742 743 std::string &code = _blocks.at(_current_block); 744 745 write_location(code, loc); 746 747 assert(!info.type.has(type::q_static) && !info.type.has(type::q_const)); 748 749 code += "static const "; 750 write_type(code, info.type); 751 code += ' ' + id_to_name(res) + " = "; 752 if (!info.type.is_scalar()) 753 write_type<false, false>(code, info.type); 754 code += "(SPEC_CONSTANT_" + info.name + ");\n"; 755 756 _module.spec_constants.push_back(info); 757 } 758 else 759 { 760 if (info.type.is_matrix()) 761 info.size = align_up(info.type.cols * 4, 16, info.type.rows); 762 else // Vectors are column major (1xN), matrices are row major (NxM) 763 info.size = info.type.rows * 4; 764 // Arrays are not packed in HLSL by default, each element is stored in a four-component vector (16 bytes) 765 if (info.type.is_array()) 766 info.size = align_up(info.size, 16, info.type.array_length); 767 768 // Data is packed into 4-byte boundaries (see https://docs.microsoft.com/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules) 769 // This is already guaranteed, since all types are at least 4-byte in size 770 info.offset = _module.total_uniform_size; 771 // Additionally, HLSL packs data so that it does not cross a 16-byte boundary 772 const uint32_t remaining = 16 - (info.offset & 15); 773 if (remaining != 16 && info.size > remaining) 774 info.offset += remaining; 775 _module.total_uniform_size = info.offset + info.size; 776 777 write_location<true>(_cbuffer_block, loc); 778 779 if (_shader_model >= 40) 780 _cbuffer_block += '\t'; 781 if (info.type.is_matrix()) // Force row major matrices 782 _cbuffer_block += "row_major "; 783 784 type type = info.type; 785 if (_shader_model < 40) 786 { 787 // The HLSL compiler tries to evaluate boolean values with temporary registers, which breaks branches, so force it to use constant float registers 788 if (type.is_boolean()) 789 type.base = type::t_float; 790 791 // Simply put each uniform into a separate constant register in shader model 3 for now 792 info.offset *= 4; 793 } 794 795 write_type(_cbuffer_block, type); 796 _cbuffer_block += ' ' + id_to_name(res); 797 798 if (info.type.is_array()) 799 _cbuffer_block += '[' + std::to_string(info.type.array_length) + ']'; 800 801 if (_shader_model < 40) 802 { 803 // Every constant register is 16 bytes wide, so divide memory offset by 16 to get the constant register index 804 // Note: All uniforms are floating-point in shader model 3, even if the uniform type says different!! 805 _cbuffer_block += " : register(c" + std::to_string(info.offset / 16) + ')'; 806 } 807 808 _cbuffer_block += ";\n"; 809 810 _module.uniforms.push_back(info); 811 } 812 813 return res; 814 } 815 id define_variable(const location &loc, const type &type, std::string name, bool global, id initializer_value) override 816 { 817 const id res = make_id(); 818 819 if (!name.empty()) 820 define_name<naming::general>(res, name); 821 822 std::string &code = _blocks.at(_current_block); 823 824 write_location(code, loc); 825 826 if (!global) 827 code += '\t'; 828 829 if (initializer_value != 0 && type.has(type::q_const)) 830 code += "const "; 831 832 write_type(code, type); 833 code += ' ' + id_to_name(res); 834 835 if (type.is_array()) 836 code += '[' + std::to_string(type.array_length) + ']'; 837 838 if (initializer_value != 0) 839 code += " = " + id_to_name(initializer_value); 840 841 code += ";\n"; 842 843 return res; 844 } 845 id define_function(const location &loc, function_info &info) override 846 { 847 info.definition = make_id(); 848 849 define_name<naming::unique>(info.definition, info.unique_name); 850 851 std::string &code = _blocks.at(_current_block); 852 853 write_location(code, loc); 854 855 write_type(code, info.return_type); 856 code += ' ' + id_to_name(info.definition) + '('; 857 858 for (size_t i = 0, num_params = info.parameter_list.size(); i < num_params; ++i) 859 { 860 auto ¶m = info.parameter_list[i]; 861 862 param.definition = make_id(); 863 define_name<naming::unique>(param.definition, param.name); 864 865 code += '\n'; 866 write_location(code, param.location); 867 code += '\t'; 868 write_type<true>(code, param.type); 869 code += ' ' + id_to_name(param.definition); 870 871 if (param.type.is_array()) 872 code += '[' + std::to_string(param.type.array_length) + ']'; 873 874 if (!param.semantic.empty()) 875 code += " : " + convert_semantic(param.semantic); 876 877 if (i < num_params - 1) 878 code += ','; 879 } 880 881 code += ')'; 882 883 if (!info.return_semantic.empty()) 884 code += " : " + convert_semantic(info.return_semantic); 885 886 code += '\n'; 887 888 _functions.push_back(std::make_unique<function_info>(info)); 889 890 return info.definition; 891 } 892 893 void define_entry_point(function_info &func, shader_type stype, int num_threads[3]) override 894 { 895 // Modify entry point name since a new function is created for it below 896 if (stype == shader_type::cs) 897 func.unique_name = 'E' + func.unique_name + 898 '_' + std::to_string(num_threads[0]) + 899 '_' + std::to_string(num_threads[1]) + 900 '_' + std::to_string(num_threads[2]); 901 else if (_shader_model < 40) 902 func.unique_name = 'E' + func.unique_name; 903 904 if (const auto it = std::find_if(_module.entry_points.begin(), _module.entry_points.end(), 905 [&func](const auto &ep) { return ep.name == func.unique_name; }); 906 it != _module.entry_points.end()) 907 return; 908 909 _module.entry_points.push_back({ func.unique_name, stype }); 910 911 // Only have to rewrite the entry point function signature in shader model 3 and for compute (to write "numthreads" attribute) 912 if (_shader_model >= 40 && stype != shader_type::cs) 913 return; 914 915 auto entry_point = func; 916 917 const auto is_color_semantic = [](const std::string &semantic) { 918 return semantic.compare(0, 9, "SV_TARGET") == 0 || semantic.compare(0, 5, "COLOR") == 0; }; 919 const auto is_position_semantic = [](const std::string &semantic) { 920 return semantic == "SV_POSITION" || semantic == "POSITION"; }; 921 922 const auto ret = make_id(); 923 define_name<naming::general>(ret, "ret"); 924 925 std::string position_variable_name; 926 { 927 if (func.return_type.is_struct() && stype == shader_type::vs) 928 { 929 // If this function returns a struct which contains a position output, keep track of its member name 930 for (const struct_member_info &member : get_struct(func.return_type.definition).member_list) 931 if (is_position_semantic(member.semantic)) 932 position_variable_name = id_to_name(ret) + '.' + member.name; 933 } 934 935 if (is_color_semantic(func.return_semantic)) 936 { 937 // The COLOR output semantic has to be a four-component vector in shader model 3, so enforce that 938 entry_point.return_type.rows = 4; 939 } 940 if (is_position_semantic(func.return_semantic)) 941 { 942 if (stype == shader_type::vs) 943 // Keep track of the position output variable 944 position_variable_name = id_to_name(ret); 945 } 946 } 947 for (struct_member_info ¶m : entry_point.parameter_list) 948 { 949 if (param.type.is_struct() && stype == shader_type::vs) 950 { 951 for (const struct_member_info &member : get_struct(param.type.definition).member_list) 952 if (is_position_semantic(member.semantic)) 953 position_variable_name = param.name + '.' + member.name; 954 } 955 956 if (is_color_semantic(param.semantic)) 957 { 958 param.type.rows = 4; 959 } 960 if (is_position_semantic(param.semantic)) 961 { 962 if (stype == shader_type::vs) 963 // Keep track of the position output variable 964 position_variable_name = param.name; 965 else if (stype == shader_type::ps) 966 // Change the position input semantic in pixel shaders 967 param.semantic = "VPOS"; 968 } 969 } 970 971 if (stype == shader_type::cs) 972 _blocks.at(_current_block) += "[numthreads(" + 973 std::to_string(num_threads[0]) + ", " + 974 std::to_string(num_threads[1]) + ", " + 975 std::to_string(num_threads[2]) + ")]\n"; 976 977 define_function({}, entry_point); 978 enter_block(create_block()); 979 980 std::string &code = _blocks.at(_current_block); 981 982 // Clear all color output parameters so no component is left uninitialized 983 for (struct_member_info ¶m : entry_point.parameter_list) 984 { 985 if (is_color_semantic(param.semantic)) 986 code += '\t' + param.name + " = float4(0.0, 0.0, 0.0, 0.0);\n"; 987 } 988 989 code += '\t'; 990 if (is_color_semantic(func.return_semantic)) 991 { 992 code += "const float4 " + id_to_name(ret) + " = float4("; 993 } 994 else if (!func.return_type.is_void()) 995 { 996 write_type(code, func.return_type); 997 code += ' ' + id_to_name(ret) + " = "; 998 } 999 1000 // Call the function this entry point refers to 1001 code += id_to_name(func.definition) + '('; 1002 1003 for (size_t i = 0, num_params = func.parameter_list.size(); i < num_params; ++i) 1004 { 1005 code += func.parameter_list[i].name; 1006 1007 if (is_color_semantic(func.parameter_list[i].semantic)) 1008 { 1009 code += '.'; 1010 for (unsigned int k = 0; k < func.parameter_list[i].type.rows; k++) 1011 code += "xyzw"[k]; 1012 } 1013 1014 if (i < num_params - 1) 1015 code += ", "; 1016 } 1017 1018 code += ')'; 1019 1020 // Cast the output value to a four-component vector 1021 if (is_color_semantic(func.return_semantic)) 1022 { 1023 for (unsigned int i = 0; i < 4 - func.return_type.rows; i++) 1024 code += ", 0.0"; 1025 code += ')'; 1026 } 1027 1028 code += ";\n"; 1029 1030 // Shift everything by half a viewport pixel to workaround the different half-pixel offset in D3D9 (https://aras-p.info/blog/2016/04/08/solving-dx9-half-pixel-offset/) 1031 if (!position_variable_name.empty() && stype == shader_type::vs) // Check if we are in a vertex shader definition 1032 code += '\t' + position_variable_name + ".xy += __TEXEL_SIZE__ * " + position_variable_name + ".ww;\n"; 1033 1034 leave_block_and_return(func.return_type.is_void() ? 0 : ret); 1035 leave_function(); 1036 } 1037 1038 id emit_load(const expression &exp, bool force_new_id) override 1039 { 1040 if (exp.is_constant) 1041 return emit_constant(exp.type, exp.constant); 1042 else if (exp.chain.empty() && !force_new_id) // Can refer to values without access chain directly 1043 return exp.base; 1044 1045 const id res = make_id(); 1046 1047 static const char s_matrix_swizzles[16][5] = { 1048 "_m00", "_m01", "_m02", "_m03", 1049 "_m10", "_m11", "_m12", "_m13", 1050 "_m20", "_m21", "_m22", "_m23", 1051 "_m30", "_m31", "_m32", "_m33" 1052 }; 1053 1054 std::string type, expr_code = id_to_name(exp.base); 1055 1056 for (const auto &op : exp.chain) 1057 { 1058 switch (op.op) 1059 { 1060 case expression::operation::op_cast: 1061 type.clear(); 1062 write_type<false, false>(type, op.to); 1063 // Cast is in parentheses so that a subsequent operation operates on the casted value 1064 expr_code = "((" + type + ')' + expr_code + ')'; 1065 break; 1066 case expression::operation::op_member: 1067 expr_code += '.'; 1068 expr_code += get_struct(op.from.definition).member_list[op.index].name; 1069 break; 1070 case expression::operation::op_dynamic_index: 1071 expr_code += '[' + id_to_name(op.index) + ']'; 1072 break; 1073 case expression::operation::op_constant_index: 1074 if (op.from.is_vector() && !op.from.is_array()) 1075 expr_code += '.', 1076 expr_code += "xyzw"[op.index]; 1077 else 1078 expr_code += '[' + std::to_string(op.index) + ']'; 1079 break; 1080 case expression::operation::op_swizzle: 1081 expr_code += '.'; 1082 for (unsigned int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) 1083 if (op.from.is_matrix()) 1084 expr_code += s_matrix_swizzles[op.swizzle[i]]; 1085 else 1086 expr_code += "xyzw"[op.swizzle[i]]; 1087 break; 1088 } 1089 } 1090 1091 if (force_new_id) 1092 { 1093 // Need to store value in a new variable to comply with request for a new ID 1094 std::string &code = _blocks.at(_current_block); 1095 1096 code += '\t'; 1097 write_type(code, exp.type); 1098 code += ' ' + id_to_name(res) + " = " + expr_code + ";\n"; 1099 } 1100 else 1101 { 1102 // Avoid excessive variable definitions by instancing simple load operations in code every time 1103 define_name<naming::expression>(res, std::move(expr_code)); 1104 } 1105 1106 return res; 1107 } 1108 void emit_store(const expression &exp, id value) override 1109 { 1110 std::string &code = _blocks.at(_current_block); 1111 1112 write_location(code, exp.location); 1113 1114 code += '\t' + id_to_name(exp.base); 1115 1116 static const char s_matrix_swizzles[16][5] = { 1117 "_m00", "_m01", "_m02", "_m03", 1118 "_m10", "_m11", "_m12", "_m13", 1119 "_m20", "_m21", "_m22", "_m23", 1120 "_m30", "_m31", "_m32", "_m33" 1121 }; 1122 1123 for (const auto &op : exp.chain) 1124 { 1125 switch (op.op) 1126 { 1127 case expression::operation::op_member: 1128 code += '.'; 1129 code += get_struct(op.from.definition).member_list[op.index].name; 1130 break; 1131 case expression::operation::op_dynamic_index: 1132 code += '[' + id_to_name(op.index) + ']'; 1133 break; 1134 case expression::operation::op_constant_index: 1135 code += '[' + std::to_string(op.index) + ']'; 1136 break; 1137 case expression::operation::op_swizzle: 1138 code += '.'; 1139 for (unsigned int i = 0; i < 4 && op.swizzle[i] >= 0; ++i) 1140 if (op.from.is_matrix()) 1141 code += s_matrix_swizzles[op.swizzle[i]]; 1142 else 1143 code += "xyzw"[op.swizzle[i]]; 1144 break; 1145 } 1146 } 1147 1148 code += " = " + id_to_name(value) + ";\n"; 1149 } 1150 1151 id emit_constant(const type &type, const constant &data) override 1152 { 1153 const id res = make_id(); 1154 1155 if (type.is_array()) 1156 { 1157 assert(type.has(type::q_const)); 1158 1159 std::string &code = _blocks.at(_current_block); 1160 1161 // Array constants need to be stored in a constant variable as they cannot be used in-place 1162 code += '\t'; 1163 code += "const "; 1164 write_type(code, type); 1165 code += ' ' + id_to_name(res); 1166 code += '[' + std::to_string(type.array_length) + ']'; 1167 code += " = "; 1168 write_constant(code, type, data); 1169 code += ";\n"; 1170 return res; 1171 } 1172 1173 std::string code; 1174 write_constant(code, type, data); 1175 define_name<naming::expression>(res, std::move(code)); 1176 1177 return res; 1178 } 1179 1180 id emit_unary_op(const location &loc, tokenid op, const type &res_type, id val) override 1181 { 1182 const id res = make_id(); 1183 1184 std::string &code = _blocks.at(_current_block); 1185 1186 write_location(code, loc); 1187 1188 code += '\t'; 1189 write_type(code, res_type); 1190 code += ' ' + id_to_name(res) + " = "; 1191 1192 if (_shader_model < 40 && op == tokenid::tilde) 1193 code += "0xFFFFFFFF - "; // Emulate bitwise not operator on shader model 3 1194 else 1195 code += char(op); 1196 1197 code += id_to_name(val) + ";\n"; 1198 1199 return res; 1200 } 1201 id emit_binary_op(const location &loc, tokenid op, const type &res_type, const type &, id lhs, id rhs) override 1202 { 1203 const id res = make_id(); 1204 1205 std::string &code = _blocks.at(_current_block); 1206 1207 write_location(code, loc); 1208 1209 code += '\t'; 1210 write_type(code, res_type); 1211 code += ' ' + id_to_name(res) + " = "; 1212 1213 if (_shader_model < 40) 1214 { 1215 // See bitwise shift operator emulation below 1216 if (op == tokenid::less_less || op == tokenid::less_less_equal) 1217 code += '('; 1218 else if (op == tokenid::greater_greater || op == tokenid::greater_greater_equal) 1219 code += "floor("; 1220 } 1221 1222 code += id_to_name(lhs) + ' '; 1223 1224 switch (op) 1225 { 1226 case tokenid::plus: 1227 case tokenid::plus_plus: 1228 case tokenid::plus_equal: 1229 code += '+'; 1230 break; 1231 case tokenid::minus: 1232 case tokenid::minus_minus: 1233 case tokenid::minus_equal: 1234 code += '-'; 1235 break; 1236 case tokenid::star: 1237 case tokenid::star_equal: 1238 code += '*'; 1239 break; 1240 case tokenid::slash: 1241 case tokenid::slash_equal: 1242 code += '/'; 1243 break; 1244 case tokenid::percent: 1245 case tokenid::percent_equal: 1246 code += '%'; 1247 break; 1248 case tokenid::caret: 1249 case tokenid::caret_equal: 1250 code += '^'; 1251 break; 1252 case tokenid::pipe: 1253 case tokenid::pipe_equal: 1254 code += '|'; 1255 break; 1256 case tokenid::ampersand: 1257 case tokenid::ampersand_equal: 1258 code += '&'; 1259 break; 1260 case tokenid::less_less: 1261 case tokenid::less_less_equal: 1262 code += _shader_model >= 40 ? "<<" : ") * exp2("; // Emulate bitwise shift operators on shader model 3 1263 break; 1264 case tokenid::greater_greater: 1265 case tokenid::greater_greater_equal: 1266 code += _shader_model >= 40 ? ">>" : ") / exp2("; 1267 break; 1268 case tokenid::pipe_pipe: 1269 code += "||"; 1270 break; 1271 case tokenid::ampersand_ampersand: 1272 code += "&&"; 1273 break; 1274 case tokenid::less: 1275 code += '<'; 1276 break; 1277 case tokenid::less_equal: 1278 code += "<="; 1279 break; 1280 case tokenid::greater: 1281 code += '>'; 1282 break; 1283 case tokenid::greater_equal: 1284 code += ">="; 1285 break; 1286 case tokenid::equal_equal: 1287 code += "=="; 1288 break; 1289 case tokenid::exclaim_equal: 1290 code += "!="; 1291 break; 1292 default: 1293 assert(false); 1294 } 1295 1296 code += ' ' + id_to_name(rhs); 1297 1298 if (_shader_model < 40) 1299 { 1300 // See bitwise shift operator emulation above 1301 if (op == tokenid::less_less || op == tokenid::less_less_equal || 1302 op == tokenid::greater_greater || op == tokenid::greater_greater_equal) 1303 code += ')'; 1304 } 1305 1306 code += ";\n"; 1307 1308 return res; 1309 } 1310 id emit_ternary_op(const location &loc, tokenid op, const type &res_type, id condition, id true_value, id false_value) override 1311 { 1312 if (op != tokenid::question) 1313 return assert(false), 0; // Should never happen, since this is the only ternary operator currently supported 1314 1315 const id res = make_id(); 1316 1317 std::string &code = _blocks.at(_current_block); 1318 1319 write_location(code, loc); 1320 1321 code += '\t'; 1322 write_type(code, res_type); 1323 code += ' ' + id_to_name(res); 1324 1325 if (res_type.is_array()) 1326 code += '[' + std::to_string(res_type.array_length) + ']'; 1327 1328 code += " = " + id_to_name(condition) + " ? " + id_to_name(true_value) + " : " + id_to_name(false_value) + ";\n"; 1329 1330 return res; 1331 } 1332 id emit_call(const location &loc, id function, const type &res_type, const std::vector<expression> &args) override 1333 { 1334 #ifndef NDEBUG 1335 for (const expression &arg : args) 1336 assert(arg.chain.empty() && arg.base != 0); 1337 #endif 1338 1339 const id res = make_id(); 1340 1341 std::string &code = _blocks.at(_current_block); 1342 1343 write_location(code, loc); 1344 1345 code += '\t'; 1346 1347 if (!res_type.is_void()) 1348 { 1349 write_type(code, res_type); 1350 code += ' ' + id_to_name(res); 1351 1352 if (res_type.is_array()) 1353 code += '[' + std::to_string(res_type.array_length) + ']'; 1354 1355 code += " = "; 1356 } 1357 1358 code += id_to_name(function) + '('; 1359 1360 for (size_t i = 0, num_args = args.size(); i < num_args; ++i) 1361 { 1362 code += id_to_name(args[i].base); 1363 1364 if (i < num_args - 1) 1365 code += ", "; 1366 } 1367 1368 code += ");\n"; 1369 1370 return res; 1371 } 1372 id emit_call_intrinsic(const location &loc, id intrinsic, const type &res_type, const std::vector<expression> &args) override 1373 { 1374 #ifndef NDEBUG 1375 for (const expression &arg : args) 1376 assert(arg.chain.empty() && arg.base != 0); 1377 #endif 1378 1379 const id res = make_id(); 1380 1381 std::string &code = _blocks.at(_current_block); 1382 1383 enum 1384 { 1385 #define IMPLEMENT_INTRINSIC_HLSL(name, i, code) name##i, 1386 #include "effect_symbol_table_intrinsics.inl" 1387 }; 1388 1389 write_location(code, loc); 1390 1391 code += '\t'; 1392 1393 if (_shader_model >= 40 && ( 1394 (intrinsic >= tex1Dsize0 && intrinsic <= tex3Dsize2) || 1395 (intrinsic >= atomicAdd0 && intrinsic <= atomicCompareExchange1) || 1396 (!(res_type.is_floating_point() || _shader_model >= 67) && (intrinsic >= tex1D0 && intrinsic <= tex3Dlod1)))) 1397 { 1398 // Implementation of the 'tex2Dsize' intrinsic passes the result variable into 'GetDimensions' as output argument 1399 // Same with the atomic intrinsics, which use the last parameter to return the previous value of the target 1400 write_type(code, res_type); 1401 code += ' ' + id_to_name(res) + "; "; 1402 } 1403 else if (!res_type.is_void()) 1404 { 1405 write_type(code, res_type); 1406 code += ' ' + id_to_name(res) + " = "; 1407 } 1408 1409 switch (intrinsic) 1410 { 1411 #define IMPLEMENT_INTRINSIC_HLSL(name, i, code) case name##i: code break; 1412 #include "effect_symbol_table_intrinsics.inl" 1413 default: 1414 assert(false); 1415 } 1416 1417 code += ";\n"; 1418 1419 return res; 1420 } 1421 id emit_construct(const location &loc, const type &type, const std::vector<expression> &args) override 1422 { 1423 #ifndef NDEBUG 1424 for (const auto &arg : args) 1425 assert((arg.type.is_scalar() || type.is_array()) && arg.chain.empty() && arg.base != 0); 1426 #endif 1427 1428 const id res = make_id(); 1429 1430 std::string &code = _blocks.at(_current_block); 1431 1432 write_location(code, loc); 1433 1434 code += '\t'; 1435 write_type(code, type); 1436 code += ' ' + id_to_name(res); 1437 1438 if (type.is_array()) 1439 code += '[' + std::to_string(type.array_length) + ']'; 1440 1441 code += " = "; 1442 1443 if (type.is_array()) 1444 code += "{ "; 1445 else 1446 write_type<false, false>(code, type), code += '('; 1447 1448 for (size_t i = 0, num_args = args.size(); i < num_args; ++i) 1449 { 1450 code += id_to_name(args[i].base); 1451 1452 if (i < num_args - 1) 1453 code += ", "; 1454 } 1455 1456 if (type.is_array()) 1457 code += " }"; 1458 else 1459 code += ')'; 1460 1461 code += ";\n"; 1462 1463 return res; 1464 } 1465 1466 void emit_if(const location &loc, id condition_value, id condition_block, id true_statement_block, id false_statement_block, unsigned int flags) override 1467 { 1468 assert(condition_value != 0 && condition_block != 0 && true_statement_block != 0 && false_statement_block != 0); 1469 1470 std::string &code = _blocks.at(_current_block); 1471 1472 std::string &true_statement_data = _blocks.at(true_statement_block); 1473 std::string &false_statement_data = _blocks.at(false_statement_block); 1474 1475 increase_indentation_level(true_statement_data); 1476 increase_indentation_level(false_statement_data); 1477 1478 code += _blocks.at(condition_block); 1479 1480 write_location(code, loc); 1481 1482 code += '\t'; 1483 1484 if (flags & 0x1) code += "[flatten] "; 1485 if (flags & 0x2) code += "[branch] "; 1486 1487 code += "if (" + id_to_name(condition_value) + ")\n\t{\n"; 1488 code += true_statement_data; 1489 code += "\t}\n"; 1490 1491 if (!false_statement_data.empty()) 1492 { 1493 code += "\telse\n\t{\n"; 1494 code += false_statement_data; 1495 code += "\t}\n"; 1496 } 1497 1498 // Remove consumed blocks to save memory 1499 _blocks.erase(condition_block); 1500 _blocks.erase(true_statement_block); 1501 _blocks.erase(false_statement_block); 1502 } 1503 id emit_phi(const location &loc, id condition_value, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &type) override 1504 { 1505 assert(condition_value != 0 && condition_block != 0 && true_value != 0 && true_statement_block != 0 && false_value != 0 && false_statement_block != 0); 1506 1507 std::string &code = _blocks.at(_current_block); 1508 1509 std::string &true_statement_data = _blocks.at(true_statement_block); 1510 std::string &false_statement_data = _blocks.at(false_statement_block); 1511 1512 increase_indentation_level(true_statement_data); 1513 increase_indentation_level(false_statement_data); 1514 1515 const id res = make_id(); 1516 1517 code += _blocks.at(condition_block); 1518 1519 code += '\t'; 1520 write_type(code, type); 1521 code += ' ' + id_to_name(res) + ";\n"; 1522 1523 write_location(code, loc); 1524 1525 code += "\tif (" + id_to_name(condition_value) + ")\n\t{\n"; 1526 code += (true_statement_block != condition_block ? true_statement_data : std::string()); 1527 code += "\t\t" + id_to_name(res) + " = " + id_to_name(true_value) + ";\n"; 1528 code += "\t}\n\telse\n\t{\n"; 1529 code += (false_statement_block != condition_block ? false_statement_data : std::string()); 1530 code += "\t\t" + id_to_name(res) + " = " + id_to_name(false_value) + ";\n"; 1531 code += "\t}\n"; 1532 1533 // Remove consumed blocks to save memory 1534 _blocks.erase(condition_block); 1535 _blocks.erase(true_statement_block); 1536 _blocks.erase(false_statement_block); 1537 1538 return res; 1539 } 1540 void emit_loop(const location &loc, id condition_value, id prev_block, id header_block, id condition_block, id loop_block, id continue_block, unsigned int flags) override 1541 { 1542 assert(prev_block != 0 && header_block != 0 && loop_block != 0 && continue_block != 0); 1543 1544 std::string &code = _blocks.at(_current_block); 1545 1546 std::string &loop_data = _blocks.at(loop_block); 1547 std::string &continue_data = _blocks.at(continue_block); 1548 1549 increase_indentation_level(loop_data); 1550 increase_indentation_level(loop_data); 1551 increase_indentation_level(continue_data); 1552 1553 code += _blocks.at(prev_block); 1554 1555 std::string attributes; 1556 if (flags & 0x1) 1557 attributes += "[unroll] "; 1558 if (flags & 0x2) 1559 attributes += _shader_model >= 40 ? "[fastopt] " : "[loop] "; 1560 1561 // Condition value can be missing in infinite loop constructs like "for (;;)" 1562 std::string condition_name = condition_value != 0 ? id_to_name(condition_value) : "true"; 1563 1564 if (condition_block == 0) 1565 { 1566 // Convert the last SSA variable initializer to an assignment statement 1567 auto pos_assign = continue_data.rfind(condition_name); 1568 auto pos_prev_assign = continue_data.rfind('\t', pos_assign); 1569 continue_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1); 1570 1571 // We need to add the continue block to all "continue" statements as well 1572 const std::string continue_id = "__CONTINUE__" + std::to_string(continue_block); 1573 for (size_t offset = 0; (offset = loop_data.find(continue_id, offset)) != std::string::npos; offset += continue_data.size()) 1574 loop_data.replace(offset, continue_id.size(), continue_data); 1575 1576 code += "\tbool " + condition_name + ";\n"; 1577 1578 write_location(code, loc); 1579 1580 code += '\t' + attributes; 1581 code += "do\n\t{\n\t\t{\n"; 1582 code += loop_data; // Encapsulate loop body into another scope, so not to confuse any local variables with the current iteration variable accessed in the continue block below 1583 code += "\t\t}\n"; 1584 code += continue_data; 1585 code += "\t}\n\twhile (" + condition_name + ");\n"; 1586 } 1587 else 1588 { 1589 std::string &condition_data = _blocks.at(condition_block); 1590 1591 // Work around D3DCompiler putting uniform variables that are used as the loop count register into integer registers (only in SM3) 1592 // Only applies to dynamic loops with uniform variables in the condition, where it generates a loop instruction like "rep i0", but then expects the "i0" register to be set externally 1593 // Moving the loop condition into the loop body forces it to move the uniform variable into a constant register instead and geneates a fixed number of loop iterations with "defi i0, 255, ..." 1594 // Check 'condition_name' instead of 'condition_value' here to also catch cases where a constant boolean expression was passed in as loop condition 1595 bool use_break_statement_for_condition = (_shader_model < 40 && condition_name != "true") && 1596 std::find_if(_module.uniforms.begin(), _module.uniforms.end(), 1597 [&](const uniform_info &info) { 1598 return condition_data.find(info.name) != std::string::npos || condition_name.find(info.name) != std::string::npos; 1599 }) != _module.uniforms.end(); 1600 1601 // If the condition data is just a single line, then it is a simple expression, which we can just put into the loop condition as-is 1602 if (!use_break_statement_for_condition && std::count(condition_data.begin(), condition_data.end(), '\n') == 1) 1603 { 1604 // Convert SSA variable initializer back to a condition expression 1605 auto pos_assign = condition_data.find('='); 1606 condition_data.erase(0, pos_assign + 2); 1607 auto pos_semicolon = condition_data.rfind(';'); 1608 condition_data.erase(pos_semicolon); 1609 1610 condition_name = std::move(condition_data); 1611 assert(condition_data.empty()); 1612 } 1613 else 1614 { 1615 code += condition_data; 1616 1617 increase_indentation_level(condition_data); 1618 1619 // Convert the last SSA variable initializer to an assignment statement 1620 auto pos_assign = condition_data.rfind(condition_name); 1621 auto pos_prev_assign = condition_data.rfind('\t', pos_assign); 1622 condition_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1); 1623 } 1624 1625 const std::string continue_id = "__CONTINUE__" + std::to_string(continue_block); 1626 for (size_t offset = 0; (offset = loop_data.find(continue_id, offset)) != std::string::npos; offset += continue_data.size()) 1627 loop_data.replace(offset, continue_id.size(), continue_data + condition_data); 1628 1629 write_location(code, loc); 1630 1631 code += '\t' + attributes; 1632 if (use_break_statement_for_condition) 1633 code += "while (true)\n\t{\n\t\tif (" + condition_name + ")\n\t\t{\n"; 1634 else 1635 code += "while (" + condition_name + ")\n\t{\n\t\t{\n"; 1636 code += loop_data; 1637 code += "\t\t}\n"; 1638 if (use_break_statement_for_condition) 1639 code += "\t\telse break;\n"; 1640 code += continue_data; 1641 code += condition_data; 1642 code += "\t}\n"; 1643 1644 _blocks.erase(condition_block); 1645 } 1646 1647 // Remove consumed blocks to save memory 1648 _blocks.erase(prev_block); 1649 _blocks.erase(header_block); 1650 _blocks.erase(loop_block); 1651 _blocks.erase(continue_block); 1652 } 1653 void emit_switch(const location &loc, id selector_value, id selector_block, id default_label, id default_block, const std::vector<id> &case_literal_and_labels, const std::vector<id> &case_blocks, unsigned int flags) override 1654 { 1655 assert(selector_value != 0 && selector_block != 0 && default_label != 0 && default_block != 0); 1656 assert(case_blocks.size() == case_literal_and_labels.size() / 2); 1657 1658 std::string &code = _blocks.at(_current_block); 1659 1660 code += _blocks.at(selector_block); 1661 1662 if (_shader_model >= 40) 1663 { 1664 write_location(code, loc); 1665 1666 code += '\t'; 1667 1668 if (flags & 0x1) code += "[flatten] "; 1669 if (flags & 0x2) code += "[branch] "; 1670 if (flags & 0x4) code += "[forcecase] "; 1671 if (flags & 0x8) code += "[call] "; 1672 1673 code += "switch (" + id_to_name(selector_value) + ")\n\t{\n"; 1674 1675 std::vector<id> labels = case_literal_and_labels; 1676 for (size_t i = 0; i < labels.size(); i += 2) 1677 { 1678 if (labels[i + 1] == 0) 1679 continue; // Happens if a case was already handled, see below 1680 1681 code += "\tcase " + std::to_string(labels[i]) + ": "; 1682 1683 if (labels[i + 1] == default_label) 1684 { 1685 code += "default: "; 1686 default_label = 0; 1687 } 1688 else 1689 { 1690 for (size_t k = i + 2; k < labels.size(); k += 2) 1691 { 1692 if (labels[k + 1] == 0 || labels[k + 1] != labels[i + 1]) 1693 continue; 1694 1695 code += "case " + std::to_string(labels[k]) + ": "; 1696 labels[k + 1] = 0; 1697 } 1698 } 1699 1700 assert(case_blocks[i / 2] != 0); 1701 std::string &case_data = _blocks.at(case_blocks[i / 2]); 1702 1703 increase_indentation_level(case_data); 1704 1705 code += "{\n"; 1706 code += case_data; 1707 code += "\t}\n"; 1708 } 1709 1710 if (default_label != 0 && default_block != _current_block) 1711 { 1712 std::string &default_data = _blocks.at(default_block); 1713 1714 increase_indentation_level(default_data); 1715 1716 code += "\tdefault: {\n"; 1717 code += default_data; 1718 code += "\t}\n"; 1719 1720 _blocks.erase(default_block); 1721 } 1722 1723 code += "\t}\n"; 1724 } 1725 else // Switch statements do not work correctly in SM3 if a constant is used as selector value (this is a D3DCompiler bug), so replace them with if statements 1726 { 1727 write_location(code, loc); 1728 1729 code += "\t[unroll] do { "; // This dummy loop makes "break" statements work 1730 1731 if (flags & 0x1) code += "[flatten] "; 1732 if (flags & 0x2) code += "[branch] "; 1733 1734 std::vector<id> labels = case_literal_and_labels; 1735 for (size_t i = 0; i < labels.size(); i += 2) 1736 { 1737 if (labels[i + 1] == 0) 1738 continue; // Happens if a case was already handled, see below 1739 1740 code += "if (" + id_to_name(selector_value) + " == " + std::to_string(labels[i]); 1741 1742 for (size_t k = i + 2; k < labels.size(); k += 2) 1743 { 1744 if (labels[k + 1] == 0 || labels[k + 1] != labels[i + 1]) 1745 continue; 1746 1747 code += " || " + id_to_name(selector_value) + " == " + std::to_string(labels[k]); 1748 labels[k + 1] = 0; 1749 } 1750 1751 assert(case_blocks[i / 2] != 0); 1752 std::string &case_data = _blocks.at(case_blocks[i / 2]); 1753 1754 increase_indentation_level(case_data); 1755 1756 code += ")\n\t{\n"; 1757 code += case_data; 1758 code += "\t}\n\telse\n\t"; 1759 } 1760 1761 code += "{\n"; 1762 1763 if (default_block != _current_block) 1764 { 1765 std::string &default_data = _blocks.at(default_block); 1766 1767 increase_indentation_level(default_data); 1768 1769 code += default_data; 1770 1771 _blocks.erase(default_block); 1772 } 1773 1774 code += "\t} } while (false);\n"; 1775 } 1776 1777 // Remove consumed blocks to save memory 1778 _blocks.erase(selector_block); 1779 for (const id case_block : case_blocks) 1780 _blocks.erase(case_block); 1781 } 1782 1783 id create_block() override 1784 { 1785 const id res = make_id(); 1786 1787 std::string &block = _blocks.emplace(res, std::string()).first->second; 1788 // Reserve a decently big enough memory block to avoid frequent reallocations 1789 block.reserve(4096); 1790 1791 return res; 1792 } 1793 id set_block(id id) override 1794 { 1795 _last_block = _current_block; 1796 _current_block = id; 1797 1798 return _last_block; 1799 } 1800 void enter_block(id id) override 1801 { 1802 _current_block = id; 1803 } 1804 id leave_block_and_kill() override 1805 { 1806 if (!is_in_block()) 1807 return 0; 1808 1809 std::string &code = _blocks.at(_current_block); 1810 1811 code += "\tdiscard;\n"; 1812 1813 const auto &return_type = _functions.back()->return_type; 1814 if (!return_type.is_void()) 1815 { 1816 // HLSL compiler doesn't handle discard like a shader kill 1817 // Add a return statement to exit functions in case discard is the last control flow statement 1818 // See https://docs.microsoft.com/windows/win32/direct3dhlsl/discard--sm4---asm- 1819 code += "\treturn "; 1820 write_constant(code, return_type, constant()); 1821 code += ";\n"; 1822 } 1823 1824 return set_block(0); 1825 } 1826 id leave_block_and_return(id value) override 1827 { 1828 if (!is_in_block()) 1829 return 0; 1830 1831 // Skip implicit return statement 1832 if (!_functions.back()->return_type.is_void() && value == 0) 1833 return set_block(0); 1834 1835 std::string &code = _blocks.at(_current_block); 1836 1837 code += "\treturn"; 1838 1839 if (value != 0) 1840 code += ' ' + id_to_name(value); 1841 1842 code += ";\n"; 1843 1844 return set_block(0); 1845 } 1846 id leave_block_and_switch(id, id) override 1847 { 1848 if (!is_in_block()) 1849 return _last_block; 1850 1851 return set_block(0); 1852 } 1853 id leave_block_and_branch(id target, unsigned int loop_flow) override 1854 { 1855 if (!is_in_block()) 1856 return _last_block; 1857 1858 std::string &code = _blocks.at(_current_block); 1859 1860 switch (loop_flow) 1861 { 1862 case 1: 1863 code += "\tbreak;\n"; 1864 break; 1865 case 2: // Keep track of continue target block, so we can insert its code here later 1866 code += "__CONTINUE__" + std::to_string(target) + "\tcontinue;\n"; 1867 break; 1868 } 1869 1870 return set_block(0); 1871 } 1872 id leave_block_and_branch_conditional(id, id, id) override 1873 { 1874 if (!is_in_block()) 1875 return _last_block; 1876 1877 return set_block(0); 1878 } 1879 void leave_function() override 1880 { 1881 assert(_last_block != 0); 1882 1883 _blocks.at(0) += "{\n" + _blocks.at(_last_block) + "}\n"; 1884 } 1885 }; 1886 1887 codegen *reshadefx::create_codegen_hlsl(unsigned int shader_model, bool debug_info, bool uniforms_to_spec_constants) 1888 { 1889 return new codegen_hlsl(shader_model, debug_info, uniforms_to_spec_constants); 1890 }