effect_preprocessor.cpp (37296B)
1 /* 2 * Copyright (C) 2014 Patrick Mours 3 * SPDX-License-Identifier: BSD-3-Clause 4 */ 5 6 #include "effect_lexer.hpp" 7 #include "effect_preprocessor.hpp" 8 #include <cassert> 9 #include <fstream> 10 #include <algorithm> // std::find_if 11 12 #ifndef _WIN32 13 // On Linux systems the native path encoding is UTF-8 already, so no conversion necessary 14 #define u8path(p) path(p) 15 #define u8string() string() 16 #endif 17 18 enum op_type 19 { 20 op_none = -1, 21 22 op_or, 23 op_and, 24 op_bitor, 25 op_bitxor, 26 op_bitand, 27 op_not_equal, 28 op_equal, 29 op_less, 30 op_greater, 31 op_less_equal, 32 op_greater_equal, 33 op_leftshift, 34 op_rightshift, 35 op_add, 36 op_subtract, 37 op_modulo, 38 op_divide, 39 op_multiply, 40 op_plus, 41 op_negate, 42 op_not, 43 op_bitnot, 44 op_parentheses 45 }; 46 47 enum macro_replacement 48 { 49 macro_replacement_start = '\x00', 50 macro_replacement_argument = '\xFD', 51 macro_replacement_concat = '\xFF', 52 macro_replacement_stringize = '\xFE', 53 }; 54 55 static const int precedence_lookup[] = { 56 0, 1, 2, 3, 4, // bitwise operators 57 5, 6, 7, 7, 7, 7, // logical operators 58 8, 8, // left shift, right shift 59 9, 9, // add, subtract 60 10, 10, 10, // modulo, divide, multiply 61 11, 11, 11, 11 // unary operators 62 }; 63 64 static bool read_file(const std::string &path, std::string &data, reshadefx::preprocessor::include_read_file_callback &cb) 65 { 66 std::string file_data; 67 if (!cb(path, file_data)) 68 return false; 69 70 // Append a new line feed to the end of the input string to avoid issues with parsing 71 file_data.push_back('\n'); 72 73 // Remove BOM (0xefbbbf means 0xfeff) 74 if (file_data.size() >= 3 && 75 static_cast<unsigned char>(file_data[0]) == 0xef && 76 static_cast<unsigned char>(file_data[1]) == 0xbb && 77 static_cast<unsigned char>(file_data[2]) == 0xbf) 78 file_data.erase(0, 3); 79 80 data = std::move(file_data); 81 return true; 82 } 83 84 bool reshadefx::preprocessor::stdfs_read_file_callback(const std::string &path, std::string &data) 85 { 86 std::ifstream file(std::filesystem::path(path), std::ios::binary); 87 if (!file) 88 return false; 89 90 // Read file contents into memory 91 std::error_code ec; 92 const uintmax_t file_size = std::filesystem::file_size(path, ec); 93 if (ec) 94 return false; 95 96 data.reserve(file_size + 1); 97 data.resize(static_cast<size_t>(file_size), '\0'); 98 if (!file.read(data.data(), file_size)) 99 return false; 100 101 // No longer need to have a handle open to the file, since all data was read, so can safely close it 102 file.close(); 103 return true; 104 } 105 106 bool reshadefx::preprocessor::stdfs_file_exists_callback(const std::string &path) 107 { 108 return std::filesystem::exists(std::filesystem::path(path)); 109 } 110 111 template <char ESCAPE_CHAR = '\\'> 112 static std::string escape_string(std::string s) 113 { 114 for (size_t offset = 0; (offset = s.find(ESCAPE_CHAR, offset)) != std::string::npos; offset += 2) 115 s.insert(offset, "\\", 1); 116 return '\"' + s + '\"'; 117 } 118 119 reshadefx::preprocessor::preprocessor() 120 : _file_exists_cb(stdfs_file_exists_callback) 121 , _read_file_cb(stdfs_read_file_callback) 122 { 123 } 124 reshadefx::preprocessor::~preprocessor() 125 { 126 } 127 128 void reshadefx::preprocessor::set_include_callbacks(include_file_exists_callback file_exists, 129 include_read_file_callback read_file) 130 { 131 _file_exists_cb = file_exists; 132 _read_file_cb = read_file; 133 } 134 135 void reshadefx::preprocessor::add_include_path(const std::string &path) 136 { 137 assert(!path.empty()); 138 _include_paths.push_back(std::filesystem::path(path)); 139 } 140 bool reshadefx::preprocessor::add_macro_definition(const std::string &name, const macro ¯o) 141 { 142 assert(!name.empty()); 143 return _macros.emplace(name, macro).second; 144 } 145 146 bool reshadefx::preprocessor::append_file(const std::string &path) 147 { 148 std::string source_code; 149 if (!read_file(path, source_code, _read_file_cb)) 150 return false; 151 152 return append_string(std::move(source_code), path); 153 } 154 bool reshadefx::preprocessor::append_string(std::string source_code, const std::string &path /* = std::string() */) 155 { 156 // Enforce all input strings to end with a line feed 157 assert(!source_code.empty() && source_code.back() == '\n'); 158 159 _success = true; // Clear success flag before parsing a new string 160 161 // Give this push a name, so that lexer location starts at a new line 162 // This is necessary in case this string starts with a preprocessor directive, since the lexer only reports those as such if they appear at the beginning of a new line 163 // But without a name, the lexer location is set to the last token location, which most likely will not be at the start of the line 164 push(std::move(source_code), path.empty() ? "unknown" : path); 165 parse(); 166 167 return _success; 168 } 169 170 std::vector<std::filesystem::path> reshadefx::preprocessor::included_files() const 171 { 172 std::vector<std::filesystem::path> files; 173 files.reserve(_file_cache.size()); 174 for (const auto &it : _file_cache) 175 files.push_back(std::filesystem::u8path(it.first)); 176 return files; 177 } 178 std::vector<std::pair<std::string, std::string>> reshadefx::preprocessor::used_macro_definitions() const 179 { 180 std::vector<std::pair<std::string, std::string>> defines; 181 defines.reserve(_used_macros.size()); 182 for (const std::string &name : _used_macros) 183 if (const auto it = _macros.find(name); 184 // Do not include function-like macros, since they are more likely to contain a complex replacement list 185 it != _macros.end() && !it->second.is_function_like) 186 defines.emplace_back(name, it->second.replacement_list); 187 return defines; 188 } 189 190 void reshadefx::preprocessor::error(const location &location, const std::string &message) 191 { 192 _errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor error: " + message + '\n'; 193 _success = false; // Unset success flag 194 } 195 void reshadefx::preprocessor::warning(const location &location, const std::string &message) 196 { 197 _errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor warning: " + message + '\n'; 198 } 199 200 void reshadefx::preprocessor::push(std::string input, const std::string &name) 201 { 202 location start_location = !name.empty() ? 203 // Start at the beginning of the file when pushing a new file 204 location(name, 1) : 205 // Start with last known token location when pushing an unnamed string 206 _token.location; 207 208 input_level level = { name }; 209 level.lexer.reset(new lexer( 210 std::move(input), 211 true /* ignore_comments */, 212 false /* ignore_whitespace */, 213 false /* ignore_pp_directives */, 214 false /* ignore_line_directives */, 215 true /* ignore_keywords */, 216 false /* escape_string_literals */, 217 start_location)); 218 level.next_token.id = tokenid::unknown; 219 level.next_token.location = start_location; // This is used in 'consume' to initialize the output location 220 221 // Inherit hidden macros from parent 222 if (!_input_stack.empty()) 223 level.hidden_macros = _input_stack.back().hidden_macros; 224 225 _input_stack.push_back(std::move(level)); 226 _next_input_index = _input_stack.size() - 1; 227 228 // Advance into the input stack to update next token 229 consume(); 230 } 231 232 bool reshadefx::preprocessor::peek(tokenid tokid) const 233 { 234 if (_input_stack.empty()) 235 return tokid == tokenid::end_of_file; 236 237 return _input_stack[_next_input_index].next_token == tokid; 238 } 239 void reshadefx::preprocessor::consume() 240 { 241 _current_input_index = _next_input_index; 242 243 if (_input_stack.empty()) 244 { 245 // End of input has been reached already (this can happen when the input text is not terminated with a new line) 246 assert(_current_input_index == 0); 247 return; 248 } 249 250 // Clear out input stack, now that the current token is overwritten 251 while (_input_stack.size() > (_current_input_index + 1)) 252 _input_stack.pop_back(); 253 254 // Update location information after switching input levels 255 input_level &input = _input_stack[_current_input_index]; 256 if (!input.name.empty() && input.name != _output_location.source) 257 { 258 _output += "#line " + std::to_string(input.next_token.location.line) + " \"" + input.name + "\"\n"; 259 // Line number is increased before checking against next token in 'tokenid::end_of_line' handling in 'parse' function below, so compensate for that here 260 _output_location.line = input.next_token.location.line - 1; 261 _output_location.source = input.name; 262 } 263 264 // Set current token 265 _token = std::move(input.next_token); 266 _current_token_raw_data = input.lexer->input_string().substr(_token.offset, _token.length); 267 268 // Get the next token 269 input.next_token = input.lexer->lex(); 270 271 // Verify string literals (since the lexer cannot throw errors itself) 272 if (_token == tokenid::string_literal && _current_token_raw_data.back() != '\"') 273 error(_token.location, "unterminated string literal"); 274 275 // Pop input level if lexical analysis has reached the end of it 276 // This ensures the EOF token is not consumed until the very last file 277 while (peek(tokenid::end_of_file)) 278 { 279 // Remove any unterminated blocks from the stack 280 for (; !_if_stack.empty() && _if_stack.back().input_index >= _next_input_index; _if_stack.pop_back()) 281 error(_if_stack.back().pp_token.location, "unterminated #if"); 282 283 if (_next_input_index == 0) 284 { 285 // End of input has been reached, so cannot pop further and this is the last token 286 _input_stack.pop_back(); 287 return; 288 } 289 else 290 { 291 _next_input_index -= 1; 292 } 293 } 294 } 295 void reshadefx::preprocessor::consume_until(tokenid tokid) 296 { 297 while (!accept(tokid) && !peek(tokenid::end_of_file)) 298 { 299 consume(); 300 } 301 } 302 303 bool reshadefx::preprocessor::accept(tokenid tokid, bool ignore_whitespace) 304 { 305 if (ignore_whitespace) 306 { 307 while (peek(tokenid::space)) 308 { 309 consume(); 310 } 311 } 312 313 if (peek(tokid)) 314 { 315 consume(); 316 return true; 317 } 318 319 return false; 320 } 321 bool reshadefx::preprocessor::expect(tokenid tokid) 322 { 323 if (!accept(tokid)) 324 { 325 if (_input_stack.empty()) 326 return tokid == tokenid::end_of_line || tokid == tokenid::end_of_file; 327 328 token actual_token = _input_stack[_next_input_index].next_token; 329 actual_token.location.source = _output_location.source; 330 331 if (actual_token == tokenid::end_of_line) 332 error(actual_token.location, "syntax error: unexpected new line"); 333 else 334 error(actual_token.location, "syntax error: unexpected token '" + 335 _input_stack[_next_input_index].lexer->input_string().substr(actual_token.offset, actual_token.length) + '\''); 336 337 return false; 338 } 339 340 return true; 341 } 342 343 void reshadefx::preprocessor::parse() 344 { 345 std::string line; 346 347 // Consume all tokens in the input 348 while (!peek(tokenid::end_of_file)) 349 { 350 consume(); 351 352 _recursion_count = 0; 353 354 const bool skip = !_if_stack.empty() && _if_stack.back().skipping; 355 356 switch (_token) 357 { 358 case tokenid::hash_if: 359 parse_if(); 360 if (!expect(tokenid::end_of_line)) 361 consume_until(tokenid::end_of_line); 362 continue; 363 case tokenid::hash_ifdef: 364 parse_ifdef(); 365 if (!expect(tokenid::end_of_line)) 366 consume_until(tokenid::end_of_line); 367 continue; 368 case tokenid::hash_ifndef: 369 parse_ifndef(); 370 if (!expect(tokenid::end_of_line)) 371 consume_until(tokenid::end_of_line); 372 continue; 373 case tokenid::hash_else: 374 parse_else(); 375 if (!expect(tokenid::end_of_line)) 376 consume_until(tokenid::end_of_line); 377 continue; 378 case tokenid::hash_elif: 379 parse_elif(); 380 if (!expect(tokenid::end_of_line)) 381 consume_until(tokenid::end_of_line); 382 continue; 383 case tokenid::hash_endif: 384 parse_endif(); 385 if (!expect(tokenid::end_of_line)) 386 consume_until(tokenid::end_of_line); 387 continue; 388 default: 389 // All other tokens are handled below 390 break; 391 } 392 393 if (skip) 394 // Ignore token since the current section is disabled 395 continue; 396 397 switch (_token) 398 { 399 case tokenid::hash_def: 400 parse_def(); 401 if (!expect(tokenid::end_of_line)) 402 consume_until(tokenid::end_of_line); 403 continue; 404 case tokenid::hash_undef: 405 parse_undef(); 406 if (!expect(tokenid::end_of_line)) 407 consume_until(tokenid::end_of_line); 408 continue; 409 case tokenid::hash_error: 410 parse_error(); 411 if (!expect(tokenid::end_of_line)) 412 consume_until(tokenid::end_of_line); 413 continue; 414 case tokenid::hash_warning: 415 parse_warning(); 416 if (!expect(tokenid::end_of_line)) 417 consume_until(tokenid::end_of_line); 418 continue; 419 case tokenid::hash_pragma: 420 parse_pragma(); 421 if (!expect(tokenid::end_of_line)) 422 consume_until(tokenid::end_of_line); 423 continue; 424 case tokenid::hash_include: 425 parse_include(); 426 continue; 427 case tokenid::hash_unknown: 428 // Standalone "#" is valid and should be ignored 429 if (_token.length != 0) 430 error(_token.location, "unrecognized preprocessing directive '" + _token.literal_as_string + '\''); 431 if (!expect(tokenid::end_of_line)) 432 consume_until(tokenid::end_of_line); 433 continue; 434 case tokenid::end_of_line: 435 if (line.empty()) 436 continue; // Do not append empty lines to output, instead emit "#line" statements 437 _output_location.line++; 438 if (_token.location.line != _output_location.line) 439 { 440 _output += "#line " + std::to_string(_token.location.line) + '\n'; 441 _output_location.line = _token.location.line; 442 } 443 _output += line; 444 _output += '\n'; 445 line.clear(); 446 continue; 447 case tokenid::identifier: 448 if (evaluate_identifier_as_macro()) 449 continue; 450 [[fallthrough]]; 451 default: 452 line += _current_token_raw_data; 453 break; 454 } 455 } 456 457 // Append the last line after the EOF token was reached to the output 458 _output += line; 459 _output += '\n'; 460 } 461 462 void reshadefx::preprocessor::parse_def() 463 { 464 if (!expect(tokenid::identifier)) 465 return; 466 if (_token.literal_as_string == "defined") 467 return warning(_token.location, "macro name 'defined' is reserved"); 468 469 macro m; 470 const location location = std::move(_token.location); 471 const std::string macro_name = std::move(_token.literal_as_string); 472 473 // Only create function-like macro if the parenthesis follows the macro name without any whitespace between 474 if (accept(tokenid::parenthesis_open, false)) 475 { 476 m.is_function_like = true; 477 478 while (accept(tokenid::identifier)) 479 { 480 m.parameters.push_back(_token.literal_as_string); 481 482 if (!accept(tokenid::comma)) 483 break; 484 } 485 486 if (accept(tokenid::ellipsis)) 487 m.is_variadic = true; 488 489 if (!expect(tokenid::parenthesis_close)) 490 return; 491 } 492 493 create_macro_replacement_list(m); 494 495 if (!add_macro_definition(macro_name, m)) 496 return error(location, "redefinition of '" + macro_name + "'"); 497 } 498 void reshadefx::preprocessor::parse_undef() 499 { 500 if (!expect(tokenid::identifier)) 501 return; 502 if (_token.literal_as_string == "defined") 503 return warning(_token.location, "macro name 'defined' is reserved"); 504 505 _macros.erase(_token.literal_as_string); 506 } 507 508 void reshadefx::preprocessor::parse_if() 509 { 510 if_level level; 511 level.pp_token = _token; 512 level.input_index = _current_input_index; 513 514 // Evaluate expression after updating 'pp_token', so that it points at the beginning # token 515 level.value = evaluate_expression(); 516 517 const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping; 518 level.skipping = parent_skipping || !level.value; 519 520 _if_stack.push_back(std::move(level)); 521 } 522 void reshadefx::preprocessor::parse_ifdef() 523 { 524 if_level level; 525 level.pp_token = _token; 526 level.input_index = _current_input_index; 527 528 if (!expect(tokenid::identifier)) 529 return; 530 531 level.value = is_defined(_token.literal_as_string); 532 533 const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping; 534 level.skipping = parent_skipping || !level.value; 535 536 _if_stack.push_back(std::move(level)); 537 // Only add to used macro list if this #ifdef is active and the macro was not defined before 538 if (!parent_skipping) 539 if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined) 540 _used_macros.emplace(_token.literal_as_string); 541 } 542 void reshadefx::preprocessor::parse_ifndef() 543 { 544 if_level level; 545 level.pp_token = _token; 546 level.input_index = _current_input_index; 547 548 if (!expect(tokenid::identifier)) 549 return; 550 551 level.value = !is_defined(_token.literal_as_string); 552 553 const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping; 554 level.skipping = parent_skipping || !level.value; 555 556 _if_stack.push_back(std::move(level)); 557 // Only add to used macro list if this #ifndef is active and the macro was not defined before 558 if (!parent_skipping) 559 if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined) 560 _used_macros.emplace(_token.literal_as_string); 561 } 562 void reshadefx::preprocessor::parse_elif() 563 { 564 if (_if_stack.empty()) 565 return error(_token.location, "missing #if for #elif"); 566 567 if_level &level = _if_stack.back(); 568 if (level.pp_token == tokenid::hash_else) 569 return error(_token.location, "#elif is not allowed after #else"); 570 571 // Update 'pp_token' before evaluating expression, so that it points at the beginning # token 572 level.pp_token = _token; 573 level.input_index = _current_input_index; 574 575 const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping; 576 const bool condition_result = evaluate_expression(); 577 level.skipping = parent_skipping || level.value || !condition_result; 578 579 if (!level.value) level.value = condition_result; 580 } 581 void reshadefx::preprocessor::parse_else() 582 { 583 if (_if_stack.empty()) 584 return error(_token.location, "missing #if for #else"); 585 586 if_level &level = _if_stack.back(); 587 if (level.pp_token == tokenid::hash_else) 588 return error(_token.location, "#else is not allowed after #else"); 589 590 level.pp_token = _token; 591 level.input_index = _current_input_index; 592 593 const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping; 594 level.skipping = parent_skipping || level.value; 595 596 if (!level.value) level.value = true; 597 } 598 void reshadefx::preprocessor::parse_endif() 599 { 600 if (_if_stack.empty()) 601 error(_token.location, "missing #if for #endif"); 602 else 603 _if_stack.pop_back(); 604 } 605 606 void reshadefx::preprocessor::parse_error() 607 { 608 const location keyword_location = std::move(_token.location); 609 610 if (!expect(tokenid::string_literal)) 611 return; 612 613 error(keyword_location, _token.literal_as_string); 614 } 615 void reshadefx::preprocessor::parse_warning() 616 { 617 const location keyword_location = std::move(_token.location); 618 619 if (!expect(tokenid::string_literal)) 620 return; 621 622 warning(keyword_location, _token.literal_as_string); 623 } 624 625 void reshadefx::preprocessor::parse_pragma() 626 { 627 const location keyword_location = std::move(_token.location); 628 629 if (!expect(tokenid::identifier)) 630 return; 631 632 std::string pragma = std::move(_token.literal_as_string); 633 std::string pragma_args; 634 635 // Ignore whitespace preceding the argument list 636 accept(tokenid::space); 637 638 while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file)) 639 { 640 consume(); 641 642 if (_token == tokenid::identifier && evaluate_identifier_as_macro()) 643 continue; 644 645 // Collapse all whitespace down to a single space 646 if (_token == tokenid::space) 647 pragma_args += ' '; 648 else 649 pragma_args += _current_token_raw_data; 650 } 651 652 if (pragma == "once") 653 { 654 // Clear file contents, so that future include statements simply push an empty string instead of these file contents again 655 if (const auto it = _file_cache.find(_output_location.source); it != _file_cache.end()) 656 it->second.clear(); 657 return; 658 } 659 660 if (pragma == "warning" || pragma == "reshade") 661 { 662 _used_pragmas.emplace_back(std::move(pragma), std::move(pragma_args)); 663 return; 664 } 665 666 warning(keyword_location, "unknown pragma ignored"); 667 } 668 669 void reshadefx::preprocessor::parse_include() 670 { 671 const location keyword_location = std::move(_token.location); 672 673 while (accept(tokenid::identifier)) 674 { 675 if (!evaluate_identifier_as_macro()) 676 { 677 error(_token.location, "syntax error: unexpected identifier in #include"); 678 consume_until(tokenid::end_of_line); 679 return; 680 } 681 } 682 683 if (!expect(tokenid::string_literal)) 684 { 685 consume_until(tokenid::end_of_line); 686 return; 687 } 688 689 std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string); 690 std::filesystem::path file_path = std::filesystem::u8path(_output_location.source); 691 file_path.replace_filename(file_name); 692 693 if (!_file_exists_cb(file_path.u8string())) 694 for (const std::filesystem::path &include_path : _include_paths) 695 if (_file_exists_cb((file_path = include_path / file_name).u8string())) 696 break; 697 698 const std::string file_path_string = file_path.u8string(); 699 700 // Detect recursive include and abort to avoid infinite loop 701 if (std::find_if(_input_stack.begin(), _input_stack.end(), 702 [&file_path_string](const input_level &level) { return level.name == file_path_string; }) != _input_stack.end()) 703 return error(_token.location, "recursive #include"); 704 705 std::string input; 706 if (const auto it = _file_cache.find(file_path_string); it != _file_cache.end()) 707 { 708 input = it->second; 709 } 710 else 711 { 712 if (!read_file(file_path_string, input, _read_file_cb)) 713 return error(keyword_location, "could not open included file '" + file_name.u8string() + '\''); 714 715 _file_cache.emplace(file_path_string, input); 716 } 717 718 // Skip end of line character following the include statement before pushing, so that the line number is already pointing to the next line when popping out of it again 719 if (!expect(tokenid::end_of_line)) 720 consume_until(tokenid::end_of_line); 721 722 // Clear out input stack before pushing include, so that hidden macros do not bleed into the include 723 while (_input_stack.size() > (_next_input_index + 1)) 724 _input_stack.pop_back(); 725 726 push(std::move(input), file_path_string); 727 } 728 729 bool reshadefx::preprocessor::evaluate_expression() 730 { 731 struct rpn_token 732 { 733 int value; 734 bool is_op; 735 }; 736 737 size_t rpn_index = 0; 738 size_t stack_index = 0; 739 const size_t STACK_SIZE = 128; 740 rpn_token rpn[STACK_SIZE]; 741 int stack[STACK_SIZE]; 742 743 // Keep track of previous token to figure out data type of expression 744 tokenid previous_token = _token; 745 746 // Run shunting-yard algorithm 747 while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file)) 748 { 749 if (stack_index >= STACK_SIZE || rpn_index >= STACK_SIZE) 750 return error(_token.location, "expression evaluator ran out of stack space"), false; 751 752 consume(); 753 754 auto op = op_none; 755 bool left_associative = true; 756 bool parenthesis_matched = false; 757 758 switch (_token) 759 { 760 case tokenid::space: 761 continue; 762 case tokenid::exclaim: 763 op = op_not; 764 left_associative = false; 765 break; 766 case tokenid::percent: 767 op = op_modulo; 768 break; 769 case tokenid::ampersand: 770 op = op_bitand; 771 break; 772 case tokenid::star: 773 op = op_multiply; 774 break; 775 case tokenid::plus: 776 left_associative = 777 previous_token == tokenid::int_literal || 778 previous_token == tokenid::uint_literal || 779 previous_token == tokenid::identifier || 780 previous_token == tokenid::parenthesis_close; 781 op = left_associative ? op_add : op_plus; 782 break; 783 case tokenid::minus: 784 left_associative = 785 previous_token == tokenid::int_literal || 786 previous_token == tokenid::uint_literal || 787 previous_token == tokenid::identifier || 788 previous_token == tokenid::parenthesis_close; 789 op = left_associative ? op_subtract : op_negate; 790 break; 791 case tokenid::slash: 792 op = op_divide; 793 break; 794 case tokenid::less: 795 op = op_less; 796 break; 797 case tokenid::greater: 798 op = op_greater; 799 break; 800 case tokenid::caret: 801 op = op_bitxor; 802 break; 803 case tokenid::pipe: 804 op = op_bitor; 805 break; 806 case tokenid::tilde: 807 op = op_bitnot; 808 left_associative = false; 809 break; 810 case tokenid::exclaim_equal: 811 op = op_not_equal; 812 break; 813 case tokenid::ampersand_ampersand: 814 op = op_and; 815 break; 816 case tokenid::less_less: 817 op = op_leftshift; 818 break; 819 case tokenid::less_equal: 820 op = op_less_equal; 821 break; 822 case tokenid::equal_equal: 823 op = op_equal; 824 break; 825 case tokenid::greater_greater: 826 op = op_rightshift; 827 break; 828 case tokenid::greater_equal: 829 op = op_greater_equal; 830 break; 831 case tokenid::pipe_pipe: 832 op = op_or; 833 break; 834 default: 835 // This is not an operator token 836 break; 837 } 838 839 switch (_token) 840 { 841 case tokenid::parenthesis_open: 842 stack[stack_index++] = op_parentheses; 843 break; 844 case tokenid::parenthesis_close: 845 parenthesis_matched = false; 846 while (stack_index > 0) 847 { 848 const int op2 = stack[--stack_index]; 849 if (op2 == op_parentheses) 850 { 851 parenthesis_matched = true; 852 break; 853 } 854 855 rpn[rpn_index++] = { op2, true }; 856 } 857 858 if (!parenthesis_matched) 859 return error(_token.location, "unmatched ')'"), false; 860 break; 861 case tokenid::identifier: 862 if (evaluate_identifier_as_macro()) 863 continue; 864 865 if (_token.literal_as_string == "exists") 866 { 867 const bool has_parentheses = accept(tokenid::parenthesis_open); 868 869 while (accept(tokenid::identifier)) 870 { 871 if (!evaluate_identifier_as_macro()) 872 { 873 error(_token.location, "syntax error: unexpected identifier after 'exists'"); 874 return false; 875 } 876 } 877 878 if (!expect(tokenid::string_literal)) 879 return false; 880 881 std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string); 882 std::filesystem::path file_path = std::filesystem::u8path(_output_location.source); 883 file_path.replace_filename(file_name); 884 885 if (has_parentheses && !expect(tokenid::parenthesis_close)) 886 return false; 887 888 if (!_file_exists_cb(file_path.u8string())) 889 for (const std::filesystem::path &include_path : _include_paths) 890 if (_file_exists_cb((file_path = include_path / file_name).u8string())) 891 break; 892 893 rpn[rpn_index++] = { _file_exists_cb(file_path.u8string()) ? 1 : 0, false }; 894 continue; 895 } 896 if (_token.literal_as_string == "defined") 897 { 898 const bool has_parentheses = accept(tokenid::parenthesis_open); 899 900 if (!expect(tokenid::identifier)) 901 return false; 902 903 const std::string macro_name = std::move(_token.literal_as_string); 904 905 if (has_parentheses && !expect(tokenid::parenthesis_close)) 906 return false; 907 908 rpn[rpn_index++] = { is_defined(macro_name) ? 1 : 0, false }; 909 continue; 910 } 911 912 // An identifier that cannot be replaced with a number becomes zero 913 rpn[rpn_index++] = { 0, false }; 914 break; 915 case tokenid::int_literal: 916 case tokenid::uint_literal: 917 rpn[rpn_index++] = { _token.literal_as_int, false }; 918 break; 919 default: 920 if (op == op_none) 921 return error(_token.location, "invalid expression"), false; 922 923 while (stack_index > 0) 924 { 925 const int prev_op = stack[stack_index - 1]; 926 if (prev_op == op_parentheses) 927 break; 928 929 if (left_associative ? 930 (precedence_lookup[op] > precedence_lookup[prev_op]) : 931 (precedence_lookup[op] >= precedence_lookup[prev_op])) 932 break; 933 934 stack_index--; 935 rpn[rpn_index++] = { prev_op, true }; 936 } 937 938 stack[stack_index++] = op; 939 break; 940 } 941 942 previous_token = _token; 943 } 944 945 while (stack_index > 0) 946 { 947 const int op = stack[--stack_index]; 948 if (op == op_parentheses) 949 return error(_token.location, "unmatched ')'"), false; 950 951 rpn[rpn_index++] = { op, true }; 952 } 953 954 #define UNARY_OPERATION(op) { \ 955 if (stack_index < 1) \ 956 return error(_token.location, "invalid expression"), 0; \ 957 stack[stack_index - 1] = op stack[stack_index - 1]; \ 958 } 959 #define BINARY_OPERATION(op) { \ 960 if (stack_index < 2) \ 961 return error(_token.location, "invalid expression"), 0; \ 962 stack[stack_index - 2] = stack[stack_index - 2] op stack[stack_index - 1]; \ 963 stack_index--; \ 964 } 965 966 // Evaluate reverse polish notation output 967 for (rpn_token *token = rpn; rpn_index--; token++) 968 { 969 if (token->is_op) 970 { 971 switch (token->value) 972 { 973 case op_or: 974 BINARY_OPERATION(||); 975 break; 976 case op_and: 977 BINARY_OPERATION(&&); 978 break; 979 case op_bitor: 980 BINARY_OPERATION(|); 981 break; 982 case op_bitxor: 983 BINARY_OPERATION(^); 984 break; 985 case op_bitand: 986 BINARY_OPERATION(&); 987 break; 988 case op_not_equal: 989 BINARY_OPERATION(!=); 990 break; 991 case op_equal: 992 BINARY_OPERATION(==); 993 break; 994 case op_less: 995 BINARY_OPERATION(<); 996 break; 997 case op_greater: 998 BINARY_OPERATION(>); 999 break; 1000 case op_less_equal: 1001 BINARY_OPERATION(<=); 1002 break; 1003 case op_greater_equal: 1004 BINARY_OPERATION(>=); 1005 break; 1006 case op_leftshift: 1007 BINARY_OPERATION(<<); 1008 break; 1009 case op_rightshift: 1010 BINARY_OPERATION(>>); 1011 break; 1012 case op_add: 1013 BINARY_OPERATION(+); 1014 break; 1015 case op_subtract: 1016 BINARY_OPERATION(-); 1017 break; 1018 case op_modulo: 1019 BINARY_OPERATION(%); 1020 break; 1021 case op_divide: 1022 BINARY_OPERATION(/); 1023 break; 1024 case op_multiply: 1025 BINARY_OPERATION(*); 1026 break; 1027 case op_plus: 1028 UNARY_OPERATION(+); 1029 break; 1030 case op_negate: 1031 UNARY_OPERATION(-); 1032 break; 1033 case op_not: 1034 UNARY_OPERATION(!); 1035 break; 1036 case op_bitnot: 1037 UNARY_OPERATION(~); 1038 break; 1039 } 1040 } 1041 else 1042 { 1043 stack[stack_index++] = token->value; 1044 } 1045 } 1046 1047 if (stack_index != 1) 1048 return error(_token.location, "invalid expression"), false; 1049 1050 return stack[0] != 0; 1051 } 1052 1053 bool reshadefx::preprocessor::evaluate_identifier_as_macro() 1054 { 1055 if (_token.literal_as_string == "__LINE__") 1056 { 1057 push(std::to_string(_token.location.line)); 1058 return true; 1059 } 1060 if (_token.literal_as_string == "__FILE__") 1061 { 1062 push(escape_string(_token.location.source)); 1063 return true; 1064 } 1065 if (_token.literal_as_string == "__FILE_STEM__") 1066 { 1067 const std::filesystem::path file_stem = std::filesystem::u8path(_token.location.source).stem(); 1068 push(escape_string(file_stem.u8string())); 1069 return true; 1070 } 1071 if (_token.literal_as_string == "__FILE_NAME__") 1072 { 1073 const std::filesystem::path file_name = std::filesystem::u8path(_token.location.source).filename(); 1074 push(escape_string(file_name.u8string())); 1075 return true; 1076 } 1077 1078 const auto it = _macros.find(_token.literal_as_string); 1079 if (it == _macros.end()) 1080 return false; 1081 1082 if (!_input_stack.empty()) 1083 { 1084 const std::unordered_set<std::string> &hidden_macros = _input_stack[_current_input_index].hidden_macros; 1085 if (hidden_macros.find(_token.literal_as_string) != hidden_macros.end()) 1086 return false; 1087 } 1088 1089 const location macro_location = _token.location; 1090 if (_recursion_count++ >= 256) 1091 return error(macro_location, "macro recursion too high"), false; 1092 1093 std::vector<std::string> arguments; 1094 if (it->second.is_function_like) 1095 { 1096 if (!accept(tokenid::parenthesis_open)) 1097 return false; // Function like macro used without arguments, handle that like a normal identifier instead 1098 1099 while (true) 1100 { 1101 int parentheses_level = 0; 1102 std::string argument; 1103 1104 // Ignore whitespace preceding the argument 1105 accept(tokenid::space); 1106 1107 if (accept(tokenid::parenthesis_close)) 1108 break; // Special case for when there are no arguments 1109 1110 while (true) 1111 { 1112 if (peek(tokenid::end_of_file)) 1113 return error(macro_location, "unexpected end of file in macro expansion"), false; 1114 1115 // Consume all tokens of the argument 1116 consume(); 1117 1118 if (_token == tokenid::comma && parentheses_level == 0 && !(it->second.is_variadic && arguments.size() == it->second.parameters.size())) 1119 break; // Comma marks end of an argument (unless this is the last argument in a variadic macro invocation) 1120 if (_token == tokenid::parenthesis_open) 1121 parentheses_level++; 1122 if (_token == tokenid::parenthesis_close && --parentheses_level < 0) 1123 break; 1124 1125 // Collapse all whitespace down to a single space 1126 if (_token == tokenid::space) 1127 argument += ' '; 1128 else 1129 argument += _current_token_raw_data; 1130 } 1131 1132 // Trim whitespace following the argument 1133 if (argument.size() && argument.back() == ' ') 1134 argument.pop_back(); 1135 1136 arguments.push_back(std::move(argument)); 1137 1138 if (parentheses_level < 0) 1139 break; 1140 } 1141 } 1142 1143 expand_macro(it->first, it->second, arguments); 1144 1145 return true; 1146 } 1147 1148 bool reshadefx::preprocessor::is_defined(const std::string &name) const 1149 { 1150 return _macros.find(name) != _macros.end() || 1151 // Check built-in macros as well 1152 name == "__LINE__" || 1153 name == "__FILE__" || 1154 name == "__FILE_NAME__" || 1155 name == "__FILE_STEM__"; 1156 } 1157 1158 void reshadefx::preprocessor::expand_macro(const std::string &name, const macro ¯o, const std::vector<std::string> &arguments) 1159 { 1160 if (macro.replacement_list.empty()) 1161 return; 1162 1163 // Verify argument count for function-like macros 1164 if (arguments.size() < macro.parameters.size()) 1165 return warning(_token.location, "not enough arguments for function-like macro invocation '" + name + "'"); 1166 if (arguments.size() > macro.parameters.size() && !macro.is_variadic) 1167 return warning(_token.location, "too many arguments for function-like macro invocation '" + name + "'"); 1168 1169 std::string input; 1170 input.reserve(macro.replacement_list.size()); 1171 1172 for (size_t offset = 0; offset < macro.replacement_list.size(); ++offset) 1173 { 1174 if (macro.replacement_list[offset] != macro_replacement_start) 1175 { 1176 input += macro.replacement_list[offset]; 1177 continue; 1178 } 1179 1180 // This is a special replacement sequence 1181 const char type = macro.replacement_list[++offset]; 1182 const char index = macro.replacement_list[++offset]; 1183 if (static_cast<size_t>(index) >= arguments.size()) 1184 { 1185 if (macro.is_variadic) 1186 { 1187 // The concatenation operator has a special meaning when placed between a comma and a variable argument, deleting the preceding comma 1188 if (type == macro_replacement_concat && input.back() == ',') 1189 input.pop_back(); 1190 if (type == macro_replacement_stringize) 1191 input += "\"\""; 1192 } 1193 continue; 1194 } 1195 1196 switch (type) 1197 { 1198 case macro_replacement_argument: 1199 // Argument prescan 1200 push(arguments[index] + static_cast<char>(macro_replacement_argument)); 1201 while (true) 1202 { 1203 // Consume all tokens of the argument (until the end marker is reached) 1204 consume(); 1205 1206 if (_token == tokenid::unknown) // 'macro_replacement_argument' is 'tokenid::unknown' 1207 break; 1208 if (_token == tokenid::identifier && evaluate_identifier_as_macro()) 1209 continue; 1210 1211 input += _current_token_raw_data; 1212 } 1213 assert(_current_token_raw_data[0] == macro_replacement_argument); 1214 break; 1215 case macro_replacement_concat: 1216 input += arguments[index]; 1217 break; 1218 case macro_replacement_stringize: 1219 // Adds backslashes to escape quotes 1220 input += escape_string<'\"'>(arguments[index]); 1221 break; 1222 } 1223 } 1224 1225 push(std::move(input)); 1226 1227 // Avoid expanding macros again that are referencing themselves 1228 _input_stack[_current_input_index].hidden_macros.insert(name); 1229 } 1230 1231 void reshadefx::preprocessor::create_macro_replacement_list(macro ¯o) 1232 { 1233 // Since the number of parameters is encoded in the string, it may not exceed the available size of a char 1234 if (macro.parameters.size() >= std::numeric_limits<unsigned char>::max()) 1235 return error(_token.location, "too many macro parameters"); 1236 1237 // Ignore whitespace preceding the replacement list 1238 accept(tokenid::space); 1239 1240 bool next_concat = false; 1241 1242 while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file)) 1243 { 1244 consume(); 1245 1246 switch (_token) 1247 { 1248 case tokenid::hash: 1249 if (accept(tokenid::hash, false)) 1250 { 1251 if (macro.replacement_list.empty()) 1252 return error(_token.location, "## cannot appear at start of macro expansion"); 1253 if (peek(tokenid::end_of_line)) 1254 return error(_token.location, "## cannot appear at end of macro expansion"); 1255 1256 // Remove any whitespace preceding or following the concatenation operator (so "a ## b" becomes "ab") 1257 if (macro.replacement_list.back() == ' ') 1258 macro.replacement_list.pop_back(); 1259 accept(tokenid::space); 1260 1261 // Disable macro expansion for any argument preceding or following the ## token concatenation operator 1262 if (macro.replacement_list.size() > 2 && macro.replacement_list[macro.replacement_list.size() - 2] == macro_replacement_argument) 1263 macro.replacement_list[macro.replacement_list.size() - 2] = macro_replacement_concat; 1264 next_concat = true; 1265 continue; 1266 } 1267 if (macro.is_function_like) 1268 { 1269 if (!expect(tokenid::identifier)) 1270 return; 1271 1272 const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string); 1273 if (it == macro.parameters.end() && !(macro.is_variadic && _token.literal_as_string == "__VA_ARGS__")) 1274 return error(_token.location, "# must be followed by parameter name"); 1275 1276 // Start a # stringize operator 1277 macro.replacement_list += macro_replacement_start; 1278 macro.replacement_list += macro_replacement_stringize; 1279 macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it)); 1280 next_concat = false; 1281 continue; 1282 } 1283 break; 1284 case tokenid::space: 1285 // Collapse all whitespace down to a single space 1286 macro.replacement_list += ' '; 1287 continue; 1288 case tokenid::minus: 1289 // Special case to handle things like "#define NUM -1\n -NUM", which would otherwise result in "--1", making parsing fail 1290 if (macro.replacement_list.empty()) 1291 macro.replacement_list += ' '; 1292 break; 1293 case tokenid::identifier: 1294 if (const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string); 1295 it != macro.parameters.end() || (macro.is_variadic && _token.literal_as_string == "__VA_ARGS__")) 1296 { 1297 macro.replacement_list += macro_replacement_start; 1298 macro.replacement_list += static_cast<char>(next_concat ? macro_replacement_concat : macro_replacement_argument); 1299 macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it)); 1300 next_concat = false; 1301 continue; 1302 } 1303 break; 1304 default: 1305 // Token needs no special handling, raw data is added to macro below 1306 break; 1307 } 1308 1309 macro.replacement_list += _current_token_raw_data; 1310 next_concat = false; 1311 } 1312 1313 // Trim whitespace following the replacement list 1314 if (macro.replacement_list.size() && macro.replacement_list.back() == ' ') 1315 macro.replacement_list.pop_back(); 1316 }