effect_preprocessor.cpp - duckstation - duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one

effect_preprocessor.cpp (37296B)
      1 /*
      2  * Copyright (C) 2014 Patrick Mours
      3  * SPDX-License-Identifier: BSD-3-Clause
      4  */
      5 
      6 #include "effect_lexer.hpp"
      7 #include "effect_preprocessor.hpp"
      8 #include <cassert>
      9 #include <fstream>
     10 #include <algorithm> // std::find_if
     11 
     12 #ifndef _WIN32
     13 	// On Linux systems the native path encoding is UTF-8 already, so no conversion necessary
     14 	#define u8path(p) path(p)
     15 	#define u8string() string()
     16 #endif
     17 
     18 enum op_type
     19 {
     20 	op_none = -1,
     21 
     22 	op_or,
     23 	op_and,
     24 	op_bitor,
     25 	op_bitxor,
     26 	op_bitand,
     27 	op_not_equal,
     28 	op_equal,
     29 	op_less,
     30 	op_greater,
     31 	op_less_equal,
     32 	op_greater_equal,
     33 	op_leftshift,
     34 	op_rightshift,
     35 	op_add,
     36 	op_subtract,
     37 	op_modulo,
     38 	op_divide,
     39 	op_multiply,
     40 	op_plus,
     41 	op_negate,
     42 	op_not,
     43 	op_bitnot,
     44 	op_parentheses
     45 };
     46 
     47 enum macro_replacement
     48 {
     49 	macro_replacement_start = '\x00',
     50 	macro_replacement_argument = '\xFD',
     51 	macro_replacement_concat = '\xFF',
     52 	macro_replacement_stringize = '\xFE',
     53 };
     54 
     55 static const int precedence_lookup[] = {
     56 	0, 1, 2, 3, 4, // bitwise operators
     57 	5, 6, 7, 7, 7, 7, // logical operators
     58 	8, 8, // left shift, right shift
     59 	9, 9, // add, subtract
     60 	10, 10, 10, // modulo, divide, multiply
     61 	11, 11, 11, 11 // unary operators
     62 };
     63 
     64 static bool read_file(const std::string &path, std::string &data, reshadefx::preprocessor::include_read_file_callback &cb)
     65 {
     66 	std::string file_data;
     67 	if (!cb(path, file_data))
     68 		return false;
     69 
     70 	// Append a new line feed to the end of the input string to avoid issues with parsing
     71 	file_data.push_back('\n');
     72 
     73 	// Remove BOM (0xefbbbf means 0xfeff)
     74 	if (file_data.size() >= 3 &&
     75 		static_cast<unsigned char>(file_data[0]) == 0xef &&
     76 		static_cast<unsigned char>(file_data[1]) == 0xbb &&
     77 		static_cast<unsigned char>(file_data[2]) == 0xbf)
     78 		file_data.erase(0, 3);
     79 
     80 	data = std::move(file_data);
     81 	return true;
     82 }
     83 
     84 bool reshadefx::preprocessor::stdfs_read_file_callback(const std::string &path, std::string &data)
     85 {
     86   std::ifstream file(std::filesystem::path(path), std::ios::binary);
     87   if (!file)
     88     return false;
     89 
     90   // Read file contents into memory
     91   std::error_code ec;
     92   const uintmax_t file_size = std::filesystem::file_size(path, ec);
     93   if (ec)
     94     return false;
     95 
     96 	data.reserve(file_size + 1);
     97 	data.resize(static_cast<size_t>(file_size), '\0');
     98   if (!file.read(data.data(), file_size))
     99     return false;
    100 
    101   // No longer need to have a handle open to the file, since all data was read, so can safely close it
    102   file.close();
    103 	return true;
    104 }
    105 
    106 bool reshadefx::preprocessor::stdfs_file_exists_callback(const std::string &path)
    107 {
    108 	return std::filesystem::exists(std::filesystem::path(path));
    109 }
    110 
    111 template <char ESCAPE_CHAR = '\\'>
    112 static std::string escape_string(std::string s)
    113 {
    114 	for (size_t offset = 0; (offset = s.find(ESCAPE_CHAR, offset)) != std::string::npos; offset += 2)
    115 		s.insert(offset, "\\", 1);
    116 	return '\"' + s + '\"';
    117 }
    118 
    119 reshadefx::preprocessor::preprocessor()
    120 	: _file_exists_cb(stdfs_file_exists_callback)
    121 	, _read_file_cb(stdfs_read_file_callback)
    122 {
    123 }
    124 reshadefx::preprocessor::~preprocessor()
    125 {
    126 }
    127 
    128 void reshadefx::preprocessor::set_include_callbacks(include_file_exists_callback file_exists,
    129                                                     include_read_file_callback read_file)
    130 {
    131 	_file_exists_cb = file_exists;
    132 	_read_file_cb = read_file;
    133 }
    134 
    135 void reshadefx::preprocessor::add_include_path(const std::string &path)
    136 {
    137 	assert(!path.empty());
    138 	_include_paths.push_back(std::filesystem::path(path));
    139 }
    140 bool reshadefx::preprocessor::add_macro_definition(const std::string &name, const macro &macro)
    141 {
    142 	assert(!name.empty());
    143 	return _macros.emplace(name, macro).second;
    144 }
    145 
    146 bool reshadefx::preprocessor::append_file(const std::string &path)
    147 {
    148 	std::string source_code;
    149 	if (!read_file(path, source_code, _read_file_cb))
    150 		return false;
    151 
    152 	return append_string(std::move(source_code), path);
    153 }
    154 bool reshadefx::preprocessor::append_string(std::string source_code, const std::string &path /* = std::string() */)
    155 {
    156 	// Enforce all input strings to end with a line feed
    157 	assert(!source_code.empty() && source_code.back() == '\n');
    158 
    159 	_success = true; // Clear success flag before parsing a new string
    160 
    161 	// Give this push a name, so that lexer location starts at a new line
    162 	// This is necessary in case this string starts with a preprocessor directive, since the lexer only reports those as such if they appear at the beginning of a new line
    163 	// But without a name, the lexer location is set to the last token location, which most likely will not be at the start of the line
    164 	push(std::move(source_code), path.empty() ? "unknown" : path);
    165 	parse();
    166 
    167 	return _success;
    168 }
    169 
    170 std::vector<std::filesystem::path> reshadefx::preprocessor::included_files() const
    171 {
    172 	std::vector<std::filesystem::path> files;
    173 	files.reserve(_file_cache.size());
    174 	for (const auto &it : _file_cache)
    175 		files.push_back(std::filesystem::u8path(it.first));
    176 	return files;
    177 }
    178 std::vector<std::pair<std::string, std::string>> reshadefx::preprocessor::used_macro_definitions() const
    179 {
    180 	std::vector<std::pair<std::string, std::string>> defines;
    181 	defines.reserve(_used_macros.size());
    182 	for (const std::string &name : _used_macros)
    183 		if (const auto it = _macros.find(name);
    184 			// Do not include function-like macros, since they are more likely to contain a complex replacement list
    185 			it != _macros.end() && !it->second.is_function_like)
    186 			defines.emplace_back(name, it->second.replacement_list);
    187 	return defines;
    188 }
    189 
    190 void reshadefx::preprocessor::error(const location &location, const std::string &message)
    191 {
    192 	_errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor error: " + message + '\n';
    193 	_success = false; // Unset success flag
    194 }
    195 void reshadefx::preprocessor::warning(const location &location, const std::string &message)
    196 {
    197 	_errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor warning: " + message + '\n';
    198 }
    199 
    200 void reshadefx::preprocessor::push(std::string input, const std::string &name)
    201 {
    202 	location start_location = !name.empty() ?
    203 		// Start at the beginning of the file when pushing a new file
    204 		location(name, 1) :
    205 		// Start with last known token location when pushing an unnamed string
    206 		_token.location;
    207 
    208 	input_level level = { name };
    209 	level.lexer.reset(new lexer(
    210 		std::move(input),
    211 		true  /* ignore_comments */,
    212 		false /* ignore_whitespace */,
    213 		false /* ignore_pp_directives */,
    214 		false /* ignore_line_directives */,
    215 		true  /* ignore_keywords */,
    216 		false /* escape_string_literals */,
    217 		start_location));
    218 	level.next_token.id = tokenid::unknown;
    219 	level.next_token.location = start_location; // This is used in 'consume' to initialize the output location
    220 
    221 	// Inherit hidden macros from parent
    222 	if (!_input_stack.empty())
    223 		level.hidden_macros = _input_stack.back().hidden_macros;
    224 
    225 	_input_stack.push_back(std::move(level));
    226 	_next_input_index = _input_stack.size() - 1;
    227 
    228 	// Advance into the input stack to update next token
    229 	consume();
    230 }
    231 
    232 bool reshadefx::preprocessor::peek(tokenid tokid) const
    233 {
    234 	if (_input_stack.empty())
    235 		return tokid == tokenid::end_of_file;
    236 
    237 	return _input_stack[_next_input_index].next_token == tokid;
    238 }
    239 void reshadefx::preprocessor::consume()
    240 {
    241 	_current_input_index = _next_input_index;
    242 
    243 	if (_input_stack.empty())
    244 	{
    245 		// End of input has been reached already (this can happen when the input text is not terminated with a new line)
    246 		assert(_current_input_index == 0);
    247 		return;
    248 	}
    249 
    250 	// Clear out input stack, now that the current token is overwritten
    251 	while (_input_stack.size() > (_current_input_index + 1))
    252 		_input_stack.pop_back();
    253 
    254 	// Update location information after switching input levels
    255 	input_level &input = _input_stack[_current_input_index];
    256 	if (!input.name.empty() && input.name != _output_location.source)
    257 	{
    258 		_output += "#line " + std::to_string(input.next_token.location.line) + " \"" + input.name + "\"\n";
    259 		// Line number is increased before checking against next token in 'tokenid::end_of_line' handling in 'parse' function below, so compensate for that here
    260 		_output_location.line = input.next_token.location.line - 1;
    261 		_output_location.source = input.name;
    262 	}
    263 
    264 	// Set current token
    265 	_token = std::move(input.next_token);
    266 	_current_token_raw_data = input.lexer->input_string().substr(_token.offset, _token.length);
    267 
    268 	// Get the next token
    269 	input.next_token = input.lexer->lex();
    270 
    271 	// Verify string literals (since the lexer cannot throw errors itself)
    272 	if (_token == tokenid::string_literal && _current_token_raw_data.back() != '\"')
    273 		error(_token.location, "unterminated string literal");
    274 
    275 	// Pop input level if lexical analysis has reached the end of it
    276 	// This ensures the EOF token is not consumed until the very last file
    277 	while (peek(tokenid::end_of_file))
    278 	{
    279 		// Remove any unterminated blocks from the stack
    280 		for (; !_if_stack.empty() && _if_stack.back().input_index >= _next_input_index; _if_stack.pop_back())
    281 			error(_if_stack.back().pp_token.location, "unterminated #if");
    282 
    283 		if (_next_input_index == 0)
    284 		{
    285 			// End of input has been reached, so cannot pop further and this is the last token
    286 			_input_stack.pop_back();
    287 			return;
    288 		}
    289 		else
    290 		{
    291 			_next_input_index -= 1;
    292 		}
    293 	}
    294 }
    295 void reshadefx::preprocessor::consume_until(tokenid tokid)
    296 {
    297 	while (!accept(tokid) && !peek(tokenid::end_of_file))
    298 	{
    299 		consume();
    300 	}
    301 }
    302 
    303 bool reshadefx::preprocessor::accept(tokenid tokid, bool ignore_whitespace)
    304 {
    305 	if (ignore_whitespace)
    306 	{
    307 		while (peek(tokenid::space))
    308 		{
    309 			consume();
    310 		}
    311 	}
    312 
    313 	if (peek(tokid))
    314 	{
    315 		consume();
    316 		return true;
    317 	}
    318 
    319 	return false;
    320 }
    321 bool reshadefx::preprocessor::expect(tokenid tokid)
    322 {
    323 	if (!accept(tokid))
    324 	{
    325 		if (_input_stack.empty())
    326 			return tokid == tokenid::end_of_line || tokid == tokenid::end_of_file;
    327 
    328 		token actual_token = _input_stack[_next_input_index].next_token;
    329 		actual_token.location.source = _output_location.source;
    330 
    331 		if (actual_token == tokenid::end_of_line)
    332 			error(actual_token.location, "syntax error: unexpected new line");
    333 		else
    334 			error(actual_token.location, "syntax error: unexpected token '" +
    335 				_input_stack[_next_input_index].lexer->input_string().substr(actual_token.offset, actual_token.length) + '\'');
    336 
    337 		return false;
    338 	}
    339 
    340 	return true;
    341 }
    342 
    343 void reshadefx::preprocessor::parse()
    344 {
    345 	std::string line;
    346 
    347 	// Consume all tokens in the input
    348 	while (!peek(tokenid::end_of_file))
    349 	{
    350 		consume();
    351 
    352 		_recursion_count = 0;
    353 
    354 		const bool skip = !_if_stack.empty() && _if_stack.back().skipping;
    355 
    356 		switch (_token)
    357 		{
    358 		case tokenid::hash_if:
    359 			parse_if();
    360 			if (!expect(tokenid::end_of_line))
    361 				consume_until(tokenid::end_of_line);
    362 			continue;
    363 		case tokenid::hash_ifdef:
    364 			parse_ifdef();
    365 			if (!expect(tokenid::end_of_line))
    366 				consume_until(tokenid::end_of_line);
    367 			continue;
    368 		case tokenid::hash_ifndef:
    369 			parse_ifndef();
    370 			if (!expect(tokenid::end_of_line))
    371 				consume_until(tokenid::end_of_line);
    372 			continue;
    373 		case tokenid::hash_else:
    374 			parse_else();
    375 			if (!expect(tokenid::end_of_line))
    376 				consume_until(tokenid::end_of_line);
    377 			continue;
    378 		case tokenid::hash_elif:
    379 			parse_elif();
    380 			if (!expect(tokenid::end_of_line))
    381 				consume_until(tokenid::end_of_line);
    382 			continue;
    383 		case tokenid::hash_endif:
    384 			parse_endif();
    385 			if (!expect(tokenid::end_of_line))
    386 				consume_until(tokenid::end_of_line);
    387 			continue;
    388 		default:
    389 			// All other tokens are handled below
    390 			break;
    391 		}
    392 
    393 		if (skip)
    394 			// Ignore token since the current section is disabled
    395 			continue;
    396 
    397 		switch (_token)
    398 		{
    399 		case tokenid::hash_def:
    400 			parse_def();
    401 			if (!expect(tokenid::end_of_line))
    402 				consume_until(tokenid::end_of_line);
    403 			continue;
    404 		case tokenid::hash_undef:
    405 			parse_undef();
    406 			if (!expect(tokenid::end_of_line))
    407 				consume_until(tokenid::end_of_line);
    408 			continue;
    409 		case tokenid::hash_error:
    410 			parse_error();
    411 			if (!expect(tokenid::end_of_line))
    412 				consume_until(tokenid::end_of_line);
    413 			continue;
    414 		case tokenid::hash_warning:
    415 			parse_warning();
    416 			if (!expect(tokenid::end_of_line))
    417 				consume_until(tokenid::end_of_line);
    418 			continue;
    419 		case tokenid::hash_pragma:
    420 			parse_pragma();
    421 			if (!expect(tokenid::end_of_line))
    422 				consume_until(tokenid::end_of_line);
    423 			continue;
    424 		case tokenid::hash_include:
    425 			parse_include();
    426 			continue;
    427 		case tokenid::hash_unknown:
    428 			// Standalone "#" is valid and should be ignored
    429 			if (_token.length != 0)
    430 				error(_token.location, "unrecognized preprocessing directive '" + _token.literal_as_string + '\'');
    431 			if (!expect(tokenid::end_of_line))
    432 				consume_until(tokenid::end_of_line);
    433 			continue;
    434 		case tokenid::end_of_line:
    435 			if (line.empty())
    436 				continue; // Do not append empty lines to output, instead emit "#line" statements
    437 			_output_location.line++;
    438 			if (_token.location.line != _output_location.line)
    439 			{
    440 				_output += "#line " + std::to_string(_token.location.line) + '\n';
    441 				_output_location.line = _token.location.line;
    442 			}
    443 			_output += line;
    444 			_output += '\n';
    445 			line.clear();
    446 			continue;
    447 		case tokenid::identifier:
    448 			if (evaluate_identifier_as_macro())
    449 				continue;
    450 			[[fallthrough]];
    451 		default:
    452 			line += _current_token_raw_data;
    453 			break;
    454 		}
    455 	}
    456 
    457 	// Append the last line after the EOF token was reached to the output
    458 	_output += line;
    459 	_output += '\n';
    460 }
    461 
    462 void reshadefx::preprocessor::parse_def()
    463 {
    464 	if (!expect(tokenid::identifier))
    465 		return;
    466 	if (_token.literal_as_string == "defined")
    467 		return warning(_token.location, "macro name 'defined' is reserved");
    468 
    469 	macro m;
    470 	const location location = std::move(_token.location);
    471 	const std::string macro_name = std::move(_token.literal_as_string);
    472 
    473 	// Only create function-like macro if the parenthesis follows the macro name without any whitespace between
    474 	if (accept(tokenid::parenthesis_open, false))
    475 	{
    476 		m.is_function_like = true;
    477 
    478 		while (accept(tokenid::identifier))
    479 		{
    480 			m.parameters.push_back(_token.literal_as_string);
    481 
    482 			if (!accept(tokenid::comma))
    483 				break;
    484 		}
    485 
    486 		if (accept(tokenid::ellipsis))
    487 			m.is_variadic = true;
    488 
    489 		if (!expect(tokenid::parenthesis_close))
    490 			return;
    491 	}
    492 
    493 	create_macro_replacement_list(m);
    494 
    495 	if (!add_macro_definition(macro_name, m))
    496 		return error(location, "redefinition of '" + macro_name + "'");
    497 }
    498 void reshadefx::preprocessor::parse_undef()
    499 {
    500 	if (!expect(tokenid::identifier))
    501 		return;
    502 	if (_token.literal_as_string == "defined")
    503 		return warning(_token.location, "macro name 'defined' is reserved");
    504 
    505 	_macros.erase(_token.literal_as_string);
    506 }
    507 
    508 void reshadefx::preprocessor::parse_if()
    509 {
    510 	if_level level;
    511 	level.pp_token = _token;
    512 	level.input_index = _current_input_index;
    513 
    514 	// Evaluate expression after updating 'pp_token', so that it points at the beginning # token
    515 	level.value = evaluate_expression();
    516 
    517 	const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
    518 	level.skipping = parent_skipping || !level.value;
    519 
    520 	_if_stack.push_back(std::move(level));
    521 }
    522 void reshadefx::preprocessor::parse_ifdef()
    523 {
    524 	if_level level;
    525 	level.pp_token = _token;
    526 	level.input_index = _current_input_index;
    527 
    528 	if (!expect(tokenid::identifier))
    529 		return;
    530 
    531 	level.value = is_defined(_token.literal_as_string);
    532 
    533 	const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
    534 	level.skipping = parent_skipping || !level.value;
    535 
    536 	_if_stack.push_back(std::move(level));
    537 	// Only add to used macro list if this #ifdef is active and the macro was not defined before
    538 	if (!parent_skipping)
    539 		if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined)
    540 			_used_macros.emplace(_token.literal_as_string);
    541 }
    542 void reshadefx::preprocessor::parse_ifndef()
    543 {
    544 	if_level level;
    545 	level.pp_token = _token;
    546 	level.input_index = _current_input_index;
    547 
    548 	if (!expect(tokenid::identifier))
    549 		return;
    550 
    551 	level.value = !is_defined(_token.literal_as_string);
    552 
    553 	const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
    554 	level.skipping = parent_skipping || !level.value;
    555 
    556 	_if_stack.push_back(std::move(level));
    557 	// Only add to used macro list if this #ifndef is active and the macro was not defined before
    558 	if (!parent_skipping)
    559 		if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined)
    560 			_used_macros.emplace(_token.literal_as_string);
    561 }
    562 void reshadefx::preprocessor::parse_elif()
    563 {
    564 	if (_if_stack.empty())
    565 		return error(_token.location, "missing #if for #elif");
    566 
    567 	if_level &level = _if_stack.back();
    568 	if (level.pp_token == tokenid::hash_else)
    569 		return error(_token.location, "#elif is not allowed after #else");
    570 
    571 	// Update 'pp_token' before evaluating expression, so that it points at the beginning # token
    572 	level.pp_token = _token;
    573 	level.input_index = _current_input_index;
    574 
    575 	const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping;
    576 	const bool condition_result = evaluate_expression();
    577 	level.skipping = parent_skipping || level.value || !condition_result;
    578 
    579 	if (!level.value) level.value = condition_result;
    580 }
    581 void reshadefx::preprocessor::parse_else()
    582 {
    583 	if (_if_stack.empty())
    584 		return error(_token.location, "missing #if for #else");
    585 
    586 	if_level &level = _if_stack.back();
    587 	if (level.pp_token == tokenid::hash_else)
    588 		return error(_token.location, "#else is not allowed after #else");
    589 
    590 	level.pp_token = _token;
    591 	level.input_index = _current_input_index;
    592 
    593 	const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping;
    594 	level.skipping = parent_skipping || level.value;
    595 
    596 	if (!level.value) level.value = true;
    597 }
    598 void reshadefx::preprocessor::parse_endif()
    599 {
    600 	if (_if_stack.empty())
    601 		error(_token.location, "missing #if for #endif");
    602 	else
    603 		_if_stack.pop_back();
    604 }
    605 
    606 void reshadefx::preprocessor::parse_error()
    607 {
    608 	const location keyword_location = std::move(_token.location);
    609 
    610 	if (!expect(tokenid::string_literal))
    611 		return;
    612 
    613 	error(keyword_location, _token.literal_as_string);
    614 }
    615 void reshadefx::preprocessor::parse_warning()
    616 {
    617 	const location keyword_location = std::move(_token.location);
    618 
    619 	if (!expect(tokenid::string_literal))
    620 		return;
    621 
    622 	warning(keyword_location, _token.literal_as_string);
    623 }
    624 
    625 void reshadefx::preprocessor::parse_pragma()
    626 {
    627 	const location keyword_location = std::move(_token.location);
    628 
    629 	if (!expect(tokenid::identifier))
    630 		return;
    631 
    632 	std::string pragma = std::move(_token.literal_as_string);
    633 	std::string pragma_args;
    634 
    635 	// Ignore whitespace preceding the argument list
    636 	accept(tokenid::space);
    637 
    638 	while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
    639 	{
    640 		consume();
    641 
    642 		if (_token == tokenid::identifier && evaluate_identifier_as_macro())
    643 			continue;
    644 
    645 		// Collapse all whitespace down to a single space
    646 		if (_token == tokenid::space)
    647 			pragma_args += ' ';
    648 		else
    649 			pragma_args += _current_token_raw_data;
    650 	}
    651 
    652 	if (pragma == "once")
    653 	{
    654 		// Clear file contents, so that future include statements simply push an empty string instead of these file contents again
    655 		if (const auto it = _file_cache.find(_output_location.source); it != _file_cache.end())
    656 			it->second.clear();
    657 		return;
    658 	}
    659 
    660 	if (pragma == "warning" || pragma == "reshade")
    661 	{
    662 		_used_pragmas.emplace_back(std::move(pragma), std::move(pragma_args));
    663 		return;
    664 	}
    665 
    666 	warning(keyword_location, "unknown pragma ignored");
    667 }
    668 
    669 void reshadefx::preprocessor::parse_include()
    670 {
    671 	const location keyword_location = std::move(_token.location);
    672 
    673 	while (accept(tokenid::identifier))
    674 	{
    675 		if (!evaluate_identifier_as_macro())
    676 		{
    677 			error(_token.location, "syntax error: unexpected identifier in #include");
    678 			consume_until(tokenid::end_of_line);
    679 			return;
    680 		}
    681 	}
    682 
    683 	if (!expect(tokenid::string_literal))
    684 	{
    685 		consume_until(tokenid::end_of_line);
    686 		return;
    687 	}
    688 
    689 	std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string);
    690 	std::filesystem::path file_path = std::filesystem::u8path(_output_location.source);
    691 	file_path.replace_filename(file_name);
    692 
    693 	if (!_file_exists_cb(file_path.u8string()))
    694 		for (const std::filesystem::path &include_path : _include_paths)
    695 			if (_file_exists_cb((file_path = include_path / file_name).u8string()))
    696 				break;
    697 
    698 	const std::string file_path_string = file_path.u8string();
    699 
    700 	// Detect recursive include and abort to avoid infinite loop
    701 	if (std::find_if(_input_stack.begin(), _input_stack.end(),
    702 			[&file_path_string](const input_level &level) { return level.name == file_path_string; }) != _input_stack.end())
    703 		return error(_token.location, "recursive #include");
    704 
    705 	std::string input;
    706 	if (const auto it = _file_cache.find(file_path_string); it != _file_cache.end())
    707 	{
    708 		input = it->second;
    709 	}
    710 	else
    711 	{
    712 		if (!read_file(file_path_string, input, _read_file_cb))
    713 			return error(keyword_location, "could not open included file '" + file_name.u8string() + '\'');
    714 
    715 		_file_cache.emplace(file_path_string, input);
    716 	}
    717 
    718 	// Skip end of line character following the include statement before pushing, so that the line number is already pointing to the next line when popping out of it again
    719 	if (!expect(tokenid::end_of_line))
    720 		consume_until(tokenid::end_of_line);
    721 
    722 	// Clear out input stack before pushing include, so that hidden macros do not bleed into the include
    723 	while (_input_stack.size() > (_next_input_index + 1))
    724 		_input_stack.pop_back();
    725 
    726 	push(std::move(input), file_path_string);
    727 }
    728 
    729 bool reshadefx::preprocessor::evaluate_expression()
    730 {
    731 	struct rpn_token
    732 	{
    733 		int value;
    734 		bool is_op;
    735 	};
    736 
    737 	size_t rpn_index = 0;
    738 	size_t stack_index = 0;
    739 	const size_t STACK_SIZE = 128;
    740 	rpn_token rpn[STACK_SIZE];
    741 	int stack[STACK_SIZE];
    742 
    743 	// Keep track of previous token to figure out data type of expression
    744 	tokenid previous_token = _token;
    745 
    746 	// Run shunting-yard algorithm
    747 	while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
    748 	{
    749 		if (stack_index >= STACK_SIZE || rpn_index >= STACK_SIZE)
    750 			return error(_token.location, "expression evaluator ran out of stack space"), false;
    751 
    752 		consume();
    753 
    754 		auto op = op_none;
    755 		bool left_associative = true;
    756 		bool parenthesis_matched = false;
    757 
    758 		switch (_token)
    759 		{
    760 		case tokenid::space:
    761 			continue;
    762 		case tokenid::exclaim:
    763 			op = op_not;
    764 			left_associative = false;
    765 			break;
    766 		case tokenid::percent:
    767 			op = op_modulo;
    768 			break;
    769 		case tokenid::ampersand:
    770 			op = op_bitand;
    771 			break;
    772 		case tokenid::star:
    773 			op = op_multiply;
    774 			break;
    775 		case tokenid::plus:
    776 			left_associative =
    777 				previous_token == tokenid::int_literal ||
    778 				previous_token == tokenid::uint_literal ||
    779 				previous_token == tokenid::identifier ||
    780 				previous_token == tokenid::parenthesis_close;
    781 			op = left_associative ? op_add : op_plus;
    782 			break;
    783 		case tokenid::minus:
    784 			left_associative =
    785 				previous_token == tokenid::int_literal ||
    786 				previous_token == tokenid::uint_literal ||
    787 				previous_token == tokenid::identifier ||
    788 				previous_token == tokenid::parenthesis_close;
    789 			op = left_associative ? op_subtract : op_negate;
    790 			break;
    791 		case tokenid::slash:
    792 			op = op_divide;
    793 			break;
    794 		case tokenid::less:
    795 			op = op_less;
    796 			break;
    797 		case tokenid::greater:
    798 			op = op_greater;
    799 			break;
    800 		case tokenid::caret:
    801 			op = op_bitxor;
    802 			break;
    803 		case tokenid::pipe:
    804 			op = op_bitor;
    805 			break;
    806 		case tokenid::tilde:
    807 			op = op_bitnot;
    808 			left_associative = false;
    809 			break;
    810 		case tokenid::exclaim_equal:
    811 			op = op_not_equal;
    812 			break;
    813 		case tokenid::ampersand_ampersand:
    814 			op = op_and;
    815 			break;
    816 		case tokenid::less_less:
    817 			op = op_leftshift;
    818 			break;
    819 		case tokenid::less_equal:
    820 			op = op_less_equal;
    821 			break;
    822 		case tokenid::equal_equal:
    823 			op = op_equal;
    824 			break;
    825 		case tokenid::greater_greater:
    826 			op = op_rightshift;
    827 			break;
    828 		case tokenid::greater_equal:
    829 			op = op_greater_equal;
    830 			break;
    831 		case tokenid::pipe_pipe:
    832 			op = op_or;
    833 			break;
    834 		default:
    835 			// This is not an operator token
    836 			break;
    837 		}
    838 
    839 		switch (_token)
    840 		{
    841 		case tokenid::parenthesis_open:
    842 			stack[stack_index++] = op_parentheses;
    843 			break;
    844 		case tokenid::parenthesis_close:
    845 			parenthesis_matched = false;
    846 			while (stack_index > 0)
    847 			{
    848 				const int op2 = stack[--stack_index];
    849 				if (op2 == op_parentheses)
    850 				{
    851 					parenthesis_matched = true;
    852 					break;
    853 				}
    854 
    855 				rpn[rpn_index++] = { op2, true };
    856 			}
    857 
    858 			if (!parenthesis_matched)
    859 				return error(_token.location, "unmatched ')'"), false;
    860 			break;
    861 		case tokenid::identifier:
    862 			if (evaluate_identifier_as_macro())
    863 				continue;
    864 
    865 			if (_token.literal_as_string == "exists")
    866 			{
    867 				const bool has_parentheses = accept(tokenid::parenthesis_open);
    868 
    869 				while (accept(tokenid::identifier))
    870 				{
    871 					if (!evaluate_identifier_as_macro())
    872 					{
    873 						error(_token.location, "syntax error: unexpected identifier after 'exists'");
    874 						return false;
    875 					}
    876 				}
    877 
    878 				if (!expect(tokenid::string_literal))
    879 					return false;
    880 
    881 				std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string);
    882 				std::filesystem::path file_path = std::filesystem::u8path(_output_location.source);
    883 				file_path.replace_filename(file_name);
    884 
    885 				if (has_parentheses && !expect(tokenid::parenthesis_close))
    886 					return false;
    887 
    888 				if (!_file_exists_cb(file_path.u8string()))
    889 					for (const std::filesystem::path &include_path : _include_paths)
    890 						if (_file_exists_cb((file_path = include_path / file_name).u8string()))
    891 							break;
    892 
    893 				rpn[rpn_index++] = { _file_exists_cb(file_path.u8string()) ? 1 : 0, false };
    894 				continue;
    895 			}
    896 			if (_token.literal_as_string == "defined")
    897 			{
    898 				const bool has_parentheses = accept(tokenid::parenthesis_open);
    899 
    900 				if (!expect(tokenid::identifier))
    901 					return false;
    902 
    903 				const std::string macro_name = std::move(_token.literal_as_string);
    904 
    905 				if (has_parentheses && !expect(tokenid::parenthesis_close))
    906 					return false;
    907 
    908 				rpn[rpn_index++] = { is_defined(macro_name) ? 1 : 0, false };
    909 				continue;
    910 			}
    911 
    912 			// An identifier that cannot be replaced with a number becomes zero
    913 			rpn[rpn_index++] = { 0, false };
    914 			break;
    915 		case tokenid::int_literal:
    916 		case tokenid::uint_literal:
    917 			rpn[rpn_index++] = { _token.literal_as_int, false };
    918 			break;
    919 		default:
    920 			if (op == op_none)
    921 				return error(_token.location, "invalid expression"), false;
    922 
    923 			while (stack_index > 0)
    924 			{
    925 				const int prev_op = stack[stack_index - 1];
    926 				if (prev_op == op_parentheses)
    927 					break;
    928 
    929 				if (left_associative ?
    930 					(precedence_lookup[op] > precedence_lookup[prev_op]) :
    931 					(precedence_lookup[op] >= precedence_lookup[prev_op]))
    932 					break;
    933 
    934 				stack_index--;
    935 				rpn[rpn_index++] = { prev_op, true };
    936 			}
    937 
    938 			stack[stack_index++] = op;
    939 			break;
    940 		}
    941 
    942 		previous_token = _token;
    943 	}
    944 
    945 	while (stack_index > 0)
    946 	{
    947 		const int op = stack[--stack_index];
    948 		if (op == op_parentheses)
    949 			return error(_token.location, "unmatched ')'"), false;
    950 
    951 		rpn[rpn_index++] = { op, true };
    952 	}
    953 
    954 #define UNARY_OPERATION(op) { \
    955 	if (stack_index < 1) \
    956 		return error(_token.location, "invalid expression"), 0; \
    957 	stack[stack_index - 1] = op stack[stack_index - 1]; \
    958 	}
    959 #define BINARY_OPERATION(op) { \
    960 	if (stack_index < 2) \
    961 		return error(_token.location, "invalid expression"), 0; \
    962 	stack[stack_index - 2] = stack[stack_index - 2] op stack[stack_index - 1]; \
    963 	stack_index--; \
    964 	}
    965 
    966 	// Evaluate reverse polish notation output
    967 	for (rpn_token *token = rpn; rpn_index--; token++)
    968 	{
    969 		if (token->is_op)
    970 		{
    971 			switch (token->value)
    972 			{
    973 			case op_or:
    974 				BINARY_OPERATION(||);
    975 				break;
    976 			case op_and:
    977 				BINARY_OPERATION(&&);
    978 				break;
    979 			case op_bitor:
    980 				BINARY_OPERATION(|);
    981 				break;
    982 			case op_bitxor:
    983 				BINARY_OPERATION(^);
    984 				break;
    985 			case op_bitand:
    986 				BINARY_OPERATION(&);
    987 				break;
    988 			case op_not_equal:
    989 				BINARY_OPERATION(!=);
    990 				break;
    991 			case op_equal:
    992 				BINARY_OPERATION(==);
    993 				break;
    994 			case op_less:
    995 				BINARY_OPERATION(<);
    996 				break;
    997 			case op_greater:
    998 				BINARY_OPERATION(>);
    999 				break;
   1000 			case op_less_equal:
   1001 				BINARY_OPERATION(<=);
   1002 				break;
   1003 			case op_greater_equal:
   1004 				BINARY_OPERATION(>=);
   1005 				break;
   1006 			case op_leftshift:
   1007 				BINARY_OPERATION(<<);
   1008 				break;
   1009 			case op_rightshift:
   1010 				BINARY_OPERATION(>>);
   1011 				break;
   1012 			case op_add:
   1013 				BINARY_OPERATION(+);
   1014 				break;
   1015 			case op_subtract:
   1016 				BINARY_OPERATION(-);
   1017 				break;
   1018 			case op_modulo:
   1019 				BINARY_OPERATION(%);
   1020 				break;
   1021 			case op_divide:
   1022 				BINARY_OPERATION(/);
   1023 				break;
   1024 			case op_multiply:
   1025 				BINARY_OPERATION(*);
   1026 				break;
   1027 			case op_plus:
   1028 				UNARY_OPERATION(+);
   1029 				break;
   1030 			case op_negate:
   1031 				UNARY_OPERATION(-);
   1032 				break;
   1033 			case op_not:
   1034 				UNARY_OPERATION(!);
   1035 				break;
   1036 			case op_bitnot:
   1037 				UNARY_OPERATION(~);
   1038 				break;
   1039 			}
   1040 		}
   1041 		else
   1042 		{
   1043 			stack[stack_index++] = token->value;
   1044 		}
   1045 	}
   1046 
   1047 	if (stack_index != 1)
   1048 		return error(_token.location, "invalid expression"), false;
   1049 
   1050 	return stack[0] != 0;
   1051 }
   1052 
   1053 bool reshadefx::preprocessor::evaluate_identifier_as_macro()
   1054 {
   1055 	if (_token.literal_as_string == "__LINE__")
   1056 	{
   1057 		push(std::to_string(_token.location.line));
   1058 		return true;
   1059 	}
   1060 	if (_token.literal_as_string == "__FILE__")
   1061 	{
   1062 		push(escape_string(_token.location.source));
   1063 		return true;
   1064 	}
   1065 	if (_token.literal_as_string == "__FILE_STEM__")
   1066 	{
   1067 		const std::filesystem::path file_stem = std::filesystem::u8path(_token.location.source).stem();
   1068 		push(escape_string(file_stem.u8string()));
   1069 		return true;
   1070 	}
   1071 	if (_token.literal_as_string == "__FILE_NAME__")
   1072 	{
   1073 		const std::filesystem::path file_name = std::filesystem::u8path(_token.location.source).filename();
   1074 		push(escape_string(file_name.u8string()));
   1075 		return true;
   1076 	}
   1077 
   1078 	const auto it = _macros.find(_token.literal_as_string);
   1079 	if (it == _macros.end())
   1080 		return false;
   1081 
   1082 	if (!_input_stack.empty())
   1083 	{
   1084 		const std::unordered_set<std::string> &hidden_macros = _input_stack[_current_input_index].hidden_macros;
   1085 		if (hidden_macros.find(_token.literal_as_string) != hidden_macros.end())
   1086 			return false;
   1087 	}
   1088 
   1089 	const location macro_location = _token.location;
   1090 	if (_recursion_count++ >= 256)
   1091 		return error(macro_location, "macro recursion too high"), false;
   1092 
   1093 	std::vector<std::string> arguments;
   1094 	if (it->second.is_function_like)
   1095 	{
   1096 		if (!accept(tokenid::parenthesis_open))
   1097 			return false; // Function like macro used without arguments, handle that like a normal identifier instead
   1098 
   1099 		while (true)
   1100 		{
   1101 			int parentheses_level = 0;
   1102 			std::string argument;
   1103 
   1104 			// Ignore whitespace preceding the argument
   1105 			accept(tokenid::space);
   1106 
   1107 			if (accept(tokenid::parenthesis_close))
   1108 				break; // Special case for when there are no arguments
   1109 
   1110 			while (true)
   1111 			{
   1112 				if (peek(tokenid::end_of_file))
   1113 					return error(macro_location, "unexpected end of file in macro expansion"), false;
   1114 
   1115 				// Consume all tokens of the argument
   1116 				consume();
   1117 
   1118 				if (_token == tokenid::comma && parentheses_level == 0 && !(it->second.is_variadic && arguments.size() == it->second.parameters.size()))
   1119 					break; // Comma marks end of an argument (unless this is the last argument in a variadic macro invocation)
   1120 				if (_token == tokenid::parenthesis_open)
   1121 					parentheses_level++;
   1122 				if (_token == tokenid::parenthesis_close && --parentheses_level < 0)
   1123 					break;
   1124 
   1125 				// Collapse all whitespace down to a single space
   1126 				if (_token == tokenid::space)
   1127 					argument += ' ';
   1128 				else
   1129 					argument += _current_token_raw_data;
   1130 			}
   1131 
   1132 			// Trim whitespace following the argument
   1133 			if (argument.size() && argument.back() == ' ')
   1134 				argument.pop_back();
   1135 
   1136 			arguments.push_back(std::move(argument));
   1137 
   1138 			if (parentheses_level < 0)
   1139 				break;
   1140 		}
   1141 	}
   1142 
   1143 	expand_macro(it->first, it->second, arguments);
   1144 
   1145 	return true;
   1146 }
   1147 
   1148 bool reshadefx::preprocessor::is_defined(const std::string &name) const
   1149 {
   1150 	return _macros.find(name) != _macros.end() ||
   1151 		// Check built-in macros as well
   1152 		name == "__LINE__" ||
   1153 		name == "__FILE__" ||
   1154 		name == "__FILE_NAME__" ||
   1155 		name == "__FILE_STEM__";
   1156 }
   1157 
   1158 void reshadefx::preprocessor::expand_macro(const std::string &name, const macro &macro, const std::vector<std::string> &arguments)
   1159 {
   1160 	if (macro.replacement_list.empty())
   1161 		return;
   1162 
   1163 	// Verify argument count for function-like macros
   1164 	if (arguments.size() < macro.parameters.size())
   1165 		return warning(_token.location, "not enough arguments for function-like macro invocation '" + name + "'");
   1166 	if (arguments.size() > macro.parameters.size() && !macro.is_variadic)
   1167 		return warning(_token.location, "too many arguments for function-like macro invocation '" + name + "'");
   1168 
   1169 	std::string input;
   1170 	input.reserve(macro.replacement_list.size());
   1171 
   1172 	for (size_t offset = 0; offset < macro.replacement_list.size(); ++offset)
   1173 	{
   1174 		if (macro.replacement_list[offset] != macro_replacement_start)
   1175 		{
   1176 			input += macro.replacement_list[offset];
   1177 			continue;
   1178 		}
   1179 
   1180 		// This is a special replacement sequence
   1181 		const char type = macro.replacement_list[++offset];
   1182 		const char index = macro.replacement_list[++offset];
   1183 		if (static_cast<size_t>(index) >= arguments.size())
   1184 		{
   1185 			if (macro.is_variadic)
   1186 			{
   1187 				// The concatenation operator has a special meaning when placed between a comma and a variable argument, deleting the preceding comma
   1188 				if (type == macro_replacement_concat && input.back() == ',')
   1189 					input.pop_back();
   1190 				if (type == macro_replacement_stringize)
   1191 					input += "\"\"";
   1192 			}
   1193 			continue;
   1194 		}
   1195 
   1196 		switch (type)
   1197 		{
   1198 		case macro_replacement_argument:
   1199 			// Argument prescan
   1200 			push(arguments[index] + static_cast<char>(macro_replacement_argument));
   1201 			while (true)
   1202 			{
   1203 				// Consume all tokens of the argument (until the end marker is reached)
   1204 				consume();
   1205 
   1206 				if (_token == tokenid::unknown) // 'macro_replacement_argument' is 'tokenid::unknown'
   1207 					break;
   1208 				if (_token == tokenid::identifier && evaluate_identifier_as_macro())
   1209 					continue;
   1210 
   1211 				input += _current_token_raw_data;
   1212 			}
   1213 			assert(_current_token_raw_data[0] == macro_replacement_argument);
   1214 			break;
   1215 		case macro_replacement_concat:
   1216 			input += arguments[index];
   1217 			break;
   1218 		case macro_replacement_stringize:
   1219 			// Adds backslashes to escape quotes
   1220 			input += escape_string<'\"'>(arguments[index]);
   1221 			break;
   1222 		}
   1223 	}
   1224 
   1225 	push(std::move(input));
   1226 
   1227 	// Avoid expanding macros again that are referencing themselves
   1228 	_input_stack[_current_input_index].hidden_macros.insert(name);
   1229 }
   1230 
   1231 void reshadefx::preprocessor::create_macro_replacement_list(macro &macro)
   1232 {
   1233 	// Since the number of parameters is encoded in the string, it may not exceed the available size of a char
   1234 	if (macro.parameters.size() >= std::numeric_limits<unsigned char>::max())
   1235 		return error(_token.location, "too many macro parameters");
   1236 
   1237 	// Ignore whitespace preceding the replacement list
   1238 	accept(tokenid::space);
   1239 
   1240 	bool next_concat = false;
   1241 
   1242 	while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
   1243 	{
   1244 		consume();
   1245 
   1246 		switch (_token)
   1247 		{
   1248 		case tokenid::hash:
   1249 			if (accept(tokenid::hash, false))
   1250 			{
   1251 				if (macro.replacement_list.empty())
   1252 					return error(_token.location, "## cannot appear at start of macro expansion");
   1253 				if (peek(tokenid::end_of_line))
   1254 					return error(_token.location, "## cannot appear at end of macro expansion");
   1255 
   1256 				// Remove any whitespace preceding or following the concatenation operator (so "a ## b" becomes "ab")
   1257 				if (macro.replacement_list.back() == ' ')
   1258 					macro.replacement_list.pop_back();
   1259 				accept(tokenid::space);
   1260 
   1261 				// Disable macro expansion for any argument preceding or following the ## token concatenation operator
   1262 				if (macro.replacement_list.size() > 2 && macro.replacement_list[macro.replacement_list.size() - 2] == macro_replacement_argument)
   1263 					macro.replacement_list[macro.replacement_list.size() - 2] = macro_replacement_concat;
   1264 				next_concat = true;
   1265 				continue;
   1266 			}
   1267 			if (macro.is_function_like)
   1268 			{
   1269 				if (!expect(tokenid::identifier))
   1270 					return;
   1271 
   1272 				const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string);
   1273 				if (it == macro.parameters.end() && !(macro.is_variadic && _token.literal_as_string == "__VA_ARGS__"))
   1274 					return error(_token.location, "# must be followed by parameter name");
   1275 
   1276 				// Start a # stringize operator
   1277 				macro.replacement_list += macro_replacement_start;
   1278 				macro.replacement_list += macro_replacement_stringize;
   1279 				macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it));
   1280 				next_concat = false;
   1281 				continue;
   1282 			}
   1283 			break;
   1284 		case tokenid::space:
   1285 			// Collapse all whitespace down to a single space
   1286 			macro.replacement_list += ' ';
   1287 			continue;
   1288 		case tokenid::minus:
   1289 			// Special case to handle things like "#define NUM -1\n -NUM", which would otherwise result in "--1", making parsing fail
   1290 			if (macro.replacement_list.empty())
   1291 				macro.replacement_list += ' ';
   1292 			break;
   1293 		case tokenid::identifier:
   1294 			if (const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string);
   1295 				it != macro.parameters.end() || (macro.is_variadic && _token.literal_as_string == "__VA_ARGS__"))
   1296 			{
   1297 				macro.replacement_list += macro_replacement_start;
   1298 				macro.replacement_list += static_cast<char>(next_concat ? macro_replacement_concat : macro_replacement_argument);
   1299 				macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it));
   1300 				next_concat = false;
   1301 				continue;
   1302 			}
   1303 			break;
   1304 		default:
   1305 			// Token needs no special handling, raw data is added to macro below
   1306 			break;
   1307 		}
   1308 
   1309 		macro.replacement_list += _current_token_raw_data;
   1310 		next_concat = false;
   1311 	}
   1312 
   1313 	// Trim whitespace following the replacement list
   1314 	if (macro.replacement_list.size() && macro.replacement_list.back() == ' ')
   1315 		macro.replacement_list.pop_back();
   1316 }
	duckstation duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
	git clone https://git.neptards.moe/u3shit/duckstation.git
	Log \| Files \| Refs \| README \| LICENSE