effect_lexer.cpp (30852B)
1 /* 2 * Copyright (C) 2014 Patrick Mours 3 * SPDX-License-Identifier: BSD-3-Clause 4 */ 5 6 #include "effect_lexer.hpp" 7 #include <cassert> 8 #include <string_view> 9 #include <unordered_map> // Used for static lookup tables 10 11 using namespace reshadefx; 12 13 enum token_type 14 { 15 DIGIT = '0', 16 IDENT = 'A', 17 SPACE = ' ', 18 }; 19 20 // Lookup table which translates a given char to a token type 21 static const unsigned type_lookup[256] = { 22 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, SPACE, 23 '\n', SPACE, SPACE, SPACE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 24 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 25 0x00, 0x00, SPACE, '!', '"', '#', '$', '%', '&', '\'', 26 '(', ')', '*', '+', ',', '-', '.', '/', DIGIT, DIGIT, 27 DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, ':', ';', 28 '<', '=', '>', '?', '@', IDENT, IDENT, IDENT, IDENT, IDENT, 29 IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, 30 IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, 31 IDENT, '[', '\\', ']', '^', IDENT, 0x00, IDENT, IDENT, IDENT, 32 IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, 33 IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, 34 IDENT, IDENT, IDENT, '{', '|', '}', '~', 0x00, 0x00, 0x00, 35 }; 36 37 // Lookup tables which translate a given string literal to a token and backwards 38 static const std::unordered_map<tokenid, std::string_view> token_lookup = { 39 { tokenid::end_of_file, "end of file" }, 40 { tokenid::exclaim, "!" }, 41 { tokenid::hash, "#" }, 42 { tokenid::dollar, "$" }, 43 { tokenid::percent, "%" }, 44 { tokenid::ampersand, "&" }, 45 { tokenid::parenthesis_open, "(" }, 46 { tokenid::parenthesis_close, ")" }, 47 { tokenid::star, "*" }, 48 { tokenid::plus, "+" }, 49 { tokenid::comma, "," }, 50 { tokenid::minus, "-" }, 51 { tokenid::dot, "." }, 52 { tokenid::slash, "/" }, 53 { tokenid::colon, ":" }, 54 { tokenid::semicolon, ";" }, 55 { tokenid::less, "<" }, 56 { tokenid::equal, "=" }, 57 { tokenid::greater, ">" }, 58 { tokenid::question, "?" }, 59 { tokenid::at, "@" }, 60 { tokenid::bracket_open, "[" }, 61 { tokenid::backslash, "\\" }, 62 { tokenid::bracket_close, "]" }, 63 { tokenid::caret, "^" }, 64 { tokenid::brace_open, "{" }, 65 { tokenid::pipe, "|" }, 66 { tokenid::brace_close, "}" }, 67 { tokenid::tilde, "~" }, 68 { tokenid::exclaim_equal, "!=" }, 69 { tokenid::percent_equal, "%=" }, 70 { tokenid::ampersand_ampersand, "&&" }, 71 { tokenid::ampersand_equal, "&=" }, 72 { tokenid::star_equal, "*=" }, 73 { tokenid::plus_plus, "++" }, 74 { tokenid::plus_equal, "+=" }, 75 { tokenid::minus_minus, "--" }, 76 { tokenid::minus_equal, "-=" }, 77 { tokenid::arrow, "->" }, 78 { tokenid::ellipsis, "..." }, 79 { tokenid::slash_equal, "|=" }, 80 { tokenid::colon_colon, "::" }, 81 { tokenid::less_less_equal, "<<=" }, 82 { tokenid::less_less, "<<" }, 83 { tokenid::less_equal, "<=" }, 84 { tokenid::equal_equal, "==" }, 85 { tokenid::greater_greater_equal, ">>=" }, 86 { tokenid::greater_greater, ">>" }, 87 { tokenid::greater_equal, ">=" }, 88 { tokenid::caret_equal, "^=" }, 89 { tokenid::pipe_equal, "|=" }, 90 { tokenid::pipe_pipe, "||" }, 91 { tokenid::identifier, "identifier" }, 92 { tokenid::reserved, "reserved word" }, 93 { tokenid::true_literal, "true" }, 94 { tokenid::false_literal, "false" }, 95 { tokenid::int_literal, "integral literal" }, 96 { tokenid::uint_literal, "integral literal" }, 97 { tokenid::float_literal, "floating point literal" }, 98 { tokenid::double_literal, "floating point literal" }, 99 { tokenid::string_literal, "string literal" }, 100 { tokenid::namespace_, "namespace" }, 101 { tokenid::struct_, "struct" }, 102 { tokenid::technique, "technique" }, 103 { tokenid::pass, "pass" }, 104 { tokenid::for_, "for" }, 105 { tokenid::while_, "while" }, 106 { tokenid::do_, "do" }, 107 { tokenid::if_, "if" }, 108 { tokenid::else_, "else" }, 109 { tokenid::switch_, "switch" }, 110 { tokenid::case_, "case" }, 111 { tokenid::default_, "default" }, 112 { tokenid::break_, "break" }, 113 { tokenid::continue_, "continue" }, 114 { tokenid::return_, "return" }, 115 { tokenid::discard_, "discard" }, 116 { tokenid::extern_, "extern" }, 117 { tokenid::static_, "static" }, 118 { tokenid::uniform_, "uniform" }, 119 { tokenid::volatile_, "volatile" }, 120 { tokenid::precise, "precise" }, 121 { tokenid::groupshared, "groupshared" }, 122 { tokenid::in, "in" }, 123 { tokenid::out, "out" }, 124 { tokenid::inout, "inout" }, 125 { tokenid::const_, "const" }, 126 { tokenid::linear, "linear" }, 127 { tokenid::noperspective, "noperspective" }, 128 { tokenid::centroid, "centroid" }, 129 { tokenid::nointerpolation, "nointerpolation" }, 130 { tokenid::void_, "void" }, 131 { tokenid::bool_, "bool" }, 132 { tokenid::bool2, "bool2" }, 133 { tokenid::bool3, "bool3" }, 134 { tokenid::bool4, "bool4" }, 135 { tokenid::bool2x2, "bool2x2" }, 136 { tokenid::bool2x3, "bool2x3" }, 137 { tokenid::bool2x4, "bool2x4" }, 138 { tokenid::bool3x2, "bool3x2" }, 139 { tokenid::bool3x3, "bool3x3" }, 140 { tokenid::bool3x4, "bool3x4" }, 141 { tokenid::bool4x2, "bool4x2" }, 142 { tokenid::bool4x3, "bool4x3" }, 143 { tokenid::bool4x4, "bool4x4" }, 144 { tokenid::int_, "int" }, 145 { tokenid::int2, "int2" }, 146 { tokenid::int3, "int3" }, 147 { tokenid::int4, "int4" }, 148 { tokenid::int2x2, "int2x2" }, 149 { tokenid::int2x3, "int2x3" }, 150 { tokenid::int2x4, "int2x4" }, 151 { tokenid::int3x2, "int3x2" }, 152 { tokenid::int3x3, "int3x3" }, 153 { tokenid::int3x4, "int3x4" }, 154 { tokenid::int4x2, "int4x2" }, 155 { tokenid::int4x3, "int4x3" }, 156 { tokenid::int4x4, "int4x4" }, 157 { tokenid::min16int, "min16int" }, 158 { tokenid::min16int2, "min16int2" }, 159 { tokenid::min16int3, "min16int3" }, 160 { tokenid::min16int4, "min16int4" }, 161 { tokenid::uint_, "uint" }, 162 { tokenid::uint2, "uint2" }, 163 { tokenid::uint3, "uint3" }, 164 { tokenid::uint4, "uint4" }, 165 { tokenid::uint2x2, "uint2x2" }, 166 { tokenid::uint2x3, "uint2x3" }, 167 { tokenid::uint2x4, "uint2x4" }, 168 { tokenid::uint3x2, "uint3x2" }, 169 { tokenid::uint3x3, "uint3x3" }, 170 { tokenid::uint3x4, "uint3x4" }, 171 { tokenid::uint4x2, "uint4x2" }, 172 { tokenid::uint4x3, "uint4x3" }, 173 { tokenid::uint4x4, "uint4x4" }, 174 { tokenid::min16uint, "min16uint" }, 175 { tokenid::min16uint2, "min16uint2" }, 176 { tokenid::min16uint3, "min16uint3" }, 177 { tokenid::min16uint4, "min16uint4" }, 178 { tokenid::float_, "float" }, 179 { tokenid::float2, "float2" }, 180 { tokenid::float3, "float3" }, 181 { tokenid::float4, "float4" }, 182 { tokenid::float2x2, "float2x2" }, 183 { tokenid::float2x3, "float2x3" }, 184 { tokenid::float2x4, "float2x4" }, 185 { tokenid::float3x2, "float3x2" }, 186 { tokenid::float3x3, "float3x3" }, 187 { tokenid::float3x4, "float3x4" }, 188 { tokenid::float4x2, "float4x2" }, 189 { tokenid::float4x3, "float4x3" }, 190 { tokenid::float4x4, "float4x4" }, 191 { tokenid::min16float, "min16float" }, 192 { tokenid::min16float2, "min16float2" }, 193 { tokenid::min16float3, "min16float3" }, 194 { tokenid::min16float4, "min16float4" }, 195 { tokenid::vector, "vector" }, 196 { tokenid::matrix, "matrix" }, 197 { tokenid::string_, "string" }, 198 { tokenid::texture1d, "texture1D" }, 199 { tokenid::texture2d, "texture2D" }, 200 { tokenid::texture3d, "texture3D" }, 201 { tokenid::sampler1d, "sampler1D" }, 202 { tokenid::sampler2d, "sampler2D" }, 203 { tokenid::sampler3d, "sampler3D" }, 204 { tokenid::storage1d, "storage1D" }, 205 { tokenid::storage2d, "storage2D" }, 206 { tokenid::storage3d, "storage3D" }, 207 }; 208 static const std::unordered_map<std::string_view, tokenid> keyword_lookup = { 209 { "asm", tokenid::reserved }, 210 { "asm_fragment", tokenid::reserved }, 211 { "auto", tokenid::reserved }, 212 { "bool", tokenid::bool_ }, 213 { "bool2", tokenid::bool2 }, 214 { "bool2x1", tokenid::bool2 }, 215 { "bool2x2", tokenid::bool2x2 }, 216 { "bool2x3", tokenid::bool2x3 }, 217 { "bool2x4", tokenid::bool2x4 }, 218 { "bool3", tokenid::bool3 }, 219 { "bool3x1", tokenid::bool3 }, 220 { "bool3x2", tokenid::bool3x2 }, 221 { "bool3x3", tokenid::bool3x3 }, 222 { "bool3x4", tokenid::bool3x4 }, 223 { "bool4", tokenid::bool4 }, 224 { "bool4x1", tokenid::bool4 }, 225 { "bool4x2", tokenid::bool4x2 }, 226 { "bool4x3", tokenid::bool4x3 }, 227 { "bool4x4", tokenid::bool4x4 }, 228 { "break", tokenid::break_ }, 229 { "case", tokenid::case_ }, 230 { "cast", tokenid::reserved }, 231 { "catch", tokenid::reserved }, 232 { "centroid", tokenid::reserved }, 233 { "char", tokenid::reserved }, 234 { "class", tokenid::reserved }, 235 { "column_major", tokenid::reserved }, 236 { "compile", tokenid::reserved }, 237 { "const", tokenid::const_ }, 238 { "const_cast", tokenid::reserved }, 239 { "continue", tokenid::continue_ }, 240 { "default", tokenid::default_ }, 241 { "delete", tokenid::reserved }, 242 { "discard", tokenid::discard_ }, 243 { "do", tokenid::do_ }, 244 { "double", tokenid::reserved }, 245 { "dword", tokenid::uint_ }, 246 { "dword2", tokenid::uint2 }, 247 { "dword2x1", tokenid::uint2 }, 248 { "dword2x2", tokenid::uint2x2 }, 249 { "dword2x3", tokenid::uint2x3 }, 250 { "dword2x4", tokenid::uint2x4 }, 251 { "dword3", tokenid::uint3, }, 252 { "dword3x1", tokenid::uint3 }, 253 { "dword3x2", tokenid::uint3x2 }, 254 { "dword3x3", tokenid::uint3x3 }, 255 { "dword3x4", tokenid::uint3x4 }, 256 { "dword4", tokenid::uint4 }, 257 { "dword4x1", tokenid::uint4 }, 258 { "dword4x2", tokenid::uint4x2 }, 259 { "dword4x3", tokenid::uint4x3 }, 260 { "dword4x4", tokenid::uint4x4 }, 261 { "dynamic_cast", tokenid::reserved }, 262 { "else", tokenid::else_ }, 263 { "enum", tokenid::reserved }, 264 { "explicit", tokenid::reserved }, 265 { "extern", tokenid::extern_ }, 266 { "external", tokenid::reserved }, 267 { "false", tokenid::false_literal }, 268 { "FALSE", tokenid::false_literal }, 269 { "float", tokenid::float_ }, 270 { "float2", tokenid::float2 }, 271 { "float2x1", tokenid::float2 }, 272 { "float2x2", tokenid::float2x2 }, 273 { "float2x3", tokenid::float2x3 }, 274 { "float2x4", tokenid::float2x4 }, 275 { "float3", tokenid::float3 }, 276 { "float3x1", tokenid::float3 }, 277 { "float3x2", tokenid::float3x2 }, 278 { "float3x3", tokenid::float3x3 }, 279 { "float3x4", tokenid::float3x4 }, 280 { "float4", tokenid::float4 }, 281 { "float4x1", tokenid::float4 }, 282 { "float4x2", tokenid::float4x2 }, 283 { "float4x3", tokenid::float4x3 }, 284 { "float4x4", tokenid::float4x4 }, 285 { "for", tokenid::for_ }, 286 { "foreach", tokenid::reserved }, 287 { "friend", tokenid::reserved }, 288 { "globallycoherent", tokenid::reserved }, 289 { "goto", tokenid::reserved }, 290 { "groupshared", tokenid::groupshared }, 291 { "half", tokenid::reserved }, 292 { "half2", tokenid::reserved }, 293 { "half2x1", tokenid::reserved }, 294 { "half2x2", tokenid::reserved }, 295 { "half2x3", tokenid::reserved }, 296 { "half2x4", tokenid::reserved }, 297 { "half3", tokenid::reserved }, 298 { "half3x1", tokenid::reserved }, 299 { "half3x2", tokenid::reserved }, 300 { "half3x3", tokenid::reserved }, 301 { "half3x4", tokenid::reserved }, 302 { "half4", tokenid::reserved }, 303 { "half4x1", tokenid::reserved }, 304 { "half4x2", tokenid::reserved }, 305 { "half4x3", tokenid::reserved }, 306 { "half4x4", tokenid::reserved }, 307 { "if", tokenid::if_ }, 308 { "in", tokenid::in }, 309 { "inline", tokenid::reserved }, 310 { "inout", tokenid::inout }, 311 { "int", tokenid::int_ }, 312 { "int2", tokenid::int2 }, 313 { "int2x1", tokenid::int2 }, 314 { "int2x2", tokenid::int2x2 }, 315 { "int2x3", tokenid::int2x3 }, 316 { "int2x4", tokenid::int2x4 }, 317 { "int3", tokenid::int3 }, 318 { "int3x1", tokenid::int3 }, 319 { "int3x2", tokenid::int3x2 }, 320 { "int3x3", tokenid::int3x3 }, 321 { "int3x4", tokenid::int3x4 }, 322 { "int4", tokenid::int4 }, 323 { "int4x1", tokenid::int4 }, 324 { "int4x2", tokenid::int4x2 }, 325 { "int4x3", tokenid::int4x3 }, 326 { "int4x4", tokenid::int4x4 }, 327 { "interface", tokenid::reserved }, 328 { "linear", tokenid::linear }, 329 { "long", tokenid::reserved }, 330 { "matrix", tokenid::matrix }, 331 { "min16float", tokenid::min16float }, 332 { "min16float2", tokenid::min16float2 }, 333 { "min16float3", tokenid::min16float3 }, 334 { "min16float4", tokenid::min16float4 }, 335 { "min16int", tokenid::min16int }, 336 { "min16int2", tokenid::min16int2 }, 337 { "min16int3", tokenid::min16int3 }, 338 { "min16int4", tokenid::min16int4 }, 339 { "min16uint", tokenid::min16uint }, 340 { "min16uint2", tokenid::min16uint2 }, 341 { "min16uint3", tokenid::min16uint3 }, 342 { "min16uint4", tokenid::min16uint4 }, 343 { "mutable", tokenid::reserved }, 344 { "namespace", tokenid::namespace_ }, 345 { "new", tokenid::reserved }, 346 { "noinline", tokenid::reserved }, 347 { "nointerpolation", tokenid::nointerpolation }, 348 { "noperspective", tokenid::noperspective }, 349 { "operator", tokenid::reserved }, 350 { "out", tokenid::out }, 351 { "packed", tokenid::reserved }, 352 { "packoffset", tokenid::reserved }, 353 { "pass", tokenid::pass }, 354 { "precise", tokenid::precise }, 355 { "private", tokenid::reserved }, 356 { "protected", tokenid::reserved }, 357 { "public", tokenid::reserved }, 358 { "register", tokenid::reserved }, 359 { "reinterpret_cast", tokenid::reserved }, 360 { "restrict", tokenid::reserved }, 361 { "return", tokenid::return_ }, 362 { "row_major", tokenid::reserved }, 363 { "sample", tokenid::reserved }, 364 { "sampler", tokenid::sampler2d }, 365 { "sampler1D", tokenid::sampler1d }, 366 { "sampler1DArray", tokenid::reserved }, 367 { "sampler2D", tokenid::sampler2d }, 368 { "sampler2DArray", tokenid::reserved }, 369 { "sampler2DMS", tokenid::reserved }, 370 { "sampler2DMSArray", tokenid::reserved }, 371 { "sampler3D", tokenid::sampler3d }, 372 { "sampler_state", tokenid::reserved }, 373 { "samplerCube", tokenid::reserved }, 374 { "samplerCubeArray", tokenid::reserved }, 375 { "samplerCUBE", tokenid::reserved }, 376 { "samplerRect", tokenid::reserved }, 377 { "samplerRECT", tokenid::reserved }, 378 { "SamplerState", tokenid::reserved }, 379 { "storage", tokenid::storage2d }, 380 { "storage1D", tokenid::storage1d }, 381 { "storage2D", tokenid::storage2d }, 382 { "storage3D", tokenid::storage3d }, 383 { "shared", tokenid::reserved }, 384 { "short", tokenid::reserved }, 385 { "signed", tokenid::reserved }, 386 { "sizeof", tokenid::reserved }, 387 { "snorm", tokenid::reserved }, 388 { "static", tokenid::static_ }, 389 { "static_cast", tokenid::reserved }, 390 { "string", tokenid::string_ }, 391 { "struct", tokenid::struct_ }, 392 { "switch", tokenid::switch_ }, 393 { "technique", tokenid::technique }, 394 { "template", tokenid::reserved }, 395 { "texture", tokenid::texture2d }, 396 { "Texture1D", tokenid::reserved }, 397 { "texture1D", tokenid::texture1d }, 398 { "Texture1DArray", tokenid::reserved }, 399 { "Texture2D", tokenid::reserved }, 400 { "texture2D", tokenid::texture2d }, 401 { "Texture2DArray", tokenid::reserved }, 402 { "Texture2DMS", tokenid::reserved }, 403 { "Texture2DMSArray", tokenid::reserved }, 404 { "Texture3D", tokenid::reserved }, 405 { "texture3D", tokenid::texture3d }, 406 { "textureCUBE", tokenid::reserved }, 407 { "TextureCube", tokenid::reserved }, 408 { "TextureCubeArray", tokenid::reserved }, 409 { "textureRECT", tokenid::reserved }, 410 { "this", tokenid::reserved }, 411 { "true", tokenid::true_literal }, 412 { "TRUE", tokenid::true_literal }, 413 { "try", tokenid::reserved }, 414 { "typedef", tokenid::reserved }, 415 { "uint", tokenid::uint_ }, 416 { "uint2", tokenid::uint2 }, 417 { "uint2x1", tokenid::uint2 }, 418 { "uint2x2", tokenid::uint2x2 }, 419 { "uint2x3", tokenid::uint2x3 }, 420 { "uint2x4", tokenid::uint2x4 }, 421 { "uint3", tokenid::uint3 }, 422 { "uint3x1", tokenid::uint3 }, 423 { "uint3x2", tokenid::uint3x2 }, 424 { "uint3x3", tokenid::uint3x3 }, 425 { "uint3x4", tokenid::uint3x4 }, 426 { "uint4", tokenid::uint4 }, 427 { "uint4x1", tokenid::uint4 }, 428 { "uint4x2", tokenid::uint4x2 }, 429 { "uint4x3", tokenid::uint4x3 }, 430 { "uint4x4", tokenid::uint4x4 }, 431 { "uniform", tokenid::uniform_ }, 432 { "union", tokenid::reserved }, 433 { "unorm", tokenid::reserved }, 434 { "unsigned", tokenid::reserved }, 435 { "using", tokenid::reserved }, 436 { "vector", tokenid::vector }, 437 { "virtual", tokenid::reserved }, 438 { "void", tokenid::void_ }, 439 { "volatile", tokenid::volatile_ }, 440 { "while", tokenid::while_ } 441 }; 442 static const std::unordered_map<std::string_view, tokenid> pp_directive_lookup = { 443 { "define", tokenid::hash_def }, 444 { "undef", tokenid::hash_undef }, 445 { "if", tokenid::hash_if }, 446 { "ifdef", tokenid::hash_ifdef }, 447 { "ifndef", tokenid::hash_ifndef }, 448 { "else", tokenid::hash_else }, 449 { "elif", tokenid::hash_elif }, 450 { "endif", tokenid::hash_endif }, 451 { "error", tokenid::hash_error }, 452 { "warning", tokenid::hash_warning }, 453 { "pragma", tokenid::hash_pragma }, 454 { "include", tokenid::hash_include }, 455 }; 456 457 static inline bool is_octal_digit(char c) 458 { 459 return static_cast<unsigned>(c - '0') < 8; 460 } 461 static inline bool is_decimal_digit(char c) 462 { 463 return static_cast<unsigned>(c - '0') < 10; 464 } 465 static inline bool is_hexadecimal_digit(char c) 466 { 467 return is_decimal_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 468 } 469 470 static bool is_digit(char c, int radix) 471 { 472 switch (radix) 473 { 474 case 8: 475 return is_octal_digit(c); 476 case 10: 477 return is_decimal_digit(c); 478 case 16: 479 return is_hexadecimal_digit(c); 480 } 481 482 return false; 483 } 484 static long long octal_to_decimal(long long n) 485 { 486 long long m = 0; 487 488 while (n != 0) 489 { 490 m *= 8; 491 m += n & 7; 492 n >>= 3; 493 } 494 495 while (m != 0) 496 { 497 n *= 10; 498 n += m & 7; 499 m >>= 3; 500 } 501 502 return n; 503 } 504 505 std::string reshadefx::token::id_to_name(tokenid id) 506 { 507 const auto it = token_lookup.find(id); 508 if (it != token_lookup.end()) 509 return std::string(it->second); 510 return "unknown"; 511 } 512 513 reshadefx::token reshadefx::lexer::lex() 514 { 515 bool is_at_line_begin = _cur_location.column <= 1; 516 517 token tok; 518 next_token: 519 // Reset token data 520 tok.location = _cur_location; 521 tok.offset = input_offset(); 522 tok.length = 1; 523 tok.literal_as_double = 0; 524 tok.literal_as_string.clear(); 525 526 assert(_cur <= _end); 527 528 // Do a character type lookup for the current character 529 switch (type_lookup[uint8_t(*_cur)]) 530 { 531 case 0xFF: // EOF 532 tok.id = tokenid::end_of_file; 533 return tok; 534 case SPACE: 535 skip_space(); 536 if (_ignore_whitespace || is_at_line_begin || *_cur == '\n') 537 goto next_token; 538 tok.id = tokenid::space; 539 tok.length = input_offset() - tok.offset; 540 return tok; 541 case '\n': 542 _cur++; 543 _cur_location.line++; 544 _cur_location.column = 1; 545 is_at_line_begin = true; 546 if (_ignore_whitespace) 547 goto next_token; 548 tok.id = tokenid::end_of_line; 549 return tok; 550 case DIGIT: 551 parse_numeric_literal(tok); 552 break; 553 case IDENT: 554 parse_identifier(tok); 555 break; 556 case '!': 557 if (_cur[1] == '=') 558 tok.id = tokenid::exclaim_equal, 559 tok.length = 2; 560 else 561 tok.id = tokenid::exclaim; 562 break; 563 case '"': 564 parse_string_literal(tok, _escape_string_literals); 565 break; 566 case '#': 567 if (is_at_line_begin) 568 { 569 if (!parse_pp_directive(tok) || _ignore_pp_directives) 570 { 571 skip_to_next_line(); 572 goto next_token; 573 } 574 } // These braces are important so the 'else' is matched to the right 'if' statement 575 else 576 tok.id = tokenid::hash; 577 break; 578 case '$': 579 tok.id = tokenid::dollar; 580 break; 581 case '%': 582 if (_cur[1] == '=') 583 tok.id = tokenid::percent_equal, 584 tok.length = 2; 585 else 586 tok.id = tokenid::percent; 587 break; 588 case '&': 589 if (_cur[1] == '&') 590 tok.id = tokenid::ampersand_ampersand, 591 tok.length = 2; 592 else if (_cur[1] == '=') 593 tok.id = tokenid::ampersand_equal, 594 tok.length = 2; 595 else 596 tok.id = tokenid::ampersand; 597 break; 598 case '(': 599 tok.id = tokenid::parenthesis_open; 600 break; 601 case ')': 602 tok.id = tokenid::parenthesis_close; 603 break; 604 case '*': 605 if (_cur[1] == '=') 606 tok.id = tokenid::star_equal, 607 tok.length = 2; 608 else 609 tok.id = tokenid::star; 610 break; 611 case '+': 612 if (_cur[1] == '+') 613 tok.id = tokenid::plus_plus, 614 tok.length = 2; 615 else if (_cur[1] == '=') 616 tok.id = tokenid::plus_equal, 617 tok.length = 2; 618 else 619 tok.id = tokenid::plus; 620 break; 621 case ',': 622 tok.id = tokenid::comma; 623 break; 624 case '-': 625 if (_cur[1] == '-') 626 tok.id = tokenid::minus_minus, 627 tok.length = 2; 628 else if (_cur[1] == '=') 629 tok.id = tokenid::minus_equal, 630 tok.length = 2; 631 else if (_cur[1] == '>') 632 tok.id = tokenid::arrow, 633 tok.length = 2; 634 else 635 tok.id = tokenid::minus; 636 break; 637 case '.': 638 if (type_lookup[uint8_t(_cur[1])] == DIGIT) 639 parse_numeric_literal(tok); 640 else if (_cur[1] == '.' && _cur[2] == '.') 641 tok.id = tokenid::ellipsis, 642 tok.length = 3; 643 else 644 tok.id = tokenid::dot; 645 break; 646 case '/': 647 if (_cur[1] == '/') 648 { 649 skip_to_next_line(); 650 if (_ignore_comments) 651 goto next_token; 652 tok.id = tokenid::single_line_comment; 653 tok.length = input_offset() - tok.offset; 654 return tok; 655 } 656 else if (_cur[1] == '*') 657 { 658 while (_cur < _end) 659 { 660 if (*_cur == '\n') 661 { 662 _cur_location.line++; 663 _cur_location.column = 1; 664 } 665 else if (_cur[0] == '*' && _cur[1] == '/') 666 { 667 skip(2); 668 break; 669 } 670 skip(1); 671 } 672 if (_ignore_comments) 673 goto next_token; 674 tok.id = tokenid::multi_line_comment; 675 tok.length = input_offset() - tok.offset; 676 return tok; 677 } 678 else if (_cur[1] == '=') 679 tok.id = tokenid::slash_equal, 680 tok.length = 2; 681 else 682 tok.id = tokenid::slash; 683 break; 684 case ':': 685 if (_cur[1] == ':') 686 tok.id = tokenid::colon_colon, 687 tok.length = 2; 688 else 689 tok.id = tokenid::colon; 690 break; 691 case ';': 692 tok.id = tokenid::semicolon; 693 break; 694 case '<': 695 if (_cur[1] == '<') 696 if (_cur[2] == '=') 697 tok.id = tokenid::less_less_equal, 698 tok.length = 3; 699 else 700 tok.id = tokenid::less_less, 701 tok.length = 2; 702 else if (_cur[1] == '=') 703 tok.id = tokenid::less_equal, 704 tok.length = 2; 705 else 706 tok.id = tokenid::less; 707 break; 708 case '=': 709 if (_cur[1] == '=') 710 tok.id = tokenid::equal_equal, 711 tok.length = 2; 712 else 713 tok.id = tokenid::equal; 714 break; 715 case '>': 716 if (_cur[1] == '>') 717 if (_cur[2] == '=') 718 tok.id = tokenid::greater_greater_equal, 719 tok.length = 3; 720 else 721 tok.id = tokenid::greater_greater, 722 tok.length = 2; 723 else if (_cur[1] == '=') 724 tok.id = tokenid::greater_equal, 725 tok.length = 2; 726 else 727 tok.id = tokenid::greater; 728 break; 729 case '?': 730 tok.id = tokenid::question; 731 break; 732 case '@': 733 tok.id = tokenid::at; 734 break; 735 case '[': 736 tok.id = tokenid::bracket_open; 737 break; 738 case '\\': 739 if (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n')) 740 { 741 // Skip to next line if current line ends with a backslash 742 skip_space(); 743 if (_ignore_whitespace) 744 goto next_token; 745 tok.id = tokenid::space; 746 tok.length = input_offset() - tok.offset; 747 return tok; 748 } 749 tok.id = tokenid::backslash; 750 break; 751 case ']': 752 tok.id = tokenid::bracket_close; 753 break; 754 case '^': 755 if (_cur[1] == '=') 756 tok.id = tokenid::caret_equal, 757 tok.length = 2; 758 else 759 tok.id = tokenid::caret; 760 break; 761 case '{': 762 tok.id = tokenid::brace_open; 763 break; 764 case '|': 765 if (_cur[1] == '=') 766 tok.id = tokenid::pipe_equal, 767 tok.length = 2; 768 else if (_cur[1] == '|') 769 tok.id = tokenid::pipe_pipe, 770 tok.length = 2; 771 else 772 tok.id = tokenid::pipe; 773 break; 774 case '}': 775 tok.id = tokenid::brace_close; 776 break; 777 case '~': 778 tok.id = tokenid::tilde; 779 break; 780 default: 781 tok.id = tokenid::unknown; 782 break; 783 } 784 785 skip(tok.length); 786 787 return tok; 788 } 789 790 void reshadefx::lexer::skip(size_t length) 791 { 792 _cur += length; 793 _cur_location.column += static_cast<unsigned int>(length); 794 } 795 void reshadefx::lexer::skip_space() 796 { 797 // Skip each character until a space is found 798 while (_cur < _end) 799 { 800 if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n'))) 801 { 802 skip(_cur[1] == '\r' ? 3 : 2); 803 _cur_location.line++; 804 _cur_location.column = 1; 805 continue; 806 } 807 808 if (type_lookup[uint8_t(*_cur)] == SPACE) 809 skip(1); 810 else 811 break; 812 } 813 } 814 void reshadefx::lexer::skip_to_next_line() 815 { 816 // Skip each character until a new line feed is found 817 while (*_cur != '\n' && _cur < _end) 818 { 819 #if 0 820 if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n'))) 821 { 822 skip(_cur[1] == '\r' ? 3 : 2); 823 _cur_location.line++; 824 _cur_location.column = 1; 825 continue; 826 } 827 #endif 828 829 skip(1); 830 } 831 } 832 833 void reshadefx::lexer::reset_to_offset(size_t offset) 834 { 835 assert(offset < _input.size()); 836 _cur = _input.data() + offset; 837 } 838 839 void reshadefx::lexer::parse_identifier(token &tok) const 840 { 841 auto *const begin = _cur, *end = begin; 842 843 // Skip to the end of the identifier sequence 844 while (type_lookup[uint8_t(*end)] == IDENT || type_lookup[uint8_t(*end)] == DIGIT) 845 end++; 846 847 tok.id = tokenid::identifier; 848 tok.offset = input_offset(); 849 tok.length = end - begin; 850 tok.literal_as_string.assign(begin, end); 851 852 if (_ignore_keywords) 853 return; 854 855 if (const auto it = keyword_lookup.find(tok.literal_as_string); 856 it != keyword_lookup.end()) 857 tok.id = it->second; 858 } 859 bool reshadefx::lexer::parse_pp_directive(token &tok) 860 { 861 skip(1); // Skip the '#' 862 skip_space(); // Skip any space between the '#' and directive 863 parse_identifier(tok); 864 865 if (const auto it = pp_directive_lookup.find(tok.literal_as_string); 866 it != pp_directive_lookup.end()) 867 { 868 tok.id = it->second; 869 return true; 870 } 871 else if (!_ignore_line_directives && tok.literal_as_string == "line") // The #line directive needs special handling 872 { 873 skip(tok.length); // The 'parse_identifier' does not update the pointer to the current character, so do that now 874 skip_space(); 875 parse_numeric_literal(tok); 876 skip(tok.length); 877 878 _cur_location.line = tok.literal_as_int; 879 880 // Need to subtract one since the line containing #line does not count into the statistics 881 if (_cur_location.line != 0) 882 _cur_location.line--; 883 884 skip_space(); 885 886 // Check if this #line directive has an file name attached to it 887 if (_cur[0] == '"') 888 { 889 token temptok; 890 parse_string_literal(temptok, false); 891 892 _cur_location.source = std::move(temptok.literal_as_string); 893 } 894 895 // Do not return the #line directive as token to the caller 896 return false; 897 } 898 899 tok.id = tokenid::hash_unknown; 900 901 return true; 902 } 903 void reshadefx::lexer::parse_string_literal(token &tok, bool escape) 904 { 905 auto *const begin = _cur, *end = begin + 1; // Skip first quote character right away 906 907 for (auto c = *end; c != '"'; c = *++end) 908 { 909 if (c == '\n' || end >= _end) 910 { 911 // Line feed reached, the string literal is done (technically this should be an error, but the lexer does not report errors, so ignore it) 912 end--; 913 if (end[0] == '\r') end--; 914 break; 915 } 916 917 if (c == '\r') 918 { 919 // Silently ignore carriage return characters 920 continue; 921 } 922 923 if (unsigned int n = (end[1] == '\r' && end + 2 < _end) ? 2 : 1; 924 c == '\\' && end[n] == '\n') 925 { 926 // Escape character found at end of line, the string literal continues on to the next line 927 end += n; 928 _cur_location.line++; 929 continue; 930 } 931 932 // Handle escape sequences 933 if (c == '\\' && escape) 934 { 935 unsigned int n = 0; 936 937 // Any character following the '\' is not parsed as usual, so increment pointer here (this makes sure '\"' does not abort the outer loop as well) 938 switch (c = *++end) 939 { 940 case '0': 941 case '1': 942 case '2': 943 case '3': 944 case '4': 945 case '5': 946 case '6': 947 case '7': 948 for (unsigned int i = 0; i < 3 && is_octal_digit(*end) && end < _end; i++) 949 { 950 c = *end++; 951 n = (n << 3) | (c - '0'); 952 } 953 // For simplicity the number is limited to what fits in a single character 954 c = n & 0xFF; 955 // The octal parsing loop above incremented one pass the escape sequence, so step back 956 end--; 957 break; 958 case 'a': 959 c = '\a'; 960 break; 961 case 'b': 962 c = '\b'; 963 break; 964 case 'f': 965 c = '\f'; 966 break; 967 case 'n': 968 c = '\n'; 969 break; 970 case 'r': 971 c = '\r'; 972 break; 973 case 't': 974 c = '\t'; 975 break; 976 case 'v': 977 c = '\v'; 978 break; 979 case 'x': 980 if (is_hexadecimal_digit(*++end)) 981 { 982 while (is_hexadecimal_digit(*end) && end < _end) 983 { 984 c = *end++; 985 n = (n << 4) | (is_decimal_digit(c) ? (c - '0') : (c - 55 - 32 * (c & 0x20))); 986 } 987 988 // For simplicity the number is limited to what fits in a single character 989 c = n & 0xFF; 990 } 991 // The hexadecimal parsing loop and check above incremented one pass the escape sequence, so step back 992 end--; 993 break; 994 } 995 } 996 997 tok.literal_as_string += c; 998 } 999 1000 tok.id = tokenid::string_literal; 1001 tok.length = end - begin + 1; 1002 } 1003 void reshadefx::lexer::parse_numeric_literal(token &tok) const 1004 { 1005 // This routine handles both integer and floating point numbers 1006 auto *const begin = _cur, *end = _cur; 1007 int mantissa_size = 0, decimal_location = -1, radix = 10; 1008 long long fraction = 0, exponent = 0; 1009 1010 // If a literal starts with '0' it is either an octal or hexadecimal ('0x') value 1011 if (begin[0] == '0') 1012 { 1013 if (begin[1] == 'x' || begin[1] == 'X') 1014 { 1015 end = begin + 2; 1016 radix = 16; 1017 } 1018 else 1019 { 1020 radix = 8; 1021 } 1022 } 1023 1024 for (; mantissa_size <= 18; mantissa_size++, end++) 1025 { 1026 auto c = *end; 1027 1028 if (is_decimal_digit(c)) 1029 { 1030 c -= '0'; 1031 1032 if (c >= radix) 1033 break; 1034 } 1035 else if (radix == 16) 1036 { 1037 // Hexadecimal values can contain the letters A to F 1038 if (c >= 'A' && c <= 'F') 1039 c -= 'A' - 10; 1040 else if (c >= 'a' && c <= 'f') 1041 c -= 'a' - 10; 1042 else 1043 break; 1044 } 1045 else 1046 { 1047 if (c != '.' || decimal_location >= 0) 1048 break; 1049 1050 // Found a decimal character, as such convert current values 1051 if (radix == 8) 1052 { 1053 radix = 10; 1054 fraction = octal_to_decimal(fraction); 1055 } 1056 1057 decimal_location = mantissa_size; 1058 continue; 1059 } 1060 1061 fraction *= radix; 1062 fraction += c; 1063 } 1064 1065 // Ignore additional digits that cannot affect the value 1066 while (is_digit(*end, radix)) 1067 end++; 1068 1069 // If a decimal character was found, this is a floating point value, otherwise an integer one 1070 if (decimal_location < 0) 1071 { 1072 tok.id = tokenid::int_literal; 1073 decimal_location = mantissa_size; 1074 } 1075 else 1076 { 1077 tok.id = tokenid::float_literal; 1078 mantissa_size -= 1; 1079 } 1080 1081 // Literals can be followed by an exponent 1082 if (*end == 'E' || *end == 'e') 1083 { 1084 auto tmp = end + 1; 1085 const bool negative = *tmp == '-'; 1086 1087 if (negative || *tmp == '+') 1088 tmp++; 1089 1090 if (is_decimal_digit(*tmp)) 1091 { 1092 end = tmp; 1093 1094 tok.id = tokenid::float_literal; 1095 1096 do { 1097 exponent *= 10; 1098 exponent += (*end++) - '0'; 1099 } while (is_decimal_digit(*end)); 1100 1101 if (negative) 1102 exponent = -exponent; 1103 } 1104 } 1105 1106 // Various suffixes force specific literal types 1107 if (*end == 'F' || *end == 'f') 1108 { 1109 end++; // Consume the suffix 1110 tok.id = tokenid::float_literal; 1111 } 1112 else if (*end == 'L' || *end == 'l') 1113 { 1114 end++; // Consume the suffix 1115 tok.id = tokenid::double_literal; 1116 } 1117 else if (tok.id == tokenid::int_literal && (*end == 'U' || *end == 'u')) // The 'u' suffix is only valid on integers and needs to be ignored otherwise 1118 { 1119 end++; // Consume the suffix 1120 tok.id = tokenid::uint_literal; 1121 } 1122 1123 if (tok.id == tokenid::float_literal || tok.id == tokenid::double_literal) 1124 { 1125 exponent += decimal_location - mantissa_size; 1126 1127 const bool exponent_negative = exponent < 0; 1128 1129 if (exponent_negative) 1130 exponent = -exponent; 1131 1132 // Limit exponent 1133 if (exponent > 511) 1134 exponent = 511; 1135 1136 // Quick exponent calculation 1137 double e = 1.0; 1138 const double powers_of_10[] = { 1139 10., 1140 100., 1141 1.0e4, 1142 1.0e8, 1143 1.0e16, 1144 1.0e32, 1145 1.0e64, 1146 1.0e128, 1147 1.0e256 1148 }; 1149 1150 for (auto d = powers_of_10; exponent != 0; exponent >>= 1, d++) 1151 if (exponent & 1) 1152 e *= *d; 1153 1154 if (tok.id == tokenid::float_literal) 1155 tok.literal_as_float = exponent_negative ? fraction / static_cast<float>(e) : fraction * static_cast<float>(e); 1156 else 1157 tok.literal_as_double = exponent_negative ? fraction / e : fraction * e; 1158 } 1159 else 1160 { 1161 // Limit the maximum value to what fits into our token structure 1162 tok.literal_as_uint = static_cast<unsigned int>(fraction & 0xFFFFFFFF); 1163 } 1164 1165 tok.length = end - begin; 1166 }