effect_lexer.cpp - duckstation - duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one

effect_lexer.cpp (30852B)
      1 /*
      2  * Copyright (C) 2014 Patrick Mours
      3  * SPDX-License-Identifier: BSD-3-Clause
      4  */
      5 
      6 #include "effect_lexer.hpp"
      7 #include <cassert>
      8 #include <string_view>
      9 #include <unordered_map> // Used for static lookup tables
     10 
     11 using namespace reshadefx;
     12 
     13 enum token_type
     14 {
     15 	DIGIT = '0',
     16 	IDENT = 'A',
     17 	SPACE = ' ',
     18 };
     19 
     20 // Lookup table which translates a given char to a token type
     21 static const unsigned type_lookup[256] = {
     22 	 0xFF,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00, SPACE,
     23 	 '\n', SPACE, SPACE, SPACE,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,
     24 	 0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,
     25 	 0x00,  0x00, SPACE,   '!',   '"',   '#',   '$',   '%',   '&',  '\'',
     26 	  '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/', DIGIT, DIGIT,
     27 	DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,   ':',   ';',
     28 	  '<',   '=',   '>',   '?',   '@', IDENT, IDENT, IDENT, IDENT, IDENT,
     29 	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
     30 	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
     31 	IDENT,   '[',  '\\',   ']',   '^', IDENT,  0x00, IDENT, IDENT, IDENT,
     32 	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
     33 	IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT, IDENT,
     34 	IDENT, IDENT, IDENT,   '{',   '|',   '}',   '~',  0x00,  0x00,  0x00,
     35 };
     36 
     37 // Lookup tables which translate a given string literal to a token and backwards
     38 static const std::unordered_map<tokenid, std::string_view> token_lookup = {
     39 	{ tokenid::end_of_file, "end of file" },
     40 	{ tokenid::exclaim, "!" },
     41 	{ tokenid::hash, "#" },
     42 	{ tokenid::dollar, "$" },
     43 	{ tokenid::percent, "%" },
     44 	{ tokenid::ampersand, "&" },
     45 	{ tokenid::parenthesis_open, "(" },
     46 	{ tokenid::parenthesis_close, ")" },
     47 	{ tokenid::star, "*" },
     48 	{ tokenid::plus, "+" },
     49 	{ tokenid::comma, "," },
     50 	{ tokenid::minus, "-" },
     51 	{ tokenid::dot, "." },
     52 	{ tokenid::slash, "/" },
     53 	{ tokenid::colon, ":" },
     54 	{ tokenid::semicolon, ";" },
     55 	{ tokenid::less, "<" },
     56 	{ tokenid::equal, "=" },
     57 	{ tokenid::greater, ">" },
     58 	{ tokenid::question, "?" },
     59 	{ tokenid::at, "@" },
     60 	{ tokenid::bracket_open, "[" },
     61 	{ tokenid::backslash, "\\" },
     62 	{ tokenid::bracket_close, "]" },
     63 	{ tokenid::caret, "^" },
     64 	{ tokenid::brace_open, "{" },
     65 	{ tokenid::pipe, "|" },
     66 	{ tokenid::brace_close, "}" },
     67 	{ tokenid::tilde, "~" },
     68 	{ tokenid::exclaim_equal, "!=" },
     69 	{ tokenid::percent_equal, "%=" },
     70 	{ tokenid::ampersand_ampersand, "&&" },
     71 	{ tokenid::ampersand_equal, "&=" },
     72 	{ tokenid::star_equal, "*=" },
     73 	{ tokenid::plus_plus, "++" },
     74 	{ tokenid::plus_equal, "+=" },
     75 	{ tokenid::minus_minus, "--" },
     76 	{ tokenid::minus_equal, "-=" },
     77 	{ tokenid::arrow, "->" },
     78 	{ tokenid::ellipsis, "..." },
     79 	{ tokenid::slash_equal, "|=" },
     80 	{ tokenid::colon_colon, "::" },
     81 	{ tokenid::less_less_equal, "<<=" },
     82 	{ tokenid::less_less, "<<" },
     83 	{ tokenid::less_equal, "<=" },
     84 	{ tokenid::equal_equal, "==" },
     85 	{ tokenid::greater_greater_equal, ">>=" },
     86 	{ tokenid::greater_greater, ">>" },
     87 	{ tokenid::greater_equal, ">=" },
     88 	{ tokenid::caret_equal, "^=" },
     89 	{ tokenid::pipe_equal, "|=" },
     90 	{ tokenid::pipe_pipe, "||" },
     91 	{ tokenid::identifier, "identifier" },
     92 	{ tokenid::reserved, "reserved word" },
     93 	{ tokenid::true_literal, "true" },
     94 	{ tokenid::false_literal, "false" },
     95 	{ tokenid::int_literal, "integral literal" },
     96 	{ tokenid::uint_literal, "integral literal" },
     97 	{ tokenid::float_literal, "floating point literal" },
     98 	{ tokenid::double_literal, "floating point literal" },
     99 	{ tokenid::string_literal, "string literal" },
    100 	{ tokenid::namespace_, "namespace" },
    101 	{ tokenid::struct_, "struct" },
    102 	{ tokenid::technique, "technique" },
    103 	{ tokenid::pass, "pass" },
    104 	{ tokenid::for_, "for" },
    105 	{ tokenid::while_, "while" },
    106 	{ tokenid::do_, "do" },
    107 	{ tokenid::if_, "if" },
    108 	{ tokenid::else_, "else" },
    109 	{ tokenid::switch_, "switch" },
    110 	{ tokenid::case_, "case" },
    111 	{ tokenid::default_, "default" },
    112 	{ tokenid::break_, "break" },
    113 	{ tokenid::continue_, "continue" },
    114 	{ tokenid::return_, "return" },
    115 	{ tokenid::discard_, "discard" },
    116 	{ tokenid::extern_, "extern" },
    117 	{ tokenid::static_, "static" },
    118 	{ tokenid::uniform_, "uniform" },
    119 	{ tokenid::volatile_, "volatile" },
    120 	{ tokenid::precise, "precise" },
    121 	{ tokenid::groupshared, "groupshared" },
    122 	{ tokenid::in, "in" },
    123 	{ tokenid::out, "out" },
    124 	{ tokenid::inout, "inout" },
    125 	{ tokenid::const_, "const" },
    126 	{ tokenid::linear, "linear" },
    127 	{ tokenid::noperspective, "noperspective" },
    128 	{ tokenid::centroid, "centroid" },
    129 	{ tokenid::nointerpolation, "nointerpolation" },
    130 	{ tokenid::void_, "void" },
    131 	{ tokenid::bool_, "bool" },
    132 	{ tokenid::bool2, "bool2" },
    133 	{ tokenid::bool3, "bool3" },
    134 	{ tokenid::bool4, "bool4" },
    135 	{ tokenid::bool2x2, "bool2x2" },
    136 	{ tokenid::bool2x3, "bool2x3" },
    137 	{ tokenid::bool2x4, "bool2x4" },
    138 	{ tokenid::bool3x2, "bool3x2" },
    139 	{ tokenid::bool3x3, "bool3x3" },
    140 	{ tokenid::bool3x4, "bool3x4" },
    141 	{ tokenid::bool4x2, "bool4x2" },
    142 	{ tokenid::bool4x3, "bool4x3" },
    143 	{ tokenid::bool4x4, "bool4x4" },
    144 	{ tokenid::int_, "int" },
    145 	{ tokenid::int2, "int2" },
    146 	{ tokenid::int3, "int3" },
    147 	{ tokenid::int4, "int4" },
    148 	{ tokenid::int2x2, "int2x2" },
    149 	{ tokenid::int2x3, "int2x3" },
    150 	{ tokenid::int2x4, "int2x4" },
    151 	{ tokenid::int3x2, "int3x2" },
    152 	{ tokenid::int3x3, "int3x3" },
    153 	{ tokenid::int3x4, "int3x4" },
    154 	{ tokenid::int4x2, "int4x2" },
    155 	{ tokenid::int4x3, "int4x3" },
    156 	{ tokenid::int4x4, "int4x4" },
    157 	{ tokenid::min16int, "min16int" },
    158 	{ tokenid::min16int2, "min16int2" },
    159 	{ tokenid::min16int3, "min16int3" },
    160 	{ tokenid::min16int4, "min16int4" },
    161 	{ tokenid::uint_, "uint" },
    162 	{ tokenid::uint2, "uint2" },
    163 	{ tokenid::uint3, "uint3" },
    164 	{ tokenid::uint4, "uint4" },
    165 	{ tokenid::uint2x2, "uint2x2" },
    166 	{ tokenid::uint2x3, "uint2x3" },
    167 	{ tokenid::uint2x4, "uint2x4" },
    168 	{ tokenid::uint3x2, "uint3x2" },
    169 	{ tokenid::uint3x3, "uint3x3" },
    170 	{ tokenid::uint3x4, "uint3x4" },
    171 	{ tokenid::uint4x2, "uint4x2" },
    172 	{ tokenid::uint4x3, "uint4x3" },
    173 	{ tokenid::uint4x4, "uint4x4" },
    174 	{ tokenid::min16uint, "min16uint" },
    175 	{ tokenid::min16uint2, "min16uint2" },
    176 	{ tokenid::min16uint3, "min16uint3" },
    177 	{ tokenid::min16uint4, "min16uint4" },
    178 	{ tokenid::float_, "float" },
    179 	{ tokenid::float2, "float2" },
    180 	{ tokenid::float3, "float3" },
    181 	{ tokenid::float4, "float4" },
    182 	{ tokenid::float2x2, "float2x2" },
    183 	{ tokenid::float2x3, "float2x3" },
    184 	{ tokenid::float2x4, "float2x4" },
    185 	{ tokenid::float3x2, "float3x2" },
    186 	{ tokenid::float3x3, "float3x3" },
    187 	{ tokenid::float3x4, "float3x4" },
    188 	{ tokenid::float4x2, "float4x2" },
    189 	{ tokenid::float4x3, "float4x3" },
    190 	{ tokenid::float4x4, "float4x4" },
    191 	{ tokenid::min16float, "min16float" },
    192 	{ tokenid::min16float2, "min16float2" },
    193 	{ tokenid::min16float3, "min16float3" },
    194 	{ tokenid::min16float4, "min16float4" },
    195 	{ tokenid::vector, "vector" },
    196 	{ tokenid::matrix, "matrix" },
    197 	{ tokenid::string_, "string" },
    198 	{ tokenid::texture1d, "texture1D" },
    199 	{ tokenid::texture2d, "texture2D" },
    200 	{ tokenid::texture3d, "texture3D" },
    201 	{ tokenid::sampler1d, "sampler1D" },
    202 	{ tokenid::sampler2d, "sampler2D" },
    203 	{ tokenid::sampler3d, "sampler3D" },
    204 	{ tokenid::storage1d, "storage1D" },
    205 	{ tokenid::storage2d, "storage2D" },
    206 	{ tokenid::storage3d, "storage3D" },
    207 };
    208 static const std::unordered_map<std::string_view, tokenid> keyword_lookup = {
    209 	{ "asm", tokenid::reserved },
    210 	{ "asm_fragment", tokenid::reserved },
    211 	{ "auto", tokenid::reserved },
    212 	{ "bool", tokenid::bool_ },
    213 	{ "bool2", tokenid::bool2 },
    214 	{ "bool2x1", tokenid::bool2 },
    215 	{ "bool2x2", tokenid::bool2x2 },
    216 	{ "bool2x3", tokenid::bool2x3 },
    217 	{ "bool2x4", tokenid::bool2x4 },
    218 	{ "bool3", tokenid::bool3 },
    219 	{ "bool3x1", tokenid::bool3 },
    220 	{ "bool3x2", tokenid::bool3x2 },
    221 	{ "bool3x3", tokenid::bool3x3 },
    222 	{ "bool3x4", tokenid::bool3x4 },
    223 	{ "bool4", tokenid::bool4 },
    224 	{ "bool4x1", tokenid::bool4 },
    225 	{ "bool4x2", tokenid::bool4x2 },
    226 	{ "bool4x3", tokenid::bool4x3 },
    227 	{ "bool4x4", tokenid::bool4x4 },
    228 	{ "break", tokenid::break_ },
    229 	{ "case", tokenid::case_ },
    230 	{ "cast", tokenid::reserved },
    231 	{ "catch", tokenid::reserved },
    232 	{ "centroid", tokenid::reserved },
    233 	{ "char", tokenid::reserved },
    234 	{ "class", tokenid::reserved },
    235 	{ "column_major", tokenid::reserved },
    236 	{ "compile", tokenid::reserved },
    237 	{ "const", tokenid::const_ },
    238 	{ "const_cast", tokenid::reserved },
    239 	{ "continue", tokenid::continue_ },
    240 	{ "default", tokenid::default_ },
    241 	{ "delete", tokenid::reserved },
    242 	{ "discard", tokenid::discard_ },
    243 	{ "do", tokenid::do_ },
    244 	{ "double", tokenid::reserved },
    245 	{ "dword", tokenid::uint_ },
    246 	{ "dword2", tokenid::uint2 },
    247 	{ "dword2x1", tokenid::uint2 },
    248 	{ "dword2x2", tokenid::uint2x2 },
    249 	{ "dword2x3", tokenid::uint2x3 },
    250 	{ "dword2x4", tokenid::uint2x4 },
    251 	{ "dword3", tokenid::uint3, },
    252 	{ "dword3x1", tokenid::uint3 },
    253 	{ "dword3x2", tokenid::uint3x2 },
    254 	{ "dword3x3", tokenid::uint3x3 },
    255 	{ "dword3x4", tokenid::uint3x4 },
    256 	{ "dword4", tokenid::uint4 },
    257 	{ "dword4x1", tokenid::uint4 },
    258 	{ "dword4x2", tokenid::uint4x2 },
    259 	{ "dword4x3", tokenid::uint4x3 },
    260 	{ "dword4x4", tokenid::uint4x4 },
    261 	{ "dynamic_cast", tokenid::reserved },
    262 	{ "else", tokenid::else_ },
    263 	{ "enum", tokenid::reserved },
    264 	{ "explicit", tokenid::reserved },
    265 	{ "extern", tokenid::extern_ },
    266 	{ "external", tokenid::reserved },
    267 	{ "false", tokenid::false_literal },
    268 	{ "FALSE", tokenid::false_literal },
    269 	{ "float", tokenid::float_ },
    270 	{ "float2", tokenid::float2 },
    271 	{ "float2x1", tokenid::float2 },
    272 	{ "float2x2", tokenid::float2x2 },
    273 	{ "float2x3", tokenid::float2x3 },
    274 	{ "float2x4", tokenid::float2x4 },
    275 	{ "float3", tokenid::float3 },
    276 	{ "float3x1", tokenid::float3 },
    277 	{ "float3x2", tokenid::float3x2 },
    278 	{ "float3x3", tokenid::float3x3 },
    279 	{ "float3x4", tokenid::float3x4 },
    280 	{ "float4", tokenid::float4 },
    281 	{ "float4x1", tokenid::float4 },
    282 	{ "float4x2", tokenid::float4x2 },
    283 	{ "float4x3", tokenid::float4x3 },
    284 	{ "float4x4", tokenid::float4x4 },
    285 	{ "for", tokenid::for_ },
    286 	{ "foreach", tokenid::reserved },
    287 	{ "friend", tokenid::reserved },
    288 	{ "globallycoherent", tokenid::reserved },
    289 	{ "goto", tokenid::reserved },
    290 	{ "groupshared", tokenid::groupshared },
    291 	{ "half", tokenid::reserved },
    292 	{ "half2", tokenid::reserved },
    293 	{ "half2x1", tokenid::reserved },
    294 	{ "half2x2", tokenid::reserved },
    295 	{ "half2x3", tokenid::reserved },
    296 	{ "half2x4", tokenid::reserved },
    297 	{ "half3", tokenid::reserved },
    298 	{ "half3x1", tokenid::reserved },
    299 	{ "half3x2", tokenid::reserved },
    300 	{ "half3x3", tokenid::reserved },
    301 	{ "half3x4", tokenid::reserved },
    302 	{ "half4", tokenid::reserved },
    303 	{ "half4x1", tokenid::reserved },
    304 	{ "half4x2", tokenid::reserved },
    305 	{ "half4x3", tokenid::reserved },
    306 	{ "half4x4", tokenid::reserved },
    307 	{ "if", tokenid::if_ },
    308 	{ "in", tokenid::in },
    309 	{ "inline", tokenid::reserved },
    310 	{ "inout", tokenid::inout },
    311 	{ "int", tokenid::int_ },
    312 	{ "int2", tokenid::int2 },
    313 	{ "int2x1", tokenid::int2 },
    314 	{ "int2x2", tokenid::int2x2 },
    315 	{ "int2x3", tokenid::int2x3 },
    316 	{ "int2x4", tokenid::int2x4 },
    317 	{ "int3", tokenid::int3 },
    318 	{ "int3x1", tokenid::int3 },
    319 	{ "int3x2", tokenid::int3x2 },
    320 	{ "int3x3", tokenid::int3x3 },
    321 	{ "int3x4", tokenid::int3x4 },
    322 	{ "int4", tokenid::int4 },
    323 	{ "int4x1", tokenid::int4 },
    324 	{ "int4x2", tokenid::int4x2 },
    325 	{ "int4x3", tokenid::int4x3 },
    326 	{ "int4x4", tokenid::int4x4 },
    327 	{ "interface", tokenid::reserved },
    328 	{ "linear", tokenid::linear },
    329 	{ "long", tokenid::reserved },
    330 	{ "matrix", tokenid::matrix },
    331 	{ "min16float", tokenid::min16float },
    332 	{ "min16float2", tokenid::min16float2 },
    333 	{ "min16float3", tokenid::min16float3 },
    334 	{ "min16float4", tokenid::min16float4 },
    335 	{ "min16int", tokenid::min16int },
    336 	{ "min16int2", tokenid::min16int2 },
    337 	{ "min16int3", tokenid::min16int3 },
    338 	{ "min16int4", tokenid::min16int4 },
    339 	{ "min16uint", tokenid::min16uint },
    340 	{ "min16uint2", tokenid::min16uint2 },
    341 	{ "min16uint3", tokenid::min16uint3 },
    342 	{ "min16uint4", tokenid::min16uint4 },
    343 	{ "mutable", tokenid::reserved },
    344 	{ "namespace", tokenid::namespace_ },
    345 	{ "new", tokenid::reserved },
    346 	{ "noinline", tokenid::reserved },
    347 	{ "nointerpolation", tokenid::nointerpolation },
    348 	{ "noperspective", tokenid::noperspective },
    349 	{ "operator", tokenid::reserved },
    350 	{ "out", tokenid::out },
    351 	{ "packed", tokenid::reserved },
    352 	{ "packoffset", tokenid::reserved },
    353 	{ "pass", tokenid::pass },
    354 	{ "precise", tokenid::precise },
    355 	{ "private", tokenid::reserved },
    356 	{ "protected", tokenid::reserved },
    357 	{ "public", tokenid::reserved },
    358 	{ "register", tokenid::reserved },
    359 	{ "reinterpret_cast", tokenid::reserved },
    360 	{ "restrict", tokenid::reserved },
    361 	{ "return", tokenid::return_ },
    362 	{ "row_major", tokenid::reserved },
    363 	{ "sample", tokenid::reserved },
    364 	{ "sampler", tokenid::sampler2d },
    365 	{ "sampler1D", tokenid::sampler1d },
    366 	{ "sampler1DArray", tokenid::reserved },
    367 	{ "sampler2D", tokenid::sampler2d },
    368 	{ "sampler2DArray", tokenid::reserved },
    369 	{ "sampler2DMS", tokenid::reserved },
    370 	{ "sampler2DMSArray", tokenid::reserved },
    371 	{ "sampler3D", tokenid::sampler3d },
    372 	{ "sampler_state", tokenid::reserved },
    373 	{ "samplerCube", tokenid::reserved },
    374 	{ "samplerCubeArray", tokenid::reserved },
    375 	{ "samplerCUBE", tokenid::reserved },
    376 	{ "samplerRect", tokenid::reserved },
    377 	{ "samplerRECT", tokenid::reserved },
    378 	{ "SamplerState", tokenid::reserved },
    379 	{ "storage", tokenid::storage2d },
    380 	{ "storage1D", tokenid::storage1d },
    381 	{ "storage2D", tokenid::storage2d },
    382 	{ "storage3D", tokenid::storage3d },
    383 	{ "shared", tokenid::reserved },
    384 	{ "short", tokenid::reserved },
    385 	{ "signed", tokenid::reserved },
    386 	{ "sizeof", tokenid::reserved },
    387 	{ "snorm", tokenid::reserved },
    388 	{ "static", tokenid::static_ },
    389 	{ "static_cast", tokenid::reserved },
    390 	{ "string", tokenid::string_ },
    391 	{ "struct", tokenid::struct_ },
    392 	{ "switch", tokenid::switch_ },
    393 	{ "technique", tokenid::technique },
    394 	{ "template", tokenid::reserved },
    395 	{ "texture", tokenid::texture2d },
    396 	{ "Texture1D", tokenid::reserved },
    397 	{ "texture1D", tokenid::texture1d },
    398 	{ "Texture1DArray", tokenid::reserved },
    399 	{ "Texture2D", tokenid::reserved },
    400 	{ "texture2D", tokenid::texture2d },
    401 	{ "Texture2DArray", tokenid::reserved },
    402 	{ "Texture2DMS", tokenid::reserved },
    403 	{ "Texture2DMSArray", tokenid::reserved },
    404 	{ "Texture3D", tokenid::reserved },
    405 	{ "texture3D", tokenid::texture3d },
    406 	{ "textureCUBE", tokenid::reserved },
    407 	{ "TextureCube", tokenid::reserved },
    408 	{ "TextureCubeArray", tokenid::reserved },
    409 	{ "textureRECT", tokenid::reserved },
    410 	{ "this", tokenid::reserved },
    411 	{ "true", tokenid::true_literal },
    412 	{ "TRUE", tokenid::true_literal },
    413 	{ "try", tokenid::reserved },
    414 	{ "typedef", tokenid::reserved },
    415 	{ "uint", tokenid::uint_ },
    416 	{ "uint2", tokenid::uint2 },
    417 	{ "uint2x1", tokenid::uint2 },
    418 	{ "uint2x2", tokenid::uint2x2 },
    419 	{ "uint2x3", tokenid::uint2x3 },
    420 	{ "uint2x4", tokenid::uint2x4 },
    421 	{ "uint3", tokenid::uint3 },
    422 	{ "uint3x1", tokenid::uint3 },
    423 	{ "uint3x2", tokenid::uint3x2 },
    424 	{ "uint3x3", tokenid::uint3x3 },
    425 	{ "uint3x4", tokenid::uint3x4 },
    426 	{ "uint4", tokenid::uint4 },
    427 	{ "uint4x1", tokenid::uint4 },
    428 	{ "uint4x2", tokenid::uint4x2 },
    429 	{ "uint4x3", tokenid::uint4x3 },
    430 	{ "uint4x4", tokenid::uint4x4 },
    431 	{ "uniform", tokenid::uniform_ },
    432 	{ "union", tokenid::reserved },
    433 	{ "unorm", tokenid::reserved },
    434 	{ "unsigned", tokenid::reserved },
    435 	{ "using", tokenid::reserved },
    436 	{ "vector", tokenid::vector },
    437 	{ "virtual", tokenid::reserved },
    438 	{ "void", tokenid::void_ },
    439 	{ "volatile", tokenid::volatile_ },
    440 	{ "while", tokenid::while_ }
    441 };
    442 static const std::unordered_map<std::string_view, tokenid> pp_directive_lookup = {
    443 	{ "define", tokenid::hash_def },
    444 	{ "undef", tokenid::hash_undef },
    445 	{ "if", tokenid::hash_if },
    446 	{ "ifdef", tokenid::hash_ifdef },
    447 	{ "ifndef", tokenid::hash_ifndef },
    448 	{ "else", tokenid::hash_else },
    449 	{ "elif", tokenid::hash_elif },
    450 	{ "endif", tokenid::hash_endif },
    451 	{ "error", tokenid::hash_error },
    452 	{ "warning", tokenid::hash_warning },
    453 	{ "pragma", tokenid::hash_pragma },
    454 	{ "include", tokenid::hash_include },
    455 };
    456 
    457 static inline bool is_octal_digit(char c)
    458 {
    459 	return static_cast<unsigned>(c - '0') < 8;
    460 }
    461 static inline bool is_decimal_digit(char c)
    462 {
    463 	return static_cast<unsigned>(c - '0') < 10;
    464 }
    465 static inline bool is_hexadecimal_digit(char c)
    466 {
    467 	return is_decimal_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
    468 }
    469 
    470 static bool is_digit(char c, int radix)
    471 {
    472 	switch (radix)
    473 	{
    474 	case 8:
    475 		return is_octal_digit(c);
    476 	case 10:
    477 		return is_decimal_digit(c);
    478 	case 16:
    479 		return is_hexadecimal_digit(c);
    480 	}
    481 
    482 	return false;
    483 }
    484 static long long octal_to_decimal(long long n)
    485 {
    486 	long long m = 0;
    487 
    488 	while (n != 0)
    489 	{
    490 		m *= 8;
    491 		m += n & 7;
    492 		n >>= 3;
    493 	}
    494 
    495 	while (m != 0)
    496 	{
    497 		n *= 10;
    498 		n += m & 7;
    499 		m >>= 3;
    500 	}
    501 
    502 	return n;
    503 }
    504 
    505 std::string reshadefx::token::id_to_name(tokenid id)
    506 {
    507 	const auto it = token_lookup.find(id);
    508 	if (it != token_lookup.end())
    509 		return std::string(it->second);
    510 	return "unknown";
    511 }
    512 
    513 reshadefx::token reshadefx::lexer::lex()
    514 {
    515 	bool is_at_line_begin = _cur_location.column <= 1;
    516 
    517 	token tok;
    518 next_token:
    519 	// Reset token data
    520 	tok.location = _cur_location;
    521 	tok.offset = input_offset();
    522 	tok.length = 1;
    523 	tok.literal_as_double = 0;
    524 	tok.literal_as_string.clear();
    525 
    526 	assert(_cur <= _end);
    527 
    528 	// Do a character type lookup for the current character
    529 	switch (type_lookup[uint8_t(*_cur)])
    530 	{
    531 	case 0xFF: // EOF
    532 		tok.id = tokenid::end_of_file;
    533 		return tok;
    534 	case SPACE:
    535 		skip_space();
    536 		if (_ignore_whitespace || is_at_line_begin || *_cur == '\n')
    537 			goto next_token;
    538 		tok.id = tokenid::space;
    539 		tok.length = input_offset() - tok.offset;
    540 		return tok;
    541 	case '\n':
    542 		_cur++;
    543 		_cur_location.line++;
    544 		_cur_location.column = 1;
    545 		is_at_line_begin = true;
    546 		if (_ignore_whitespace)
    547 			goto next_token;
    548 		tok.id = tokenid::end_of_line;
    549 		return tok;
    550 	case DIGIT:
    551 		parse_numeric_literal(tok);
    552 		break;
    553 	case IDENT:
    554 		parse_identifier(tok);
    555 		break;
    556 	case '!':
    557 		if (_cur[1] == '=')
    558 			tok.id = tokenid::exclaim_equal,
    559 			tok.length = 2;
    560 		else
    561 			tok.id = tokenid::exclaim;
    562 		break;
    563 	case '"':
    564 		parse_string_literal(tok, _escape_string_literals);
    565 		break;
    566 	case '#':
    567 		if (is_at_line_begin)
    568 		{
    569 			if (!parse_pp_directive(tok) || _ignore_pp_directives)
    570 			{
    571 				skip_to_next_line();
    572 				goto next_token;
    573 			}
    574 		} // These braces are important so the 'else' is matched to the right 'if' statement
    575 		else
    576 		tok.id = tokenid::hash;
    577 		break;
    578 	case '$':
    579 		tok.id = tokenid::dollar;
    580 		break;
    581 	case '%':
    582 		if (_cur[1] == '=')
    583 			tok.id = tokenid::percent_equal,
    584 			tok.length = 2;
    585 		else
    586 			tok.id = tokenid::percent;
    587 		break;
    588 	case '&':
    589 		if (_cur[1] == '&')
    590 			tok.id = tokenid::ampersand_ampersand,
    591 			tok.length = 2;
    592 		else if (_cur[1] == '=')
    593 			tok.id = tokenid::ampersand_equal,
    594 			tok.length = 2;
    595 		else
    596 			tok.id = tokenid::ampersand;
    597 		break;
    598 	case '(':
    599 		tok.id = tokenid::parenthesis_open;
    600 		break;
    601 	case ')':
    602 		tok.id = tokenid::parenthesis_close;
    603 		break;
    604 	case '*':
    605 		if (_cur[1] == '=')
    606 			tok.id = tokenid::star_equal,
    607 			tok.length = 2;
    608 		else
    609 			tok.id = tokenid::star;
    610 		break;
    611 	case '+':
    612 		if (_cur[1] == '+')
    613 			tok.id = tokenid::plus_plus,
    614 			tok.length = 2;
    615 		else if (_cur[1] == '=')
    616 			tok.id = tokenid::plus_equal,
    617 			tok.length = 2;
    618 		else
    619 			tok.id = tokenid::plus;
    620 		break;
    621 	case ',':
    622 		tok.id = tokenid::comma;
    623 		break;
    624 	case '-':
    625 		if (_cur[1] == '-')
    626 			tok.id = tokenid::minus_minus,
    627 			tok.length = 2;
    628 		else if (_cur[1] == '=')
    629 			tok.id = tokenid::minus_equal,
    630 			tok.length = 2;
    631 		else if (_cur[1] == '>')
    632 			tok.id = tokenid::arrow,
    633 			tok.length = 2;
    634 		else
    635 			tok.id = tokenid::minus;
    636 		break;
    637 	case '.':
    638 		if (type_lookup[uint8_t(_cur[1])] == DIGIT)
    639 			parse_numeric_literal(tok);
    640 		else if (_cur[1] == '.' && _cur[2] == '.')
    641 			tok.id = tokenid::ellipsis,
    642 			tok.length = 3;
    643 		else
    644 			tok.id = tokenid::dot;
    645 		break;
    646 	case '/':
    647 		if (_cur[1] == '/')
    648 		{
    649 			skip_to_next_line();
    650 			if (_ignore_comments)
    651 				goto next_token;
    652 			tok.id = tokenid::single_line_comment;
    653 			tok.length = input_offset() - tok.offset;
    654 			return tok;
    655 		}
    656 		else if (_cur[1] == '*')
    657 		{
    658 			while (_cur < _end)
    659 			{
    660 				if (*_cur == '\n')
    661 				{
    662 					_cur_location.line++;
    663 					_cur_location.column = 1;
    664 				}
    665 				else if (_cur[0] == '*' && _cur[1] == '/')
    666 				{
    667 					skip(2);
    668 					break;
    669 				}
    670 				skip(1);
    671 			}
    672 			if (_ignore_comments)
    673 				goto next_token;
    674 			tok.id = tokenid::multi_line_comment;
    675 			tok.length = input_offset() - tok.offset;
    676 			return tok;
    677 		}
    678 		else if (_cur[1] == '=')
    679 			tok.id = tokenid::slash_equal,
    680 			tok.length = 2;
    681 		else
    682 			tok.id = tokenid::slash;
    683 		break;
    684 	case ':':
    685 		if (_cur[1] == ':')
    686 			tok.id = tokenid::colon_colon,
    687 			tok.length = 2;
    688 		else
    689 			tok.id = tokenid::colon;
    690 		break;
    691 	case ';':
    692 		tok.id = tokenid::semicolon;
    693 		break;
    694 	case '<':
    695 		if (_cur[1] == '<')
    696 			if (_cur[2] == '=')
    697 				tok.id = tokenid::less_less_equal,
    698 				tok.length = 3;
    699 			else
    700 				tok.id = tokenid::less_less,
    701 				tok.length = 2;
    702 		else if (_cur[1] == '=')
    703 			tok.id = tokenid::less_equal,
    704 			tok.length = 2;
    705 		else
    706 			tok.id = tokenid::less;
    707 		break;
    708 	case '=':
    709 		if (_cur[1] == '=')
    710 			tok.id = tokenid::equal_equal,
    711 			tok.length = 2;
    712 		else
    713 			tok.id = tokenid::equal;
    714 		break;
    715 	case '>':
    716 		if (_cur[1] == '>')
    717 			if (_cur[2] == '=')
    718 				tok.id = tokenid::greater_greater_equal,
    719 				tok.length = 3;
    720 			else
    721 				tok.id = tokenid::greater_greater,
    722 				tok.length = 2;
    723 		else if (_cur[1] == '=')
    724 			tok.id = tokenid::greater_equal,
    725 			tok.length = 2;
    726 		else
    727 			tok.id = tokenid::greater;
    728 		break;
    729 	case '?':
    730 		tok.id = tokenid::question;
    731 		break;
    732 	case '@':
    733 		tok.id = tokenid::at;
    734 		break;
    735 	case '[':
    736 		tok.id = tokenid::bracket_open;
    737 		break;
    738 	case '\\':
    739 		if (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n'))
    740 		{
    741 			// Skip to next line if current line ends with a backslash
    742 			skip_space();
    743 			if (_ignore_whitespace)
    744 				goto next_token;
    745 			tok.id = tokenid::space;
    746 			tok.length = input_offset() - tok.offset;
    747 			return tok;
    748 		}
    749 		tok.id = tokenid::backslash;
    750 		break;
    751 	case ']':
    752 		tok.id = tokenid::bracket_close;
    753 		break;
    754 	case '^':
    755 		if (_cur[1] == '=')
    756 			tok.id = tokenid::caret_equal,
    757 			tok.length = 2;
    758 		else
    759 			tok.id = tokenid::caret;
    760 		break;
    761 	case '{':
    762 		tok.id = tokenid::brace_open;
    763 		break;
    764 	case '|':
    765 		if (_cur[1] == '=')
    766 			tok.id = tokenid::pipe_equal,
    767 			tok.length = 2;
    768 		else if (_cur[1] == '|')
    769 			tok.id = tokenid::pipe_pipe,
    770 			tok.length = 2;
    771 		else
    772 			tok.id = tokenid::pipe;
    773 		break;
    774 	case '}':
    775 		tok.id = tokenid::brace_close;
    776 		break;
    777 	case '~':
    778 		tok.id = tokenid::tilde;
    779 		break;
    780 	default:
    781 		tok.id = tokenid::unknown;
    782 		break;
    783 	}
    784 
    785 	skip(tok.length);
    786 
    787 	return tok;
    788 }
    789 
    790 void reshadefx::lexer::skip(size_t length)
    791 {
    792 	_cur += length;
    793 	_cur_location.column += static_cast<unsigned int>(length);
    794 }
    795 void reshadefx::lexer::skip_space()
    796 {
    797 	// Skip each character until a space is found
    798 	while (_cur < _end)
    799 	{
    800 		if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n')))
    801 		{
    802 			skip(_cur[1] == '\r' ? 3 : 2);
    803 			_cur_location.line++;
    804 			_cur_location.column = 1;
    805 			continue;
    806 		}
    807 
    808 		if (type_lookup[uint8_t(*_cur)] == SPACE)
    809 			skip(1);
    810 		else
    811 			break;
    812 	}
    813 }
    814 void reshadefx::lexer::skip_to_next_line()
    815 {
    816 	// Skip each character until a new line feed is found
    817 	while (*_cur != '\n' && _cur < _end)
    818 	{
    819 #if 0
    820 		if (_cur[0] == '\\' && (_cur[1] == '\n' || (_cur[1] == '\r' && _cur[2] == '\n')))
    821 		{
    822 			skip(_cur[1] == '\r' ? 3 : 2);
    823 			_cur_location.line++;
    824 			_cur_location.column = 1;
    825 			continue;
    826 		}
    827 #endif
    828 
    829 		skip(1);
    830 	}
    831 }
    832 
    833 void reshadefx::lexer::reset_to_offset(size_t offset)
    834 {
    835 	assert(offset < _input.size());
    836 	_cur = _input.data() + offset;
    837 }
    838 
    839 void reshadefx::lexer::parse_identifier(token &tok) const
    840 {
    841 	auto *const begin = _cur, *end = begin;
    842 
    843 	// Skip to the end of the identifier sequence
    844 	while (type_lookup[uint8_t(*end)] == IDENT || type_lookup[uint8_t(*end)] == DIGIT)
    845 		end++;
    846 
    847 	tok.id = tokenid::identifier;
    848 	tok.offset = input_offset();
    849 	tok.length = end - begin;
    850 	tok.literal_as_string.assign(begin, end);
    851 
    852 	if (_ignore_keywords)
    853 		return;
    854 
    855 	if (const auto it = keyword_lookup.find(tok.literal_as_string);
    856 		it != keyword_lookup.end())
    857 		tok.id = it->second;
    858 }
    859 bool reshadefx::lexer::parse_pp_directive(token &tok)
    860 {
    861 	skip(1); // Skip the '#'
    862 	skip_space(); // Skip any space between the '#' and directive
    863 	parse_identifier(tok);
    864 
    865 	if (const auto it = pp_directive_lookup.find(tok.literal_as_string);
    866 		it != pp_directive_lookup.end())
    867 	{
    868 		tok.id = it->second;
    869 		return true;
    870 	}
    871 	else if (!_ignore_line_directives && tok.literal_as_string == "line") // The #line directive needs special handling
    872 	{
    873 		skip(tok.length); // The 'parse_identifier' does not update the pointer to the current character, so do that now
    874 		skip_space();
    875 		parse_numeric_literal(tok);
    876 		skip(tok.length);
    877 
    878 		_cur_location.line = tok.literal_as_int;
    879 
    880 		// Need to subtract one since the line containing #line does not count into the statistics
    881 		if (_cur_location.line != 0)
    882 			_cur_location.line--;
    883 
    884 		skip_space();
    885 
    886 		// Check if this #line directive has an file name attached to it
    887 		if (_cur[0] == '"')
    888 		{
    889 			token temptok;
    890 			parse_string_literal(temptok, false);
    891 
    892 			_cur_location.source = std::move(temptok.literal_as_string);
    893 		}
    894 
    895 		// Do not return the #line directive as token to the caller
    896 		return false;
    897 	}
    898 
    899 	tok.id = tokenid::hash_unknown;
    900 
    901 	return true;
    902 }
    903 void reshadefx::lexer::parse_string_literal(token &tok, bool escape)
    904 {
    905 	auto *const begin = _cur, *end = begin + 1; // Skip first quote character right away
    906 
    907 	for (auto c = *end; c != '"'; c = *++end)
    908 	{
    909 		if (c == '\n' || end >= _end)
    910 		{
    911 			// Line feed reached, the string literal is done (technically this should be an error, but the lexer does not report errors, so ignore it)
    912 			end--;
    913 			if (end[0] == '\r') end--;
    914 			break;
    915 		}
    916 
    917 		if (c == '\r')
    918 		{
    919 			// Silently ignore carriage return characters
    920 			continue;
    921 		}
    922 
    923 		if (unsigned int n = (end[1] == '\r' && end + 2 < _end) ? 2 : 1;
    924 			c == '\\' && end[n] == '\n')
    925 		{
    926 			// Escape character found at end of line, the string literal continues on to the next line
    927 			end += n;
    928 			_cur_location.line++;
    929 			continue;
    930 		}
    931 
    932 		// Handle escape sequences
    933 		if (c == '\\' && escape)
    934 		{
    935 			unsigned int n = 0;
    936 
    937 			// Any character following the '\' is not parsed as usual, so increment pointer here (this makes sure '\"' does not abort the outer loop as well)
    938 			switch (c = *++end)
    939 			{
    940 			case '0':
    941 			case '1':
    942 			case '2':
    943 			case '3':
    944 			case '4':
    945 			case '5':
    946 			case '6':
    947 			case '7':
    948 				for (unsigned int i = 0; i < 3 && is_octal_digit(*end) && end < _end; i++)
    949 				{
    950 					c = *end++;
    951 					n = (n << 3) | (c - '0');
    952 				}
    953 				// For simplicity the number is limited to what fits in a single character
    954 				c = n & 0xFF;
    955 				// The octal parsing loop above incremented one pass the escape sequence, so step back
    956 				end--;
    957 				break;
    958 			case 'a':
    959 				c = '\a';
    960 				break;
    961 			case 'b':
    962 				c = '\b';
    963 				break;
    964 			case 'f':
    965 				c = '\f';
    966 				break;
    967 			case 'n':
    968 				c = '\n';
    969 				break;
    970 			case 'r':
    971 				c = '\r';
    972 				break;
    973 			case 't':
    974 				c = '\t';
    975 				break;
    976 			case 'v':
    977 				c = '\v';
    978 				break;
    979 			case 'x':
    980 				if (is_hexadecimal_digit(*++end))
    981 				{
    982 					while (is_hexadecimal_digit(*end) && end < _end)
    983 					{
    984 						c = *end++;
    985 						n = (n << 4) | (is_decimal_digit(c) ? (c - '0') : (c - 55 - 32 * (c & 0x20)));
    986 					}
    987 
    988 					// For simplicity the number is limited to what fits in a single character
    989 					c = n & 0xFF;
    990 				}
    991 				// The hexadecimal parsing loop and check above incremented one pass the escape sequence, so step back
    992 				end--;
    993 				break;
    994 			}
    995 		}
    996 
    997 		tok.literal_as_string += c;
    998 	}
    999 
   1000 	tok.id = tokenid::string_literal;
   1001 	tok.length = end - begin + 1;
   1002 }
   1003 void reshadefx::lexer::parse_numeric_literal(token &tok) const
   1004 {
   1005 	// This routine handles both integer and floating point numbers
   1006 	auto *const begin = _cur, *end = _cur;
   1007 	int mantissa_size = 0, decimal_location = -1, radix = 10;
   1008 	long long fraction = 0, exponent = 0;
   1009 
   1010 	// If a literal starts with '0' it is either an octal or hexadecimal ('0x') value
   1011 	if (begin[0] == '0')
   1012 	{
   1013 		if (begin[1] == 'x' || begin[1] == 'X')
   1014 		{
   1015 			end = begin + 2;
   1016 			radix = 16;
   1017 		}
   1018 		else
   1019 		{
   1020 			radix = 8;
   1021 		}
   1022 	}
   1023 
   1024 	for (; mantissa_size <= 18; mantissa_size++, end++)
   1025 	{
   1026 		auto c = *end;
   1027 
   1028 		if (is_decimal_digit(c))
   1029 		{
   1030 			c -= '0';
   1031 
   1032 			if (c >= radix)
   1033 				break;
   1034 		}
   1035 		else if (radix == 16)
   1036 		{
   1037 			// Hexadecimal values can contain the letters A to F
   1038 			if (c >= 'A' && c <= 'F')
   1039 				c -= 'A' - 10;
   1040 			else if (c >= 'a' && c <= 'f')
   1041 				c -= 'a' - 10;
   1042 			else
   1043 				break;
   1044 		}
   1045 		else
   1046 		{
   1047 			if (c != '.' || decimal_location >= 0)
   1048 				break;
   1049 
   1050 			// Found a decimal character, as such convert current values
   1051 			if (radix == 8)
   1052 			{
   1053 				radix = 10;
   1054 				fraction = octal_to_decimal(fraction);
   1055 			}
   1056 
   1057 			decimal_location = mantissa_size;
   1058 			continue;
   1059 		}
   1060 
   1061 		fraction *= radix;
   1062 		fraction += c;
   1063 	}
   1064 
   1065 	// Ignore additional digits that cannot affect the value
   1066 	while (is_digit(*end, radix))
   1067 		end++;
   1068 
   1069 	// If a decimal character was found, this is a floating point value, otherwise an integer one
   1070 	if (decimal_location < 0)
   1071 	{
   1072 		tok.id = tokenid::int_literal;
   1073 		decimal_location = mantissa_size;
   1074 	}
   1075 	else
   1076 	{
   1077 		tok.id = tokenid::float_literal;
   1078 		mantissa_size -= 1;
   1079 	}
   1080 
   1081 	// Literals can be followed by an exponent
   1082 	if (*end == 'E' || *end == 'e')
   1083 	{
   1084 		auto tmp = end + 1;
   1085 		const bool negative = *tmp == '-';
   1086 
   1087 		if (negative || *tmp == '+')
   1088 			tmp++;
   1089 
   1090 		if (is_decimal_digit(*tmp))
   1091 		{
   1092 			end = tmp;
   1093 
   1094 			tok.id = tokenid::float_literal;
   1095 
   1096 			do {
   1097 				exponent *= 10;
   1098 				exponent += (*end++) - '0';
   1099 			} while (is_decimal_digit(*end));
   1100 
   1101 			if (negative)
   1102 				exponent = -exponent;
   1103 		}
   1104 	}
   1105 
   1106 	// Various suffixes force specific literal types
   1107 	if (*end == 'F' || *end == 'f')
   1108 	{
   1109 		end++; // Consume the suffix
   1110 		tok.id = tokenid::float_literal;
   1111 	}
   1112 	else if (*end == 'L' || *end == 'l')
   1113 	{
   1114 		end++; // Consume the suffix
   1115 		tok.id = tokenid::double_literal;
   1116 	}
   1117 	else if (tok.id == tokenid::int_literal && (*end == 'U' || *end == 'u')) // The 'u' suffix is only valid on integers and needs to be ignored otherwise
   1118 	{
   1119 		end++; // Consume the suffix
   1120 		tok.id = tokenid::uint_literal;
   1121 	}
   1122 
   1123 	if (tok.id == tokenid::float_literal || tok.id == tokenid::double_literal)
   1124 	{
   1125 		exponent += decimal_location - mantissa_size;
   1126 
   1127 		const bool exponent_negative = exponent < 0;
   1128 
   1129 		if (exponent_negative)
   1130 			exponent = -exponent;
   1131 
   1132 		// Limit exponent
   1133 		if (exponent > 511)
   1134 			exponent = 511;
   1135 
   1136 		// Quick exponent calculation
   1137 		double e = 1.0;
   1138 		const double powers_of_10[] = {
   1139 			10.,
   1140 			100.,
   1141 			1.0e4,
   1142 			1.0e8,
   1143 			1.0e16,
   1144 			1.0e32,
   1145 			1.0e64,
   1146 			1.0e128,
   1147 			1.0e256
   1148 		};
   1149 
   1150 		for (auto d = powers_of_10; exponent != 0; exponent >>= 1, d++)
   1151 			if (exponent & 1)
   1152 				e *= *d;
   1153 
   1154 		if (tok.id == tokenid::float_literal)
   1155 			tok.literal_as_float = exponent_negative ? fraction / static_cast<float>(e) : fraction * static_cast<float>(e);
   1156 		else
   1157 			tok.literal_as_double = exponent_negative ? fraction / e : fraction * e;
   1158 	}
   1159 	else
   1160 	{
   1161 		// Limit the maximum value to what fits into our token structure
   1162 		tok.literal_as_uint = static_cast<unsigned int>(fraction & 0xFFFFFFFF);
   1163 	}
   1164 
   1165 	tok.length = end - begin;
   1166 }
	duckstation duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
	git clone https://git.neptards.moe/u3shit/duckstation.git
	Log \| Files \| Refs \| README \| LICENSE