libshit

Just some random shit
git clone https://git.neptards.moe/neptards/libshit.git
Log | Files | Refs | Submodules | README | LICENSE

llex.c (16163B)


      1 /*
      2 ** $Id: llex.c,v 2.96.1.1 2017/04/19 17:20:42 roberto Exp $
      3 ** Lexical Analyzer
      4 ** See Copyright Notice in lua.h
      5 */
      6 
      7 #define llex_c
      8 #define LUA_CORE
      9 
     10 #include "lprefix.h"
     11 
     12 
     13 #include <locale.h>
     14 #include <string.h>
     15 
     16 #include "lua.h"
     17 
     18 #include "lctype.h"
     19 #include "ldebug.h"
     20 #include "ldo.h"
     21 #include "lgc.h"
     22 #include "llex.h"
     23 #include "lobject.h"
     24 #include "lparser.h"
     25 #include "lstate.h"
     26 #include "lstring.h"
     27 #include "ltable.h"
     28 #include "lzio.h"
     29 
     30 
     31 
     32 #define next(ls) (ls->current = zgetc(ls->z))
     33 
     34 
     35 
     36 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
     37 
     38 
     39 /* ORDER RESERVED */
     40 static const char *const luaX_tokens [] = {
     41     "and", "break", "do", "else", "elseif",
     42     "end", "false", "for", "function", "goto", "if",
     43     "in", "local", "nil", "not", "or", "repeat",
     44     "return", "then", "true", "until", "while",
     45     "//", "..", "...", "==", ">=", "<=", "~=",
     46     "<<", ">>", "::", "<eof>",
     47     "<number>", "<integer>", "<name>", "<string>"
     48 };
     49 
     50 
     51 #define save_and_next(ls) (save(ls, ls->current), next(ls))
     52 
     53 
     54 static l_noret lexerror (LexState *ls, const char *msg, int token);
     55 
     56 
     57 static void save (LexState *ls, int c) {
     58   Mbuffer *b = ls->buff;
     59   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
     60     size_t newsize;
     61     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
     62       lexerror(ls, "lexical element too long", 0);
     63     newsize = luaZ_sizebuffer(b) * 2;
     64     luaZ_resizebuffer(ls->L, b, newsize);
     65   }
     66   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
     67 }
     68 
     69 
     70 void luaX_init (lua_State *L) {
     71   int i;
     72   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
     73   luaC_fix(L, obj2gco(e));  /* never collect this name */
     74   for (i=0; i<NUM_RESERVED; i++) {
     75     TString *ts = luaS_new(L, luaX_tokens[i]);
     76     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
     77     ts->extra = cast_byte(i+1);  /* reserved word */
     78   }
     79 }
     80 
     81 
     82 const char *luaX_token2str (LexState *ls, int token) {
     83   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
     84     lua_assert(token == cast_uchar(token));
     85     return luaO_pushfstring(ls->L, "'%c'", token);
     86   }
     87   else {
     88     const char *s = luaX_tokens[token - FIRST_RESERVED];
     89     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
     90       return luaO_pushfstring(ls->L, "'%s'", s);
     91     else  /* names, strings, and numerals */
     92       return s;
     93   }
     94 }
     95 
     96 
     97 static const char *txtToken (LexState *ls, int token) {
     98   switch (token) {
     99     case TK_NAME: case TK_STRING:
    100     case TK_FLT: case TK_INT:
    101       save(ls, '\0');
    102       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
    103     default:
    104       return luaX_token2str(ls, token);
    105   }
    106 }
    107 
    108 
    109 static l_noret lexerror (LexState *ls, const char *msg, int token) {
    110   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
    111   if (token)
    112     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
    113   luaD_throw(ls->L, LUA_ERRSYNTAX);
    114 }
    115 
    116 
    117 l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
    118   lexerror(ls, msg, ls->t.token);
    119 }
    120 
    121 
    122 /*
    123 ** creates a new string and anchors it in scanner's table so that
    124 ** it will not be collected until the end of the compilation
    125 ** (by that time it should be anchored somewhere)
    126 */
    127 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
    128   lua_State *L = ls->L;
    129   TValue *o;  /* entry for 'str' */
    130   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
    131   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
    132   o = luaH_set(L, ls->h, L->top - 1);
    133   if (ttisnil(o)) {  /* not in use yet? */
    134     /* boolean value does not need GC barrier;
    135        table has no metatable, so it does not need to invalidate cache */
    136     setbvalue(o, 1);  /* t[string] = true */
    137     luaC_checkGC(L);
    138   }
    139   else {  /* string already present */
    140     ts = tsvalue(keyfromval(o));  /* re-use value previously stored */
    141   }
    142   L->top--;  /* remove string from stack */
    143   return ts;
    144 }
    145 
    146 
    147 /*
    148 ** increment line number and skips newline sequence (any of
    149 ** \n, \r, \n\r, or \r\n)
    150 */
    151 static void inclinenumber (LexState *ls) {
    152   int old = ls->current;
    153   lua_assert(currIsNewline(ls));
    154   next(ls);  /* skip '\n' or '\r' */
    155   if (currIsNewline(ls) && ls->current != old)
    156     next(ls);  /* skip '\n\r' or '\r\n' */
    157   if (++ls->linenumber >= MAX_INT)
    158     lexerror(ls, "chunk has too many lines", 0);
    159 }
    160 
    161 
    162 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
    163                     int firstchar) {
    164   ls->t.token = 0;
    165   ls->L = L;
    166   ls->current = firstchar;
    167   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
    168   ls->z = z;
    169   ls->fs = NULL;
    170   ls->linenumber = 1;
    171   ls->lastline = 1;
    172   ls->source = source;
    173   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
    174   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
    175 }
    176 
    177 
    178 
    179 /*
    180 ** =======================================================
    181 ** LEXICAL ANALYZER
    182 ** =======================================================
    183 */
    184 
    185 
    186 static int check_next1 (LexState *ls, int c) {
    187   if (ls->current == c) {
    188     next(ls);
    189     return 1;
    190   }
    191   else return 0;
    192 }
    193 
    194 
    195 /*
    196 ** Check whether current char is in set 'set' (with two chars) and
    197 ** saves it
    198 */
    199 static int check_next2 (LexState *ls, const char *set) {
    200   lua_assert(set[2] == '\0');
    201   if (ls->current == set[0] || ls->current == set[1]) {
    202     save_and_next(ls);
    203     return 1;
    204   }
    205   else return 0;
    206 }
    207 
    208 
    209 /* LUA_NUMBER */
    210 /*
    211 ** this function is quite liberal in what it accepts, as 'luaO_str2num'
    212 ** will reject ill-formed numerals.
    213 */
    214 static int read_numeral (LexState *ls, SemInfo *seminfo) {
    215   TValue obj;
    216   const char *expo = "Ee";
    217   int first = ls->current;
    218   lua_assert(lisdigit(ls->current));
    219   save_and_next(ls);
    220   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
    221     expo = "Pp";
    222   for (;;) {
    223     if (check_next2(ls, expo))  /* exponent part? */
    224       check_next2(ls, "-+");  /* optional exponent sign */
    225     if (lisxdigit(ls->current))
    226       save_and_next(ls);
    227     else if (ls->current == '.')
    228       save_and_next(ls);
    229     else break;
    230   }
    231   save(ls, '\0');
    232   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
    233     lexerror(ls, "malformed number", TK_FLT);
    234   if (ttisinteger(&obj)) {
    235     seminfo->i = ivalue(&obj);
    236     return TK_INT;
    237   }
    238   else {
    239     lua_assert(ttisfloat(&obj));
    240     seminfo->r = fltvalue(&obj);
    241     return TK_FLT;
    242   }
    243 }
    244 
    245 
    246 /*
    247 ** reads a sequence '[=*[' or ']=*]', leaving the last bracket.
    248 ** If sequence is well formed, return its number of '='s + 2; otherwise,
    249 ** return 1 if there is no '='s or 0 otherwise (an unfinished '[==...').
    250 */
    251 static size_t skip_sep (LexState *ls) {
    252   size_t count = 0;
    253   int s = ls->current;
    254   lua_assert(s == '[' || s == ']');
    255   save_and_next(ls);
    256   while (ls->current == '=') {
    257     save_and_next(ls);
    258     count++;
    259   }
    260   return (ls->current == s) ? count + 2
    261          : (count == 0) ? 1
    262          : 0;
    263 
    264 }
    265 
    266 
    267 static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
    268   int line = ls->linenumber;  /* initial line (for error message) */
    269   save_and_next(ls);  /* skip 2nd '[' */
    270   if (currIsNewline(ls))  /* string starts with a newline? */
    271     inclinenumber(ls);  /* skip it */
    272   for (;;) {
    273     switch (ls->current) {
    274       case EOZ: {  /* error */
    275         const char *what = (seminfo ? "string" : "comment");
    276         const char *msg = luaO_pushfstring(ls->L,
    277                      "unfinished long %s (starting at line %d)", what, line);
    278         lexerror(ls, msg, TK_EOS);
    279         break;  /* to avoid warnings */
    280       }
    281       case ']': {
    282         if (skip_sep(ls) == sep) {
    283           save_and_next(ls);  /* skip 2nd ']' */
    284           goto endloop;
    285         }
    286         break;
    287       }
    288       case '\n': case '\r': {
    289         save(ls, '\n');
    290         inclinenumber(ls);
    291         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
    292         break;
    293       }
    294       default: {
    295         if (seminfo) save_and_next(ls);
    296         else next(ls);
    297       }
    298     }
    299   } endloop:
    300   if (seminfo)
    301     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
    302                                      luaZ_bufflen(ls->buff) - 2 * sep);
    303 }
    304 
    305 
    306 static void esccheck (LexState *ls, int c, const char *msg) {
    307   if (!c) {
    308     if (ls->current != EOZ)
    309       save_and_next(ls);  /* add current to buffer for error message */
    310     lexerror(ls, msg, TK_STRING);
    311   }
    312 }
    313 
    314 
    315 static int gethexa (LexState *ls) {
    316   save_and_next(ls);
    317   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
    318   return luaO_hexavalue(ls->current);
    319 }
    320 
    321 
    322 static int readhexaesc (LexState *ls) {
    323   int r = gethexa(ls);
    324   r = (r << 4) + gethexa(ls);
    325   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
    326   return r;
    327 }
    328 
    329 
    330 static unsigned long readutf8esc (LexState *ls) {
    331   unsigned long r;
    332   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
    333   save_and_next(ls);  /* skip 'u' */
    334   esccheck(ls, ls->current == '{', "missing '{'");
    335   r = gethexa(ls);  /* must have at least one digit */
    336   while ((save_and_next(ls), lisxdigit(ls->current))) {
    337     i++;
    338     r = (r << 4) + luaO_hexavalue(ls->current);
    339     esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
    340   }
    341   esccheck(ls, ls->current == '}', "missing '}'");
    342   next(ls);  /* skip '}' */
    343   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
    344   return r;
    345 }
    346 
    347 
    348 static void utf8esc (LexState *ls) {
    349   char buff[UTF8BUFFSZ];
    350   int n = luaO_utf8esc(buff, readutf8esc(ls));
    351   for (; n > 0; n--)  /* add 'buff' to string */
    352     save(ls, buff[UTF8BUFFSZ - n]);
    353 }
    354 
    355 
    356 static int readdecesc (LexState *ls) {
    357   int i;
    358   int r = 0;  /* result accumulator */
    359   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
    360     r = 10*r + ls->current - '0';
    361     save_and_next(ls);
    362   }
    363   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
    364   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
    365   return r;
    366 }
    367 
    368 
    369 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
    370   save_and_next(ls);  /* keep delimiter (for error messages) */
    371   while (ls->current != del) {
    372     switch (ls->current) {
    373       case EOZ:
    374         lexerror(ls, "unfinished string", TK_EOS);
    375         break;  /* to avoid warnings */
    376       case '\n':
    377       case '\r':
    378         lexerror(ls, "unfinished string", TK_STRING);
    379         break;  /* to avoid warnings */
    380       case '\\': {  /* escape sequences */
    381         int c;  /* final character to be saved */
    382         save_and_next(ls);  /* keep '\\' for error messages */
    383         switch (ls->current) {
    384           case 'a': c = '\a'; goto read_save;
    385           case 'b': c = '\b'; goto read_save;
    386           case 'f': c = '\f'; goto read_save;
    387           case 'n': c = '\n'; goto read_save;
    388           case 'r': c = '\r'; goto read_save;
    389           case 't': c = '\t'; goto read_save;
    390           case 'v': c = '\v'; goto read_save;
    391           case 'x': c = readhexaesc(ls); goto read_save;
    392           case 'u': utf8esc(ls);  goto no_save;
    393           case '\n': case '\r':
    394             inclinenumber(ls); c = '\n'; goto only_save;
    395           case '\\': case '\"': case '\'':
    396             c = ls->current; goto read_save;
    397           case EOZ: goto no_save;  /* will raise an error next loop */
    398           case 'z': {  /* zap following span of spaces */
    399             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
    400             next(ls);  /* skip the 'z' */
    401             while (lisspace(ls->current)) {
    402               if (currIsNewline(ls)) inclinenumber(ls);
    403               else next(ls);
    404             }
    405             goto no_save;
    406           }
    407           default: {
    408             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
    409             c = readdecesc(ls);  /* digital escape '\ddd' */
    410             goto only_save;
    411           }
    412         }
    413        read_save:
    414          next(ls);
    415          /* go through */
    416        only_save:
    417          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
    418          save(ls, c);
    419          /* go through */
    420        no_save: break;
    421       }
    422       default:
    423         save_and_next(ls);
    424     }
    425   }
    426   save_and_next(ls);  /* skip delimiter */
    427   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
    428                                    luaZ_bufflen(ls->buff) - 2);
    429 }
    430 
    431 
    432 static int llex (LexState *ls, SemInfo *seminfo) {
    433   luaZ_resetbuffer(ls->buff);
    434   for (;;) {
    435     switch (ls->current) {
    436       case '\n': case '\r': {  /* line breaks */
    437         inclinenumber(ls);
    438         break;
    439       }
    440       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
    441         next(ls);
    442         break;
    443       }
    444       case '-': {  /* '-' or '--' (comment) */
    445         next(ls);
    446         if (ls->current != '-') return '-';
    447         /* else is a comment */
    448         next(ls);
    449         if (ls->current == '[') {  /* long comment? */
    450           size_t sep = skip_sep(ls);
    451           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
    452           if (sep >= 2) {
    453             read_long_string(ls, NULL, sep);  /* skip long comment */
    454             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
    455             break;
    456           }
    457         }
    458         /* else short comment */
    459         while (!currIsNewline(ls) && ls->current != EOZ)
    460           next(ls);  /* skip until end of line (or end of file) */
    461         break;
    462       }
    463       case '[': {  /* long string or simply '[' */
    464         size_t sep = skip_sep(ls);
    465         if (sep >= 2) {
    466           read_long_string(ls, seminfo, sep);
    467           return TK_STRING;
    468         }
    469         else if (sep == 0)  /* '[=...' missing second bracket */
    470           lexerror(ls, "invalid long string delimiter", TK_STRING);
    471         return '[';
    472       }
    473       case '=': {
    474         next(ls);
    475         if (check_next1(ls, '=')) return TK_EQ;
    476         else return '=';
    477       }
    478       case '<': {
    479         next(ls);
    480         if (check_next1(ls, '=')) return TK_LE;
    481         else if (check_next1(ls, '<')) return TK_SHL;
    482         else return '<';
    483       }
    484       case '>': {
    485         next(ls);
    486         if (check_next1(ls, '=')) return TK_GE;
    487         else if (check_next1(ls, '>')) return TK_SHR;
    488         else return '>';
    489       }
    490       case '/': {
    491         next(ls);
    492         if (check_next1(ls, '/')) return TK_IDIV;
    493         else return '/';
    494       }
    495       case '~': {
    496         next(ls);
    497         if (check_next1(ls, '=')) return TK_NE;
    498         else return '~';
    499       }
    500       case ':': {
    501         next(ls);
    502         if (check_next1(ls, ':')) return TK_DBCOLON;
    503         else return ':';
    504       }
    505       case '"': case '\'': {  /* short literal strings */
    506         read_string(ls, ls->current, seminfo);
    507         return TK_STRING;
    508       }
    509       case '.': {  /* '.', '..', '...', or number */
    510         save_and_next(ls);
    511         if (check_next1(ls, '.')) {
    512           if (check_next1(ls, '.'))
    513             return TK_DOTS;   /* '...' */
    514           else return TK_CONCAT;   /* '..' */
    515         }
    516         else if (!lisdigit(ls->current)) return '.';
    517         else return read_numeral(ls, seminfo);
    518       }
    519       case '0': case '1': case '2': case '3': case '4':
    520       case '5': case '6': case '7': case '8': case '9': {
    521         return read_numeral(ls, seminfo);
    522       }
    523       case EOZ: {
    524         return TK_EOS;
    525       }
    526       default: {
    527         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
    528           TString *ts;
    529           do {
    530             save_and_next(ls);
    531           } while (lislalnum(ls->current));
    532           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
    533                                   luaZ_bufflen(ls->buff));
    534           seminfo->ts = ts;
    535           if (isreserved(ts))  /* reserved word? */
    536             return ts->extra - 1 + FIRST_RESERVED;
    537           else {
    538             return TK_NAME;
    539           }
    540         }
    541         else {  /* single-char tokens (+ - / ...) */
    542           int c = ls->current;
    543           next(ls);
    544           return c;
    545         }
    546       }
    547     }
    548   }
    549 }
    550 
    551 
    552 void luaX_next (LexState *ls) {
    553   ls->lastline = ls->linenumber;
    554   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
    555     ls->t = ls->lookahead;  /* use this one */
    556     ls->lookahead.token = TK_EOS;  /* and discharge it */
    557   }
    558   else
    559     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
    560 }
    561 
    562 
    563 int luaX_lookahead (LexState *ls) {
    564   lua_assert(ls->lookahead.token == TK_EOS);
    565   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
    566   return ls->lookahead.token;
    567 }
    568