ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_strfmt.c (14415B)


      1 /*
      2 ** String formatting.
      3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 */
      5 
      6 #include <stdio.h>
      7 
      8 #define lj_strfmt_c
      9 #define LUA_CORE
     10 
     11 #include "lj_obj.h"
     12 #include "lj_buf.h"
     13 #include "lj_str.h"
     14 #include "lj_state.h"
     15 #include "lj_char.h"
     16 #include "lj_strfmt.h"
     17 
     18 /* -- Format parser ------------------------------------------------------- */
     19 
     20 static const uint8_t strfmt_map[('x'-'A')+1] = {
     21   STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
     22   0,0,0,0,0,0,0,STRFMT_UTF8,0,0,STRFMT_X,0,0,
     23   0,0,0,0,0,0,
     24   STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
     25   0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
     26 };
     27 
     28 SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
     29 {
     30   const uint8_t *p = fs->p, *e = fs->e;
     31   fs->str = (const char *)p;
     32   for (; p < e; p++) {
     33     if (*p == '%') {  /* Escape char? */
     34       if (p[1] == '%') {  /* '%%'? */
     35 	fs->p = ++p+1;
     36 	goto retlit;
     37       } else {
     38 	SFormat sf = 0;
     39 	uint32_t c;
     40 	if (p != (const uint8_t *)fs->str)
     41 	  break;
     42 	for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
     43 	  /* Parse flags. */
     44 	  if (*p == '-') sf |= STRFMT_F_LEFT;
     45 	  else if (*p == '+') sf |= STRFMT_F_PLUS;
     46 	  else if (*p == '0') sf |= STRFMT_F_ZERO;
     47 	  else if (*p == ' ') sf |= STRFMT_F_SPACE;
     48 	  else if (*p == '#') sf |= STRFMT_F_ALT;
     49 	  else break;
     50 	}
     51 	if ((uint32_t)*p - '0' < 10) {  /* Parse width. */
     52 	  uint32_t width = (uint32_t)*p++ - '0';
     53 	  if ((uint32_t)*p - '0' < 10)
     54 	    width = (uint32_t)*p++ - '0' + width*10;
     55 	  sf |= (width << STRFMT_SH_WIDTH);
     56 	}
     57 	if (*p == '.') {  /* Parse precision. */
     58 	  uint32_t prec = 0;
     59 	  p++;
     60 	  if ((uint32_t)*p - '0' < 10) {
     61 	    prec = (uint32_t)*p++ - '0';
     62 	    if ((uint32_t)*p - '0' < 10)
     63 	      prec = (uint32_t)*p++ - '0' + prec*10;
     64 	  }
     65 	  sf |= ((prec+1) << STRFMT_SH_PREC);
     66 	}
     67 	/* Parse conversion. */
     68 	c = (uint32_t)*p - 'A';
     69 	if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
     70 	  uint32_t sx = strfmt_map[c];
     71 	  if (sx) {
     72 	    fs->p = p+1;
     73 	    return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
     74 	  }
     75 	}
     76 	/* Return error location. */
     77 	if (*p >= 32) p++;
     78 	fs->len = (MSize)(p - (const uint8_t *)fs->str);
     79 	fs->p = fs->e;
     80 	return STRFMT_ERR;
     81       }
     82     }
     83   }
     84   fs->p = p;
     85 retlit:
     86   fs->len = (MSize)(p - (const uint8_t *)fs->str);
     87   return fs->len ? STRFMT_LIT : STRFMT_EOF;
     88 }
     89 
     90 /* -- Raw conversions ----------------------------------------------------- */
     91 
     92 #define WINT_R(x, sh, sc) \
     93   { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
     94 
     95 /* Write integer to buffer. */
     96 char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
     97 {
     98   uint32_t u = (uint32_t)k;
     99   if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
    100   if (u < 10000) {
    101     if (u < 10) goto dig1;
    102     if (u < 100) goto dig2;
    103     if (u < 1000) goto dig3;
    104   } else {
    105     uint32_t v = u / 10000; u -= v * 10000;
    106     if (v < 10000) {
    107       if (v < 10) goto dig5;
    108       if (v < 100) goto dig6;
    109       if (v < 1000) goto dig7;
    110     } else {
    111       uint32_t w = v / 10000; v -= w * 10000;
    112       if (w >= 10) WINT_R(w, 10, 10)
    113       *p++ = (char)('0'+w);
    114     }
    115     WINT_R(v, 23, 1000)
    116     dig7: WINT_R(v, 12, 100)
    117     dig6: WINT_R(v, 10, 10)
    118     dig5: *p++ = (char)('0'+v);
    119   }
    120   WINT_R(u, 23, 1000)
    121   dig3: WINT_R(u, 12, 100)
    122   dig2: WINT_R(u, 10, 10)
    123   dig1: *p++ = (char)('0'+u);
    124   return p;
    125 }
    126 #undef WINT_R
    127 
    128 /* Write pointer to buffer. */
    129 char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
    130 {
    131   ptrdiff_t x = (ptrdiff_t)v;
    132   MSize i, n = STRFMT_MAXBUF_PTR;
    133   if (x == 0) {
    134     *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
    135     return p;
    136   }
    137 #if LJ_64
    138   /* Shorten output for 64 bit pointers. */
    139   n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
    140 #endif
    141   p[0] = '0';
    142   p[1] = 'x';
    143   for (i = n-1; i >= 2; i--, x >>= 4)
    144     p[i] = "0123456789abcdef"[(x & 15)];
    145   return p+n;
    146 }
    147 
    148 /* Write ULEB128 to buffer. */
    149 char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
    150 {
    151   for (; v >= 0x80; v >>= 7)
    152     *p++ = (char)((v & 0x7f) | 0x80);
    153   *p++ = (char)v;
    154   return p;
    155 }
    156 
    157 /* Return string or write number to tmp buffer and return pointer to start. */
    158 const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
    159 {
    160   SBuf *sb;
    161   if (tvisstr(o)) {
    162     *lenp = strV(o)->len;
    163     return strVdata(o);
    164   } else if (tvisint(o)) {
    165     sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
    166   } else if (tvisnum(o)) {
    167     sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
    168   } else {
    169     return NULL;
    170   }
    171   *lenp = sbuflen(sb);
    172   return sbufB(sb);
    173 }
    174 
    175 #if LJ_53
    176 /* Format utf8 code into buff. Note that `buff` goes backwards. */
    177 MSize LJ_FASTCALL lj_strfmt_utf8(char *buff, unsigned long x)
    178 {
    179   int n = 1;  /* number of bytes put in buffer (backwards) */
    180   lua_assert(x <= 0x10FFFF);
    181   if (x < 0x80)  /* ascii? */
    182     buff[STRFMT_MAXBUF_UTF8 - 1] = (char)x;
    183   else {  /* need continuation bytes */
    184     unsigned int mfb = 0x3f;  /* maximum that fits in first byte */
    185     do {  /* add continuation bytes */
    186       buff[STRFMT_MAXBUF_UTF8 - (n++)] = (char)(0x80 | (x & 0x3f));
    187       x >>= 6;  /* remove added bits */
    188       mfb >>= 1;  /* now there is one less bit available in first byte */
    189     } while (x > mfb);  /* still needs continuation byte? */
    190     buff[STRFMT_MAXBUF_UTF8 - n] = (char)((~mfb << 1) | x);  /* add first byte */
    191   }
    192   return n;
    193 }
    194 #endif
    195 
    196 
    197 /* -- Unformatted conversions to buffer ----------------------------------- */
    198 /* Add integer to buffer. */
    199 SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
    200 {
    201   setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
    202   return sb;
    203 }
    204 
    205 #if LJ_HASJIT
    206 /* Add number to buffer. */
    207 SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
    208 {
    209   return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
    210 }
    211 #endif
    212 
    213 SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
    214 {
    215   setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
    216   return sb;
    217 }
    218 
    219 /* Add quoted string to buffer. */
    220 SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
    221 {
    222   const char *s = strdata(str);
    223   MSize len = str->len;
    224   lj_buf_putb(sb, '"');
    225   while (len--) {
    226     uint32_t c = (uint32_t)(uint8_t)*s++;
    227     char *p = lj_buf_more(sb, 4);
    228     if (c == '"' || c == '\\' || c == '\n') {
    229       *p++ = '\\';
    230     } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
    231       uint32_t d;
    232       *p++ = '\\';
    233       if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
    234 	*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
    235 	goto tens;
    236       } else if (c >= 10) {
    237       tens:
    238 	d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
    239       }
    240       c += '0';
    241     }
    242     *p++ = (char)c;
    243     setsbufP(sb, p);
    244   }
    245   lj_buf_putb(sb, '"');
    246   return sb;
    247 }
    248 
    249 /* -- Formatted conversions to buffer ------------------------------------- */
    250 
    251 /* Add formatted char to buffer. */
    252 SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
    253 {
    254   MSize width = STRFMT_WIDTH(sf);
    255   char *p = lj_buf_more(sb, width > 1 ? width : 1);
    256   if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
    257   while (width-- > 1) *p++ = ' ';
    258   if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
    259   setsbufP(sb, p);
    260   return sb;
    261 }
    262 
    263 /* Add formatted string to buffer. */
    264 SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
    265 {
    266   MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
    267   MSize width = STRFMT_WIDTH(sf);
    268   char *p = lj_buf_more(sb, width > len ? width : len);
    269   if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
    270   while (width-- > len) *p++ = ' ';
    271   if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
    272   setsbufP(sb, p);
    273   return sb;
    274 }
    275 
    276 /* Add formatted signed/unsigned integer to buffer. */
    277 SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
    278 {
    279   char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
    280 #ifdef LUA_USE_ASSERT
    281   char *ps;
    282 #endif
    283   MSize prefix = 0, len, prec, pprec, width, need;
    284 
    285   /* Figure out signed prefixes. */
    286   if (STRFMT_TYPE(sf) == STRFMT_INT) {
    287     if ((int64_t)k < 0) {
    288       k = (uint64_t)-(int64_t)k;
    289       prefix = 256 + '-';
    290     } else if ((sf & STRFMT_F_PLUS)) {
    291       prefix = 256 + '+';
    292     } else if ((sf & STRFMT_F_SPACE)) {
    293       prefix = 256 + ' ';
    294     }
    295   }
    296 
    297   /* Convert number and store to fixed-size buffer in reverse order. */
    298   prec = STRFMT_PREC(sf);
    299   if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
    300   if (k == 0) {  /* Special-case zero argument. */
    301     if (prec != 0 ||
    302 	(sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
    303       *--q = '0';
    304   } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) {  /* Decimal. */
    305     uint32_t k2;
    306     while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
    307     k2 = (uint32_t)k;
    308     do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
    309   } else if ((sf & STRFMT_T_HEX)) {  /* Hex. */
    310     const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
    311 						 "0123456789abcdef";
    312     do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
    313     if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
    314   } else {  /* Octal. */
    315     do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
    316     if ((sf & STRFMT_F_ALT)) *--q = '0';
    317   }
    318 
    319   /* Calculate sizes. */
    320   len = (MSize)(buf + sizeof(buf) - q);
    321   if ((int32_t)len >= (int32_t)prec) prec = len;
    322   width = STRFMT_WIDTH(sf);
    323   pprec = prec + (prefix >> 8);
    324   need = width > pprec ? width : pprec;
    325   p = lj_buf_more(sb, need);
    326 #ifdef LUA_USE_ASSERT
    327   ps = p;
    328 #endif
    329 
    330   /* Format number with leading/trailing whitespace and zeros. */
    331   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
    332     while (width-- > pprec) *p++ = ' ';
    333   if (prefix) {
    334     if ((char)prefix >= 'X') *p++ = '0';
    335     *p++ = (char)prefix;
    336   }
    337   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
    338     while (width-- > pprec) *p++ = '0';
    339   while (prec-- > len) *p++ = '0';
    340   while (q < buf + sizeof(buf)) *p++ = *q++;  /* Add number itself. */
    341   if ((sf & STRFMT_F_LEFT))
    342     while (width-- > pprec) *p++ = ' ';
    343 
    344   lua_assert(need == (MSize)(p - ps));
    345   setsbufP(sb, p);
    346   return sb;
    347 }
    348 
    349 /* Add number formatted as signed integer to buffer. */
    350 SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
    351 {
    352   int64_t k = (int64_t)n;
    353   if (checki32(k) && sf == STRFMT_INT)
    354     return lj_strfmt_putint(sb, (int32_t)k);  /* Shortcut for plain %d. */
    355   else
    356     return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
    357 }
    358 
    359 /* Add number formatted as unsigned integer to buffer. */
    360 SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
    361 {
    362   int64_t k;
    363   if (n >= 9223372036854775808.0)
    364     k = (int64_t)(n - 18446744073709551616.0);
    365   else
    366     k = (int64_t)n;
    367   return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
    368 }
    369 
    370 /* -- Conversions to strings ---------------------------------------------- */
    371 
    372 /* Convert integer to string. */
    373 GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
    374 {
    375   char buf[STRFMT_MAXBUF_INT];
    376   MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
    377   return lj_str_new(L, buf, len);
    378 }
    379 
    380 /* Convert integer or number to string. */
    381 GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
    382 {
    383   return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
    384 }
    385 
    386 #if LJ_HASJIT
    387 /* Convert char value to string. */
    388 GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
    389 {
    390   char buf[1];
    391   buf[0] = c;
    392   return lj_str_new(L, buf, 1);
    393 }
    394 #endif
    395 
    396 /* Raw conversion of object to string. */
    397 GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
    398 {
    399   if (tvisstr(o)) {
    400     return strV(o);
    401   } else if (tvisnumber(o)) {
    402     return lj_strfmt_number(L, o);
    403   } else if (tvisnil(o)) {
    404     return lj_str_newlit(L, "nil");
    405   } else if (tvisfalse(o)) {
    406     return lj_str_newlit(L, "false");
    407   } else if (tvistrue(o)) {
    408     return lj_str_newlit(L, "true");
    409   } else {
    410     char buf[8+2+2+16], *p = buf;
    411     p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
    412     *p++ = ':'; *p++ = ' ';
    413     if (tvisfunc(o) && isffunc(funcV(o))) {
    414       p = lj_buf_wmem(p, "builtin#", 8);
    415       p = lj_strfmt_wint(p, funcV(o)->c.ffid);
    416     } else {
    417       p = lj_strfmt_wptr(p, lj_obj_ptr(o));
    418     }
    419     return lj_str_new(L, buf, (size_t)(p - buf));
    420   }
    421 }
    422 
    423 #if LJ_53
    424 SBuf * LJ_FASTCALL lj_strfmt_pututf8(SBuf *sb, long n)
    425 {
    426   char buff[STRFMT_MAXBUF_UTF8];
    427   MSize l = lj_strfmt_utf8(buff, n);
    428   lj_buf_putmem(sb, buff + STRFMT_MAXBUF_UTF8 - l, l);
    429   return sb;
    430 }
    431 #endif
    432 
    433 /* -- Internal string formatting ------------------------------------------ */
    434 
    435 /*
    436 ** These functions are only used for lua_pushfstring(), lua_pushvfstring()
    437 ** and for internal string formatting (e.g. error messages). Caveat: unlike
    438 ** string.format(), only a limited subset of formats and flags are supported!
    439 **
    440 ** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
    441 ** - %d %u %o %x with full formatting, 32 bit integers only.
    442 ** - %f and other FP formats are really %.14g.
    443 ** - %s %c %p without formatting.
    444 ** - %U - utf8
    445 */
    446 
    447 /* Push formatted message as a string object to Lua stack. va_list variant. */
    448 const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
    449 {
    450   SBuf *sb = lj_buf_tmp_(L);
    451   FormatState fs;
    452   SFormat sf;
    453   GCstr *str;
    454   lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
    455   while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
    456     switch (STRFMT_TYPE(sf)) {
    457     case STRFMT_LIT:
    458       lj_buf_putmem(sb, fs.str, fs.len);
    459       break;
    460     case STRFMT_INT:
    461       lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
    462       break;
    463     case STRFMT_UINT:
    464       lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
    465       break;
    466     case STRFMT_NUM:
    467       lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
    468       break;
    469     case STRFMT_STR: {
    470       const char *s = va_arg(argp, char *);
    471       if (s == NULL) s = "(null)";
    472       lj_buf_putmem(sb, s, (MSize)strlen(s));
    473       break;
    474       }
    475     case STRFMT_CHAR:
    476       lj_buf_putb(sb, va_arg(argp, int));
    477       break;
    478     case STRFMT_PTR:
    479       lj_strfmt_putptr(sb, va_arg(argp, void *));
    480       break;
    481 #if LJ_53
    482     case STRFMT_UTF8: {
    483       lj_strfmt_pututf8(sb, (long)va_arg(argp, long));
    484       break;
    485     }
    486 #endif
    487     case STRFMT_ERR:
    488     default:
    489       lj_buf_putb(sb, '?');
    490       lua_assert(0);
    491       break;
    492     }
    493   }
    494   str = lj_buf_str(L, sb);
    495   setstrV(L, L->top, str);
    496   incr_top(L);
    497   return strdata(str);
    498 }
    499 
    500 /* Push formatted message as a string object to Lua stack. Vararg variant. */
    501 const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
    502 {
    503   const char *msg;
    504   va_list argp;
    505   va_start(argp, fmt);
    506   msg = lj_strfmt_pushvf(L, fmt, argp);
    507   va_end(argp);
    508   return msg;
    509 }
    510