lj_strfmt.c (14415B)
1 /* 2 ** String formatting. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 #include <stdio.h> 7 8 #define lj_strfmt_c 9 #define LUA_CORE 10 11 #include "lj_obj.h" 12 #include "lj_buf.h" 13 #include "lj_str.h" 14 #include "lj_state.h" 15 #include "lj_char.h" 16 #include "lj_strfmt.h" 17 18 /* -- Format parser ------------------------------------------------------- */ 19 20 static const uint8_t strfmt_map[('x'-'A')+1] = { 21 STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, 22 0,0,0,0,0,0,0,STRFMT_UTF8,0,0,STRFMT_X,0,0, 23 0,0,0,0,0,0, 24 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, 25 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X 26 }; 27 28 SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) 29 { 30 const uint8_t *p = fs->p, *e = fs->e; 31 fs->str = (const char *)p; 32 for (; p < e; p++) { 33 if (*p == '%') { /* Escape char? */ 34 if (p[1] == '%') { /* '%%'? */ 35 fs->p = ++p+1; 36 goto retlit; 37 } else { 38 SFormat sf = 0; 39 uint32_t c; 40 if (p != (const uint8_t *)fs->str) 41 break; 42 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) { 43 /* Parse flags. */ 44 if (*p == '-') sf |= STRFMT_F_LEFT; 45 else if (*p == '+') sf |= STRFMT_F_PLUS; 46 else if (*p == '0') sf |= STRFMT_F_ZERO; 47 else if (*p == ' ') sf |= STRFMT_F_SPACE; 48 else if (*p == '#') sf |= STRFMT_F_ALT; 49 else break; 50 } 51 if ((uint32_t)*p - '0' < 10) { /* Parse width. */ 52 uint32_t width = (uint32_t)*p++ - '0'; 53 if ((uint32_t)*p - '0' < 10) 54 width = (uint32_t)*p++ - '0' + width*10; 55 sf |= (width << STRFMT_SH_WIDTH); 56 } 57 if (*p == '.') { /* Parse precision. */ 58 uint32_t prec = 0; 59 p++; 60 if ((uint32_t)*p - '0' < 10) { 61 prec = (uint32_t)*p++ - '0'; 62 if ((uint32_t)*p - '0' < 10) 63 prec = (uint32_t)*p++ - '0' + prec*10; 64 } 65 sf |= ((prec+1) << STRFMT_SH_PREC); 66 } 67 /* Parse conversion. */ 68 c = (uint32_t)*p - 'A'; 69 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) { 70 uint32_t sx = strfmt_map[c]; 71 if (sx) { 72 fs->p = p+1; 73 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER)); 74 } 75 } 76 /* Return error location. */ 77 if (*p >= 32) p++; 78 fs->len = (MSize)(p - (const uint8_t *)fs->str); 79 fs->p = fs->e; 80 return STRFMT_ERR; 81 } 82 } 83 } 84 fs->p = p; 85 retlit: 86 fs->len = (MSize)(p - (const uint8_t *)fs->str); 87 return fs->len ? STRFMT_LIT : STRFMT_EOF; 88 } 89 90 /* -- Raw conversions ----------------------------------------------------- */ 91 92 #define WINT_R(x, sh, sc) \ 93 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } 94 95 /* Write integer to buffer. */ 96 char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) 97 { 98 uint32_t u = (uint32_t)k; 99 if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } 100 if (u < 10000) { 101 if (u < 10) goto dig1; 102 if (u < 100) goto dig2; 103 if (u < 1000) goto dig3; 104 } else { 105 uint32_t v = u / 10000; u -= v * 10000; 106 if (v < 10000) { 107 if (v < 10) goto dig5; 108 if (v < 100) goto dig6; 109 if (v < 1000) goto dig7; 110 } else { 111 uint32_t w = v / 10000; v -= w * 10000; 112 if (w >= 10) WINT_R(w, 10, 10) 113 *p++ = (char)('0'+w); 114 } 115 WINT_R(v, 23, 1000) 116 dig7: WINT_R(v, 12, 100) 117 dig6: WINT_R(v, 10, 10) 118 dig5: *p++ = (char)('0'+v); 119 } 120 WINT_R(u, 23, 1000) 121 dig3: WINT_R(u, 12, 100) 122 dig2: WINT_R(u, 10, 10) 123 dig1: *p++ = (char)('0'+u); 124 return p; 125 } 126 #undef WINT_R 127 128 /* Write pointer to buffer. */ 129 char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) 130 { 131 ptrdiff_t x = (ptrdiff_t)v; 132 MSize i, n = STRFMT_MAXBUF_PTR; 133 if (x == 0) { 134 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; 135 return p; 136 } 137 #if LJ_64 138 /* Shorten output for 64 bit pointers. */ 139 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); 140 #endif 141 p[0] = '0'; 142 p[1] = 'x'; 143 for (i = n-1; i >= 2; i--, x >>= 4) 144 p[i] = "0123456789abcdef"[(x & 15)]; 145 return p+n; 146 } 147 148 /* Write ULEB128 to buffer. */ 149 char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) 150 { 151 for (; v >= 0x80; v >>= 7) 152 *p++ = (char)((v & 0x7f) | 0x80); 153 *p++ = (char)v; 154 return p; 155 } 156 157 /* Return string or write number to tmp buffer and return pointer to start. */ 158 const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) 159 { 160 SBuf *sb; 161 if (tvisstr(o)) { 162 *lenp = strV(o)->len; 163 return strVdata(o); 164 } else if (tvisint(o)) { 165 sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); 166 } else if (tvisnum(o)) { 167 sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); 168 } else { 169 return NULL; 170 } 171 *lenp = sbuflen(sb); 172 return sbufB(sb); 173 } 174 175 #if LJ_53 176 /* Format utf8 code into buff. Note that `buff` goes backwards. */ 177 MSize LJ_FASTCALL lj_strfmt_utf8(char *buff, unsigned long x) 178 { 179 int n = 1; /* number of bytes put in buffer (backwards) */ 180 lua_assert(x <= 0x10FFFF); 181 if (x < 0x80) /* ascii? */ 182 buff[STRFMT_MAXBUF_UTF8 - 1] = (char)x; 183 else { /* need continuation bytes */ 184 unsigned int mfb = 0x3f; /* maximum that fits in first byte */ 185 do { /* add continuation bytes */ 186 buff[STRFMT_MAXBUF_UTF8 - (n++)] = (char)(0x80 | (x & 0x3f)); 187 x >>= 6; /* remove added bits */ 188 mfb >>= 1; /* now there is one less bit available in first byte */ 189 } while (x > mfb); /* still needs continuation byte? */ 190 buff[STRFMT_MAXBUF_UTF8 - n] = (char)((~mfb << 1) | x); /* add first byte */ 191 } 192 return n; 193 } 194 #endif 195 196 197 /* -- Unformatted conversions to buffer ----------------------------------- */ 198 /* Add integer to buffer. */ 199 SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) 200 { 201 setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k)); 202 return sb; 203 } 204 205 #if LJ_HASJIT 206 /* Add number to buffer. */ 207 SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) 208 { 209 return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); 210 } 211 #endif 212 213 SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) 214 { 215 setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v)); 216 return sb; 217 } 218 219 /* Add quoted string to buffer. */ 220 SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) 221 { 222 const char *s = strdata(str); 223 MSize len = str->len; 224 lj_buf_putb(sb, '"'); 225 while (len--) { 226 uint32_t c = (uint32_t)(uint8_t)*s++; 227 char *p = lj_buf_more(sb, 4); 228 if (c == '"' || c == '\\' || c == '\n') { 229 *p++ = '\\'; 230 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ 231 uint32_t d; 232 *p++ = '\\'; 233 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { 234 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; 235 goto tens; 236 } else if (c >= 10) { 237 tens: 238 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); 239 } 240 c += '0'; 241 } 242 *p++ = (char)c; 243 setsbufP(sb, p); 244 } 245 lj_buf_putb(sb, '"'); 246 return sb; 247 } 248 249 /* -- Formatted conversions to buffer ------------------------------------- */ 250 251 /* Add formatted char to buffer. */ 252 SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) 253 { 254 MSize width = STRFMT_WIDTH(sf); 255 char *p = lj_buf_more(sb, width > 1 ? width : 1); 256 if ((sf & STRFMT_F_LEFT)) *p++ = (char)c; 257 while (width-- > 1) *p++ = ' '; 258 if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c; 259 setsbufP(sb, p); 260 return sb; 261 } 262 263 /* Add formatted string to buffer. */ 264 SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) 265 { 266 MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf); 267 MSize width = STRFMT_WIDTH(sf); 268 char *p = lj_buf_more(sb, width > len ? width : len); 269 if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); 270 while (width-- > len) *p++ = ' '; 271 if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); 272 setsbufP(sb, p); 273 return sb; 274 } 275 276 /* Add formatted signed/unsigned integer to buffer. */ 277 SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) 278 { 279 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p; 280 #ifdef LUA_USE_ASSERT 281 char *ps; 282 #endif 283 MSize prefix = 0, len, prec, pprec, width, need; 284 285 /* Figure out signed prefixes. */ 286 if (STRFMT_TYPE(sf) == STRFMT_INT) { 287 if ((int64_t)k < 0) { 288 k = (uint64_t)-(int64_t)k; 289 prefix = 256 + '-'; 290 } else if ((sf & STRFMT_F_PLUS)) { 291 prefix = 256 + '+'; 292 } else if ((sf & STRFMT_F_SPACE)) { 293 prefix = 256 + ' '; 294 } 295 } 296 297 /* Convert number and store to fixed-size buffer in reverse order. */ 298 prec = STRFMT_PREC(sf); 299 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO; 300 if (k == 0) { /* Special-case zero argument. */ 301 if (prec != 0 || 302 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT)) 303 *--q = '0'; 304 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */ 305 uint32_t k2; 306 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; } 307 k2 = (uint32_t)k; 308 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2); 309 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */ 310 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" : 311 "0123456789abcdef"; 312 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k); 313 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x'); 314 } else { /* Octal. */ 315 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k); 316 if ((sf & STRFMT_F_ALT)) *--q = '0'; 317 } 318 319 /* Calculate sizes. */ 320 len = (MSize)(buf + sizeof(buf) - q); 321 if ((int32_t)len >= (int32_t)prec) prec = len; 322 width = STRFMT_WIDTH(sf); 323 pprec = prec + (prefix >> 8); 324 need = width > pprec ? width : pprec; 325 p = lj_buf_more(sb, need); 326 #ifdef LUA_USE_ASSERT 327 ps = p; 328 #endif 329 330 /* Format number with leading/trailing whitespace and zeros. */ 331 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) 332 while (width-- > pprec) *p++ = ' '; 333 if (prefix) { 334 if ((char)prefix >= 'X') *p++ = '0'; 335 *p++ = (char)prefix; 336 } 337 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) 338 while (width-- > pprec) *p++ = '0'; 339 while (prec-- > len) *p++ = '0'; 340 while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */ 341 if ((sf & STRFMT_F_LEFT)) 342 while (width-- > pprec) *p++ = ' '; 343 344 lua_assert(need == (MSize)(p - ps)); 345 setsbufP(sb, p); 346 return sb; 347 } 348 349 /* Add number formatted as signed integer to buffer. */ 350 SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) 351 { 352 int64_t k = (int64_t)n; 353 if (checki32(k) && sf == STRFMT_INT) 354 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ 355 else 356 return lj_strfmt_putfxint(sb, sf, (uint64_t)k); 357 } 358 359 /* Add number formatted as unsigned integer to buffer. */ 360 SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) 361 { 362 int64_t k; 363 if (n >= 9223372036854775808.0) 364 k = (int64_t)(n - 18446744073709551616.0); 365 else 366 k = (int64_t)n; 367 return lj_strfmt_putfxint(sb, sf, (uint64_t)k); 368 } 369 370 /* -- Conversions to strings ---------------------------------------------- */ 371 372 /* Convert integer to string. */ 373 GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) 374 { 375 char buf[STRFMT_MAXBUF_INT]; 376 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf); 377 return lj_str_new(L, buf, len); 378 } 379 380 /* Convert integer or number to string. */ 381 GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) 382 { 383 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); 384 } 385 386 #if LJ_HASJIT 387 /* Convert char value to string. */ 388 GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) 389 { 390 char buf[1]; 391 buf[0] = c; 392 return lj_str_new(L, buf, 1); 393 } 394 #endif 395 396 /* Raw conversion of object to string. */ 397 GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) 398 { 399 if (tvisstr(o)) { 400 return strV(o); 401 } else if (tvisnumber(o)) { 402 return lj_strfmt_number(L, o); 403 } else if (tvisnil(o)) { 404 return lj_str_newlit(L, "nil"); 405 } else if (tvisfalse(o)) { 406 return lj_str_newlit(L, "false"); 407 } else if (tvistrue(o)) { 408 return lj_str_newlit(L, "true"); 409 } else { 410 char buf[8+2+2+16], *p = buf; 411 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o))); 412 *p++ = ':'; *p++ = ' '; 413 if (tvisfunc(o) && isffunc(funcV(o))) { 414 p = lj_buf_wmem(p, "builtin#", 8); 415 p = lj_strfmt_wint(p, funcV(o)->c.ffid); 416 } else { 417 p = lj_strfmt_wptr(p, lj_obj_ptr(o)); 418 } 419 return lj_str_new(L, buf, (size_t)(p - buf)); 420 } 421 } 422 423 #if LJ_53 424 SBuf * LJ_FASTCALL lj_strfmt_pututf8(SBuf *sb, long n) 425 { 426 char buff[STRFMT_MAXBUF_UTF8]; 427 MSize l = lj_strfmt_utf8(buff, n); 428 lj_buf_putmem(sb, buff + STRFMT_MAXBUF_UTF8 - l, l); 429 return sb; 430 } 431 #endif 432 433 /* -- Internal string formatting ------------------------------------------ */ 434 435 /* 436 ** These functions are only used for lua_pushfstring(), lua_pushvfstring() 437 ** and for internal string formatting (e.g. error messages). Caveat: unlike 438 ** string.format(), only a limited subset of formats and flags are supported! 439 ** 440 ** LuaJIT has support for a couple more formats than Lua 5.1/5.2: 441 ** - %d %u %o %x with full formatting, 32 bit integers only. 442 ** - %f and other FP formats are really %.14g. 443 ** - %s %c %p without formatting. 444 ** - %U - utf8 445 */ 446 447 /* Push formatted message as a string object to Lua stack. va_list variant. */ 448 const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) 449 { 450 SBuf *sb = lj_buf_tmp_(L); 451 FormatState fs; 452 SFormat sf; 453 GCstr *str; 454 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt)); 455 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { 456 switch (STRFMT_TYPE(sf)) { 457 case STRFMT_LIT: 458 lj_buf_putmem(sb, fs.str, fs.len); 459 break; 460 case STRFMT_INT: 461 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t)); 462 break; 463 case STRFMT_UINT: 464 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); 465 break; 466 case STRFMT_NUM: 467 lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); 468 break; 469 case STRFMT_STR: { 470 const char *s = va_arg(argp, char *); 471 if (s == NULL) s = "(null)"; 472 lj_buf_putmem(sb, s, (MSize)strlen(s)); 473 break; 474 } 475 case STRFMT_CHAR: 476 lj_buf_putb(sb, va_arg(argp, int)); 477 break; 478 case STRFMT_PTR: 479 lj_strfmt_putptr(sb, va_arg(argp, void *)); 480 break; 481 #if LJ_53 482 case STRFMT_UTF8: { 483 lj_strfmt_pututf8(sb, (long)va_arg(argp, long)); 484 break; 485 } 486 #endif 487 case STRFMT_ERR: 488 default: 489 lj_buf_putb(sb, '?'); 490 lua_assert(0); 491 break; 492 } 493 } 494 str = lj_buf_str(L, sb); 495 setstrV(L, L->top, str); 496 incr_top(L); 497 return strdata(str); 498 } 499 500 /* Push formatted message as a string object to Lua stack. Vararg variant. */ 501 const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) 502 { 503 const char *msg; 504 va_list argp; 505 va_start(argp, fmt); 506 msg = lj_strfmt_pushvf(L, fmt, argp); 507 va_end(argp); 508 return msg; 509 } 510