lib_string.c (35869B)
1 /* 2 ** String library. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 ** 5 ** Lua 5.2 semantics and string.match() compiling 6 ** Copyright (C) 2014 Karel Tuma. See Copyright Notice in luajit.h 7 ** 8 ** Major portions taken verbatim or adapted from the Lua interpreter. 9 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h 10 */ 11 12 #define lib_string_c 13 #define LUA_LIB 14 15 #include "lua.h" 16 #include "lauxlib.h" 17 #include "lualib.h" 18 19 #include "lj_obj.h" 20 #include "lj_gc.h" 21 #include "lj_err.h" 22 #include "lj_buf.h" 23 #include "lj_str.h" 24 #include "lj_tab.h" 25 #include "lj_meta.h" 26 #include "lj_state.h" 27 #include "lj_ff.h" 28 #include "lj_bcdump.h" 29 #include "lj_char.h" 30 #include "lj_strfmt.h" 31 #include "lj_lib.h" 32 33 /* ------------------------------------------------------------------------ */ 34 35 #define LJLIB_MODULE_string 36 37 LJLIB_LUA(string_len) /* 38 function(s) 39 CHECK_str(s) 40 return #s 41 end 42 */ 43 44 LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) 45 { 46 GCstr *s = lj_lib_checkstr(L, 1); 47 int32_t len = (int32_t)s->len; 48 int32_t start = lj_lib_optint(L, 2, 1); 49 int32_t stop = lj_lib_optint(L, 3, start); 50 int32_t n, i; 51 const unsigned char *p; 52 if (stop < 0) stop += len+1; 53 if (start < 0) start += len+1; 54 if (start <= 0) start = 1; 55 if (stop > len) stop = len; 56 if (start > stop) return FFH_RES(0); /* Empty interval: return no results. */ 57 start--; 58 n = stop - start; 59 if ((uint32_t)n > LUAI_MAXCSTACK) 60 lj_err_caller(L, LJ_ERR_STRSLC); 61 lj_state_checkstack(L, (MSize)n); 62 p = (const unsigned char *)strdata(s) + start; 63 for (i = 0; i < n; i++) 64 setintV(L->base + i-1-LJ_FR2, p[i]); 65 return FFH_RES(n); 66 } 67 68 LJLIB_ASM(string_char) LJLIB_REC(.) 69 { 70 int i, nargs = (int)(L->top - L->base); 71 char *buf = lj_buf_tmp(L, (MSize)nargs); 72 for (i = 1; i <= nargs; i++) { 73 int32_t k = lj_lib_checkint(L, i); 74 if (!checku8(k)) 75 lj_err_arg(L, i, LJ_ERR_BADVAL); 76 buf[i-1] = (char)k; 77 } 78 setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs)); 79 return FFH_RES(1); 80 } 81 82 LJLIB_ASM(string_sub) LJLIB_REC(string_range 1) 83 { 84 lj_lib_checkstr(L, 1); 85 lj_lib_checkint(L, 2); 86 setintV(L->base+2, lj_lib_optint(L, 3, -1)); 87 return FFH_RETRY; 88 } 89 90 LJLIB_CF(string_rep) LJLIB_REC(.) 91 { 92 GCstr *s = lj_lib_checkstr(L, 1); 93 int32_t rep = lj_lib_checkint(L, 2); 94 GCstr *sep = lj_lib_optstr(L, 3); 95 SBuf *sb = lj_buf_tmp_(L); 96 if (sep && rep > 1) { 97 GCstr *s2 = lj_buf_cat2str(L, sep, s); 98 lj_buf_reset(sb); 99 lj_buf_putstr(sb, s); 100 s = s2; 101 rep--; 102 } 103 sb = lj_buf_putstr_rep(sb, s, rep); 104 setstrV(L, L->top-1, lj_buf_str(L, sb)); 105 lj_gc_check(L); 106 return 1; 107 } 108 109 LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse) 110 { 111 lj_lib_checkstr(L, 1); 112 return FFH_RETRY; 113 } 114 LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower) 115 LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper) 116 117 /* ------------------------------------------------------------------------ */ 118 119 static int writer_buf(lua_State *L, const void *p, size_t size, void *sb) 120 { 121 lj_buf_putmem((SBuf *)sb, p, (MSize)size); 122 UNUSED(L); 123 return 0; 124 } 125 126 LJLIB_CF(string_dump) 127 { 128 GCfunc *fn = lj_lib_checkfunc(L, 1); 129 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 130 SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ 131 L->top = L->base+1; 132 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) 133 lj_err_caller(L, LJ_ERR_STRDUMP); 134 setstrV(L, L->top-1, lj_buf_str(L, sb)); 135 lj_gc_check(L); 136 return 1; 137 } 138 139 /* ------------------------------------------------------------------------ */ 140 141 /* macro to `unsign' a character */ 142 #define uchar(c) ((unsigned char)(c)) 143 144 #define L_ESC '%' 145 146 static int check_capture(MatchState *ms, int l) 147 { 148 l -= '1'; 149 if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) 150 lj_err_caller(ms->L, LJ_ERR_STRCAPI); 151 return l; 152 } 153 154 static int capture_to_close(MatchState *ms) 155 { 156 int level = ms->level; 157 for (level--; level>=0; level--) 158 if (ms->capture[level].len == CAP_UNFINISHED) return level; 159 lj_err_caller(ms->L, LJ_ERR_STRPATC); 160 return 0; /* unreachable */ 161 } 162 163 static const char *classend(MatchState *ms, const char *p) 164 { 165 switch (*p++) { 166 case L_ESC: 167 if (p == ms->p_end) 168 lj_err_caller(ms->L, LJ_ERR_STRPATE); 169 return p+1; 170 case '[': 171 if (*p == '^') p++; 172 do { /* look for a `]' */ 173 if (p == ms->p_end) 174 lj_err_caller(ms->L, LJ_ERR_STRPATM); 175 if (*(p++) == L_ESC && p < ms->p_end) 176 p++; /* skip escapes (e.g. `%]') */ 177 } while (*p != ']'); 178 return p+1; 179 default: 180 return p; 181 } 182 } 183 184 static const unsigned char match_class_map[32] = { 185 0,LJ_CHAR_ALPHA,0,LJ_CHAR_CNTRL,LJ_CHAR_DIGIT,0,0,LJ_CHAR_GRAPH,0,0,0,0, 186 LJ_CHAR_LOWER,0,0,0,LJ_CHAR_PUNCT,0,0,LJ_CHAR_SPACE,0, 187 LJ_CHAR_UPPER,0,LJ_CHAR_ALNUM,LJ_CHAR_XDIGIT,0,0,0,0,0,0,0 188 }; 189 190 static int match_class(int c, int cl) 191 { 192 if ((cl & 0xc0) == 0x40) { 193 int t = match_class_map[(cl&0x1f)]; 194 if (t) { 195 t = lj_char_isa(c, t); 196 return (cl & 0x20) ? t : !t; 197 } 198 if (cl == 'z') return c == 0; 199 if (cl == 'Z') return c != 0; 200 } 201 return (cl == c); 202 } 203 204 static int matchbracketclass(int c, const char *p, const char *ec) 205 { 206 int sig = 1; 207 if (*(p+1) == '^') { 208 sig = 0; 209 p++; /* skip the `^' */ 210 } 211 while (++p < ec) { 212 if (*p == L_ESC) { 213 p++; 214 if (match_class(c, uchar(*p))) 215 return sig; 216 } 217 else if ((*(p+1) == '-') && (p+2 < ec)) { 218 p+=2; 219 if (uchar(*(p-2)) <= c && c <= uchar(*p)) 220 return sig; 221 } 222 else if (uchar(*p) == c) return sig; 223 } 224 return !sig; 225 } 226 227 static int singlematch(MatchState *ms, const char *s, const char *p, const char *ep) 228 { 229 int c = uchar(*s); 230 if (s >= ms->src_end) 231 return 0; 232 switch (*p) { 233 case '.': return 1; /* matches any char */ 234 case L_ESC: return match_class(c, uchar(*(p+1))); 235 case '[': return matchbracketclass(c, p, ep-1); 236 default: return (uchar(*p) == c); 237 } 238 } 239 240 static const char *match(MatchState *ms, const char *s, const char *p); 241 242 static const char *matchbalance(MatchState *ms, const char *s, const char *p) 243 { 244 if (p >= ms->p_end - 1) 245 lj_err_callerv(ms->L, LJ_ERR_STRPATPB); 246 if (*s != *p) { 247 return NULL; 248 } else { 249 int b = *p; 250 int e = *(p+1); 251 int cont = 1; 252 while (++s < ms->src_end) { 253 if (*s == e) { 254 if (--cont == 0) return s+1; 255 } else if (*s == b) { 256 cont++; 257 } 258 } 259 } 260 return NULL; /* string ends out of balance */ 261 } 262 263 static const char *max_expand(MatchState *ms, const char *s, 264 const char *p, const char *ep) 265 { 266 ptrdiff_t i = 0; /* counts maximum expand for item */ 267 while (singlematch(ms, s+i, p, ep)) 268 i++; 269 /* keeps trying to match with the maximum repetitions */ 270 while (i>=0) { 271 const char *res = match(ms, (s+i), ep+1); 272 if (res) return res; 273 i--; /* else didn't match; reduce 1 repetition to try again */ 274 } 275 return NULL; 276 } 277 278 static const char *min_expand(MatchState *ms, const char *s, 279 const char *p, const char *ep) 280 { 281 for (;;) { 282 const char *res = match(ms, s, ep+1); 283 if (res != NULL) 284 return res; 285 else if (singlematch(ms, s, p, ep)) 286 s++; /* try with one more repetition */ 287 else 288 return NULL; 289 } 290 } 291 292 static const char *start_capture(MatchState *ms, const char *s, 293 const char *p, int what) 294 { 295 const char *res; 296 int level = ms->level; 297 if (level >= LUA_MAXCAPTURES) lj_err_caller(ms->L, LJ_ERR_STRCAPN); 298 setmref(ms->capture[level].init, s); 299 ms->capture[level].len = what; 300 ms->level = level+1; 301 if ((res=match(ms, s, p)) == NULL) /* match failed? */ { 302 lua_assert(ms->level); 303 ms->level--; /* undo capture */ 304 } 305 return res; 306 } 307 308 static const char *end_capture(MatchState *ms, const char *s, 309 const char *p) 310 { 311 int l = capture_to_close(ms); 312 const char *res; 313 ms->capture[l].len = s - mref(ms->capture[l].init, char); /* close capture */ 314 if ((res = match(ms, s, p)) == NULL) /* match failed? */ 315 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ 316 return res; 317 } 318 319 static const char *match_capture(MatchState *ms, const char *s, int l) 320 { 321 size_t len; 322 l = check_capture(ms, l); 323 len = (size_t)ms->capture[l].len; 324 if ((size_t)(ms->src_end-s) >= len && 325 memcmp(mref(ms->capture[l].init, char), s, len) == 0) 326 return s+len; 327 else 328 return NULL; 329 } 330 331 static const char *match(MatchState *ms, const char *s, const char *p) 332 { 333 if (++ms->depth > LJ_MAX_XLEVEL || ++ms->backtracks > LJ_MAX_MSBT) 334 lj_err_caller(ms->L, LJ_ERR_STRPATX); 335 init: /* using goto's to optimize tail recursion */ 336 if (p != ms->p_end) switch (*p) { 337 case '(': /* start capture */ 338 if (*(p+1) == ')') { /* position capture? */ 339 s = start_capture(ms, s, p+2, CAP_POSITION); 340 } else 341 s = start_capture(ms, s, p+1, CAP_UNFINISHED); 342 break; 343 case ')': /* end capture */ 344 s = end_capture(ms, s, p+1); 345 break; 346 case L_ESC: 347 switch (*(p+1)) { 348 case 'b': /* balanced string? */ 349 s = matchbalance(ms, s, p+2); 350 if (s == NULL) break; 351 p+=4; 352 goto init; /* else s = match(ms, s, p+4); */ 353 case 'f': { /* frontier? */ 354 const char *ep; char previous; 355 p += 2; 356 if (*p != '[') 357 lj_err_caller(ms->L, LJ_ERR_STRPATB); 358 ep = classend(ms, p); /* points to what is next */ 359 previous = (s == ms->src_init) ? '\0' : *(s-1); 360 if (matchbracketclass(uchar(previous), p, ep-1) || 361 !matchbracketclass(uchar(*s), p, ep-1)) { s = NULL; break; } 362 p=ep; 363 goto init; /* else s = match(ms, s, ep); */ 364 } 365 default: 366 if (lj_char_isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */ 367 s = match_capture(ms, s, uchar(*(p+1))); 368 if (s == NULL) break; 369 p+=2; 370 goto init; /* else s = match(ms, s, p+2) */ 371 } 372 goto dflt; /* case default */ 373 } 374 break; 375 case '$': 376 /* is the `$' the last char in pattern? */ 377 if ((p + 1) != ms->p_end) 378 goto dflt; 379 if (s != ms->src_end) s = NULL; /* check end of string */ 380 break; 381 default: dflt: { /* it is a pattern item */ 382 const char *ep = classend(ms, p); /* points to what is next */ 383 int m = singlematch(ms, s, p, ep); 384 switch (*ep) { 385 case '?': { /* optional */ 386 const char *res; 387 if (m && ((res=match(ms, s+1, ep+1)) != NULL)) { 388 s = res; 389 break; 390 } 391 p=ep+1; 392 goto init; /* else s = match(ms, s, ep+1); */ 393 } 394 case '*': /* 0 or more repetitions */ 395 s = max_expand(ms, s, p, ep); 396 break; 397 case '+': /* 1 or more repetitions */ 398 s = (m ? max_expand(ms, s+1, p, ep) : NULL); 399 break; 400 case '-': /* 0 or more repetitions (minimum) */ 401 s = min_expand(ms, s, p, ep); 402 break; 403 default: 404 if (m) { s++; p=ep; goto init; } /* else s = match(ms, s+1, ep); */ 405 s = NULL; 406 break; 407 } 408 break; 409 } 410 } 411 ms->depth--; 412 return s; 413 } 414 415 static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 416 { 417 if (i >= ms->level) { 418 lua_assert(ms->level>=0); 419 if (i == 0) /* ms->level == 0, too */ 420 lua_pushlstring(ms->L, s, (size_t)(e - s)); /* add whole match */ 421 else 422 lj_err_caller(ms->L, LJ_ERR_STRCAPI); 423 } else { 424 ptrdiff_t l = ms->capture[i].len; 425 if (l == CAP_UNFINISHED) lj_err_caller(ms->L, LJ_ERR_STRCAPU); 426 if (l == CAP_POSITION) 427 lua_pushinteger(ms->L, mref(ms->capture[i].init, char) - ms->src_init + 1); 428 else 429 lua_pushlstring(ms->L, mref(ms->capture[i].init, char), (size_t)l); 430 } 431 } 432 433 static int push_captures(MatchState *ms, const char *s, const char *e) 434 { 435 int i; 436 int nlevels = (ms->level == 0 && s) ? 1 : ms->level; 437 lua_assert(nlevels >= 0); 438 luaL_checkstack(ms->L, nlevels, "too many captures"); 439 for (i = 0; i < nlevels; i++) 440 push_onecapture(ms, i, s, e); 441 return nlevels; /* number of strings pushed */ 442 } 443 444 MatchState * ljx_str_match(lua_State *L, const char *s, const char *p, 445 MSize slen, MSize plen, int32_t start) 446 { 447 MatchState *ms = &G(L)->ms; 448 int anchor = 0; 449 MSize st; 450 const char *sstr; 451 if (start < 0) start += (int32_t)slen; else start--; 452 if (start < 0) start = 0; 453 st = start; 454 if (st > slen) 455 return NULL; 456 sstr = s + start; 457 if (*p == '^') { p++; anchor = 1; } 458 ms->L = L; 459 ms->src_init = s; 460 ms->src_end = s + slen; 461 ms->p_end = p + plen - anchor; 462 do { /* Loop through string and try to match the pattern. */ 463 const char *q; 464 ms->level = ms->depth = ms->backtracks = 0; 465 q = match(ms, sstr, p); 466 if (q) { 467 /* No capture - simulate one return capture. */ 468 lua_assert(sstr>=s); 469 lua_assert(q>=s); 470 ms->findret1 = (int32_t)(sstr-s+1); 471 ms->findret2 = (int32_t)(q-s); 472 if (!ms->level) { 473 setmref(ms->capture[0].init, sstr); 474 ms->capture[0].len = q - sstr; 475 ms->level = 1; 476 } 477 return ms; 478 } 479 } while (!anchor && (sstr++ < ms->src_end)); 480 return NULL; 481 } 482 483 static int str_find_aux(lua_State *L, int find) 484 { 485 GCstr *s = lj_lib_checkstr(L, 1); 486 GCstr *p = lj_lib_checkstr(L, 2); 487 int32_t start = lj_lib_optint(L, 3, 1); 488 MSize st; 489 MatchState ms; 490 const char *pstr; 491 const char *sstr; 492 int anchor; 493 494 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) || 495 !lj_str_haspattern(p))) { /* Search for fixed string. */ 496 int n = lj_str_find(strdata(s), strdata(p), s->len, p->len, start); 497 if (n) { 498 setintV(L->top-2, n); 499 setintV(L->top-1, n+p->len-1); 500 return 2; 501 } 502 } else { /* Search for pattern. */ 503 if (start < 0) start += (int32_t)s->len; else start--; 504 if (start < 0) start = 0; 505 st = (MSize)start; 506 if (st > s->len) { 507 setnilV(L->top-1); 508 return 1; 509 } 510 pstr = strdata(p); 511 sstr = strdata(s) + st; 512 anchor = 0; 513 if (*pstr == '^') { pstr++; anchor = 1; } 514 ms.L = L; 515 ms.src_init = strdata(s); 516 ms.src_end = strdata(s) + s->len; 517 ms.p_end = pstr + p->len - anchor; 518 do { /* Loop through string and try to match the pattern. */ 519 const char *q; 520 ms.level = ms.depth = ms.backtracks = 0; 521 q = match(&ms, sstr, pstr); 522 if (q) { 523 if (find) { 524 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1))); 525 setintV(L->top++, (int32_t)(q-strdata(s))); 526 return push_captures(&ms, NULL, NULL) + 2; 527 } else { 528 return push_captures(&ms, sstr, q); 529 } 530 } 531 } while (sstr++ < ms.src_end && !anchor); 532 } 533 setnilV(L->top-1); /* Not found. */ 534 return 1; 535 } 536 537 LJLIB_CF(string_find) LJLIB_REC(string_findmatch 1) 538 { 539 return str_find_aux(L, 1); 540 } 541 542 LJLIB_CF(string_match) LJLIB_REC(string_findmatch 0) 543 { 544 return str_find_aux(L, 0); 545 } 546 547 LJLIB_NOREG LJLIB_CF(string_gmatch_aux) 548 { 549 GCstr *pstr = strV(lj_lib_upvalue(L, 2)); 550 GCstr *str = strV(lj_lib_upvalue(L, 1)); 551 const char *s = strdata(str); 552 const char *p = strdata(pstr); 553 TValue *tvpos = lj_lib_upvalue(L, 3); 554 const char *src = s + tvpos->u32.lo; 555 MatchState ms; 556 ms.L = L; 557 ms.src_init = s; 558 ms.src_end = s + str->len; 559 ms.p_end = p + pstr->len; 560 for (; src <= ms.src_end; src++) { 561 const char *e; 562 ms.level = ms.depth = ms.backtracks = 0; 563 if ((e = match(&ms, src, p)) != NULL) { 564 int32_t pos = (int32_t)(e - s); 565 if (e == src) pos++; /* Ensure progress for empty match. */ 566 tvpos->u32.lo = (uint32_t)pos; 567 return push_captures(&ms, src, e); 568 } 569 } 570 return 0; /* not found */ 571 } 572 573 LJLIB_CF(string_gmatch) 574 { 575 lj_lib_checkstr(L, 1); 576 lj_lib_checkstr(L, 2); 577 L->top = L->base+3; 578 (L->top-1)->u64 = 0; 579 lj_lib_pushcc(L, lj_cf_string_gmatch_aux, FF_string_gmatch_aux, 3); 580 return 1; 581 } 582 583 static void add_s(MatchState *ms, luaL_Buffer *b, const char *s, const char *e) 584 { 585 size_t l, i; 586 const char *news = lua_tolstring(ms->L, 3, &l); 587 for (i = 0; i < l; i++) { 588 if (news[i] != L_ESC) { 589 luaL_addchar(b, news[i]); 590 } else { 591 i++; /* skip ESC */ 592 if (!lj_char_isdigit(uchar(news[i]))) { 593 luaL_addchar(b, news[i]); 594 } else if (news[i] == '0') { 595 luaL_addlstring(b, s, (size_t)(e - s)); 596 } else { 597 push_onecapture(ms, news[i] - '1', s, e); 598 luaL_addvalue(b); /* add capture to accumulated result */ 599 } 600 } 601 } 602 } 603 604 static void add_value(MatchState *ms, luaL_Buffer *b, 605 const char *s, const char *e) 606 { 607 lua_State *L = ms->L; 608 switch (lua_type(L, 3)) { 609 case LUA_TNUMBER: 610 case LUA_TSTRING: { 611 add_s(ms, b, s, e); 612 return; 613 } 614 case LUA_TFUNCTION: { 615 int n; 616 lua_pushvalue(L, 3); 617 n = push_captures(ms, s, e); 618 lua_call(L, n, 1); 619 break; 620 } 621 case LUA_TTABLE: { 622 push_onecapture(ms, 0, s, e); 623 lua_gettable(L, 3); 624 break; 625 } 626 } 627 if (!lua_toboolean(L, -1)) { /* nil or false? */ 628 lua_pop(L, 1); 629 lua_pushlstring(L, s, (size_t)(e - s)); /* keep original text */ 630 } else if (!lua_isstring(L, -1)) { 631 lj_err_callerv(L, LJ_ERR_STRGSRV, luaL_typename(L, -1)); 632 } 633 luaL_addvalue(b); /* add result to accumulator */ 634 } 635 636 LJLIB_CF(string_gsub) 637 { 638 size_t srcl, pl; 639 const char *src = luaL_checklstring(L, 1, &srcl); 640 const char *p = luaL_checklstring(L, 2, &pl); 641 int tr = lua_type(L, 3); 642 int max_s = luaL_optint(L, 4, (int)(srcl+1)); 643 int anchor = (*p == '^') ? (p++, 1) : 0; 644 int n = 0; 645 MatchState ms; 646 luaL_Buffer b; 647 if (!(tr == LUA_TNUMBER || tr == LUA_TSTRING || 648 tr == LUA_TFUNCTION || tr == LUA_TTABLE)) 649 lj_err_arg(L, 3, LJ_ERR_NOSFT); 650 luaL_buffinit(L, &b); 651 ms.L = L; 652 ms.src_init = src; 653 ms.src_end = src+srcl; 654 ms.p_end = p+pl-anchor; 655 while (n < max_s) { 656 const char *e; 657 ms.level = ms.depth = ms.backtracks = 0; 658 e = match(&ms, src, p); 659 if (e) { 660 n++; 661 add_value(&ms, &b, src, e); 662 } 663 if (e && e>src) /* non empty match? */ 664 src = e; /* skip it */ 665 else if (src < ms.src_end) 666 luaL_addchar(&b, *src++); 667 else 668 break; 669 if (anchor) 670 break; 671 } 672 luaL_addlstring(&b, src, (size_t)(ms.src_end-src)); 673 luaL_pushresult(&b); 674 lua_pushinteger(L, n); /* number of substitutions */ 675 return 2; 676 } 677 678 /* ------------------------------------------------------------------------ */ 679 680 /* Emulate tostring() inline. */ 681 static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) 682 { 683 TValue *o = L->base+arg-1; 684 cTValue *mo; 685 lua_assert(o < L->top); /* Caller already checks for existence. */ 686 if (LJ_LIKELY(tvisstr(o))) 687 return strV(o); 688 if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 689 copyTV(L, L->top++, mo); 690 copyTV(L, L->top++, o); 691 lua_call(L, 1, 1); 692 copyTV(L, L->base+arg-1, --L->top); 693 return NULL; /* Buffer may be overwritten, retry. */ 694 } 695 return lj_strfmt_obj(L, o); 696 } 697 698 LJLIB_CF(string_format) LJLIB_REC(.) 699 { 700 int arg, top = (int)(L->top - L->base); 701 GCstr *fmt; 702 SBuf *sb; 703 FormatState fs; 704 SFormat sf; 705 int retry = 0; 706 again: 707 arg = 1; 708 sb = lj_buf_tmp_(L); 709 fmt = lj_lib_checkstr(L, arg); 710 lj_strfmt_init(&fs, strdata(fmt), fmt->len); 711 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { 712 if (sf == STRFMT_LIT) { 713 lj_buf_putmem(sb, fs.str, fs.len); 714 } else if (sf == STRFMT_ERR) { 715 lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); 716 } else { 717 if (++arg > top) 718 luaL_argerror(L, arg, lj_obj_typename[0]); 719 switch (STRFMT_TYPE(sf)) { 720 case STRFMT_INT: 721 if (tvisint(L->base+arg-1)) { 722 int32_t k = intV(L->base+arg-1); 723 if (sf == STRFMT_INT) 724 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ 725 else 726 lj_strfmt_putfxint(sb, sf, k); 727 } else { 728 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); 729 } 730 break; 731 case STRFMT_UINT: 732 if (tvisint(L->base+arg-1)) 733 lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1)); 734 else 735 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); 736 break; 737 case STRFMT_NUM: 738 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); 739 break; 740 case STRFMT_STR: { 741 GCstr *str = string_fmt_tostring(L, arg, retry); 742 if (str == NULL) 743 retry = 1; 744 else if ((sf & STRFMT_T_QUOTED)) 745 lj_strfmt_putquoted(sb, str); /* No formatting. */ 746 else 747 lj_strfmt_putfstr(sb, sf, str); 748 break; 749 } 750 case STRFMT_CHAR: 751 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); 752 break; 753 case STRFMT_PTR: /* No formatting. */ 754 lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1)); 755 break; 756 #if LJ_53 757 case STRFMT_UTF8: 758 lj_strfmt_pututf8(sb, lj_lib_checkint(L, arg)); 759 break; 760 #endif 761 default: 762 lua_assert(0); 763 break; 764 } 765 } 766 } 767 if (retry++ == 1) goto again; 768 setstrV(L, L->top-1, lj_buf_str(L, sb)); 769 lj_gc_check(L); 770 return 1; 771 } 772 773 #if LJ_53 774 775 776 /* macro to 'unsign' a character */ 777 #define uchar(c) ((unsigned char)(c)) 778 779 780 /* 781 ** Some sizes are better limited to fit in 'int', but must also fit in 782 ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) 783 */ 784 #define MAX_SIZET ((size_t)(~(size_t)0)) 785 786 #define MAXSIZE \ 787 (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) 788 789 790 791 792 793 /* translate a relative string position: negative means back from end */ 794 static lua_Integer posrelat (lua_Integer pos, size_t len) { 795 if (pos >= 0) return pos; 796 else if (0u - (size_t)pos > len) return 0; 797 else return (lua_Integer)len + pos + 1; 798 } 799 800 801 802 /* value used for padding */ 803 #if !defined(LUA_PACKPADBYTE) 804 #define LUA_PACKPADBYTE 0x00 805 #endif 806 807 /* maximum size for the binary representation of an integer */ 808 #define MAXINTSIZE 16 809 810 /* number of bits in a character */ 811 #define NB CHAR_BIT 812 813 /* mask for one character (NB 1's) */ 814 #define MC ((1 << NB) - 1) 815 816 /* size of a lua_Integer */ 817 #define SZINT ((int)sizeof(lua_Integer)) 818 819 820 /* dummy union to get native endianness */ 821 static const union { 822 int dummy; 823 char little; /* true iff machine is little endian */ 824 } nativeendian = {1}; 825 826 827 /* dummy structure to get native alignment requirements */ 828 struct cD { 829 char c; 830 union { double d; void *p; lua_Integer i; lua_Number n; } u; 831 }; 832 833 #define MAXALIGN (offsetof(struct cD, u)) 834 835 836 /* 837 ** Union for serializing floats 838 */ 839 typedef union Ftypes { 840 float f; 841 double d; 842 lua_Number n; 843 char buff[5 * sizeof(lua_Number)]; /* enough for any float type */ 844 } Ftypes; 845 846 847 /* 848 ** information to pack/unpack stuff 849 */ 850 typedef struct Header { 851 lua_State *L; 852 int islittle; 853 int maxalign; 854 } Header; 855 856 857 /* 858 ** options for pack/unpack 859 */ 860 typedef enum KOption { 861 Kint, /* signed integers */ 862 Kuint, /* unsigned integers */ 863 Kfloat, /* floating-point numbers */ 864 Kchar, /* fixed-length strings */ 865 Kstring, /* strings with prefixed length */ 866 Kzstr, /* zero-terminated strings */ 867 Kpadding, /* padding */ 868 Kpaddalign, /* padding for alignment */ 869 Knop /* no-op (configuration or spaces) */ 870 } KOption; 871 872 873 /* 874 ** Read an integer numeral from string 'fmt' or return 'df' if 875 ** there is no numeral 876 */ 877 static int digit (int c) { return '0' <= c && c <= '9'; } 878 879 static int getnum (const char **fmt, int df) { 880 if (!digit(**fmt)) /* no number? */ 881 return df; /* return default value */ 882 else { 883 int a = 0; 884 do { 885 a = a*10 + (*((*fmt)++) - '0'); 886 } while (digit(**fmt) && a <= ((int)MAXSIZE - 9)/10); 887 return a; 888 } 889 } 890 891 892 /* 893 ** Read an integer numeral and raises an error if it is larger 894 ** than the maximum size for integers. 895 */ 896 static int getnumlimit (Header *h, const char **fmt, int df) { 897 int sz = getnum(fmt, df); 898 if (sz > MAXINTSIZE || sz <= 0) 899 luaL_error(h->L, "integral size (%d) out of limits [1,%d]", 900 sz, MAXINTSIZE); 901 return sz; 902 } 903 904 905 /* 906 ** Initialize Header 907 */ 908 static void initheader (lua_State *L, Header *h) { 909 h->L = L; 910 h->islittle = nativeendian.little; 911 h->maxalign = 1; 912 } 913 914 915 /* 916 ** Read and classify next option. 'size' is filled with option's size. 917 */ 918 static KOption getoption (Header *h, const char **fmt, int *size) { 919 int opt = *((*fmt)++); 920 *size = 0; /* default */ 921 switch (opt) { 922 case 'b': *size = sizeof(char); return Kint; 923 case 'B': *size = sizeof(char); return Kuint; 924 case 'h': *size = sizeof(short); return Kint; 925 case 'H': *size = sizeof(short); return Kuint; 926 case 'l': *size = sizeof(long); return Kint; 927 case 'L': *size = sizeof(long); return Kuint; 928 case 'j': *size = sizeof(lua_Integer); return Kint; 929 case 'J': *size = sizeof(lua_Integer); return Kuint; 930 case 'T': *size = sizeof(size_t); return Kuint; 931 case 'f': *size = sizeof(float); return Kfloat; 932 case 'd': *size = sizeof(double); return Kfloat; 933 case 'n': *size = sizeof(lua_Number); return Kfloat; 934 case 'i': *size = getnumlimit(h, fmt, sizeof(int)); return Kint; 935 case 'I': *size = getnumlimit(h, fmt, sizeof(int)); return Kuint; 936 case 's': *size = getnumlimit(h, fmt, sizeof(size_t)); return Kstring; 937 case 'c': 938 *size = getnum(fmt, -1); 939 if (*size == -1) 940 luaL_error(h->L, "missing size for format option 'c'"); 941 return Kchar; 942 case 'z': return Kzstr; 943 case 'x': *size = 1; return Kpadding; 944 case 'X': return Kpaddalign; 945 case ' ': break; 946 case '<': h->islittle = 1; break; 947 case '>': h->islittle = 0; break; 948 case '=': h->islittle = nativeendian.little; break; 949 case '!': h->maxalign = getnumlimit(h, fmt, MAXALIGN); break; 950 default: luaL_error(h->L, "invalid format option '%c'", opt); 951 } 952 return Knop; 953 } 954 955 956 /* 957 ** Read, classify, and fill other details about the next option. 958 ** 'psize' is filled with option's size, 'notoalign' with its 959 ** alignment requirements. 960 ** Local variable 'size' gets the size to be aligned. (Kpadal option 961 ** always gets its full alignment, other options are limited by 962 ** the maximum alignment ('maxalign'). Kchar option needs no alignment 963 ** despite its size. 964 */ 965 static KOption getdetails (Header *h, size_t totalsize, 966 const char **fmt, int *psize, int *ntoalign) { 967 KOption opt = getoption(h, fmt, psize); 968 int align = *psize; /* usually, alignment follows size */ 969 if (opt == Kpaddalign) { /* 'X' gets alignment from following option */ 970 if (**fmt == '\0' || getoption(h, fmt, &align) == Kchar || align == 0) 971 luaL_argerror(h->L, 1, "invalid next option for option 'X'"); 972 } 973 if (align <= 1 || opt == Kchar) /* need no alignment? */ 974 *ntoalign = 0; 975 else { 976 if (align > h->maxalign) /* enforce maximum alignment */ 977 align = h->maxalign; 978 if ((align & (align - 1)) != 0) /* is 'align' not a power of 2? */ 979 luaL_argerror(h->L, 1, "format asks for alignment not power of 2"); 980 *ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1); 981 } 982 return opt; 983 } 984 985 986 /* 987 ** Pack integer 'n' with 'size' bytes and 'islittle' endianness. 988 ** The final 'if' handles the case when 'size' is larger than 989 ** the size of a Lua integer, correcting the extra sign-extension 990 ** bytes if necessary (by default they would be zeros). 991 */ 992 static void packint (luaL_Buffer *b, lua_Unsigned n, 993 int islittle, int size, int neg) { 994 char *buff = luaL_prepbuffsize(b, size); 995 int i; 996 buff[islittle ? 0 : size - 1] = (char)(n & MC); /* first byte */ 997 for (i = 1; i < size; i++) { 998 n >>= NB; 999 buff[islittle ? i : size - 1 - i] = (char)(n & MC); 1000 } 1001 if (neg && size > SZINT) { /* negative number need sign extension? */ 1002 for (i = SZINT; i < size; i++) /* correct extra bytes */ 1003 buff[islittle ? i : size - 1 - i] = (char)MC; 1004 } 1005 luaL_addsize(b, size); /* add result to buffer */ 1006 } 1007 1008 1009 /* 1010 ** Copy 'size' bytes from 'src' to 'dest', correcting endianness if 1011 ** given 'islittle' is different from native endianness. 1012 */ 1013 static void copywithendian (volatile char *dest, volatile const char *src, 1014 int size, int islittle) { 1015 if (islittle == nativeendian.little) { 1016 while (size-- != 0) 1017 *(dest++) = *(src++); 1018 } 1019 else { 1020 dest += size - 1; 1021 while (size-- != 0) 1022 *(dest--) = *(src++); 1023 } 1024 } 1025 1026 1027 1028 LJLIB_CF(string_pack) 1029 { 1030 luaL_Buffer b; 1031 Header h; 1032 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1033 int arg = 1; /* current argument to pack */ 1034 size_t totalsize = 0; /* accumulate total size of result */ 1035 initheader(L, &h); 1036 lua_pushnil(L); /* mark to separate arguments from string buffer */ 1037 luaL_buffinit(L, &b); 1038 while (*fmt != '\0') { 1039 int size, ntoalign; 1040 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1041 totalsize += ntoalign + size; 1042 while (ntoalign-- > 0) 1043 luaL_addchar(&b, LUA_PACKPADBYTE); /* fill alignment */ 1044 arg++; 1045 switch (opt) { 1046 case Kint: { /* signed integers */ 1047 lua_Integer n = luaL_checkinteger(L, arg); 1048 if (size < SZINT) { /* need overflow check? */ 1049 lua_Integer lim = (lua_Integer)1 << ((size * NB) - 1); 1050 luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow"); 1051 } 1052 packint(&b, (lua_Unsigned)n, h.islittle, size, (n < 0)); 1053 break; 1054 } 1055 case Kuint: { /* unsigned integers */ 1056 lua_Integer n = luaL_checkinteger(L, arg); 1057 if (size < SZINT) /* need overflow check? */ 1058 luaL_argcheck(L, (lua_Unsigned)n < ((lua_Unsigned)1 << (size * NB)), 1059 arg, "unsigned overflow"); 1060 packint(&b, (lua_Unsigned)n, h.islittle, size, 0); 1061 break; 1062 } 1063 case Kfloat: { /* floating-point options */ 1064 volatile Ftypes u; 1065 char *buff = luaL_prepbuffsize(&b, size); 1066 lua_Number n = luaL_checknumber(L, arg); /* get argument */ 1067 if (size == sizeof(u.f)) u.f = (float)n; /* copy it into 'u' */ 1068 else if (size == sizeof(u.d)) u.d = (double)n; 1069 else u.n = n; 1070 /* move 'u' to final result, correcting endianness if needed */ 1071 copywithendian(buff, u.buff, size, h.islittle); 1072 luaL_addsize(&b, size); 1073 break; 1074 } 1075 case Kchar: { /* fixed-size string */ 1076 size_t len; 1077 const char *s = luaL_checklstring(L, arg, &len); 1078 if ((size_t)size <= len) /* string larger than (or equal to) needed? */ 1079 luaL_addlstring(&b, s, size); /* truncate string to asked size */ 1080 else { /* string smaller than needed */ 1081 luaL_addlstring(&b, s, len); /* add it all */ 1082 while (len++ < (size_t)size) /* pad extra space */ 1083 luaL_addchar(&b, LUA_PACKPADBYTE); 1084 } 1085 break; 1086 } 1087 case Kstring: { /* strings with length count */ 1088 size_t len; 1089 const char *s = luaL_checklstring(L, arg, &len); 1090 luaL_argcheck(L, size >= (int)sizeof(size_t) || 1091 len < ((size_t)1 << (size * NB)), 1092 arg, "string length does not fit in given size"); 1093 packint(&b, (lua_Unsigned)len, h.islittle, size, 0); /* pack length */ 1094 luaL_addlstring(&b, s, len); 1095 totalsize += len; 1096 break; 1097 } 1098 case Kzstr: { /* zero-terminated string */ 1099 size_t len; 1100 const char *s = luaL_checklstring(L, arg, &len); 1101 luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros"); 1102 luaL_addlstring(&b, s, len); 1103 luaL_addchar(&b, '\0'); /* add zero at the end */ 1104 totalsize += len + 1; 1105 break; 1106 } 1107 case Kpadding: luaL_addchar(&b, LUA_PACKPADBYTE); /* FALLTHROUGH */ 1108 case Kpaddalign: case Knop: 1109 arg--; /* undo increment */ 1110 break; 1111 } 1112 } 1113 luaL_pushresult(&b); 1114 return 1; 1115 } 1116 1117 1118 LJLIB_CF(string_packsize) 1119 { 1120 Header h; 1121 const char *fmt = luaL_checkstring(L, 1); /* format string */ 1122 size_t totalsize = 0; /* accumulate total size of result */ 1123 initheader(L, &h); 1124 while (*fmt != '\0') { 1125 int size, ntoalign; 1126 KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign); 1127 size += ntoalign; /* total space used by option */ 1128 luaL_argcheck(L, totalsize <= MAXSIZE - size, 1, 1129 "format result too large"); 1130 totalsize += size; 1131 switch (opt) { 1132 case Kstring: /* strings with length count */ 1133 case Kzstr: /* zero-terminated string */ 1134 luaL_argerror(L, 1, "variable-length format"); 1135 /* call never return, but to avoid warnings: *//* FALLTHROUGH */ 1136 default: break; 1137 } 1138 } 1139 lua_pushinteger(L, (lua_Integer)totalsize); 1140 return 1; 1141 } 1142 1143 1144 /* 1145 ** Unpack an integer with 'size' bytes and 'islittle' endianness. 1146 ** If size is smaller than the size of a Lua integer and integer 1147 ** is signed, must do sign extension (propagating the sign to the 1148 ** higher bits); if size is larger than the size of a Lua integer, 1149 ** it must check the unread bytes to see whether they do not cause an 1150 ** overflow. 1151 */ 1152 static lua_Integer unpackint (lua_State *L, const char *str, 1153 int islittle, int size, int issigned) { 1154 lua_Unsigned res = 0; 1155 int i; 1156 int limit = (size <= SZINT) ? size : SZINT; 1157 for (i = limit - 1; i >= 0; i--) { 1158 res <<= NB; 1159 res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i]; 1160 } 1161 if (size < SZINT) { /* real size smaller than lua_Integer? */ 1162 if (issigned) { /* needs sign extension? */ 1163 lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1); 1164 res = ((res ^ mask) - mask); /* do sign extension */ 1165 } 1166 } 1167 else if (size > SZINT) { /* must check unread bytes */ 1168 int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : MC; 1169 for (i = limit; i < size; i++) { 1170 if ((unsigned char)str[islittle ? i : size - 1 - i] != mask) 1171 luaL_error(L, "%d-byte integer does not fit into Lua Integer", size); 1172 } 1173 } 1174 return (lua_Integer)res; 1175 } 1176 1177 1178 LJLIB_CF(string_unpack) 1179 { 1180 Header h; 1181 const char *fmt = luaL_checkstring(L, 1); 1182 size_t ld; 1183 const char *data = luaL_checklstring(L, 2, &ld); 1184 size_t pos = (size_t)posrelat(luaL_optinteger(L, 3, 1), ld) - 1; 1185 int n = 0; /* number of results */ 1186 luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); 1187 initheader(L, &h); 1188 while (*fmt != '\0') { 1189 int size, ntoalign; 1190 KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign); 1191 if ((size_t)ntoalign + size > ~pos || pos + ntoalign + size > ld) 1192 luaL_argerror(L, 2, "data string too short"); 1193 pos += ntoalign; /* skip alignment */ 1194 /* stack space for item + next position */ 1195 luaL_checkstack(L, 2, "too many results"); 1196 n++; 1197 switch (opt) { 1198 case Kint: 1199 case Kuint: { 1200 lua_Integer res = unpackint(L, data + pos, h.islittle, size, 1201 (opt == Kint)); 1202 lua_pushinteger(L, res); 1203 break; 1204 } 1205 case Kfloat: { 1206 volatile Ftypes u; 1207 lua_Number num; 1208 copywithendian(u.buff, data + pos, size, h.islittle); 1209 if (size == sizeof(u.f)) num = (lua_Number)u.f; 1210 else if (size == sizeof(u.d)) num = (lua_Number)u.d; 1211 else num = u.n; 1212 lua_pushnumber(L, num); 1213 break; 1214 } 1215 case Kchar: { 1216 lua_pushlstring(L, data + pos, size); 1217 break; 1218 } 1219 case Kstring: { 1220 size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0); 1221 luaL_argcheck(L, pos + len + size <= ld, 2, "data string too short"); 1222 lua_pushlstring(L, data + pos + size, len); 1223 pos += len; /* skip string */ 1224 break; 1225 } 1226 case Kzstr: { 1227 size_t len = (int)strlen(data + pos); 1228 lua_pushlstring(L, data + pos, len); 1229 pos += len + 1; /* skip string plus final '\0' */ 1230 break; 1231 } 1232 case Kpaddalign: case Kpadding: case Knop: 1233 n--; /* undo increment */ 1234 break; 1235 } 1236 pos += size; 1237 } 1238 lua_pushinteger(L, pos + 1); /* next position */ 1239 return n + 1; 1240 } 1241 #endif 1242 1243 /* ------------------------------------------------------------------------ */ 1244 1245 #include "lj_libdef.h" 1246 1247 LUALIB_API int luaopen_string(lua_State *L) 1248 { 1249 GCtab *mt; 1250 global_State *g; 1251 LJ_LIB_REG(L, LUA_STRLIBNAME, string); 1252 #if defined(LUA_COMPAT_GFIND) 1253 lua_getfield(L, -1, "gmatch"); 1254 lua_setfield(L, -2, "gfind"); 1255 #endif 1256 mt = lj_tab_new(L, 0, 1); 1257 /* NOBARRIER: basemt is a GC root. */ 1258 g = G(L); 1259 setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); 1260 settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); 1261 mt->nomm = (uint8_t)(~(1u<<MM_index)); 1262 return 1; 1263 } 1264