ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_str.c (6086B)


      1 /*
      2 ** String handling.
      3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 */
      5 
      6 #define lj_str_c
      7 #define LUA_CORE
      8 
      9 #include "lj_obj.h"
     10 #include "lj_gc.h"
     11 #include "lj_err.h"
     12 #include "lj_str.h"
     13 #include "lj_char.h"
     14 
     15 /* -- String helpers ------------------------------------------------------ */
     16 
     17 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
     18 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
     19 {
     20   MSize i, n = a->len > b->len ? b->len : a->len;
     21   for (i = 0; i < n; i += 4) {
     22     /* Note: innocuous access up to end of string + 3. */
     23     uint32_t va = *(const uint32_t *)(strdata(a)+i);
     24     uint32_t vb = *(const uint32_t *)(strdata(b)+i);
     25     if (va != vb) {
     26 #if LJ_LE
     27       va = lj_bswap(va); vb = lj_bswap(vb);
     28 #endif
     29       i -= n;
     30       if ((int32_t)i >= -3) {
     31 	va >>= 32+(i<<3); vb >>= 32+(i<<3);
     32 	if (va == vb) break;
     33       }
     34       return va < vb ? -1 : 1;
     35     }
     36   }
     37   return (int32_t)(a->len - b->len);
     38 }
     39 
     40 /* Fast string data comparison. Caveat: unaligned access to 1st string! */
     41 static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
     42 {
     43   MSize i = 0;
     44   lua_assert(len > 0);
     45   lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
     46   do {  /* Note: innocuous access up to end of string + 3. */
     47     uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
     48     if (v) {
     49       i -= len;
     50 #if LJ_LE
     51       return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
     52 #else
     53       return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
     54 #endif
     55     }
     56     i += 4;
     57   } while (i < len);
     58   return 0;
     59 }
     60 
     61 /* Find fixed string p inside string s with offset start. Returns offset adjusted index. */
     62 uint32_t lj_str_find(const char *s, const char *p, MSize slen, MSize plen, int32_t start)
     63 {
     64   const char *os = s;
     65   if (start < 0) start += (int32_t)slen; else start--;
     66   if (start < 0) start = 0;
     67   if (start > slen)
     68     return 0;
     69 
     70   s += start;
     71   slen -= start;
     72 
     73   if (plen <= slen) {
     74     if (plen == 0) {
     75       return start+1;
     76     } else {
     77       int c = *(const uint8_t *)p++;
     78       plen--; slen -= plen;
     79       while (slen) {
     80 	const char *q = (const char *)memchr(s, c, slen);
     81 	if (!q) break;
     82 	if (memcmp(q+1, p, plen) == 0) return (q-os+1);
     83 	q++; slen -= (MSize)(q-s); s = q;
     84       }
     85     }
     86   }
     87   return 0;
     88 }
     89 
     90 /* Check whether a string has a pattern matching character. */
     91 int lj_str_haspattern(GCstr *s)
     92 {
     93   const char *p = strdata(s), *q = p + s->len;
     94   while (p < q) {
     95     int c = *(const uint8_t *)p++;
     96     if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
     97       return 1;  /* Found a pattern matching char. */
     98   }
     99   return 0;  /* No pattern matching chars found. */
    100 }
    101 
    102 /* -- String interning ---------------------------------------------------- */
    103 
    104 /* Resize the string hash table (grow and shrink). */
    105 void lj_str_resize(lua_State *L, MSize newmask)
    106 {
    107   global_State *g = G(L);
    108   GCRef *newhash;
    109   MSize i;
    110   if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
    111     return;  /* No resizing during GC traversal or if already too big. */
    112   newhash = lj_mem_newvec(L, newmask+1, GCRef);
    113   memset(newhash, 0, (newmask+1)*sizeof(GCRef));
    114   for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
    115     GCobj *p = gcref(g->strhash[i]);
    116     while (p) {  /* Follow each hash chain and reinsert all strings. */
    117       MSize h = gco2str(p)->hash & newmask;
    118       GCobj *next = gcnext(p);
    119       /* NOBARRIER: The string table is a GC root. */
    120       setgcrefr(p->gch.nextgc, newhash[h]);
    121       setgcref(newhash[h], p);
    122       p = next;
    123     }
    124   }
    125   lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
    126   g->strmask = newmask;
    127   g->strhash = newhash;
    128 }
    129 
    130 /* Intern a string and return string object. */
    131 GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
    132 {
    133   global_State *g;
    134   GCstr *s;
    135   GCobj *o;
    136   MSize len = (MSize)lenx;
    137   MSize a, b, h = len;
    138   if (lenx >= LJ_MAX_STR)
    139     lj_err_msg(L, LJ_ERR_STROV);
    140   g = G(L);
    141   /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
    142   if (len >= 4) {  /* Caveat: unaligned access! */
    143     a = lj_getu32(str);
    144     h ^= lj_getu32(str+len-4);
    145     b = lj_getu32(str+(len>>1)-2);
    146     h ^= b; h -= lj_rol(b, 14);
    147     b += lj_getu32(str+(len>>2)-1);
    148   } else if (len > 0) {
    149     a = *(const uint8_t *)str;
    150     h ^= *(const uint8_t *)(str+len-1);
    151     b = *(const uint8_t *)(str+(len>>1));
    152     h ^= b; h -= lj_rol(b, 14);
    153   } else {
    154     return &g->strempty;
    155   }
    156   a ^= h; a -= lj_rol(h, 11);
    157   b ^= a; b -= lj_rol(a, 25);
    158   h ^= b; h -= lj_rol(b, 16);
    159   /* Check if the string has already been interned. */
    160   o = gcref(g->strhash[h & g->strmask]);
    161   if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
    162     while (o != NULL) {
    163       GCstr *sx = gco2str(o);
    164       if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
    165 	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
    166 	if (isdead(g, o)) flipwhite(o);
    167 	return sx;  /* Return existing string. */
    168       }
    169       o = gcnext(o);
    170     }
    171   } else {  /* Slow path: end of string is too close to a page boundary. */
    172     while (o != NULL) {
    173       GCstr *sx = gco2str(o);
    174       if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
    175 	/* Resurrect if dead. Can only happen with fixstring() (keywords). */
    176 	if (isdead(g, o)) flipwhite(o);
    177 	return sx;  /* Return existing string. */
    178       }
    179       o = gcnext(o);
    180     }
    181   }
    182   /* Nope, create a new string. */
    183   s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
    184   newwhite(g, s);
    185   s->gct = ~LJ_TSTR;
    186   s->len = len;
    187   s->hash = h;
    188   s->reserved = 0;
    189   memcpy(strdatawr(s), str, len);
    190   strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
    191   /* Add it to string hash table. */
    192   h &= g->strmask;
    193   s->nextgc = g->strhash[h];
    194   /* NOBARRIER: The string table is a GC root. */
    195   setgcref(g->strhash[h], obj2gco(s));
    196   if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
    197     lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
    198   return s;  /* Return newly interned string. */
    199 }
    200 
    201 void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
    202 {
    203   g->strnum--;
    204   lj_mem_free(g, s, sizestring(s));
    205 }
    206