lj_str.c (6086B)
1 /* 2 ** String handling. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 #define lj_str_c 7 #define LUA_CORE 8 9 #include "lj_obj.h" 10 #include "lj_gc.h" 11 #include "lj_err.h" 12 #include "lj_str.h" 13 #include "lj_char.h" 14 15 /* -- String helpers ------------------------------------------------------ */ 16 17 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 18 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 19 { 20 MSize i, n = a->len > b->len ? b->len : a->len; 21 for (i = 0; i < n; i += 4) { 22 /* Note: innocuous access up to end of string + 3. */ 23 uint32_t va = *(const uint32_t *)(strdata(a)+i); 24 uint32_t vb = *(const uint32_t *)(strdata(b)+i); 25 if (va != vb) { 26 #if LJ_LE 27 va = lj_bswap(va); vb = lj_bswap(vb); 28 #endif 29 i -= n; 30 if ((int32_t)i >= -3) { 31 va >>= 32+(i<<3); vb >>= 32+(i<<3); 32 if (va == vb) break; 33 } 34 return va < vb ? -1 : 1; 35 } 36 } 37 return (int32_t)(a->len - b->len); 38 } 39 40 /* Fast string data comparison. Caveat: unaligned access to 1st string! */ 41 static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) 42 { 43 MSize i = 0; 44 lua_assert(len > 0); 45 lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); 46 do { /* Note: innocuous access up to end of string + 3. */ 47 uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); 48 if (v) { 49 i -= len; 50 #if LJ_LE 51 return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; 52 #else 53 return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; 54 #endif 55 } 56 i += 4; 57 } while (i < len); 58 return 0; 59 } 60 61 /* Find fixed string p inside string s with offset start. Returns offset adjusted index. */ 62 uint32_t lj_str_find(const char *s, const char *p, MSize slen, MSize plen, int32_t start) 63 { 64 const char *os = s; 65 if (start < 0) start += (int32_t)slen; else start--; 66 if (start < 0) start = 0; 67 if (start > slen) 68 return 0; 69 70 s += start; 71 slen -= start; 72 73 if (plen <= slen) { 74 if (plen == 0) { 75 return start+1; 76 } else { 77 int c = *(const uint8_t *)p++; 78 plen--; slen -= plen; 79 while (slen) { 80 const char *q = (const char *)memchr(s, c, slen); 81 if (!q) break; 82 if (memcmp(q+1, p, plen) == 0) return (q-os+1); 83 q++; slen -= (MSize)(q-s); s = q; 84 } 85 } 86 } 87 return 0; 88 } 89 90 /* Check whether a string has a pattern matching character. */ 91 int lj_str_haspattern(GCstr *s) 92 { 93 const char *p = strdata(s), *q = p + s->len; 94 while (p < q) { 95 int c = *(const uint8_t *)p++; 96 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) 97 return 1; /* Found a pattern matching char. */ 98 } 99 return 0; /* No pattern matching chars found. */ 100 } 101 102 /* -- String interning ---------------------------------------------------- */ 103 104 /* Resize the string hash table (grow and shrink). */ 105 void lj_str_resize(lua_State *L, MSize newmask) 106 { 107 global_State *g = G(L); 108 GCRef *newhash; 109 MSize i; 110 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) 111 return; /* No resizing during GC traversal or if already too big. */ 112 newhash = lj_mem_newvec(L, newmask+1, GCRef); 113 memset(newhash, 0, (newmask+1)*sizeof(GCRef)); 114 for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ 115 GCobj *p = gcref(g->strhash[i]); 116 while (p) { /* Follow each hash chain and reinsert all strings. */ 117 MSize h = gco2str(p)->hash & newmask; 118 GCobj *next = gcnext(p); 119 /* NOBARRIER: The string table is a GC root. */ 120 setgcrefr(p->gch.nextgc, newhash[h]); 121 setgcref(newhash[h], p); 122 p = next; 123 } 124 } 125 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 126 g->strmask = newmask; 127 g->strhash = newhash; 128 } 129 130 /* Intern a string and return string object. */ 131 GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) 132 { 133 global_State *g; 134 GCstr *s; 135 GCobj *o; 136 MSize len = (MSize)lenx; 137 MSize a, b, h = len; 138 if (lenx >= LJ_MAX_STR) 139 lj_err_msg(L, LJ_ERR_STROV); 140 g = G(L); 141 /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ 142 if (len >= 4) { /* Caveat: unaligned access! */ 143 a = lj_getu32(str); 144 h ^= lj_getu32(str+len-4); 145 b = lj_getu32(str+(len>>1)-2); 146 h ^= b; h -= lj_rol(b, 14); 147 b += lj_getu32(str+(len>>2)-1); 148 } else if (len > 0) { 149 a = *(const uint8_t *)str; 150 h ^= *(const uint8_t *)(str+len-1); 151 b = *(const uint8_t *)(str+(len>>1)); 152 h ^= b; h -= lj_rol(b, 14); 153 } else { 154 return &g->strempty; 155 } 156 a ^= h; a -= lj_rol(h, 11); 157 b ^= a; b -= lj_rol(a, 25); 158 h ^= b; h -= lj_rol(b, 16); 159 /* Check if the string has already been interned. */ 160 o = gcref(g->strhash[h & g->strmask]); 161 if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { 162 while (o != NULL) { 163 GCstr *sx = gco2str(o); 164 if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { 165 /* Resurrect if dead. Can only happen with fixstring() (keywords). */ 166 if (isdead(g, o)) flipwhite(o); 167 return sx; /* Return existing string. */ 168 } 169 o = gcnext(o); 170 } 171 } else { /* Slow path: end of string is too close to a page boundary. */ 172 while (o != NULL) { 173 GCstr *sx = gco2str(o); 174 if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { 175 /* Resurrect if dead. Can only happen with fixstring() (keywords). */ 176 if (isdead(g, o)) flipwhite(o); 177 return sx; /* Return existing string. */ 178 } 179 o = gcnext(o); 180 } 181 } 182 /* Nope, create a new string. */ 183 s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); 184 newwhite(g, s); 185 s->gct = ~LJ_TSTR; 186 s->len = len; 187 s->hash = h; 188 s->reserved = 0; 189 memcpy(strdatawr(s), str, len); 190 strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ 191 /* Add it to string hash table. */ 192 h &= g->strmask; 193 s->nextgc = g->strhash[h]; 194 /* NOBARRIER: The string table is a GC root. */ 195 setgcref(g->strhash[h], obj2gco(s)); 196 if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ 197 lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ 198 return s; /* Return newly interned string. */ 199 } 200 201 void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) 202 { 203 g->strnum--; 204 lj_mem_free(g, s, sizestring(s)); 205 } 206