ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_alloc.c (41455B)


      1 /*
      2 ** Bundled memory allocator.
      3 **
      4 ** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc.
      5 ** The original bears the following remark:
      6 **
      7 **   This is a version (aka dlmalloc) of malloc/free/realloc written by
      8 **   Doug Lea and released to the public domain, as explained at
      9 **   http://creativecommons.org/licenses/publicdomain.
     10 **
     11 **   * Version pre-2.8.4 Wed Mar 29 19:46:29 2006    (dl at gee)
     12 **
     13 ** No additional copyright is claimed over the customizations.
     14 ** Please do NOT bother the original author about this version here!
     15 **
     16 ** If you want to use dlmalloc in another project, you should get
     17 ** the original from: ftp://gee.cs.oswego.edu/pub/misc/
     18 ** For thread-safe derivatives, take a look at:
     19 ** - ptmalloc: http://www.malloc.de/
     20 ** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/
     21 */
     22 
     23 #define lj_alloc_c
     24 #define LUA_CORE
     25 
     26 /* To get the mremap prototype. Must be defined before any system includes. */
     27 #if defined(__linux__) && !defined(_GNU_SOURCE)
     28 #define _GNU_SOURCE
     29 #endif
     30 
     31 #include "lj_def.h"
     32 #include "lj_arch.h"
     33 #include "lj_alloc.h"
     34 
     35 #ifndef LUAJIT_USE_SYSMALLOC
     36 
     37 #define MAX_SIZE_T		(~(size_t)0)
     38 #define MALLOC_ALIGNMENT	((size_t)8U)
     39 
     40 #if LJ_4GB
     41 #define MEM_SCALE 16
     42 #else
     43 #define MEM_SCALE 1
     44 #endif
     45 #define DEFAULT_GRANULARITY	((size_t)128U * MEM_SCALE * (size_t)1024U)
     46 #define DEFAULT_TRIM_THRESHOLD	((size_t)2U * MEM_SCALE * (size_t)1024U * (size_t)1024U)
     47 #define DEFAULT_MMAP_THRESHOLD	((size_t)128U * MEM_SCALE * (size_t)1024U)
     48 #define MAX_RELEASE_CHECK_RATE	255
     49 
     50 /* ------------------- size_t and alignment properties -------------------- */
     51 
     52 /* The byte and bit size of a size_t */
     53 #define SIZE_T_SIZE		(sizeof(size_t))
     54 #define SIZE_T_BITSIZE		(sizeof(size_t) << 3)
     55 
     56 /* Some constants coerced to size_t */
     57 /* Annoying but necessary to avoid errors on some platforms */
     58 #define SIZE_T_ZERO		((size_t)0)
     59 #define SIZE_T_ONE		((size_t)1)
     60 #define SIZE_T_TWO		((size_t)2)
     61 #define TWO_SIZE_T_SIZES	(SIZE_T_SIZE<<1)
     62 #define FOUR_SIZE_T_SIZES	(SIZE_T_SIZE<<2)
     63 #define SIX_SIZE_T_SIZES	(FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
     64 
     65 /* The bit mask value corresponding to MALLOC_ALIGNMENT */
     66 #define CHUNK_ALIGN_MASK	(MALLOC_ALIGNMENT - SIZE_T_ONE)
     67 
     68 /* the number of bytes to offset an address to align it */
     69 #define align_offset(A)\
     70  ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
     71   ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
     72 
     73 #define any_align(S,n) \
     74  (((S) + ((n) - SIZE_T_ONE)) & ~((n) - SIZE_T_ONE))
     75 
     76 /* -------------------------- MMAP support ------------------------------- */
     77 
     78 #define MFAIL			((void *)(MAX_SIZE_T))
     79 #define CMFAIL			((char *)(MFAIL)) /* defined for convenience */
     80 
     81 #define IS_DIRECT_BIT		(SIZE_T_ONE)
     82 
     83 #if LJ_TARGET_WINDOWS || __MSYS__
     84 
     85 #define WIN32_LEAN_AND_MEAN
     86 #include <windows.h>
     87 
     88 #if LJ_64 && !LJ_GC64
     89 
     90 /* Undocumented, but hey, that's what we all love so much about Windows. */
     91 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
     92 		       size_t *size, ULONG alloctype, ULONG prot);
     93 static PNTAVM ntavm;
     94 
     95 /* Number of top bits of the lower 32 bits of an address that must be zero.
     96 ** Apparently 0 gives us full 64 bit addresses and 1 gives us the lower 2GB.
     97 */
     98 #define NTAVM_ZEROBITS		1
     99 
    100 static void INIT_MMAP(void)
    101 {
    102   ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
    103 				 "NtAllocateVirtualMemory");
    104 }
    105 
    106 /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
    107 static LJ_AINLINE void *CALL_MMAP(size_t size)
    108 {
    109   DWORD olderr = GetLastError();
    110   void *ptr = NULL;
    111   long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
    112 		  MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
    113   SetLastError(olderr);
    114   return st == 0 ? ptr : MFAIL;
    115 }
    116 
    117 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
    118 static LJ_AINLINE void *DIRECT_MMAP(size_t size)
    119 {
    120   DWORD olderr = GetLastError();
    121   void *ptr = NULL;
    122   long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
    123 		  MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE);
    124   SetLastError(olderr);
    125   return st == 0 ? ptr : MFAIL;
    126 }
    127 
    128 #else
    129 
    130 #define INIT_MMAP()		((void)0)
    131 
    132 /* Win32 MMAP via VirtualAlloc */
    133 static LJ_AINLINE void *CALL_MMAP(size_t size)
    134 {
    135   DWORD olderr = GetLastError();
    136   void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
    137   SetLastError(olderr);
    138   return ptr ? ptr : MFAIL;
    139 }
    140 
    141 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
    142 static LJ_AINLINE void *DIRECT_MMAP(size_t size)
    143 {
    144   DWORD olderr = GetLastError();
    145   void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
    146 			   PAGE_READWRITE);
    147   SetLastError(olderr);
    148   return ptr ? ptr : MFAIL;
    149 }
    150 
    151 #endif
    152 
    153 /* This function supports releasing coalesed segments */
    154 static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
    155 {
    156   DWORD olderr = GetLastError();
    157   MEMORY_BASIC_INFORMATION minfo;
    158   char *cptr = (char *)ptr;
    159   while (size) {
    160     if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
    161       return -1;
    162     if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
    163 	minfo.State != MEM_COMMIT || minfo.RegionSize > size)
    164       return -1;
    165     if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
    166       return -1;
    167     cptr += minfo.RegionSize;
    168     size -= minfo.RegionSize;
    169   }
    170   SetLastError(olderr);
    171   return 0;
    172 }
    173 
    174 /*
    175  * If the libc supports it, try to convince it to always use brk()
    176  * even for large allocations.
    177  */
    178 #elif LJ_4GB
    179 #define INIT_MMAP() { \
    180   mallopt(M_MMAP_THRESHOLD, LJ_MAX_MEM); \
    181   mallopt(M_MMAP_MAX, 0); \
    182 }
    183 #define CALL_MMAP(n) aligned_alloc(LJ_PAGESIZE, n)
    184 #define CALL_MUNMAP_FORCE(n,s) ((void)s,free(n),0)
    185 #define CALL_MUNMAP(n,s) ((void)(s),(void)(n),-1)
    186 #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz,realloc((addr), (nsz)))
    187 #define DIRECT_MMAP CALL_MMAP
    188 #undef MFAIL
    189 #define MFAIL NULL
    190 
    191 #else
    192 
    193 #include <errno.h>
    194 #include <sys/mman.h>
    195 
    196 #define MMAP_PROT		(PROT_READ|PROT_WRITE)
    197 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
    198 #define MAP_ANONYMOUS		MAP_ANON
    199 #endif
    200 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
    201 
    202 #if LJ_64 && !LJ_GC64
    203 /* 64 bit mode with 32 bit pointers needs special support for allocating
    204 ** memory in the lower 2GB.
    205 */
    206 
    207 #if defined(MAP_32BIT)
    208 
    209 #if defined(__sun__)
    210 #define MMAP_REGION_START	((uintptr_t)0x1000)
    211 #else
    212 /* Actually this only gives us max. 1GB in current Linux kernels. */
    213 #define MMAP_REGION_START	((uintptr_t)0)
    214 #endif
    215 
    216 static LJ_AINLINE void *CALL_MMAP(size_t size)
    217 {
    218   int olderr = errno;
    219   void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
    220   errno = olderr;
    221   return ptr;
    222 }
    223 
    224 #elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__)
    225 
    226 /* OSX and FreeBSD mmap() use a naive first-fit linear search.
    227 ** That's perfect for us. Except that -pagezero_size must be set for OSX,
    228 ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
    229 ** to be reduced to 250MB on FreeBSD.
    230 */
    231 #if LJ_TARGET_OSX || defined(__DragonFly__)
    232 #define MMAP_REGION_START	((uintptr_t)0x10000)
    233 #elif LJ_TARGET_PS4
    234 #define MMAP_REGION_START	((uintptr_t)0x4000)
    235 #else
    236 #define MMAP_REGION_START	((uintptr_t)0x10000000)
    237 #endif
    238 #define MMAP_REGION_END		((uintptr_t)0x80000000)
    239 
    240 #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
    241 #include <sys/resource.h>
    242 #endif
    243 
    244 static LJ_AINLINE void *CALL_MMAP(size_t size)
    245 {
    246   int olderr = errno;
    247   /* Hint for next allocation. Doesn't need to be thread-safe. */
    248   static uintptr_t alloc_hint = MMAP_REGION_START;
    249   int retry = 0;
    250 #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
    251   static int rlimit_modified = 0;
    252   if (LJ_UNLIKELY(rlimit_modified == 0)) {
    253     struct rlimit rlim;
    254     rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
    255     setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail below. */
    256     rlimit_modified = 1;
    257   }
    258 #endif
    259   for (;;) {
    260     void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
    261     if ((uintptr_t)p >= MMAP_REGION_START &&
    262 	(uintptr_t)p + size < MMAP_REGION_END) {
    263       alloc_hint = (uintptr_t)p + size;
    264       errno = olderr;
    265       return p;
    266     }
    267     if (p != CMFAIL) munmap(p, size);
    268 #if defined(__sun__) || defined(__DragonFly__)
    269     alloc_hint += 0x1000000;  /* Need near-exhaustive linear scan. */
    270     if (alloc_hint + size < MMAP_REGION_END) continue;
    271 #endif
    272     if (retry) break;
    273     retry = 1;
    274     alloc_hint = MMAP_REGION_START;
    275   }
    276   errno = olderr;
    277   return CMFAIL;
    278 }
    279 
    280 #else
    281 
    282 #error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
    283 
    284 #endif
    285 
    286 #else
    287 
    288 /* 32 bit mode and GC64 mode is easy. */
    289 static LJ_AINLINE void *CALL_MMAP(size_t size)
    290 {
    291   int olderr = errno;
    292   void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
    293   errno = olderr;
    294   return ptr;
    295 }
    296 
    297 #endif
    298 
    299 #ifndef INIT_MMAP
    300 #define INIT_MMAP()		((void)0)
    301 #endif
    302 #define DIRECT_MMAP(s)		CALL_MMAP(s)
    303 
    304 static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
    305 {
    306   int olderr = errno;
    307   int ret = munmap(ptr, size);
    308   errno = olderr;
    309   return ret;
    310 }
    311 
    312 #if LJ_TARGET_LINUX
    313 /* Need to define _GNU_SOURCE to get the mremap prototype. */
    314 static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
    315 				     int flags)
    316 {
    317   int olderr = errno;
    318   ptr = mremap(ptr, osz, nsz, flags);
    319   errno = olderr;
    320   return ptr;
    321 }
    322 
    323 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
    324 #define CALL_MREMAP_NOMOVE	0
    325 #define CALL_MREMAP_MAYMOVE	1
    326 #if LJ_64 && !LJ_GC64
    327 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
    328 #else
    329 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
    330 #endif
    331 #endif
    332 
    333 #endif
    334 
    335 #ifndef CALL_MREMAP
    336 #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
    337 #endif
    338 
    339 #ifndef CALL_MUNMAP_FORCE
    340 #define CALL_MUNMAP_FORCE CALL_MUNMAP
    341 #endif
    342 
    343 /* -----------------------  Chunk representations ------------------------ */
    344 
    345 struct malloc_chunk {
    346   size_t               prev_foot;  /* Size of previous chunk (if free).  */
    347   size_t               head;       /* Size and inuse bits. */
    348   struct malloc_chunk *fd;         /* double links -- used only if free. */
    349   struct malloc_chunk *bk;
    350 };
    351 
    352 typedef struct malloc_chunk  mchunk;
    353 typedef struct malloc_chunk *mchunkptr;
    354 typedef struct malloc_chunk *sbinptr;  /* The type of bins of chunks */
    355 typedef size_t bindex_t;               /* Described below */
    356 typedef unsigned int binmap_t;         /* Described below */
    357 typedef unsigned int flag_t;           /* The type of various bit flag sets */
    358 
    359 /* ------------------- Chunks sizes and alignments ----------------------- */
    360 
    361 #define MCHUNK_SIZE		(sizeof(mchunk))
    362 
    363 #define CHUNK_OVERHEAD		(SIZE_T_SIZE)
    364 
    365 /* Direct chunks need a second word of overhead ... */
    366 #define DIRECT_CHUNK_OVERHEAD	(TWO_SIZE_T_SIZES)
    367 /* ... and additional padding for fake next-chunk at foot */
    368 #define DIRECT_FOOT_PAD		(FOUR_SIZE_T_SIZES)
    369 
    370 /* The smallest size we can malloc is an aligned minimal chunk */
    371 #define MIN_CHUNK_SIZE\
    372   ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
    373 
    374 /* conversion from malloc headers to user pointers, and back */
    375 #define chunk2mem(p)		((void *)((char *)(p) + TWO_SIZE_T_SIZES))
    376 #define mem2chunk(mem)		((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES))
    377 /* chunk associated with aligned address A */
    378 #define align_as_chunk(A)	(mchunkptr)((A) + align_offset(chunk2mem(A)))
    379 
    380 /* Bounds on request (not chunk) sizes. */
    381 #define MAX_REQUEST		((~MIN_CHUNK_SIZE+1) << 2)
    382 #define MIN_REQUEST		(MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
    383 
    384 /* pad request bytes into a usable size */
    385 #define pad_request(req) \
    386    (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
    387 
    388 /* pad request, checking for minimum (but not maximum) */
    389 #define request2size(req) \
    390   (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
    391 
    392 /* ------------------ Operations on head and foot fields ----------------- */
    393 
    394 #define PINUSE_BIT		(SIZE_T_ONE)
    395 #define CINUSE_BIT		(SIZE_T_TWO)
    396 #define INUSE_BITS		(PINUSE_BIT|CINUSE_BIT)
    397 
    398 /* Head value for fenceposts */
    399 #define FENCEPOST_HEAD		(INUSE_BITS|SIZE_T_SIZE)
    400 
    401 /* extraction of fields from head words */
    402 #define cinuse(p)		((p)->head & CINUSE_BIT)
    403 #define pinuse(p)		((p)->head & PINUSE_BIT)
    404 #define chunksize(p)		((p)->head & ~(INUSE_BITS))
    405 
    406 #define clear_pinuse(p)		((p)->head &= ~PINUSE_BIT)
    407 #define clear_cinuse(p)		((p)->head &= ~CINUSE_BIT)
    408 
    409 /* Treat space at ptr +/- offset as a chunk */
    410 #define chunk_plus_offset(p, s)		((mchunkptr)(((char *)(p)) + (s)))
    411 #define chunk_minus_offset(p, s)	((mchunkptr)(((char *)(p)) - (s)))
    412 
    413 /* Ptr to next or previous physical malloc_chunk. */
    414 #define next_chunk(p)	((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS)))
    415 #define prev_chunk(p)	((mchunkptr)(((char *)(p)) - ((p)->prev_foot) ))
    416 
    417 /* extract next chunk's pinuse bit */
    418 #define next_pinuse(p)	((next_chunk(p)->head) & PINUSE_BIT)
    419 
    420 /* Get/set size at footer */
    421 #define get_foot(p, s)	(((mchunkptr)((char *)(p) + (s)))->prev_foot)
    422 #define set_foot(p, s)	(((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
    423 
    424 /* Set size, pinuse bit, and foot */
    425 #define set_size_and_pinuse_of_free_chunk(p, s)\
    426   ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
    427 
    428 /* Set size, pinuse bit, foot, and clear next pinuse */
    429 #define set_free_with_pinuse(p, s, n)\
    430   (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
    431 
    432 #define is_direct(p)\
    433   (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT))
    434 
    435 /* Get the internal overhead associated with chunk p */
    436 #define overhead_for(p)\
    437  (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
    438 
    439 /* ---------------------- Overlaid data structures ----------------------- */
    440 
    441 struct malloc_tree_chunk {
    442   /* The first four fields must be compatible with malloc_chunk */
    443   size_t                    prev_foot;
    444   size_t                    head;
    445   struct malloc_tree_chunk *fd;
    446   struct malloc_tree_chunk *bk;
    447 
    448   struct malloc_tree_chunk *child[2];
    449   struct malloc_tree_chunk *parent;
    450   bindex_t                  index;
    451 };
    452 
    453 typedef struct malloc_tree_chunk  tchunk;
    454 typedef struct malloc_tree_chunk *tchunkptr;
    455 typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */
    456 
    457 /* A little helper macro for trees */
    458 #define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
    459 
    460 /* ----------------------------- Segments -------------------------------- */
    461 
    462 struct malloc_segment {
    463   char        *base;             /* base address */
    464   size_t       size;             /* allocated size */
    465   struct malloc_segment *next;   /* ptr to next segment */
    466 };
    467 
    468 typedef struct malloc_segment  msegment;
    469 typedef struct malloc_segment *msegmentptr;
    470 
    471 /* ---------------------------- malloc_state ----------------------------- */
    472 
    473 /* Bin types, widths and sizes */
    474 #define NSMALLBINS		(32U)
    475 #define NTREEBINS		(32U)
    476 #define SMALLBIN_SHIFT		(3U)
    477 #define SMALLBIN_WIDTH		(SIZE_T_ONE << SMALLBIN_SHIFT)
    478 #define TREEBIN_SHIFT		(8U)
    479 #define MIN_LARGE_SIZE		(SIZE_T_ONE << TREEBIN_SHIFT)
    480 #define MAX_SMALL_SIZE		(MIN_LARGE_SIZE - SIZE_T_ONE)
    481 #define MAX_SMALL_REQUEST  (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
    482 
    483 struct malloc_state {
    484   binmap_t   smallmap;
    485   binmap_t   treemap;
    486   size_t     dvsize;
    487   size_t     topsize;
    488   mchunkptr  dv;
    489   mchunkptr  top;
    490   size_t     trim_check;
    491   size_t     release_checks;
    492   mchunkptr  smallbins[(NSMALLBINS+1)*2];
    493   tbinptr    treebins[NTREEBINS];
    494   msegment   seg;
    495 };
    496 
    497 typedef struct malloc_state *mstate;
    498 
    499 #define is_initialized(M)	((M)->top != 0)
    500 
    501 /* -------------------------- system alloc setup ------------------------- */
    502 
    503 /* page-align a size */
    504 #define page_align(S) any_align(S,LJ_PAGESIZE)
    505 
    506 /* granularity-align a size */
    507 #define granularity_align(S) any_align(S,DEFAULT_GRANULARITY)
    508 
    509 #ifndef mmap_align
    510 #if LJ_TARGET_WINDOWS
    511 #define mmap_align(S)	granularity_align(S)
    512 #else
    513 #define mmap_align(S)	page_align(S)
    514 #endif
    515 #endif
    516 
    517 /*  True if segment S holds address A */
    518 #define segment_holds(S, A)\
    519   ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
    520 
    521 /* Return segment holding given address */
    522 static msegmentptr segment_holding(mstate m, char *addr)
    523 {
    524   msegmentptr sp = &m->seg;
    525   for (;;) {
    526     if (addr >= sp->base && addr < sp->base + sp->size)
    527       return sp;
    528     if ((sp = sp->next) == 0)
    529       return 0;
    530   }
    531 }
    532 
    533 /* Return true if segment contains a segment link */
    534 static int has_segment_link(mstate m, msegmentptr ss)
    535 {
    536   msegmentptr sp = &m->seg;
    537   for (;;) {
    538     if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size)
    539       return 1;
    540     if ((sp = sp->next) == 0)
    541       return 0;
    542   }
    543 }
    544 
    545 /*
    546   TOP_FOOT_SIZE is padding at the end of a segment, including space
    547   that may be needed to place segment records and fenceposts when new
    548   noncontiguous segments are added.
    549 */
    550 #define TOP_FOOT_SIZE\
    551   (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
    552 
    553 /* ---------------------------- Indexing Bins ---------------------------- */
    554 
    555 #define is_small(s)		(((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
    556 #define small_index(s)		((s)  >> SMALLBIN_SHIFT)
    557 #define small_index2size(i)	((i)  << SMALLBIN_SHIFT)
    558 #define MIN_SMALL_INDEX		(small_index(MIN_CHUNK_SIZE))
    559 
    560 /* addressing by index. See above about smallbin repositioning */
    561 #define smallbin_at(M, i)	((sbinptr)((char *)&((M)->smallbins[(i)<<1])))
    562 #define treebin_at(M,i)		(&((M)->treebins[i]))
    563 
    564 /* assign tree index for size S to variable I */
    565 #define compute_tree_index(S, I)\
    566 {\
    567   unsigned int X = (unsigned int)(S >> TREEBIN_SHIFT);\
    568   if (X == 0) {\
    569     I = 0;\
    570   } else if (X > 0xFFFF) {\
    571     I = NTREEBINS-1;\
    572   } else {\
    573     unsigned int K = lj_fls(X);\
    574     I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
    575   }\
    576 }
    577 
    578 /* Bit representing maximum resolved size in a treebin at i */
    579 #define bit_for_tree_index(i) \
    580    (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
    581 
    582 /* Shift placing maximum resolved bit in a treebin at i as sign bit */
    583 #define leftshift_for_tree_index(i) \
    584    ((i == NTREEBINS-1)? 0 : \
    585     ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
    586 
    587 /* The size of the smallest chunk held in bin with index i */
    588 #define minsize_for_tree_index(i) \
    589    ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
    590    (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
    591 
    592 /* ------------------------ Operations on bin maps ----------------------- */
    593 
    594 /* bit corresponding to given index */
    595 #define idx2bit(i)		((binmap_t)(1) << (i))
    596 
    597 /* Mark/Clear bits with given index */
    598 #define mark_smallmap(M,i)	((M)->smallmap |=  idx2bit(i))
    599 #define clear_smallmap(M,i)	((M)->smallmap &= ~idx2bit(i))
    600 #define smallmap_is_marked(M,i)	((M)->smallmap &   idx2bit(i))
    601 
    602 #define mark_treemap(M,i)	((M)->treemap  |=  idx2bit(i))
    603 #define clear_treemap(M,i)	((M)->treemap  &= ~idx2bit(i))
    604 #define treemap_is_marked(M,i)	((M)->treemap  &   idx2bit(i))
    605 
    606 /* mask with all bits to left of least bit of x on */
    607 #define left_bits(x)		((x<<1) | (~(x<<1)+1))
    608 
    609 /* Set cinuse bit and pinuse bit of next chunk */
    610 #define set_inuse(M,p,s)\
    611   ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
    612   ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
    613 
    614 /* Set cinuse and pinuse of this chunk and pinuse of next chunk */
    615 #define set_inuse_and_pinuse(M,p,s)\
    616   ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
    617   ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
    618 
    619 /* Set size, cinuse and pinuse bit of this chunk */
    620 #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
    621   ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
    622 
    623 /* ----------------------- Operations on smallbins ----------------------- */
    624 
    625 /* Link a free chunk into a smallbin  */
    626 #define insert_small_chunk(M, P, S) {\
    627   bindex_t I = small_index(S);\
    628   mchunkptr B = smallbin_at(M, I);\
    629   mchunkptr F = B;\
    630   if (!smallmap_is_marked(M, I))\
    631     mark_smallmap(M, I);\
    632   else\
    633     F = B->fd;\
    634   B->fd = P;\
    635   F->bk = P;\
    636   P->fd = F;\
    637   P->bk = B;\
    638 }
    639 
    640 /* Unlink a chunk from a smallbin  */
    641 #define unlink_small_chunk(M, P, S) {\
    642   mchunkptr F = P->fd;\
    643   mchunkptr B = P->bk;\
    644   bindex_t I = small_index(S);\
    645   if (F == B) {\
    646     clear_smallmap(M, I);\
    647   } else {\
    648     F->bk = B;\
    649     B->fd = F;\
    650   }\
    651 }
    652 
    653 /* Unlink the first chunk from a smallbin */
    654 #define unlink_first_small_chunk(M, B, P, I) {\
    655   mchunkptr F = P->fd;\
    656   if (B == F) {\
    657     clear_smallmap(M, I);\
    658   } else {\
    659     B->fd = F;\
    660     F->bk = B;\
    661   }\
    662 }
    663 
    664 /* Replace dv node, binning the old one */
    665 /* Used only when dvsize known to be small */
    666 #define replace_dv(M, P, S) {\
    667   size_t DVS = M->dvsize;\
    668   if (DVS != 0) {\
    669     mchunkptr DV = M->dv;\
    670     insert_small_chunk(M, DV, DVS);\
    671   }\
    672   M->dvsize = S;\
    673   M->dv = P;\
    674 }
    675 
    676 /* ------------------------- Operations on trees ------------------------- */
    677 
    678 /* Insert chunk into tree */
    679 #define insert_large_chunk(M, X, S) {\
    680   tbinptr *H;\
    681   bindex_t I;\
    682   compute_tree_index(S, I);\
    683   H = treebin_at(M, I);\
    684   X->index = I;\
    685   X->child[0] = X->child[1] = 0;\
    686   if (!treemap_is_marked(M, I)) {\
    687     mark_treemap(M, I);\
    688     *H = X;\
    689     X->parent = (tchunkptr)H;\
    690     X->fd = X->bk = X;\
    691   } else {\
    692     tchunkptr T = *H;\
    693     size_t K = S << leftshift_for_tree_index(I);\
    694     for (;;) {\
    695       if (chunksize(T) != S) {\
    696 	tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
    697 	K <<= 1;\
    698 	if (*C != 0) {\
    699 	  T = *C;\
    700 	} else {\
    701 	  *C = X;\
    702 	  X->parent = T;\
    703 	  X->fd = X->bk = X;\
    704 	  break;\
    705 	}\
    706       } else {\
    707 	tchunkptr F = T->fd;\
    708 	T->fd = F->bk = X;\
    709 	X->fd = F;\
    710 	X->bk = T;\
    711 	X->parent = 0;\
    712 	break;\
    713       }\
    714     }\
    715   }\
    716 }
    717 
    718 #define unlink_large_chunk(M, X) {\
    719   tchunkptr XP = X->parent;\
    720   tchunkptr R;\
    721   if (X->bk != X) {\
    722     tchunkptr F = X->fd;\
    723     R = X->bk;\
    724     F->bk = R;\
    725     R->fd = F;\
    726   } else {\
    727     tchunkptr *RP;\
    728     if (((R = *(RP = &(X->child[1]))) != 0) ||\
    729 	((R = *(RP = &(X->child[0]))) != 0)) {\
    730       tchunkptr *CP;\
    731       while ((*(CP = &(R->child[1])) != 0) ||\
    732 	     (*(CP = &(R->child[0])) != 0)) {\
    733 	R = *(RP = CP);\
    734       }\
    735       *RP = 0;\
    736     }\
    737   }\
    738   if (XP != 0) {\
    739     tbinptr *H = treebin_at(M, X->index);\
    740     if (X == *H) {\
    741       if ((*H = R) == 0) \
    742 	clear_treemap(M, X->index);\
    743     } else {\
    744       if (XP->child[0] == X) \
    745 	XP->child[0] = R;\
    746       else \
    747 	XP->child[1] = R;\
    748     }\
    749     if (R != 0) {\
    750       tchunkptr C0, C1;\
    751       R->parent = XP;\
    752       if ((C0 = X->child[0]) != 0) {\
    753 	R->child[0] = C0;\
    754 	C0->parent = R;\
    755       }\
    756       if ((C1 = X->child[1]) != 0) {\
    757 	R->child[1] = C1;\
    758 	C1->parent = R;\
    759       }\
    760     }\
    761   }\
    762 }
    763 
    764 /* Relays to large vs small bin operations */
    765 
    766 #define insert_chunk(M, P, S)\
    767   if (is_small(S)) { insert_small_chunk(M, P, S)\
    768   } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
    769 
    770 #define unlink_chunk(M, P, S)\
    771   if (is_small(S)) { unlink_small_chunk(M, P, S)\
    772   } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
    773 
    774 /* -----------------------  Direct-mmapping chunks ----------------------- */
    775 
    776 static void *direct_alloc(size_t nb)
    777 {
    778   size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
    779   if (LJ_LIKELY(mmsize > nb)) {     /* Check for wrap around 0 */
    780     char *mm = (char *)(DIRECT_MMAP(mmsize));
    781     if (mm != CMFAIL) {
    782       size_t offset = align_offset(chunk2mem(mm));
    783       size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
    784       mchunkptr p = (mchunkptr)(mm + offset);
    785       p->prev_foot = offset | IS_DIRECT_BIT;
    786       p->head = psize|CINUSE_BIT;
    787       chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
    788       chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
    789       return chunk2mem(p);
    790     }
    791   }
    792   return NULL;
    793 }
    794 
    795 static mchunkptr direct_resize(mchunkptr oldp, size_t nb)
    796 {
    797   size_t oldsize = chunksize(oldp);
    798   if (is_small(nb)) /* Can't shrink direct regions below small size */
    799     return NULL;
    800   /* Keep old chunk if big enough but not too big */
    801   if (oldsize >= nb + SIZE_T_SIZE &&
    802       (oldsize - nb) <= (DEFAULT_GRANULARITY >> 1)) {
    803     return oldp;
    804   } else {
    805     size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT;
    806     size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD;
    807     size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
    808     char *cp = (char *)CALL_MREMAP((char *)oldp - offset,
    809 				   oldmmsize, newmmsize, CALL_MREMAP_MV);
    810     if (cp != CMFAIL) {
    811       mchunkptr newp = (mchunkptr)(cp + offset);
    812       size_t psize = newmmsize - offset - DIRECT_FOOT_PAD;
    813       newp->head = psize|CINUSE_BIT;
    814       chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
    815       chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
    816       return newp;
    817     }
    818   }
    819   return NULL;
    820 }
    821 
    822 /* -------------------------- mspace management -------------------------- */
    823 
    824 /* Initialize top chunk and its size */
    825 static void init_top(mstate m, mchunkptr p, size_t psize)
    826 {
    827   /* Ensure alignment */
    828   size_t offset = align_offset(chunk2mem(p));
    829   p = (mchunkptr)((char *)p + offset);
    830   psize -= offset;
    831 
    832   m->top = p;
    833   m->topsize = psize;
    834   p->head = psize | PINUSE_BIT;
    835   /* set size of fake trailing chunk holding overhead space only once */
    836   chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
    837   m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */
    838 }
    839 
    840 /* Initialize bins for a new mstate that is otherwise zeroed out */
    841 static void init_bins(mstate m)
    842 {
    843   /* Establish circular links for smallbins */
    844   bindex_t i;
    845   for (i = 0; i < NSMALLBINS; i++) {
    846     sbinptr bin = smallbin_at(m,i);
    847     bin->fd = bin->bk = bin;
    848   }
    849 }
    850 
    851 /* Allocate chunk and prepend remainder with chunk in successor base. */
    852 static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb)
    853 {
    854   mchunkptr p = align_as_chunk(newbase);
    855   mchunkptr oldfirst = align_as_chunk(oldbase);
    856   size_t psize = (size_t)((char *)oldfirst - (char *)p);
    857   mchunkptr q = chunk_plus_offset(p, nb);
    858   size_t qsize = psize - nb;
    859   set_size_and_pinuse_of_inuse_chunk(m, p, nb);
    860 
    861   /* consolidate remainder with first chunk of old base */
    862   if (oldfirst == m->top) {
    863     size_t tsize = m->topsize += qsize;
    864     m->top = q;
    865     q->head = tsize | PINUSE_BIT;
    866   } else if (oldfirst == m->dv) {
    867     size_t dsize = m->dvsize += qsize;
    868     m->dv = q;
    869     set_size_and_pinuse_of_free_chunk(q, dsize);
    870   } else {
    871     if (!cinuse(oldfirst)) {
    872       size_t nsize = chunksize(oldfirst);
    873       unlink_chunk(m, oldfirst, nsize);
    874       oldfirst = chunk_plus_offset(oldfirst, nsize);
    875       qsize += nsize;
    876     }
    877     set_free_with_pinuse(q, qsize, oldfirst);
    878     insert_chunk(m, q, qsize);
    879   }
    880 
    881   return chunk2mem(p);
    882 }
    883 
    884 /* Add a segment to hold a new noncontiguous region */
    885 static void add_segment(mstate m, char *tbase, size_t tsize)
    886 {
    887   /* Determine locations and sizes of segment, fenceposts, old top */
    888   char *old_top = (char *)m->top;
    889   msegmentptr oldsp = segment_holding(m, old_top);
    890   char *old_end = oldsp->base + oldsp->size;
    891   size_t ssize = pad_request(sizeof(struct malloc_segment));
    892   char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
    893   size_t offset = align_offset(chunk2mem(rawsp));
    894   char *asp = rawsp + offset;
    895   char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
    896   mchunkptr sp = (mchunkptr)csp;
    897   msegmentptr ss = (msegmentptr)(chunk2mem(sp));
    898   mchunkptr tnext = chunk_plus_offset(sp, ssize);
    899   mchunkptr p = tnext;
    900 
    901   /* reset top to new space */
    902   init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
    903 
    904   /* Set up segment record */
    905   set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
    906   *ss = m->seg; /* Push current record */
    907   m->seg.base = tbase;
    908   m->seg.size = tsize;
    909   m->seg.next = ss;
    910 
    911   /* Insert trailing fenceposts */
    912   for (;;) {
    913     mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
    914     p->head = FENCEPOST_HEAD;
    915     if ((char *)(&(nextp->head)) < old_end)
    916       p = nextp;
    917     else
    918       break;
    919   }
    920 
    921   /* Insert the rest of old top into a bin as an ordinary free chunk */
    922   if (csp != old_top) {
    923     mchunkptr q = (mchunkptr)old_top;
    924     size_t psize = (size_t)(csp - old_top);
    925     mchunkptr tn = chunk_plus_offset(q, psize);
    926     set_free_with_pinuse(q, psize, tn);
    927     insert_chunk(m, q, psize);
    928   }
    929 }
    930 
    931 /* -------------------------- System allocation -------------------------- */
    932 
    933 static void *alloc_sys(mstate m, size_t nb)
    934 {
    935   char *tbase = CMFAIL;
    936   size_t tsize = 0;
    937 
    938   /* Directly map large chunks */
    939   if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
    940     void *mem = direct_alloc(nb);
    941     if (mem != 0)
    942       return mem;
    943   }
    944 
    945   {
    946     size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
    947     size_t rsize = granularity_align(req);
    948     if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
    949       char *mp = (char *)(CALL_MMAP(rsize));
    950       if (mp != CMFAIL) {
    951 	tbase = mp;
    952 	tsize = rsize;
    953       }
    954     }
    955   }
    956 
    957   if (tbase != CMFAIL) {
    958     msegmentptr sp = &m->seg;
    959     /* Try to merge with an existing segment */
    960     while (sp != 0 && tbase != sp->base + sp->size)
    961       sp = sp->next;
    962     if (sp != 0 && segment_holds(sp, m->top)) { /* append */
    963       sp->size += tsize;
    964       init_top(m, m->top, m->topsize + tsize);
    965     } else {
    966       sp = &m->seg;
    967       while (sp != 0 && sp->base != tbase + tsize)
    968 	sp = sp->next;
    969       if (sp != 0) {
    970 	char *oldbase = sp->base;
    971 	sp->base = tbase;
    972 	sp->size += tsize;
    973 	return prepend_alloc(m, tbase, oldbase, nb);
    974       } else {
    975 	add_segment(m, tbase, tsize);
    976       }
    977     }
    978 
    979     if (nb < m->topsize) { /* Allocate from new or extended top space */
    980       size_t rsize = m->topsize -= nb;
    981       mchunkptr p = m->top;
    982       mchunkptr r = m->top = chunk_plus_offset(p, nb);
    983       r->head = rsize | PINUSE_BIT;
    984       set_size_and_pinuse_of_inuse_chunk(m, p, nb);
    985       return chunk2mem(p);
    986     }
    987   }
    988 
    989   return NULL;
    990 }
    991 
    992 /* -----------------------  system deallocation -------------------------- */
    993 
    994 /* Unmap and unlink any mmapped segments that don't contain used chunks */
    995 static size_t release_unused_segments(mstate m)
    996 {
    997   size_t released = 0;
    998   size_t nsegs = 0;
    999   msegmentptr pred = &m->seg;
   1000   msegmentptr sp = pred->next;
   1001   while (sp != 0) {
   1002     char *base = sp->base;
   1003     size_t size = sp->size;
   1004     msegmentptr next = sp->next;
   1005     nsegs++;
   1006     {
   1007       mchunkptr p = align_as_chunk(base);
   1008       size_t psize = chunksize(p);
   1009       /* Can unmap if first chunk holds entire segment and not pinned */
   1010       if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
   1011 	tchunkptr tp = (tchunkptr)p;
   1012 	if (p == m->dv) {
   1013 	  m->dv = 0;
   1014 	  m->dvsize = 0;
   1015 	} else {
   1016 	  unlink_large_chunk(m, tp);
   1017 	}
   1018 	if (CALL_MUNMAP(base, size) == 0) {
   1019 	  released += size;
   1020 	  /* unlink obsoleted record */
   1021 	  sp = pred;
   1022 	  sp->next = next;
   1023 	} else { /* back out if cannot unmap */
   1024 	  insert_large_chunk(m, tp, psize);
   1025 	}
   1026       }
   1027     }
   1028     pred = sp;
   1029     sp = next;
   1030   }
   1031   /* Reset check counter */
   1032   m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ?
   1033 		      nsegs : MAX_RELEASE_CHECK_RATE;
   1034   return released;
   1035 }
   1036 
   1037 static int alloc_trim(mstate m, size_t pad)
   1038 {
   1039   size_t released = 0;
   1040   if (pad < MAX_REQUEST && is_initialized(m)) {
   1041     pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
   1042 
   1043     if (m->topsize > pad) {
   1044       /* Shrink top space in granularity-size units, keeping at least one */
   1045       size_t unit = DEFAULT_GRANULARITY;
   1046       size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
   1047 		      SIZE_T_ONE) * unit;
   1048       msegmentptr sp = segment_holding(m, (char *)m->top);
   1049 
   1050       if (sp->size >= extra &&
   1051 	  !has_segment_link(m, sp)) { /* can't shrink if pinned */
   1052 	size_t newsize = sp->size - extra;
   1053 	/* Prefer mremap, fall back to munmap */
   1054 	if ((CALL_MREMAP(sp->base, sp->size, newsize, CALL_MREMAP_NOMOVE) != MFAIL) ||
   1055 	    (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
   1056 	  released = extra;
   1057 	}
   1058       }
   1059 
   1060       if (released != 0) {
   1061 	sp->size -= released;
   1062 	init_top(m, m->top, m->topsize - released);
   1063       }
   1064     }
   1065 
   1066     /* Unmap any unused mmapped segments */
   1067     released += release_unused_segments(m);
   1068 
   1069     /* On failure, disable autotrim to avoid repeated failed future calls */
   1070     if (released == 0 && m->topsize > m->trim_check)
   1071       m->trim_check = MAX_SIZE_T;
   1072   }
   1073 
   1074   return (released != 0)? 1 : 0;
   1075 }
   1076 
   1077 /* ---------------------------- malloc support --------------------------- */
   1078 
   1079 /* allocate a large request from the best fitting chunk in a treebin */
   1080 static void *tmalloc_large(mstate m, size_t nb)
   1081 {
   1082   tchunkptr v = 0;
   1083   size_t rsize = ~nb+1; /* Unsigned negation */
   1084   tchunkptr t;
   1085   bindex_t idx;
   1086   compute_tree_index(nb, idx);
   1087 
   1088   if ((t = *treebin_at(m, idx)) != 0) {
   1089     /* Traverse tree for this bin looking for node with size == nb */
   1090     size_t sizebits = nb << leftshift_for_tree_index(idx);
   1091     tchunkptr rst = 0;  /* The deepest untaken right subtree */
   1092     for (;;) {
   1093       tchunkptr rt;
   1094       size_t trem = chunksize(t) - nb;
   1095       if (trem < rsize) {
   1096 	v = t;
   1097 	if ((rsize = trem) == 0)
   1098 	  break;
   1099       }
   1100       rt = t->child[1];
   1101       t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
   1102       if (rt != 0 && rt != t)
   1103 	rst = rt;
   1104       if (t == 0) {
   1105 	t = rst; /* set t to least subtree holding sizes > nb */
   1106 	break;
   1107       }
   1108       sizebits <<= 1;
   1109     }
   1110   }
   1111 
   1112   if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
   1113     binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
   1114     if (leftbits != 0)
   1115       t = *treebin_at(m, lj_ffs(leftbits));
   1116   }
   1117 
   1118   while (t != 0) { /* find smallest of tree or subtree */
   1119     size_t trem = chunksize(t) - nb;
   1120     if (trem < rsize) {
   1121       rsize = trem;
   1122       v = t;
   1123     }
   1124     t = leftmost_child(t);
   1125   }
   1126 
   1127   /*  If dv is a better fit, return NULL so malloc will use it */
   1128   if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
   1129     mchunkptr r = chunk_plus_offset(v, nb);
   1130     unlink_large_chunk(m, v);
   1131     if (rsize < MIN_CHUNK_SIZE) {
   1132       set_inuse_and_pinuse(m, v, (rsize + nb));
   1133     } else {
   1134       set_size_and_pinuse_of_inuse_chunk(m, v, nb);
   1135       set_size_and_pinuse_of_free_chunk(r, rsize);
   1136       insert_chunk(m, r, rsize);
   1137     }
   1138     return chunk2mem(v);
   1139   }
   1140   return NULL;
   1141 }
   1142 
   1143 /* allocate a small request from the best fitting chunk in a treebin */
   1144 static void *tmalloc_small(mstate m, size_t nb)
   1145 {
   1146   tchunkptr t, v;
   1147   mchunkptr r;
   1148   size_t rsize;
   1149   bindex_t i = lj_ffs(m->treemap);
   1150 
   1151   v = t = *treebin_at(m, i);
   1152   rsize = chunksize(t) - nb;
   1153 
   1154   while ((t = leftmost_child(t)) != 0) {
   1155     size_t trem = chunksize(t) - nb;
   1156     if (trem < rsize) {
   1157       rsize = trem;
   1158       v = t;
   1159     }
   1160   }
   1161 
   1162   r = chunk_plus_offset(v, nb);
   1163   unlink_large_chunk(m, v);
   1164   if (rsize < MIN_CHUNK_SIZE) {
   1165     set_inuse_and_pinuse(m, v, (rsize + nb));
   1166   } else {
   1167     set_size_and_pinuse_of_inuse_chunk(m, v, nb);
   1168     set_size_and_pinuse_of_free_chunk(r, rsize);
   1169     replace_dv(m, r, rsize);
   1170   }
   1171   return chunk2mem(v);
   1172 }
   1173 
   1174 /* ----------------------------------------------------------------------- */
   1175 
   1176 void *lj_alloc_create(void)
   1177 {
   1178   size_t tsize = DEFAULT_GRANULARITY;
   1179   char *tbase;
   1180   INIT_MMAP();
   1181   tbase = (char *)(CALL_MMAP(tsize));
   1182   if (tbase != CMFAIL) {
   1183     size_t msize = pad_request(sizeof(struct malloc_state));
   1184     mchunkptr mn;
   1185     mchunkptr msp = align_as_chunk(tbase);
   1186     mstate m = (mstate)(chunk2mem(msp));
   1187     memset(m, 0, msize);
   1188     msp->head = (msize|PINUSE_BIT|CINUSE_BIT);
   1189     m->seg.base = tbase;
   1190     m->seg.size = tsize;
   1191     m->release_checks = MAX_RELEASE_CHECK_RATE;
   1192     init_bins(m);
   1193     mn = next_chunk(mem2chunk(m));
   1194     init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
   1195     return m;
   1196   }
   1197   return NULL;
   1198 }
   1199 
   1200 void lj_alloc_destroy(void *msp)
   1201 {
   1202   mstate ms = (mstate)msp;
   1203   msegmentptr sp = &ms->seg;
   1204   while (sp != 0) {
   1205     char *base = sp->base;
   1206     size_t size = sp->size;
   1207     sp = sp->next;
   1208     CALL_MUNMAP_FORCE(base, size);
   1209   }
   1210 }
   1211 
   1212 static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize)
   1213 {
   1214   mstate ms = (mstate)msp;
   1215   void *mem;
   1216   size_t nb;
   1217   if (nsize <= MAX_SMALL_REQUEST) {
   1218     bindex_t idx;
   1219     binmap_t smallbits;
   1220     nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize);
   1221     idx = small_index(nb);
   1222     smallbits = ms->smallmap >> idx;
   1223 
   1224     if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
   1225       mchunkptr b, p;
   1226       idx += ~smallbits & 1;       /* Uses next bin if idx empty */
   1227       b = smallbin_at(ms, idx);
   1228       p = b->fd;
   1229       unlink_first_small_chunk(ms, b, p, idx);
   1230       set_inuse_and_pinuse(ms, p, small_index2size(idx));
   1231       mem = chunk2mem(p);
   1232       return mem;
   1233     } else if (nb > ms->dvsize) {
   1234       if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
   1235 	mchunkptr b, p, r;
   1236 	size_t rsize;
   1237 	binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
   1238 	bindex_t i = lj_ffs(leftbits);
   1239 	b = smallbin_at(ms, i);
   1240 	p = b->fd;
   1241 	unlink_first_small_chunk(ms, b, p, i);
   1242 	rsize = small_index2size(i) - nb;
   1243 	/* Fit here cannot be remainderless if 4byte sizes */
   1244 	if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) {
   1245 	  set_inuse_and_pinuse(ms, p, small_index2size(i));
   1246 	} else {
   1247 	  set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
   1248 	  r = chunk_plus_offset(p, nb);
   1249 	  set_size_and_pinuse_of_free_chunk(r, rsize);
   1250 	  replace_dv(ms, r, rsize);
   1251 	}
   1252 	mem = chunk2mem(p);
   1253 	return mem;
   1254       } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
   1255 	return mem;
   1256       }
   1257     }
   1258   } else if (nsize >= MAX_REQUEST) {
   1259     nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
   1260   } else {
   1261     nb = pad_request(nsize);
   1262     if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
   1263       return mem;
   1264     }
   1265   }
   1266 
   1267   if (nb <= ms->dvsize) {
   1268     size_t rsize = ms->dvsize - nb;
   1269     mchunkptr p = ms->dv;
   1270     if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
   1271       mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
   1272       ms->dvsize = rsize;
   1273       set_size_and_pinuse_of_free_chunk(r, rsize);
   1274       set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
   1275     } else { /* exhaust dv */
   1276       size_t dvs = ms->dvsize;
   1277       ms->dvsize = 0;
   1278       ms->dv = 0;
   1279       set_inuse_and_pinuse(ms, p, dvs);
   1280     }
   1281     mem = chunk2mem(p);
   1282     return mem;
   1283   } else if (nb < ms->topsize) { /* Split top */
   1284     size_t rsize = ms->topsize -= nb;
   1285     mchunkptr p = ms->top;
   1286     mchunkptr r = ms->top = chunk_plus_offset(p, nb);
   1287     r->head = rsize | PINUSE_BIT;
   1288     set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
   1289     mem = chunk2mem(p);
   1290     return mem;
   1291   }
   1292   return alloc_sys(ms, nb);
   1293 }
   1294 
   1295 static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr)
   1296 {
   1297   if (ptr != 0) {
   1298     mchunkptr p = mem2chunk(ptr);
   1299     mstate fm = (mstate)msp;
   1300     size_t psize = chunksize(p);
   1301     mchunkptr next = chunk_plus_offset(p, psize);
   1302     if (!pinuse(p)) {
   1303       size_t prevsize = p->prev_foot;
   1304       if ((prevsize & IS_DIRECT_BIT) != 0) {
   1305 	prevsize &= ~IS_DIRECT_BIT;
   1306 	psize += prevsize + DIRECT_FOOT_PAD;
   1307 	CALL_MUNMAP_FORCE((char *)p - prevsize, psize);
   1308 	return NULL;
   1309       } else {
   1310 	mchunkptr prev = chunk_minus_offset(p, prevsize);
   1311 	psize += prevsize;
   1312 	p = prev;
   1313 	/* consolidate backward */
   1314 	if (p != fm->dv) {
   1315 	  unlink_chunk(fm, p, prevsize);
   1316 	} else if ((next->head & INUSE_BITS) == INUSE_BITS) {
   1317 	  fm->dvsize = psize;
   1318 	  set_free_with_pinuse(p, psize, next);
   1319 	  return NULL;
   1320 	}
   1321       }
   1322     }
   1323     if (!cinuse(next)) {  /* consolidate forward */
   1324       if (next == fm->top) {
   1325 	size_t tsize = fm->topsize += psize;
   1326 	fm->top = p;
   1327 	p->head = tsize | PINUSE_BIT;
   1328 	if (p == fm->dv) {
   1329 	  fm->dv = 0;
   1330 	  fm->dvsize = 0;
   1331 	}
   1332 	if (tsize > fm->trim_check)
   1333 	  alloc_trim(fm, 0);
   1334 	return NULL;
   1335       } else if (next == fm->dv) {
   1336 	size_t dsize = fm->dvsize += psize;
   1337 	fm->dv = p;
   1338 	set_size_and_pinuse_of_free_chunk(p, dsize);
   1339 	return NULL;
   1340       } else {
   1341 	size_t nsize = chunksize(next);
   1342 	psize += nsize;
   1343 	unlink_chunk(fm, next, nsize);
   1344 	set_size_and_pinuse_of_free_chunk(p, psize);
   1345 	if (p == fm->dv) {
   1346 	  fm->dvsize = psize;
   1347 	  return NULL;
   1348 	}
   1349       }
   1350     } else {
   1351       set_free_with_pinuse(p, psize, next);
   1352     }
   1353 
   1354     if (is_small(psize)) {
   1355       insert_small_chunk(fm, p, psize);
   1356     } else {
   1357       tchunkptr tp = (tchunkptr)p;
   1358       insert_large_chunk(fm, tp, psize);
   1359       if (--fm->release_checks == 0)
   1360 	release_unused_segments(fm);
   1361     }
   1362   }
   1363   return NULL;
   1364 }
   1365 
   1366 static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
   1367 {
   1368   if (nsize >= MAX_REQUEST) {
   1369     return NULL;
   1370   } else {
   1371     mstate m = (mstate)msp;
   1372     mchunkptr oldp = mem2chunk(ptr);
   1373     size_t oldsize = chunksize(oldp);
   1374     mchunkptr next = chunk_plus_offset(oldp, oldsize);
   1375     mchunkptr newp = 0;
   1376     size_t nb = request2size(nsize);
   1377 
   1378     /* Try to either shrink or extend into top. Else malloc-copy-free */
   1379     if (is_direct(oldp)) {
   1380       newp = direct_resize(oldp, nb);  /* this may return NULL. */
   1381     } else if (oldsize >= nb) { /* already big enough */
   1382       size_t rsize = oldsize - nb;
   1383       newp = oldp;
   1384       if (rsize >= MIN_CHUNK_SIZE) {
   1385 	mchunkptr rem = chunk_plus_offset(newp, nb);
   1386 	set_inuse(m, newp, nb);
   1387 	set_inuse(m, rem, rsize);
   1388 	lj_alloc_free(m, chunk2mem(rem));
   1389       }
   1390     } else if (next == m->top && oldsize + m->topsize > nb) {
   1391       /* Expand into top */
   1392       size_t newsize = oldsize + m->topsize;
   1393       size_t newtopsize = newsize - nb;
   1394       mchunkptr newtop = chunk_plus_offset(oldp, nb);
   1395       set_inuse(m, oldp, nb);
   1396       newtop->head = newtopsize |PINUSE_BIT;
   1397       m->top = newtop;
   1398       m->topsize = newtopsize;
   1399       newp = oldp;
   1400     }
   1401 
   1402     if (newp != 0) {
   1403       return chunk2mem(newp);
   1404     } else {
   1405       void *newmem = lj_alloc_malloc(m, nsize);
   1406       if (newmem != 0) {
   1407 	size_t oc = oldsize - overhead_for(oldp);
   1408 	memcpy(newmem, ptr, oc < nsize ? oc : nsize);
   1409 	lj_alloc_free(m, ptr);
   1410       }
   1411       return newmem;
   1412     }
   1413   }
   1414 }
   1415 
   1416 void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize)
   1417 {
   1418   (void)osize;
   1419   if (nsize == 0) {
   1420     return lj_alloc_free(msp, ptr);
   1421   } else if (ptr == NULL) {
   1422     return lj_alloc_malloc(msp, nsize);
   1423   } else {
   1424     return lj_alloc_realloc(msp, ptr, nsize);
   1425   }
   1426 }
   1427 
   1428 #endif