buildvm.c (13034B)
1 /* 2 ** LuaJIT VM builder. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 ** 5 ** This is a tool to build the hand-tuned assembler code required for 6 ** LuaJIT's bytecode interpreter. It supports a variety of output formats 7 ** to feed different toolchains (see usage() below). 8 ** 9 ** This tool is not particularly optimized because it's only used while 10 ** _building_ LuaJIT. There's no point in distributing or installing it. 11 ** Only the object code generated by this tool is linked into LuaJIT. 12 ** 13 ** Caveat: some memory is not free'd, error handling is lazy. 14 ** It's a one-shot tool -- any effort fixing this would be wasted. 15 */ 16 17 #include "buildvm.h" 18 #include "lj_obj.h" 19 #include "lj_gc.h" 20 #include "lj_bc.h" 21 #include "lj_ir.h" 22 #include "lj_ircall.h" 23 #include "lj_frame.h" 24 #include "lj_dispatch.h" 25 #if LJ_HASFFI 26 #include "lj_ctype.h" 27 #include "lj_ccall.h" 28 #endif 29 #include "luajit.h" 30 31 #if defined(_WIN32) 32 #include <fcntl.h> 33 #include <io.h> 34 #endif 35 36 /* ------------------------------------------------------------------------ */ 37 38 /* DynASM glue definitions. */ 39 #define Dst ctx 40 #define Dst_DECL BuildCtx *ctx 41 #define Dst_REF (ctx->D) 42 #define DASM_CHECKS 1 43 44 #include "../dynasm/dasm_proto.h" 45 46 /* Glue macros for DynASM. */ 47 static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); 48 49 #define DASM_EXTERN(ctx, addr, idx, type) \ 50 collect_reloc(ctx, addr, idx, type) 51 52 /* ------------------------------------------------------------------------ */ 53 54 /* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */ 55 #define DASM_ALIGNED_WRITES 1 56 57 /* Embed architecture-specific DynASM encoder. */ 58 #if LJ_TARGET_X86ORX64 59 #include "../dynasm/dasm_x86.h" 60 #elif LJ_TARGET_ARM 61 #include "../dynasm/dasm_arm.h" 62 #elif LJ_TARGET_ARM64 63 #include "../dynasm/dasm_arm64.h" 64 #elif LJ_TARGET_PPC 65 #include "../dynasm/dasm_ppc.h" 66 #elif LJ_TARGET_MIPS 67 #include "../dynasm/dasm_mips.h" 68 #else 69 #error "No support for this architecture (yet)" 70 #endif 71 72 /* Embed generated architecture-specific backend. */ 73 #include "buildvm_arch.h" 74 75 /* ------------------------------------------------------------------------ */ 76 77 void owrite(BuildCtx *ctx, const void *ptr, size_t sz) 78 { 79 if (fwrite(ptr, 1, sz, ctx->fp) != sz) { 80 fprintf(stderr, "Error: cannot write to output file: %s\n", 81 strerror(errno)); 82 exit(1); 83 } 84 } 85 86 /* ------------------------------------------------------------------------ */ 87 88 /* Emit code as raw bytes. Only used for DynASM debugging. */ 89 static void emit_raw(BuildCtx *ctx) 90 { 91 owrite(ctx, ctx->code, ctx->codesz); 92 } 93 94 /* -- Build machine code -------------------------------------------------- */ 95 96 static const char *sym_decorate(BuildCtx *ctx, 97 const char *prefix, const char *suffix) 98 { 99 char name[256]; 100 char *p; 101 #if LJ_64 102 const char *symprefix = ctx->mode == BUILD_machasm ? "_" : ""; 103 #elif LJ_TARGET_XBOX360 104 const char *symprefix = ""; 105 #else 106 const char *symprefix = ctx->mode != BUILD_elfasm ? "_" : ""; 107 #endif 108 sprintf(name, "%s%s%s", symprefix, prefix, suffix); 109 p = strchr(name, '@'); 110 if (p) { 111 #if LJ_TARGET_X86ORX64 112 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) 113 name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ 114 else 115 *p = '\0'; 116 #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE 117 /* Keep @plt etc. */ 118 #else 119 *p = '\0'; 120 #endif 121 } 122 p = (char *)malloc(strlen(name)+1); /* MSVC doesn't like strdup. */ 123 strcpy(p, name); 124 return p; 125 } 126 127 #define NRELOCSYM (sizeof(extnames)/sizeof(extnames[0])-1) 128 129 static int relocmap[NRELOCSYM]; 130 131 /* Collect external relocations. */ 132 static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type) 133 { 134 if (ctx->nreloc >= BUILD_MAX_RELOC) { 135 fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n"); 136 exit(1); 137 } 138 if (relocmap[idx] < 0) { 139 relocmap[idx] = ctx->nrelocsym; 140 ctx->relocsym[ctx->nrelocsym] = sym_decorate(ctx, "", extnames[idx]); 141 ctx->nrelocsym++; 142 } 143 ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code); 144 ctx->reloc[ctx->nreloc].sym = relocmap[idx]; 145 ctx->reloc[ctx->nreloc].type = type; 146 ctx->nreloc++; 147 #if LJ_TARGET_XBOX360 148 return (int)(ctx->code - addr) + 4; /* Encode symbol offset of .text. */ 149 #else 150 return 0; /* Encode symbol offset of 0. */ 151 #endif 152 } 153 154 /* Naive insertion sort. Performance doesn't matter here. */ 155 static void sym_insert(BuildCtx *ctx, int32_t ofs, 156 const char *prefix, const char *suffix) 157 { 158 ptrdiff_t i = ctx->nsym++; 159 while (i > 0) { 160 if (ctx->sym[i-1].ofs <= ofs) 161 break; 162 ctx->sym[i] = ctx->sym[i-1]; 163 i--; 164 } 165 ctx->sym[i].ofs = ofs; 166 ctx->sym[i].name = sym_decorate(ctx, prefix, suffix); 167 } 168 169 /* Build the machine code. */ 170 static int build_code(BuildCtx *ctx) 171 { 172 int status; 173 int i; 174 175 /* Initialize DynASM structures. */ 176 ctx->nglob = GLOB__MAX; 177 ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *)); 178 memset(ctx->glob, 0, ctx->nglob*sizeof(void *)); 179 ctx->nreloc = 0; 180 181 ctx->globnames = globnames; 182 ctx->extnames = extnames; 183 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); 184 ctx->nrelocsym = 0; 185 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; 186 187 ctx->dasm_ident = DASM_IDENT; 188 ctx->dasm_arch = DASM_ARCH; 189 190 dasm_init(Dst, DASM_MAXSECTION); 191 dasm_setupglobal(Dst, ctx->glob, ctx->nglob); 192 dasm_setup(Dst, build_actionlist); 193 194 /* Call arch-specific backend to emit the code. */ 195 ctx->npc = build_backend(ctx); 196 197 /* Finalize the code. */ 198 (void)dasm_checkstep(Dst, -1); 199 if ((status = dasm_link(Dst, &ctx->codesz))) return status; 200 ctx->code = (uint8_t *)malloc(ctx->codesz); 201 if ((status = dasm_encode(Dst, (void *)ctx->code))) return status; 202 203 /* Allocate symbol table and bytecode offsets. */ 204 ctx->beginsym = sym_decorate(ctx, "", LABEL_PREFIX "vm_asm_begin"); 205 ctx->sym = (BuildSym *)malloc((ctx->npc+ctx->nglob+1)*sizeof(BuildSym)); 206 ctx->nsym = 0; 207 ctx->bc_ofs = (int32_t *)malloc(ctx->npc*sizeof(int32_t)); 208 209 /* Collect the opcodes (PC labels). */ 210 for (i = 0; i < ctx->npc; i++) { 211 int32_t ofs = dasm_getpclabel(Dst, i); 212 if (ofs < 0) return 0x22000000|i; 213 ctx->bc_ofs[i] = ofs; 214 if ((LJ_HASJIT || 215 !(i == BC_JFORI || i == BC_JFORL || i == BC_JITERL || i == BC_JLOOP || 216 i == BC_IFORL || i == BC_IITERL || i == BC_ILOOP)) && 217 (LJ_HASFFI || i != BC_KCDATA)) 218 sym_insert(ctx, ofs, LABEL_PREFIX_BC, bc_names[i]); 219 } 220 221 /* Collect the globals (named labels). */ 222 for (i = 0; i < ctx->nglob; i++) { 223 const char *gl = globnames[i]; 224 int len = (int)strlen(gl); 225 if (!ctx->glob[i]) { 226 fprintf(stderr, "Error: undefined global %s\n", gl); 227 exit(2); 228 } 229 /* Skip the _Z symbols. */ 230 if (!(len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z')) 231 sym_insert(ctx, (int32_t)((uint8_t *)(ctx->glob[i]) - ctx->code), 232 LABEL_PREFIX, globnames[i]); 233 } 234 235 /* Close the address range. */ 236 sym_insert(ctx, (int32_t)ctx->codesz, "", ""); 237 ctx->nsym--; 238 239 dasm_free(Dst); 240 241 return 0; 242 } 243 244 /* -- Generate VM enums --------------------------------------------------- */ 245 246 const char *const bc_names[] = { 247 #define BCNAME(name, ma, mb, mc, mt) #name, 248 BCDEF(BCNAME) 249 #undef BCNAME 250 NULL 251 }; 252 253 const char *const ir_names[] = { 254 #define IRNAME(name, m, m1, m2) #name, 255 IRDEF(IRNAME) 256 #undef IRNAME 257 NULL 258 }; 259 260 const char *const irt_names[] = { 261 #define IRTNAME(name, size) #name, 262 IRTDEF(IRTNAME) 263 #undef IRTNAME 264 NULL 265 }; 266 267 const char *const irfpm_names[] = { 268 #define FPMNAME(name) #name, 269 IRFPMDEF(FPMNAME) 270 #undef FPMNAME 271 NULL 272 }; 273 274 const char *const irfield_names[] = { 275 #define FLNAME(name, ofs) #name, 276 IRFLDEF(FLNAME) 277 #undef FLNAME 278 NULL 279 }; 280 281 const char *const ircall_names[] = { 282 #define IRCALLNAME(cond, name, nargs, kind, type, flags) #name, 283 IRCALLDEF(IRCALLNAME) 284 #undef IRCALLNAME 285 NULL 286 }; 287 288 static const char *const trace_errors[] = { 289 #define TREDEF(name, msg) msg, 290 #include "lj_traceerr.h" 291 NULL 292 }; 293 294 static const char *lower(char *buf, const char *s) 295 { 296 char *p = buf; 297 while (*s) { 298 *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s; 299 s++; 300 } 301 *p = '\0'; 302 return buf; 303 } 304 305 /* Emit C source code for bytecode-related definitions. */ 306 static void emit_bcdef(BuildCtx *ctx) 307 { 308 int i; 309 fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n"); 310 fprintf(ctx->fp, "LJ_DATADEF const uint16_t lj_bc_ofs[] = {\n"); 311 for (i = 0; i < ctx->npc; i++) { 312 if (i != 0) 313 fprintf(ctx->fp, ",\n"); 314 fprintf(ctx->fp, "%d", ctx->bc_ofs[i]); 315 } 316 } 317 318 /* Emit VM definitions as Lua code for debug modules. */ 319 static void emit_vmdef(BuildCtx *ctx) 320 { 321 char buf[80]; 322 int i; 323 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); 324 fprintf(ctx->fp, "return {\n\n"); 325 326 fprintf(ctx->fp, "bcnames = \""); 327 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); 328 fprintf(ctx->fp, "\",\n\n"); 329 330 fprintf(ctx->fp, "irnames = \""); 331 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); 332 fprintf(ctx->fp, "\",\n\n"); 333 334 fprintf(ctx->fp, "irfpm = { [0]="); 335 for (i = 0; irfpm_names[i]; i++) 336 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); 337 fprintf(ctx->fp, "},\n\n"); 338 339 fprintf(ctx->fp, "irfield = { [0]="); 340 for (i = 0; irfield_names[i]; i++) { 341 char *p; 342 lower(buf, irfield_names[i]); 343 p = strchr(buf, '_'); 344 if (p) *p = '.'; 345 fprintf(ctx->fp, "\"%s\", ", buf); 346 } 347 fprintf(ctx->fp, "},\n\n"); 348 349 fprintf(ctx->fp, "ircall = {\n[0]="); 350 for (i = 0; ircall_names[i]; i++) 351 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); 352 fprintf(ctx->fp, "},\n\n"); 353 354 fprintf(ctx->fp, "traceerr = {\n[0]="); 355 for (i = 0; trace_errors[i]; i++) 356 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 357 fprintf(ctx->fp, "},\n\n"); 358 } 359 360 /* -- Argument parsing ---------------------------------------------------- */ 361 362 /* Build mode names. */ 363 static const char *const modenames[] = { 364 #define BUILDNAME(name) #name, 365 BUILDDEF(BUILDNAME) 366 #undef BUILDNAME 367 NULL 368 }; 369 370 /* Print usage information and exit. */ 371 static void usage(void) 372 { 373 int i; 374 fprintf(stderr, LUAJIT_VERSION " VM builder.\n"); 375 fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n"); 376 fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n"); 377 fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n"); 378 fprintf(stderr, "Available modes:\n"); 379 for (i = 0; i < BUILD__MAX; i++) 380 fprintf(stderr, " %s\n", modenames[i]); 381 exit(1); 382 } 383 384 /* Parse the output mode name. */ 385 static BuildMode parsemode(const char *mode) 386 { 387 int i; 388 for (i = 0; modenames[i]; i++) 389 if (!strcmp(mode, modenames[i])) 390 return (BuildMode)i; 391 usage(); 392 return (BuildMode)-1; 393 } 394 395 /* Parse arguments. */ 396 static void parseargs(BuildCtx *ctx, char **argv) 397 { 398 const char *a; 399 int i; 400 ctx->mode = (BuildMode)-1; 401 ctx->outname = "-"; 402 for (i = 1; (a = argv[i]) != NULL; i++) { 403 if (a[0] != '-') 404 break; 405 switch (a[1]) { 406 case '-': 407 if (a[2]) goto err; 408 i++; 409 goto ok; 410 case '\0': 411 goto ok; 412 case 'm': 413 i++; 414 if (a[2] || argv[i] == NULL) goto err; 415 ctx->mode = parsemode(argv[i]); 416 break; 417 case 'o': 418 i++; 419 if (a[2] || argv[i] == NULL) goto err; 420 ctx->outname = argv[i]; 421 break; 422 default: err: 423 usage(); 424 break; 425 } 426 } 427 ok: 428 ctx->args = argv+i; 429 if (ctx->mode == (BuildMode)-1) goto err; 430 } 431 432 int main(int argc, char **argv) 433 { 434 BuildCtx ctx_; 435 BuildCtx *ctx = &ctx_; 436 int status, binmode; 437 438 if (sizeof(void *) != 4*LJ_32+8*LJ_64) { 439 fprintf(stderr,"Error: pointer size mismatch in cross-build.\n"); 440 fprintf(stderr,"Try: make HOST_CC=\"gcc -m32\" CROSS=...\n\n"); 441 return 1; 442 } 443 444 UNUSED(argc); 445 parseargs(ctx, argv); 446 447 if ((status = build_code(ctx))) { 448 fprintf(stderr,"Error: DASM error %08x\n", status); 449 return 1; 450 } 451 452 switch (ctx->mode) { 453 case BUILD_peobj: 454 case BUILD_raw: 455 binmode = 1; 456 break; 457 default: 458 binmode = 0; 459 break; 460 } 461 462 if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') { 463 ctx->fp = stdout; 464 #if defined(_WIN32) 465 if (binmode) 466 _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */ 467 #endif 468 } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w"))) { 469 fprintf(stderr, "Error: cannot open output file '%s': %s\n", 470 ctx->outname, strerror(errno)); 471 exit(1); 472 } 473 474 switch (ctx->mode) { 475 case BUILD_elfasm: 476 case BUILD_coffasm: 477 case BUILD_machasm: 478 emit_asm(ctx); 479 emit_asm_debug(ctx); 480 break; 481 case BUILD_peobj: 482 emit_peobj(ctx); 483 break; 484 case BUILD_raw: 485 emit_raw(ctx); 486 break; 487 case BUILD_bcdef: 488 emit_bcdef(ctx); 489 emit_lib(ctx); 490 break; 491 case BUILD_vmdef: 492 emit_vmdef(ctx); 493 emit_lib(ctx); 494 fprintf(ctx->fp, "}\n\n"); 495 break; 496 case BUILD_ffdef: 497 case BUILD_libdef: 498 case BUILD_recdef: 499 emit_lib(ctx); 500 break; 501 case BUILD_folddef: 502 emit_fold(ctx); 503 break; 504 default: 505 break; 506 } 507 508 fflush(ctx->fp); 509 if (ferror(ctx->fp)) { 510 fprintf(stderr, "Error: cannot write to output file: %s\n", 511 strerror(errno)); 512 exit(1); 513 } 514 fclose(ctx->fp); 515 516 return 0; 517 } 518