build_stats.py (14502B)
1 #!/usr/bin/env python3 2 # Copyright (c) the JPEG XL Project Authors. All rights reserved. 3 # 4 # Use of this source code is governed by a BSD-style 5 # license that can be found in the LICENSE file. 6 7 8 """build_stats.py: Gather statistics about sizes of dependencies. 9 10 This tools computes a realistic estimate of the size contribution to a binary 11 from a statically linked library. Statically linked libraries compiled with 12 -ffunction-sections and linked -gc-sections mean that we could drop part of the 13 library at the final binary linking time. This tool takes that into account the 14 symbols that end up in the final binary and not just all the symbols of the 15 components. 16 """ 17 18 import argparse 19 import collections 20 import itertools 21 import json 22 import os 23 import platform 24 import re 25 import struct 26 import subprocess 27 import sys 28 import tempfile 29 30 # Ignore functions with stack size smaller than this value. 31 MIN_STACK_SIZE = 32 32 33 IS_OSX = (platform.system() == 'Darwin') 34 35 Symbol = collections.namedtuple('Symbol', ['address', 'size', 'typ', 'name']) 36 37 # Represents the stack size information of a function (defined by its address). 38 SymbolStack = collections.namedtuple('SymbolStack', 39 ['address', 'stack_size']) 40 41 ObjectStats = collections.namedtuple('ObjectStats', 42 ['name', 'in_partition', 'size_map']) 43 44 # An object target file in the build system. 45 Target = collections.namedtuple('Target', 46 ['name', 'deps', 'filename']) 47 48 # Sections that end up in the binary file. 49 # t - text (code), d - global non-const data, n/r - read-only data, 50 # w - weak symbols (likely inline code not inlined), 51 # v - weak symbols (vtable / typeinfo) 52 # u - unique symbols 53 BIN_SIZE = 'tdnrwvu' 54 55 # Sections that end up in static RAM. 56 RAM_SIZE = 'dbs' 57 58 # u - symbols imported from some other library 59 # a - absolute address symbols 60 # c - common symbol 61 # i - indirect symbol 62 # - - debugger symbol table entries 63 IGNORE_SYMBOLS = 'uaci-' 64 65 SIMD_NAMESPACES = [ 66 'N_SCALAR', 'N_WASM', 'N_NEON', 'N_PPC8', 'N_SSE4', 'N_AVX2', 'N_AVX3'] 67 68 69 def LoadSymbols(filename): 70 ret = [] 71 nmout = subprocess.check_output(['nm', '--format=posix', filename]) 72 for line in nmout.decode('utf-8').splitlines(): 73 line = line.rstrip() 74 if len(line) == 0: 75 # OSX nm produces extra crlf at the end 76 continue 77 if line.endswith(':'): 78 # Ignore object names. 79 continue 80 line = re.sub(' +', ' ', line) 81 # symbol_name, symbol_type, (optional) address, (optional) size 82 symlist = line.rstrip().split(' ') 83 col_count = len(symlist) 84 assert 2 <= col_count <= 4 85 ret.append(Symbol( 86 int(symlist[2], 16) if col_count > 2 else None, 87 int(symlist[3], 16) if col_count > 3 else None, 88 symlist[1], 89 symlist[0])) 90 if IS_OSX: 91 ret = sorted(ret, key=lambda sym: sym.address) 92 for i in range(len(ret) - 1): 93 size = ret[i + 1].address - ret[i].address 94 if size > (1 << 30): 95 continue 96 ret[i] = ret[i]._replace(size=size) 97 return ret 98 99 def LoadTargetCommand(target, build_dir): 100 stdout = subprocess.check_output( 101 ['ninja', '-C', build_dir, '-t', 'commands', target]) 102 # The last command is always the command to build (link) the requested 103 # target. 104 command = stdout.splitlines()[-1] 105 return command.decode('utf-8') 106 107 108 def LoadTarget(target, build_dir): 109 """Loads a build system target and its dependencies into a Target object""" 110 if target.endswith('.o'): 111 # Speed up this case. 112 return Target(target, [], target) 113 114 link_params = LoadTargetCommand(target, build_dir).split() 115 if 'cmake_symlink_library' in link_params: 116 # The target is a library symlinked, use the target of the symlink 117 # instead. 118 target = link_params[link_params.index('cmake_symlink_library') + 1] 119 link_params = LoadTargetCommand(target, build_dir).split() 120 121 # The target name is not always the same as the filename of the output, for 122 # example, "djxl" target generates "tools/djxl" file. 123 if '-o' in link_params: 124 target_filename = link_params[link_params.index('-o') + 1] 125 elif target.endswith('.a'): 126 # Command is '/path/to/ar', 'qc', 'target.a', ... 127 target_filename = link_params[link_params.index('qc') + 1] 128 else: 129 raise Exception('Unknown "%s" output filename in command: %r' % 130 (target, link_params)) 131 132 tgt_libs = [] 133 for entry in link_params: 134 if not entry or not (entry.endswith('.o') or entry.endswith('.a')): 135 continue 136 if entry == target_filename: 137 continue 138 fn = os.path.join(build_dir, entry) 139 if not os.path.exists(fn): 140 continue 141 if entry in tgt_libs: 142 continue 143 tgt_libs.append(entry) 144 145 return Target(target, tgt_libs, target_filename) 146 147 148 def TargetTransitiveDeps(all_tgts, target): 149 """Returns the list of all transitive dependencies of target""" 150 ret = all_tgts[target].deps 151 # There can't be loop dependencies in the targets. 152 i = 0 153 while i < len(ret): 154 ret.extend(all_tgts[ret[i]].deps) 155 i += 1 156 return ret 157 158 159 def LoadStackSizes(filename, binutils=''): 160 """Loads the stack size used by functions from the ELF. 161 162 This function loads the stack size the compiler stored in the .stack_sizes 163 section, which can be done by compiling with -fstack-size-section in clang. 164 """ 165 with tempfile.NamedTemporaryFile() as stack_sizes_sec: 166 objcopy = ['objcopy', 'gobjcopy'][IS_OSX] 167 subprocess.check_call( 168 [binutils + objcopy, '-O', 'binary', '--only-section=.stack_sizes', 169 '--set-section-flags', '.stack_sizes=alloc', filename, 170 stack_sizes_sec.name]) 171 stack_sizes = stack_sizes_sec.read() 172 # From the documentation: 173 # The section will contain an array of pairs of function symbol values 174 # (pointer size) and stack sizes (unsigned LEB128). The stack size values 175 # only include the space allocated in the function prologue. Functions with 176 # dynamic stack allocations are not included. 177 178 # Get the pointer format based on the ELF file. 179 objdump = ['objdump', 'gobjdump'][IS_OSX] 180 output = subprocess.check_output( 181 [binutils + objdump, '-a', filename]).decode('utf-8') 182 elf_format = re.search('file format (.*)$', output, re.MULTILINE).group(1) 183 if elf_format.startswith('elf64-little') or elf_format.endswith('-x86-64') or elf_format.endswith('-arm64'): 184 pointer_fmt = '<Q' 185 elif elf_format.startswith('elf32-little') or elf_format == 'elf32-i386': 186 pointer_fmt = '<I' 187 else: 188 raise Exception('Unknown ELF format: %s' % elf_format) 189 pointer_size = struct.calcsize(pointer_fmt) 190 191 ret = [] 192 i = 0 193 while i < len(stack_sizes): 194 assert len(stack_sizes) >= i + pointer_size 195 addr, = struct.unpack_from(pointer_fmt, stack_sizes, i) 196 i += pointer_size 197 # Parse LEB128 198 size = 0 199 for j in range(10): 200 b = stack_sizes[i] 201 i += 1 202 size += (b & 0x7f) << (7 * j) 203 if (b & 0x80) == 0: 204 break 205 if size >= MIN_STACK_SIZE: 206 ret.append(SymbolStack(addr, size)) 207 return ret 208 209 210 def TargetSize(symbols, symbol_filter=None): 211 ret = {} 212 for sym in symbols: 213 if not sym.size or (symbol_filter is not None and 214 sym.name not in symbol_filter): 215 continue 216 t = sym.typ.lower() 217 # We can remove symbols if they appear in multiple objects since they will 218 # be merged by the linker. 219 if symbol_filter is not None and (t == sym.typ or t in 'wv'): 220 symbol_filter.remove(sym.name) 221 ret.setdefault(t, 0) 222 ret[t] += sym.size 223 return ret 224 225 226 def PrintStats(stats): 227 """Print a table with the size stats for a target""" 228 table = [] 229 sum_bin_size = 0 230 sum_ram_size = 0 231 232 for objstat in stats: 233 bin_size = 0 234 ram_size = 0 235 for typ, size in objstat.size_map.items(): 236 if typ in BIN_SIZE: 237 bin_size += size 238 if typ in RAM_SIZE: 239 ram_size += size 240 if typ not in BIN_SIZE + RAM_SIZE: 241 raise Exception('Unknown type "%s"' % typ) 242 if objstat.in_partition: 243 sum_bin_size += bin_size 244 sum_ram_size += ram_size 245 246 table.append((objstat.name, bin_size, ram_size)) 247 mx_bin_size = max(row[1] for row in table) 248 mx_ram_size = max(row[2] for row in table) 249 250 table.append(('-- unknown --', mx_bin_size - sum_bin_size, 251 mx_ram_size - sum_ram_size)) 252 253 # Print the table 254 print('%-32s %17s %17s' % ('Object name', 'Binary size', 'Static RAM size')) 255 for name, bin_size, ram_size in table: 256 print('%-32s %8d (%5.1f%%) %8d (%5.1f%%)' % ( 257 name, bin_size, (100. * bin_size / mx_bin_size) if mx_bin_size else 0, 258 ram_size, (100. * ram_size / mx_ram_size) if mx_ram_size else 0)) 259 print() 260 261 262 def PrintStackStats(tgt_stack_sizes, top_entries=20): 263 if not tgt_stack_sizes: 264 return 265 print(' Stack Symbol name') 266 for i, (name, size) in zip(itertools.count(), tgt_stack_sizes.items()): 267 if top_entries > 0 and i >= top_entries: 268 break 269 print('%8d %s' % (size, name)) 270 print() 271 272 273 def PrintTopSymbols(tgt_top_symbols): 274 if not tgt_top_symbols: 275 return 276 print(' Size T Symbol name') 277 for size, typ, name in tgt_top_symbols: 278 print('%9d %s %s' % (size, typ, name)) 279 print() 280 281 282 def SizeStats(args): 283 """Main entry point of the program after parsing parameters. 284 285 Computes the size statistics of the given targets and their components.""" 286 # The dictionary with the stats that we store on disk as a json. This includes 287 # one entry per passed args.target. 288 stats = {} 289 290 # Cache of Target object of a target. 291 tgts = {} 292 293 # Load all the targets. 294 pending = set(args.target) 295 while pending: 296 target = pending.pop() 297 tgt = LoadTarget(target, args.build_dir) 298 tgts[target] = tgt 299 if args.recursive: 300 for dep in tgt.deps: 301 if dep not in tgts: 302 pending.add(dep) 303 304 # Cache of symbols of a target. 305 syms = {} 306 # Load the symbols from the all targets and its deps. 307 all_deps = set(tgts.keys()).union(*[set(tgt.deps) for tgt in tgts.values()]) 308 for entry in all_deps: 309 fn = os.path.join(args.build_dir, 310 tgts[entry].filename if entry in tgts else entry) 311 syms[entry] = LoadSymbols(fn) 312 313 for target in args.target: 314 tgt_stats = [] 315 tgt = tgts[target] 316 317 tgt_syms = syms[target] 318 used_syms = set() 319 for sym in tgt_syms: 320 if sym.typ.lower() in BIN_SIZE + RAM_SIZE: 321 used_syms.add(sym.name) 322 elif sym.typ.lower() in IGNORE_SYMBOLS: 323 continue 324 else: 325 print('Unknown: %s %s' % (sym.typ, sym.name)) 326 327 target_path = os.path.join(args.build_dir, tgt.filename) 328 sym_stacks = [] 329 if not target_path.endswith('.a'): 330 sym_stacks = LoadStackSizes(target_path, args.binutils) 331 symbols_by_addr = {sym.address: sym for sym in tgt_syms 332 if sym.typ.lower() in 'tw'} 333 tgt_stack_sizes = collections.OrderedDict() 334 for sym_stack in sorted(sym_stacks, key=lambda s: -s.stack_size): 335 tgt_stack_sizes[ 336 symbols_by_addr[sym_stack.address].name] = sym_stack.stack_size 337 338 tgt_top_symbols = [] 339 if args.top_symbols: 340 tgt_top_symbols = [(sym.size, sym.typ, sym.name) for sym in tgt_syms 341 if sym.name in used_syms and sym.size] 342 tgt_top_symbols.sort(key=lambda t: (-t[0], t[2])) 343 tgt_top_symbols = tgt_top_symbols[:args.top_symbols] 344 345 tgt_size = TargetSize(tgt_syms) 346 tgt_stats.append(ObjectStats(target, False, tgt_size)) 347 348 # Split out by SIMD. 349 for namespace in SIMD_NAMESPACES: 350 mangled = str(len(namespace)) + namespace 351 if not any(mangled in sym.name for sym in tgt_syms): 352 continue 353 ret = {} 354 for sym in tgt_syms: 355 if not sym.size or mangled not in sym.name: 356 continue 357 t = sym.typ.lower() 358 ret.setdefault(t, 0) 359 ret[t] += sym.size 360 # SIMD namespaces are not part of the partition, they are already included 361 # in the jpegxl-static normally. 362 if not ret: 363 continue 364 tgt_stats.append(ObjectStats('\\--> ' + namespace, False, ret)) 365 366 for obj in tgt.deps: 367 dep_used_syms = used_syms.copy() 368 obj_size = TargetSize(syms[obj], used_syms) 369 if not obj_size: 370 continue 371 tgt_stats.append(ObjectStats(os.path.basename(obj), True, obj_size)) 372 if args.recursive: 373 # Not really recursive, but it shows all the remaining deps at a second 374 # level. 375 for obj_dep in sorted(TargetTransitiveDeps(tgts, obj), 376 key=os.path.basename): 377 obj_dep_size = TargetSize(syms[obj_dep], dep_used_syms) 378 if not obj_dep_size: 379 continue 380 tgt_stats.append(ObjectStats( 381 ' '+ os.path.basename(obj_dep), False, obj_dep_size)) 382 383 PrintStats(tgt_stats) 384 PrintStackStats(tgt_stack_sizes) 385 PrintTopSymbols(tgt_top_symbols) 386 stats[target] = { 387 'build': tgt_stats, 388 'stack': tgt_stack_sizes, 389 'top': tgt_top_symbols, 390 } 391 392 if args.save: 393 with open(args.save, 'w') as f: 394 json.dump(stats, f) 395 396 # Check the maximum stack size. 397 exit_code = 0 398 if args.max_stack: 399 for name, size in tgt_stack_sizes.items(): 400 if size > args.max_stack: 401 print('Error: %s exceeds stack limit: %d vs %d' % ( 402 name, size, args.max_stack), 403 file=sys.stderr) 404 exit_code = 1 405 406 return exit_code 407 408 def main(): 409 parser = argparse.ArgumentParser(description=__doc__) 410 parser.add_argument('target', type=str, nargs='+', 411 help='target(s) to analyze') 412 parser.add_argument('--build-dir', default='build', 413 help='path to the build directory') 414 parser.add_argument('--save', default=None, 415 help='path to save the stats as JSON file') 416 parser.add_argument('-r', '--recursive', default=False, action='store_true', 417 help='Print recursive entries.') 418 parser.add_argument('--top-symbols', default=0, type=int, 419 help='Number of largest symbols to print') 420 parser.add_argument('--binutils', default='', 421 help='prefix path to binutils tools, such as ' 422 'aarch64-linux-gnu-') 423 parser.add_argument('--max-stack', default=None, type=int, 424 help=('Maximum static stack size of a function. If a ' 425 'static stack is larger it will exit with an error ' 426 'code.')) 427 args = parser.parse_args() 428 sys.exit(SizeStats(args)) 429 430 431 if __name__ == '__main__': 432 main()