libjxl

FORK: libjxl patches used on blog
git clone https://git.neptards.moe/blog/libjxl.git
Log | Files | Refs | Submodules | README | LICENSE

build_stats.py (14502B)


      1 #!/usr/bin/env python3
      2 # Copyright (c) the JPEG XL Project Authors. All rights reserved.
      3 #
      4 # Use of this source code is governed by a BSD-style
      5 # license that can be found in the LICENSE file.
      6 
      7 
      8 """build_stats.py: Gather statistics about sizes of dependencies.
      9 
     10 This tools computes a realistic estimate of the size contribution to a binary
     11 from a statically linked library. Statically linked libraries compiled with
     12 -ffunction-sections and linked -gc-sections mean that we could drop part of the
     13 library at the final binary linking time. This tool takes that into account the
     14 symbols that end up in the final binary and not just all the symbols of the
     15 components.
     16 """
     17 
     18 import argparse
     19 import collections
     20 import itertools
     21 import json
     22 import os
     23 import platform
     24 import re
     25 import struct
     26 import subprocess
     27 import sys
     28 import tempfile
     29 
     30 # Ignore functions with stack size smaller than this value.
     31 MIN_STACK_SIZE = 32
     32 
     33 IS_OSX = (platform.system() == 'Darwin')
     34 
     35 Symbol = collections.namedtuple('Symbol', ['address', 'size', 'typ', 'name'])
     36 
     37 # Represents the stack size information of a function (defined by its address).
     38 SymbolStack = collections.namedtuple('SymbolStack',
     39                                      ['address', 'stack_size'])
     40 
     41 ObjectStats = collections.namedtuple('ObjectStats',
     42                                      ['name', 'in_partition', 'size_map'])
     43 
     44 # An object target file in the build system.
     45 Target = collections.namedtuple('Target',
     46                                 ['name', 'deps', 'filename'])
     47 
     48 # Sections that end up in the binary file.
     49 # t - text (code), d - global non-const data, n/r - read-only data,
     50 # w - weak symbols (likely inline code not inlined),
     51 # v - weak symbols (vtable / typeinfo)
     52 # u - unique symbols
     53 BIN_SIZE = 'tdnrwvu'
     54 
     55 # Sections that end up in static RAM.
     56 RAM_SIZE = 'dbs'
     57 
     58 # u - symbols imported from some other library
     59 # a - absolute address symbols
     60 # c - common symbol
     61 # i - indirect symbol
     62 # - - debugger symbol table entries
     63 IGNORE_SYMBOLS = 'uaci-'
     64 
     65 SIMD_NAMESPACES = [
     66     'N_SCALAR', 'N_WASM', 'N_NEON', 'N_PPC8', 'N_SSE4', 'N_AVX2', 'N_AVX3']
     67 
     68 
     69 def LoadSymbols(filename):
     70   ret = []
     71   nmout = subprocess.check_output(['nm', '--format=posix', filename])
     72   for line in nmout.decode('utf-8').splitlines():
     73     line = line.rstrip()
     74     if len(line) == 0:
     75       # OSX nm produces extra crlf at the end
     76       continue
     77     if line.endswith(':'):
     78       # Ignore object names.
     79       continue
     80     line = re.sub(' +', ' ', line)
     81     # symbol_name, symbol_type, (optional) address, (optional) size
     82     symlist = line.rstrip().split(' ')
     83     col_count = len(symlist)
     84     assert 2 <= col_count <= 4
     85     ret.append(Symbol(
     86         int(symlist[2], 16) if col_count > 2 else None,
     87         int(symlist[3], 16) if col_count > 3 else None,
     88         symlist[1],
     89         symlist[0]))
     90   if IS_OSX:
     91     ret = sorted(ret, key=lambda sym: sym.address)
     92     for i in range(len(ret) - 1):
     93       size = ret[i + 1].address - ret[i].address
     94       if size > (1 << 30):
     95         continue
     96       ret[i] = ret[i]._replace(size=size)
     97   return ret
     98 
     99 def LoadTargetCommand(target, build_dir):
    100   stdout = subprocess.check_output(
    101       ['ninja', '-C', build_dir, '-t', 'commands', target])
    102   # The last command is always the command to build (link) the requested
    103   # target.
    104   command = stdout.splitlines()[-1]
    105   return command.decode('utf-8')
    106 
    107 
    108 def LoadTarget(target, build_dir):
    109   """Loads a build system target and its dependencies into a Target object"""
    110   if target.endswith('.o'):
    111     # Speed up this case.
    112     return Target(target, [], target)
    113 
    114   link_params = LoadTargetCommand(target, build_dir).split()
    115   if 'cmake_symlink_library' in link_params:
    116     # The target is a library symlinked, use the target of the symlink
    117     # instead.
    118     target = link_params[link_params.index('cmake_symlink_library') + 1]
    119     link_params = LoadTargetCommand(target, build_dir).split()
    120 
    121   # The target name is not always the same as the filename of the output, for
    122   # example, "djxl" target generates "tools/djxl" file.
    123   if '-o' in link_params:
    124     target_filename = link_params[link_params.index('-o') + 1]
    125   elif target.endswith('.a'):
    126     # Command is '/path/to/ar', 'qc', 'target.a', ...
    127     target_filename = link_params[link_params.index('qc') + 1]
    128   else:
    129     raise Exception('Unknown "%s" output filename in command: %r' %
    130                     (target, link_params))
    131 
    132   tgt_libs = []
    133   for entry in link_params:
    134     if not entry or not (entry.endswith('.o') or entry.endswith('.a')):
    135       continue
    136     if entry == target_filename:
    137       continue
    138     fn = os.path.join(build_dir, entry)
    139     if not os.path.exists(fn):
    140       continue
    141     if entry in tgt_libs:
    142       continue
    143     tgt_libs.append(entry)
    144 
    145   return Target(target, tgt_libs, target_filename)
    146 
    147 
    148 def TargetTransitiveDeps(all_tgts, target):
    149   """Returns the list of all transitive dependencies of target"""
    150   ret = all_tgts[target].deps
    151   # There can't be loop dependencies in the targets.
    152   i = 0
    153   while i < len(ret):
    154     ret.extend(all_tgts[ret[i]].deps)
    155     i += 1
    156   return ret
    157 
    158 
    159 def LoadStackSizes(filename, binutils=''):
    160   """Loads the stack size used by functions from the ELF.
    161 
    162   This function loads the stack size the compiler stored in the .stack_sizes
    163   section, which can be done by compiling with -fstack-size-section in clang.
    164   """
    165   with tempfile.NamedTemporaryFile() as stack_sizes_sec:
    166     objcopy = ['objcopy', 'gobjcopy'][IS_OSX]
    167     subprocess.check_call(
    168         [binutils + objcopy, '-O', 'binary', '--only-section=.stack_sizes',
    169          '--set-section-flags', '.stack_sizes=alloc', filename,
    170          stack_sizes_sec.name])
    171     stack_sizes = stack_sizes_sec.read()
    172   # From the documentation:
    173   #  The section will contain an array of pairs of function symbol values
    174   #  (pointer size) and stack sizes (unsigned LEB128). The stack size values
    175   #  only include the space allocated in the function prologue. Functions with
    176   #  dynamic stack allocations are not included.
    177 
    178   # Get the pointer format based on the ELF file.
    179   objdump = ['objdump', 'gobjdump'][IS_OSX]
    180   output = subprocess.check_output(
    181       [binutils + objdump, '-a', filename]).decode('utf-8')
    182   elf_format = re.search('file format (.*)$', output, re.MULTILINE).group(1)
    183   if elf_format.startswith('elf64-little') or elf_format.endswith('-x86-64') or elf_format.endswith('-arm64'):
    184     pointer_fmt = '<Q'
    185   elif elf_format.startswith('elf32-little') or elf_format == 'elf32-i386':
    186     pointer_fmt = '<I'
    187   else:
    188     raise Exception('Unknown ELF format: %s' % elf_format)
    189   pointer_size = struct.calcsize(pointer_fmt)
    190 
    191   ret = []
    192   i = 0
    193   while i < len(stack_sizes):
    194     assert len(stack_sizes) >= i + pointer_size
    195     addr, = struct.unpack_from(pointer_fmt, stack_sizes, i)
    196     i += pointer_size
    197     # Parse LEB128
    198     size = 0
    199     for j in range(10):
    200       b = stack_sizes[i]
    201       i += 1
    202       size += (b & 0x7f) << (7 * j)
    203       if (b & 0x80) == 0:
    204         break
    205     if size >= MIN_STACK_SIZE:
    206       ret.append(SymbolStack(addr, size))
    207   return ret
    208 
    209 
    210 def TargetSize(symbols, symbol_filter=None):
    211   ret = {}
    212   for sym in symbols:
    213     if not sym.size or (symbol_filter is not None and
    214                         sym.name not in symbol_filter):
    215       continue
    216     t = sym.typ.lower()
    217     # We can remove symbols if they appear in multiple objects since they will
    218     # be merged by the linker.
    219     if symbol_filter is not None and (t == sym.typ or t in 'wv'):
    220       symbol_filter.remove(sym.name)
    221     ret.setdefault(t, 0)
    222     ret[t] += sym.size
    223   return ret
    224 
    225 
    226 def PrintStats(stats):
    227   """Print a table with the size stats for a target"""
    228   table = []
    229   sum_bin_size = 0
    230   sum_ram_size = 0
    231 
    232   for objstat in stats:
    233     bin_size = 0
    234     ram_size = 0
    235     for typ, size in objstat.size_map.items():
    236       if typ in BIN_SIZE:
    237         bin_size += size
    238       if typ in RAM_SIZE:
    239         ram_size += size
    240       if typ not in BIN_SIZE + RAM_SIZE:
    241         raise Exception('Unknown type "%s"' % typ)
    242     if objstat.in_partition:
    243       sum_bin_size += bin_size
    244       sum_ram_size += ram_size
    245 
    246     table.append((objstat.name, bin_size, ram_size))
    247   mx_bin_size = max(row[1] for row in table)
    248   mx_ram_size = max(row[2] for row in table)
    249 
    250   table.append(('-- unknown --', mx_bin_size - sum_bin_size,
    251                 mx_ram_size - sum_ram_size))
    252 
    253   # Print the table
    254   print('%-32s %17s %17s' % ('Object name', 'Binary size', 'Static RAM size'))
    255   for name, bin_size, ram_size in table:
    256     print('%-32s %8d (%5.1f%%) %8d (%5.1f%%)' % (
    257         name, bin_size, (100. * bin_size / mx_bin_size) if mx_bin_size else 0,
    258         ram_size, (100. * ram_size / mx_ram_size) if mx_ram_size else 0))
    259   print()
    260 
    261 
    262 def PrintStackStats(tgt_stack_sizes, top_entries=20):
    263   if not tgt_stack_sizes:
    264     return
    265   print(' Stack   Symbol name')
    266   for i, (name, size) in zip(itertools.count(), tgt_stack_sizes.items()):
    267     if top_entries > 0 and i >= top_entries:
    268       break
    269     print('%8d %s' % (size, name))
    270   print()
    271 
    272 
    273 def PrintTopSymbols(tgt_top_symbols):
    274   if not tgt_top_symbols:
    275     return
    276   print(' Size     T Symbol name')
    277   for size, typ, name in tgt_top_symbols:
    278     print('%9d %s %s' % (size, typ, name))
    279   print()
    280 
    281 
    282 def SizeStats(args):
    283   """Main entry point of the program after parsing parameters.
    284 
    285   Computes the size statistics of the given targets and their components."""
    286   # The dictionary with the stats that we store on disk as a json. This includes
    287   # one entry per passed args.target.
    288   stats = {}
    289 
    290   # Cache of Target object of a target.
    291   tgts = {}
    292 
    293   # Load all the targets.
    294   pending = set(args.target)
    295   while pending:
    296     target = pending.pop()
    297     tgt = LoadTarget(target, args.build_dir)
    298     tgts[target] = tgt
    299     if args.recursive:
    300       for dep in tgt.deps:
    301         if dep not in tgts:
    302           pending.add(dep)
    303 
    304   # Cache of symbols of a target.
    305   syms = {}
    306   # Load the symbols from the all targets and its deps.
    307   all_deps = set(tgts.keys()).union(*[set(tgt.deps) for tgt in tgts.values()])
    308   for entry in all_deps:
    309     fn = os.path.join(args.build_dir,
    310                       tgts[entry].filename if entry in tgts else entry)
    311     syms[entry] = LoadSymbols(fn)
    312 
    313   for target in args.target:
    314     tgt_stats = []
    315     tgt = tgts[target]
    316 
    317     tgt_syms = syms[target]
    318     used_syms = set()
    319     for sym in tgt_syms:
    320       if sym.typ.lower() in BIN_SIZE + RAM_SIZE:
    321         used_syms.add(sym.name)
    322       elif sym.typ.lower() in IGNORE_SYMBOLS:
    323         continue
    324       else:
    325         print('Unknown: %s %s' % (sym.typ, sym.name))
    326 
    327     target_path = os.path.join(args.build_dir, tgt.filename)
    328     sym_stacks = []
    329     if not target_path.endswith('.a'):
    330       sym_stacks = LoadStackSizes(target_path, args.binutils)
    331     symbols_by_addr = {sym.address: sym for sym in tgt_syms
    332                           if sym.typ.lower() in 'tw'}
    333     tgt_stack_sizes = collections.OrderedDict()
    334     for sym_stack in sorted(sym_stacks, key=lambda s: -s.stack_size):
    335       tgt_stack_sizes[
    336           symbols_by_addr[sym_stack.address].name] = sym_stack.stack_size
    337 
    338     tgt_top_symbols = []
    339     if args.top_symbols:
    340       tgt_top_symbols = [(sym.size, sym.typ, sym.name) for sym in tgt_syms
    341                          if sym.name in used_syms and sym.size]
    342       tgt_top_symbols.sort(key=lambda t: (-t[0], t[2]))
    343       tgt_top_symbols = tgt_top_symbols[:args.top_symbols]
    344 
    345     tgt_size = TargetSize(tgt_syms)
    346     tgt_stats.append(ObjectStats(target, False, tgt_size))
    347 
    348     # Split out by SIMD.
    349     for namespace in SIMD_NAMESPACES:
    350       mangled = str(len(namespace)) + namespace
    351       if not any(mangled in sym.name for sym in tgt_syms):
    352         continue
    353       ret = {}
    354       for sym in tgt_syms:
    355         if not sym.size or mangled not in sym.name:
    356           continue
    357         t = sym.typ.lower()
    358         ret.setdefault(t, 0)
    359         ret[t] += sym.size
    360       # SIMD namespaces are not part of the partition, they are already included
    361       # in the jpegxl-static normally.
    362       if not ret:
    363         continue
    364       tgt_stats.append(ObjectStats('\\--> ' + namespace, False, ret))
    365 
    366     for obj in tgt.deps:
    367       dep_used_syms = used_syms.copy()
    368       obj_size = TargetSize(syms[obj], used_syms)
    369       if not obj_size:
    370         continue
    371       tgt_stats.append(ObjectStats(os.path.basename(obj), True, obj_size))
    372       if args.recursive:
    373         # Not really recursive, but it shows all the remaining deps at a second
    374         # level.
    375         for obj_dep in sorted(TargetTransitiveDeps(tgts, obj),
    376                               key=os.path.basename):
    377           obj_dep_size = TargetSize(syms[obj_dep], dep_used_syms)
    378           if not obj_dep_size:
    379             continue
    380           tgt_stats.append(ObjectStats(
    381               '   '+ os.path.basename(obj_dep), False, obj_dep_size))
    382 
    383     PrintStats(tgt_stats)
    384     PrintStackStats(tgt_stack_sizes)
    385     PrintTopSymbols(tgt_top_symbols)
    386     stats[target] = {
    387         'build': tgt_stats,
    388         'stack': tgt_stack_sizes,
    389         'top': tgt_top_symbols,
    390     }
    391 
    392   if args.save:
    393     with open(args.save, 'w') as f:
    394       json.dump(stats, f)
    395 
    396   # Check the maximum stack size.
    397   exit_code = 0
    398   if args.max_stack:
    399     for name, size in tgt_stack_sizes.items():
    400       if size > args.max_stack:
    401         print('Error: %s exceeds stack limit: %d vs %d' % (
    402                   name, size, args.max_stack),
    403               file=sys.stderr)
    404         exit_code = 1
    405 
    406   return exit_code
    407 
    408 def main():
    409   parser = argparse.ArgumentParser(description=__doc__)
    410   parser.add_argument('target', type=str, nargs='+',
    411                       help='target(s) to analyze')
    412   parser.add_argument('--build-dir', default='build',
    413                       help='path to the build directory')
    414   parser.add_argument('--save', default=None,
    415                       help='path to save the stats as JSON file')
    416   parser.add_argument('-r', '--recursive', default=False, action='store_true',
    417                       help='Print recursive entries.')
    418   parser.add_argument('--top-symbols', default=0, type=int,
    419                       help='Number of largest symbols to print')
    420   parser.add_argument('--binutils', default='',
    421                       help='prefix path to binutils tools, such as '
    422                            'aarch64-linux-gnu-')
    423   parser.add_argument('--max-stack', default=None, type=int,
    424                       help=('Maximum static stack size of a function. If a '
    425                             'static stack is larger it will exit with an error '
    426                             'code.'))
    427   args = parser.parse_args()
    428   sys.exit(SizeStats(args))
    429 
    430 
    431 if __name__ == '__main__':
    432   main()