qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

decodetree.py (41623B)


      1 #!/usr/bin/env python3
      2 # Copyright (c) 2018 Linaro Limited
      3 #
      4 # This library is free software; you can redistribute it and/or
      5 # modify it under the terms of the GNU Lesser General Public
      6 # License as published by the Free Software Foundation; either
      7 # version 2.1 of the License, or (at your option) any later version.
      8 #
      9 # This library is distributed in the hope that it will be useful,
     10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12 # Lesser General Public License for more details.
     13 #
     14 # You should have received a copy of the GNU Lesser General Public
     15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
     16 #
     17 
     18 #
     19 # Generate a decoding tree from a specification file.
     20 # See the syntax and semantics in docs/devel/decodetree.rst.
     21 #
     22 
     23 import io
     24 import os
     25 import re
     26 import sys
     27 import getopt
     28 
     29 insnwidth = 32
     30 bitop_width = 32
     31 insnmask = 0xffffffff
     32 variablewidth = False
     33 fields = {}
     34 arguments = {}
     35 formats = {}
     36 allpatterns = []
     37 anyextern = False
     38 
     39 translate_prefix = 'trans'
     40 translate_scope = 'static '
     41 input_file = ''
     42 output_file = None
     43 output_fd = None
     44 insntype = 'uint32_t'
     45 decode_function = 'decode'
     46 
     47 # An identifier for C.
     48 re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
     49 
     50 # Identifiers for Arguments, Fields, Formats and Patterns.
     51 re_arg_ident = '&[a-zA-Z0-9_]*'
     52 re_fld_ident = '%[a-zA-Z0-9_]*'
     53 re_fmt_ident = '@[a-zA-Z0-9_]*'
     54 re_pat_ident = '[a-zA-Z0-9_]*'
     55 
     56 def error_with_file(file, lineno, *args):
     57     """Print an error message from file:line and args and exit."""
     58     global output_file
     59     global output_fd
     60 
     61     prefix = ''
     62     if file:
     63         prefix += f'{file}:'
     64     if lineno:
     65         prefix += f'{lineno}:'
     66     if prefix:
     67         prefix += ' '
     68     print(prefix, end='error: ', file=sys.stderr)
     69     print(*args, file=sys.stderr)
     70 
     71     if output_file and output_fd:
     72         output_fd.close()
     73         os.remove(output_file)
     74     exit(1)
     75 # end error_with_file
     76 
     77 
     78 def error(lineno, *args):
     79     error_with_file(input_file, lineno, *args)
     80 # end error
     81 
     82 
     83 def output(*args):
     84     global output_fd
     85     for a in args:
     86         output_fd.write(a)
     87 
     88 
     89 def output_autogen():
     90     output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
     91 
     92 
     93 def str_indent(c):
     94     """Return a string with C spaces"""
     95     return ' ' * c
     96 
     97 
     98 def str_fields(fields):
     99     """Return a string uniquely identifying FIELDS"""
    100     r = ''
    101     for n in sorted(fields.keys()):
    102         r += '_' + n
    103     return r[1:]
    104 
    105 
    106 def whex(val):
    107     """Return a hex string for val padded for insnwidth"""
    108     global insnwidth
    109     return f'0x{val:0{insnwidth // 4}x}'
    110 
    111 
    112 def whexC(val):
    113     """Return a hex string for val padded for insnwidth,
    114        and with the proper suffix for a C constant."""
    115     suffix = ''
    116     if val >= 0x100000000:
    117         suffix = 'ull'
    118     elif val >= 0x80000000:
    119         suffix = 'u'
    120     return whex(val) + suffix
    121 
    122 
    123 def str_match_bits(bits, mask):
    124     """Return a string pretty-printing BITS/MASK"""
    125     global insnwidth
    126 
    127     i = 1 << (insnwidth - 1)
    128     space = 0x01010100
    129     r = ''
    130     while i != 0:
    131         if i & mask:
    132             if i & bits:
    133                 r += '1'
    134             else:
    135                 r += '0'
    136         else:
    137             r += '.'
    138         if i & space:
    139             r += ' '
    140         i >>= 1
    141     return r
    142 
    143 
    144 def is_pow2(x):
    145     """Return true iff X is equal to a power of 2."""
    146     return (x & (x - 1)) == 0
    147 
    148 
    149 def ctz(x):
    150     """Return the number of times 2 factors into X."""
    151     assert x != 0
    152     r = 0
    153     while ((x >> r) & 1) == 0:
    154         r += 1
    155     return r
    156 
    157 
    158 def is_contiguous(bits):
    159     if bits == 0:
    160         return -1
    161     shift = ctz(bits)
    162     if is_pow2((bits >> shift) + 1):
    163         return shift
    164     else:
    165         return -1
    166 
    167 
    168 def eq_fields_for_args(flds_a, arg):
    169     if len(flds_a) != len(arg.fields):
    170         return False
    171     # Only allow inference on default types
    172     for t in arg.types:
    173         if t != 'int':
    174             return False
    175     for k, a in flds_a.items():
    176         if k not in arg.fields:
    177             return False
    178     return True
    179 
    180 
    181 def eq_fields_for_fmts(flds_a, flds_b):
    182     if len(flds_a) != len(flds_b):
    183         return False
    184     for k, a in flds_a.items():
    185         if k not in flds_b:
    186             return False
    187         b = flds_b[k]
    188         if a.__class__ != b.__class__ or a != b:
    189             return False
    190     return True
    191 
    192 
    193 class Field:
    194     """Class representing a simple instruction field"""
    195     def __init__(self, sign, pos, len):
    196         self.sign = sign
    197         self.pos = pos
    198         self.len = len
    199         self.mask = ((1 << len) - 1) << pos
    200 
    201     def __str__(self):
    202         if self.sign:
    203             s = 's'
    204         else:
    205             s = ''
    206         return str(self.pos) + ':' + s + str(self.len)
    207 
    208     def str_extract(self):
    209         global bitop_width
    210         s = 's' if self.sign else ''
    211         return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
    212 
    213     def __eq__(self, other):
    214         return self.sign == other.sign and self.mask == other.mask
    215 
    216     def __ne__(self, other):
    217         return not self.__eq__(other)
    218 # end Field
    219 
    220 
    221 class MultiField:
    222     """Class representing a compound instruction field"""
    223     def __init__(self, subs, mask):
    224         self.subs = subs
    225         self.sign = subs[0].sign
    226         self.mask = mask
    227 
    228     def __str__(self):
    229         return str(self.subs)
    230 
    231     def str_extract(self):
    232         global bitop_width
    233         ret = '0'
    234         pos = 0
    235         for f in reversed(self.subs):
    236             ext = f.str_extract()
    237             if pos == 0:
    238                 ret = ext
    239             else:
    240                 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
    241             pos += f.len
    242         return ret
    243 
    244     def __ne__(self, other):
    245         if len(self.subs) != len(other.subs):
    246             return True
    247         for a, b in zip(self.subs, other.subs):
    248             if a.__class__ != b.__class__ or a != b:
    249                 return True
    250         return False
    251 
    252     def __eq__(self, other):
    253         return not self.__ne__(other)
    254 # end MultiField
    255 
    256 
    257 class ConstField:
    258     """Class representing an argument field with constant value"""
    259     def __init__(self, value):
    260         self.value = value
    261         self.mask = 0
    262         self.sign = value < 0
    263 
    264     def __str__(self):
    265         return str(self.value)
    266 
    267     def str_extract(self):
    268         return str(self.value)
    269 
    270     def __cmp__(self, other):
    271         return self.value - other.value
    272 # end ConstField
    273 
    274 
    275 class FunctionField:
    276     """Class representing a field passed through a function"""
    277     def __init__(self, func, base):
    278         self.mask = base.mask
    279         self.sign = base.sign
    280         self.base = base
    281         self.func = func
    282 
    283     def __str__(self):
    284         return self.func + '(' + str(self.base) + ')'
    285 
    286     def str_extract(self):
    287         return self.func + '(ctx, ' + self.base.str_extract() + ')'
    288 
    289     def __eq__(self, other):
    290         return self.func == other.func and self.base == other.base
    291 
    292     def __ne__(self, other):
    293         return not self.__eq__(other)
    294 # end FunctionField
    295 
    296 
    297 class ParameterField:
    298     """Class representing a pseudo-field read from a function"""
    299     def __init__(self, func):
    300         self.mask = 0
    301         self.sign = 0
    302         self.func = func
    303 
    304     def __str__(self):
    305         return self.func
    306 
    307     def str_extract(self):
    308         return self.func + '(ctx)'
    309 
    310     def __eq__(self, other):
    311         return self.func == other.func
    312 
    313     def __ne__(self, other):
    314         return not self.__eq__(other)
    315 # end ParameterField
    316 
    317 
    318 class Arguments:
    319     """Class representing the extracted fields of a format"""
    320     def __init__(self, nm, flds, types, extern):
    321         self.name = nm
    322         self.extern = extern
    323         self.fields = flds
    324         self.types = types
    325 
    326     def __str__(self):
    327         return self.name + ' ' + str(self.fields)
    328 
    329     def struct_name(self):
    330         return 'arg_' + self.name
    331 
    332     def output_def(self):
    333         if not self.extern:
    334             output('typedef struct {\n')
    335             for (n, t) in zip(self.fields, self.types):
    336                 output(f'    {t} {n};\n')
    337             output('} ', self.struct_name(), ';\n\n')
    338 # end Arguments
    339 
    340 
    341 class General:
    342     """Common code between instruction formats and instruction patterns"""
    343     def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
    344         self.name = name
    345         self.file = input_file
    346         self.lineno = lineno
    347         self.base = base
    348         self.fixedbits = fixb
    349         self.fixedmask = fixm
    350         self.undefmask = udfm
    351         self.fieldmask = fldm
    352         self.fields = flds
    353         self.width = w
    354 
    355     def __str__(self):
    356         return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
    357 
    358     def str1(self, i):
    359         return str_indent(i) + self.__str__()
    360 # end General
    361 
    362 
    363 class Format(General):
    364     """Class representing an instruction format"""
    365 
    366     def extract_name(self):
    367         global decode_function
    368         return decode_function + '_extract_' + self.name
    369 
    370     def output_extract(self):
    371         output('static void ', self.extract_name(), '(DisasContext *ctx, ',
    372                self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
    373         for n, f in self.fields.items():
    374             output('    a->', n, ' = ', f.str_extract(), ';\n')
    375         output('}\n\n')
    376 # end Format
    377 
    378 
    379 class Pattern(General):
    380     """Class representing an instruction pattern"""
    381 
    382     def output_decl(self):
    383         global translate_scope
    384         global translate_prefix
    385         output('typedef ', self.base.base.struct_name(),
    386                ' arg_', self.name, ';\n')
    387         output(translate_scope, 'bool ', translate_prefix, '_', self.name,
    388                '(DisasContext *ctx, arg_', self.name, ' *a);\n')
    389 
    390     def output_code(self, i, extracted, outerbits, outermask):
    391         global translate_prefix
    392         ind = str_indent(i)
    393         arg = self.base.base.name
    394         output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
    395         if not extracted:
    396             output(ind, self.base.extract_name(),
    397                    '(ctx, &u.f_', arg, ', insn);\n')
    398         for n, f in self.fields.items():
    399             output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
    400         output(ind, 'if (', translate_prefix, '_', self.name,
    401                '(ctx, &u.f_', arg, ')) return true;\n')
    402 
    403     # Normal patterns do not have children.
    404     def build_tree(self):
    405         return
    406     def prop_masks(self):
    407         return
    408     def prop_format(self):
    409         return
    410     def prop_width(self):
    411         return
    412 
    413 # end Pattern
    414 
    415 
    416 class MultiPattern(General):
    417     """Class representing a set of instruction patterns"""
    418 
    419     def __init__(self, lineno):
    420         self.file = input_file
    421         self.lineno = lineno
    422         self.pats = []
    423         self.base = None
    424         self.fixedbits = 0
    425         self.fixedmask = 0
    426         self.undefmask = 0
    427         self.width = None
    428 
    429     def __str__(self):
    430         r = 'group'
    431         if self.fixedbits is not None:
    432             r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
    433         return r
    434 
    435     def output_decl(self):
    436         for p in self.pats:
    437             p.output_decl()
    438 
    439     def prop_masks(self):
    440         global insnmask
    441 
    442         fixedmask = insnmask
    443         undefmask = insnmask
    444 
    445         # Collect fixedmask/undefmask for all of the children.
    446         for p in self.pats:
    447             p.prop_masks()
    448             fixedmask &= p.fixedmask
    449             undefmask &= p.undefmask
    450 
    451         # Widen fixedmask until all fixedbits match
    452         repeat = True
    453         fixedbits = 0
    454         while repeat and fixedmask != 0:
    455             fixedbits = None
    456             for p in self.pats:
    457                 thisbits = p.fixedbits & fixedmask
    458                 if fixedbits is None:
    459                     fixedbits = thisbits
    460                 elif fixedbits != thisbits:
    461                     fixedmask &= ~(fixedbits ^ thisbits)
    462                     break
    463             else:
    464                 repeat = False
    465 
    466         self.fixedbits = fixedbits
    467         self.fixedmask = fixedmask
    468         self.undefmask = undefmask
    469 
    470     def build_tree(self):
    471         for p in self.pats:
    472             p.build_tree()
    473 
    474     def prop_format(self):
    475         for p in self.pats:
    476             p.build_tree()
    477 
    478     def prop_width(self):
    479         width = None
    480         for p in self.pats:
    481             p.prop_width()
    482             if width is None:
    483                 width = p.width
    484             elif width != p.width:
    485                 error_with_file(self.file, self.lineno,
    486                                 'width mismatch in patterns within braces')
    487         self.width = width
    488 
    489 # end MultiPattern
    490 
    491 
    492 class IncMultiPattern(MultiPattern):
    493     """Class representing an overlapping set of instruction patterns"""
    494 
    495     def output_code(self, i, extracted, outerbits, outermask):
    496         global translate_prefix
    497         ind = str_indent(i)
    498         for p in self.pats:
    499             if outermask != p.fixedmask:
    500                 innermask = p.fixedmask & ~outermask
    501                 innerbits = p.fixedbits & ~outermask
    502                 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
    503                 output(ind, f'    /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
    504                 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
    505                 output(ind, '}\n')
    506             else:
    507                 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
    508 #end IncMultiPattern
    509 
    510 
    511 class Tree:
    512     """Class representing a node in a decode tree"""
    513 
    514     def __init__(self, fm, tm):
    515         self.fixedmask = fm
    516         self.thismask = tm
    517         self.subs = []
    518         self.base = None
    519 
    520     def str1(self, i):
    521         ind = str_indent(i)
    522         r = ind + whex(self.fixedmask)
    523         if self.format:
    524             r += ' ' + self.format.name
    525         r += ' [\n'
    526         for (b, s) in self.subs:
    527             r += ind + f'  {whex(b)}:\n'
    528             r += s.str1(i + 4) + '\n'
    529         r += ind + ']'
    530         return r
    531 
    532     def __str__(self):
    533         return self.str1(0)
    534 
    535     def output_code(self, i, extracted, outerbits, outermask):
    536         ind = str_indent(i)
    537 
    538         # If we identified all nodes below have the same format,
    539         # extract the fields now.
    540         if not extracted and self.base:
    541             output(ind, self.base.extract_name(),
    542                    '(ctx, &u.f_', self.base.base.name, ', insn);\n')
    543             extracted = True
    544 
    545         # Attempt to aid the compiler in producing compact switch statements.
    546         # If the bits in the mask are contiguous, extract them.
    547         sh = is_contiguous(self.thismask)
    548         if sh > 0:
    549             # Propagate SH down into the local functions.
    550             def str_switch(b, sh=sh):
    551                 return f'(insn >> {sh}) & {b >> sh:#x}'
    552 
    553             def str_case(b, sh=sh):
    554                 return hex(b >> sh)
    555         else:
    556             def str_switch(b):
    557                 return f'insn & {whexC(b)}'
    558 
    559             def str_case(b):
    560                 return whexC(b)
    561 
    562         output(ind, 'switch (', str_switch(self.thismask), ') {\n')
    563         for b, s in sorted(self.subs):
    564             assert (self.thismask & ~s.fixedmask) == 0
    565             innermask = outermask | self.thismask
    566             innerbits = outerbits | b
    567             output(ind, 'case ', str_case(b), ':\n')
    568             output(ind, '    /* ',
    569                    str_match_bits(innerbits, innermask), ' */\n')
    570             s.output_code(i + 4, extracted, innerbits, innermask)
    571             output(ind, '    break;\n')
    572         output(ind, '}\n')
    573 # end Tree
    574 
    575 
    576 class ExcMultiPattern(MultiPattern):
    577     """Class representing a non-overlapping set of instruction patterns"""
    578 
    579     def output_code(self, i, extracted, outerbits, outermask):
    580         # Defer everything to our decomposed Tree node
    581         self.tree.output_code(i, extracted, outerbits, outermask)
    582 
    583     @staticmethod
    584     def __build_tree(pats, outerbits, outermask):
    585         # Find the intersection of all remaining fixedmask.
    586         innermask = ~outermask & insnmask
    587         for i in pats:
    588             innermask &= i.fixedmask
    589 
    590         if innermask == 0:
    591             # Edge condition: One pattern covers the entire insnmask
    592             if len(pats) == 1:
    593                 t = Tree(outermask, innermask)
    594                 t.subs.append((0, pats[0]))
    595                 return t
    596 
    597             text = 'overlapping patterns:'
    598             for p in pats:
    599                 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
    600             error_with_file(pats[0].file, pats[0].lineno, text)
    601 
    602         fullmask = outermask | innermask
    603 
    604         # Sort each element of pats into the bin selected by the mask.
    605         bins = {}
    606         for i in pats:
    607             fb = i.fixedbits & innermask
    608             if fb in bins:
    609                 bins[fb].append(i)
    610             else:
    611                 bins[fb] = [i]
    612 
    613         # We must recurse if any bin has more than one element or if
    614         # the single element in the bin has not been fully matched.
    615         t = Tree(fullmask, innermask)
    616 
    617         for b, l in bins.items():
    618             s = l[0]
    619             if len(l) > 1 or s.fixedmask & ~fullmask != 0:
    620                 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
    621             t.subs.append((b, s))
    622 
    623         return t
    624 
    625     def build_tree(self):
    626         super().prop_format()
    627         self.tree = self.__build_tree(self.pats, self.fixedbits,
    628                                       self.fixedmask)
    629 
    630     @staticmethod
    631     def __prop_format(tree):
    632         """Propagate Format objects into the decode tree"""
    633 
    634         # Depth first search.
    635         for (b, s) in tree.subs:
    636             if isinstance(s, Tree):
    637                 ExcMultiPattern.__prop_format(s)
    638 
    639         # If all entries in SUBS have the same format, then
    640         # propagate that into the tree.
    641         f = None
    642         for (b, s) in tree.subs:
    643             if f is None:
    644                 f = s.base
    645                 if f is None:
    646                     return
    647             if f is not s.base:
    648                 return
    649         tree.base = f
    650 
    651     def prop_format(self):
    652         super().prop_format()
    653         self.__prop_format(self.tree)
    654 
    655 # end ExcMultiPattern
    656 
    657 
    658 def parse_field(lineno, name, toks):
    659     """Parse one instruction field from TOKS at LINENO"""
    660     global fields
    661     global insnwidth
    662 
    663     # A "simple" field will have only one entry;
    664     # a "multifield" will have several.
    665     subs = []
    666     width = 0
    667     func = None
    668     for t in toks:
    669         if re.match('^!function=', t):
    670             if func:
    671                 error(lineno, 'duplicate function')
    672             func = t.split('=')
    673             func = func[1]
    674             continue
    675 
    676         if re.fullmatch('[0-9]+:s[0-9]+', t):
    677             # Signed field extract
    678             subtoks = t.split(':s')
    679             sign = True
    680         elif re.fullmatch('[0-9]+:[0-9]+', t):
    681             # Unsigned field extract
    682             subtoks = t.split(':')
    683             sign = False
    684         else:
    685             error(lineno, f'invalid field token "{t}"')
    686         po = int(subtoks[0])
    687         le = int(subtoks[1])
    688         if po + le > insnwidth:
    689             error(lineno, f'field {t} too large')
    690         f = Field(sign, po, le)
    691         subs.append(f)
    692         width += le
    693 
    694     if width > insnwidth:
    695         error(lineno, 'field too large')
    696     if len(subs) == 0:
    697         if func:
    698             f = ParameterField(func)
    699         else:
    700             error(lineno, 'field with no value')
    701     else:
    702         if len(subs) == 1:
    703             f = subs[0]
    704         else:
    705             mask = 0
    706             for s in subs:
    707                 if mask & s.mask:
    708                     error(lineno, 'field components overlap')
    709                 mask |= s.mask
    710             f = MultiField(subs, mask)
    711         if func:
    712             f = FunctionField(func, f)
    713 
    714     if name in fields:
    715         error(lineno, 'duplicate field', name)
    716     fields[name] = f
    717 # end parse_field
    718 
    719 
    720 def parse_arguments(lineno, name, toks):
    721     """Parse one argument set from TOKS at LINENO"""
    722     global arguments
    723     global re_C_ident
    724     global anyextern
    725 
    726     flds = []
    727     types = []
    728     extern = False
    729     for n in toks:
    730         if re.fullmatch('!extern', n):
    731             extern = True
    732             anyextern = True
    733             continue
    734         if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
    735             (n, t) = n.split(':')
    736         elif re.fullmatch(re_C_ident, n):
    737             t = 'int'
    738         else:
    739             error(lineno, f'invalid argument set token "{n}"')
    740         if n in flds:
    741             error(lineno, f'duplicate argument "{n}"')
    742         flds.append(n)
    743         types.append(t)
    744 
    745     if name in arguments:
    746         error(lineno, 'duplicate argument set', name)
    747     arguments[name] = Arguments(name, flds, types, extern)
    748 # end parse_arguments
    749 
    750 
    751 def lookup_field(lineno, name):
    752     global fields
    753     if name in fields:
    754         return fields[name]
    755     error(lineno, 'undefined field', name)
    756 
    757 
    758 def add_field(lineno, flds, new_name, f):
    759     if new_name in flds:
    760         error(lineno, 'duplicate field', new_name)
    761     flds[new_name] = f
    762     return flds
    763 
    764 
    765 def add_field_byname(lineno, flds, new_name, old_name):
    766     return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
    767 
    768 
    769 def infer_argument_set(flds):
    770     global arguments
    771     global decode_function
    772 
    773     for arg in arguments.values():
    774         if eq_fields_for_args(flds, arg):
    775             return arg
    776 
    777     name = decode_function + str(len(arguments))
    778     arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
    779     arguments[name] = arg
    780     return arg
    781 
    782 
    783 def infer_format(arg, fieldmask, flds, width):
    784     global arguments
    785     global formats
    786     global decode_function
    787 
    788     const_flds = {}
    789     var_flds = {}
    790     for n, c in flds.items():
    791         if c is ConstField:
    792             const_flds[n] = c
    793         else:
    794             var_flds[n] = c
    795 
    796     # Look for an existing format with the same argument set and fields
    797     for fmt in formats.values():
    798         if arg and fmt.base != arg:
    799             continue
    800         if fieldmask != fmt.fieldmask:
    801             continue
    802         if width != fmt.width:
    803             continue
    804         if not eq_fields_for_fmts(flds, fmt.fields):
    805             continue
    806         return (fmt, const_flds)
    807 
    808     name = decode_function + '_Fmt_' + str(len(formats))
    809     if not arg:
    810         arg = infer_argument_set(flds)
    811 
    812     fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
    813     formats[name] = fmt
    814 
    815     return (fmt, const_flds)
    816 # end infer_format
    817 
    818 
    819 def parse_generic(lineno, parent_pat, name, toks):
    820     """Parse one instruction format from TOKS at LINENO"""
    821     global fields
    822     global arguments
    823     global formats
    824     global allpatterns
    825     global re_arg_ident
    826     global re_fld_ident
    827     global re_fmt_ident
    828     global re_C_ident
    829     global insnwidth
    830     global insnmask
    831     global variablewidth
    832 
    833     is_format = parent_pat is None
    834 
    835     fixedmask = 0
    836     fixedbits = 0
    837     undefmask = 0
    838     width = 0
    839     flds = {}
    840     arg = None
    841     fmt = None
    842     for t in toks:
    843         # '&Foo' gives a format an explicit argument set.
    844         if re.fullmatch(re_arg_ident, t):
    845             tt = t[1:]
    846             if arg:
    847                 error(lineno, 'multiple argument sets')
    848             if tt in arguments:
    849                 arg = arguments[tt]
    850             else:
    851                 error(lineno, 'undefined argument set', t)
    852             continue
    853 
    854         # '@Foo' gives a pattern an explicit format.
    855         if re.fullmatch(re_fmt_ident, t):
    856             tt = t[1:]
    857             if fmt:
    858                 error(lineno, 'multiple formats')
    859             if tt in formats:
    860                 fmt = formats[tt]
    861             else:
    862                 error(lineno, 'undefined format', t)
    863             continue
    864 
    865         # '%Foo' imports a field.
    866         if re.fullmatch(re_fld_ident, t):
    867             tt = t[1:]
    868             flds = add_field_byname(lineno, flds, tt, tt)
    869             continue
    870 
    871         # 'Foo=%Bar' imports a field with a different name.
    872         if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
    873             (fname, iname) = t.split('=%')
    874             flds = add_field_byname(lineno, flds, fname, iname)
    875             continue
    876 
    877         # 'Foo=number' sets an argument field to a constant value
    878         if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
    879             (fname, value) = t.split('=')
    880             value = int(value)
    881             flds = add_field(lineno, flds, fname, ConstField(value))
    882             continue
    883 
    884         # Pattern of 0s, 1s, dots and dashes indicate required zeros,
    885         # required ones, or dont-cares.
    886         if re.fullmatch('[01.-]+', t):
    887             shift = len(t)
    888             fms = t.replace('0', '1')
    889             fms = fms.replace('.', '0')
    890             fms = fms.replace('-', '0')
    891             fbs = t.replace('.', '0')
    892             fbs = fbs.replace('-', '0')
    893             ubm = t.replace('1', '0')
    894             ubm = ubm.replace('.', '0')
    895             ubm = ubm.replace('-', '1')
    896             fms = int(fms, 2)
    897             fbs = int(fbs, 2)
    898             ubm = int(ubm, 2)
    899             fixedbits = (fixedbits << shift) | fbs
    900             fixedmask = (fixedmask << shift) | fms
    901             undefmask = (undefmask << shift) | ubm
    902         # Otherwise, fieldname:fieldwidth
    903         elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
    904             (fname, flen) = t.split(':')
    905             sign = False
    906             if flen[0] == 's':
    907                 sign = True
    908                 flen = flen[1:]
    909             shift = int(flen, 10)
    910             if shift + width > insnwidth:
    911                 error(lineno, f'field {fname} exceeds insnwidth')
    912             f = Field(sign, insnwidth - width - shift, shift)
    913             flds = add_field(lineno, flds, fname, f)
    914             fixedbits <<= shift
    915             fixedmask <<= shift
    916             undefmask <<= shift
    917         else:
    918             error(lineno, f'invalid token "{t}"')
    919         width += shift
    920 
    921     if variablewidth and width < insnwidth and width % 8 == 0:
    922         shift = insnwidth - width
    923         fixedbits <<= shift
    924         fixedmask <<= shift
    925         undefmask <<= shift
    926         undefmask |= (1 << shift) - 1
    927 
    928     # We should have filled in all of the bits of the instruction.
    929     elif not (is_format and width == 0) and width != insnwidth:
    930         error(lineno, f'definition has {width} bits')
    931 
    932     # Do not check for fields overlapping fields; one valid usage
    933     # is to be able to duplicate fields via import.
    934     fieldmask = 0
    935     for f in flds.values():
    936         fieldmask |= f.mask
    937 
    938     # Fix up what we've parsed to match either a format or a pattern.
    939     if is_format:
    940         # Formats cannot reference formats.
    941         if fmt:
    942             error(lineno, 'format referencing format')
    943         # If an argument set is given, then there should be no fields
    944         # without a place to store it.
    945         if arg:
    946             for f in flds.keys():
    947                 if f not in arg.fields:
    948                     error(lineno, f'field {f} not in argument set {arg.name}')
    949         else:
    950             arg = infer_argument_set(flds)
    951         if name in formats:
    952             error(lineno, 'duplicate format name', name)
    953         fmt = Format(name, lineno, arg, fixedbits, fixedmask,
    954                      undefmask, fieldmask, flds, width)
    955         formats[name] = fmt
    956     else:
    957         # Patterns can reference a format ...
    958         if fmt:
    959             # ... but not an argument simultaneously
    960             if arg:
    961                 error(lineno, 'pattern specifies both format and argument set')
    962             if fixedmask & fmt.fixedmask:
    963                 error(lineno, 'pattern fixed bits overlap format fixed bits')
    964             if width != fmt.width:
    965                 error(lineno, 'pattern uses format of different width')
    966             fieldmask |= fmt.fieldmask
    967             fixedbits |= fmt.fixedbits
    968             fixedmask |= fmt.fixedmask
    969             undefmask |= fmt.undefmask
    970         else:
    971             (fmt, flds) = infer_format(arg, fieldmask, flds, width)
    972         arg = fmt.base
    973         for f in flds.keys():
    974             if f not in arg.fields:
    975                 error(lineno, f'field {f} not in argument set {arg.name}')
    976             if f in fmt.fields.keys():
    977                 error(lineno, f'field {f} set by format and pattern')
    978         for f in arg.fields:
    979             if f not in flds.keys() and f not in fmt.fields.keys():
    980                 error(lineno, f'field {f} not initialized')
    981         pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
    982                       undefmask, fieldmask, flds, width)
    983         parent_pat.pats.append(pat)
    984         allpatterns.append(pat)
    985 
    986     # Validate the masks that we have assembled.
    987     if fieldmask & fixedmask:
    988         error(lineno, 'fieldmask overlaps fixedmask ',
    989               f'({whex(fieldmask)} & {whex(fixedmask)})')
    990     if fieldmask & undefmask:
    991         error(lineno, 'fieldmask overlaps undefmask ',
    992               f'({whex(fieldmask)} & {whex(undefmask)})')
    993     if fixedmask & undefmask:
    994         error(lineno, 'fixedmask overlaps undefmask ',
    995               f'({whex(fixedmask)} & {whex(undefmask)})')
    996     if not is_format:
    997         allbits = fieldmask | fixedmask | undefmask
    998         if allbits != insnmask:
    999             error(lineno, 'bits left unspecified ',
   1000                   f'({whex(allbits ^ insnmask)})')
   1001 # end parse_general
   1002 
   1003 
   1004 def parse_file(f, parent_pat):
   1005     """Parse all of the patterns within a file"""
   1006     global re_arg_ident
   1007     global re_fld_ident
   1008     global re_fmt_ident
   1009     global re_pat_ident
   1010 
   1011     # Read all of the lines of the file.  Concatenate lines
   1012     # ending in backslash; discard empty lines and comments.
   1013     toks = []
   1014     lineno = 0
   1015     nesting = 0
   1016     nesting_pats = []
   1017 
   1018     for line in f:
   1019         lineno += 1
   1020 
   1021         # Expand and strip spaces, to find indent.
   1022         line = line.rstrip()
   1023         line = line.expandtabs()
   1024         len1 = len(line)
   1025         line = line.lstrip()
   1026         len2 = len(line)
   1027 
   1028         # Discard comments
   1029         end = line.find('#')
   1030         if end >= 0:
   1031             line = line[:end]
   1032 
   1033         t = line.split()
   1034         if len(toks) != 0:
   1035             # Next line after continuation
   1036             toks.extend(t)
   1037         else:
   1038             # Allow completely blank lines.
   1039             if len1 == 0:
   1040                 continue
   1041             indent = len1 - len2
   1042             # Empty line due to comment.
   1043             if len(t) == 0:
   1044                 # Indentation must be correct, even for comment lines.
   1045                 if indent != nesting:
   1046                     error(lineno, 'indentation ', indent, ' != ', nesting)
   1047                 continue
   1048             start_lineno = lineno
   1049             toks = t
   1050 
   1051         # Continuation?
   1052         if toks[-1] == '\\':
   1053             toks.pop()
   1054             continue
   1055 
   1056         name = toks[0]
   1057         del toks[0]
   1058 
   1059         # End nesting?
   1060         if name == '}' or name == ']':
   1061             if len(toks) != 0:
   1062                 error(start_lineno, 'extra tokens after close brace')
   1063 
   1064             # Make sure { } and [ ] nest properly.
   1065             if (name == '}') != isinstance(parent_pat, IncMultiPattern):
   1066                 error(lineno, 'mismatched close brace')
   1067 
   1068             try:
   1069                 parent_pat = nesting_pats.pop()
   1070             except:
   1071                 error(lineno, 'extra close brace')
   1072 
   1073             nesting -= 2
   1074             if indent != nesting:
   1075                 error(lineno, 'indentation ', indent, ' != ', nesting)
   1076 
   1077             toks = []
   1078             continue
   1079 
   1080         # Everything else should have current indentation.
   1081         if indent != nesting:
   1082             error(start_lineno, 'indentation ', indent, ' != ', nesting)
   1083 
   1084         # Start nesting?
   1085         if name == '{' or name == '[':
   1086             if len(toks) != 0:
   1087                 error(start_lineno, 'extra tokens after open brace')
   1088 
   1089             if name == '{':
   1090                 nested_pat = IncMultiPattern(start_lineno)
   1091             else:
   1092                 nested_pat = ExcMultiPattern(start_lineno)
   1093             parent_pat.pats.append(nested_pat)
   1094             nesting_pats.append(parent_pat)
   1095             parent_pat = nested_pat
   1096 
   1097             nesting += 2
   1098             toks = []
   1099             continue
   1100 
   1101         # Determine the type of object needing to be parsed.
   1102         if re.fullmatch(re_fld_ident, name):
   1103             parse_field(start_lineno, name[1:], toks)
   1104         elif re.fullmatch(re_arg_ident, name):
   1105             parse_arguments(start_lineno, name[1:], toks)
   1106         elif re.fullmatch(re_fmt_ident, name):
   1107             parse_generic(start_lineno, None, name[1:], toks)
   1108         elif re.fullmatch(re_pat_ident, name):
   1109             parse_generic(start_lineno, parent_pat, name, toks)
   1110         else:
   1111             error(lineno, f'invalid token "{name}"')
   1112         toks = []
   1113 
   1114     if nesting != 0:
   1115         error(lineno, 'missing close brace')
   1116 # end parse_file
   1117 
   1118 
   1119 class SizeTree:
   1120     """Class representing a node in a size decode tree"""
   1121 
   1122     def __init__(self, m, w):
   1123         self.mask = m
   1124         self.subs = []
   1125         self.base = None
   1126         self.width = w
   1127 
   1128     def str1(self, i):
   1129         ind = str_indent(i)
   1130         r = ind + whex(self.mask) + ' [\n'
   1131         for (b, s) in self.subs:
   1132             r += ind + f'  {whex(b)}:\n'
   1133             r += s.str1(i + 4) + '\n'
   1134         r += ind + ']'
   1135         return r
   1136 
   1137     def __str__(self):
   1138         return self.str1(0)
   1139 
   1140     def output_code(self, i, extracted, outerbits, outermask):
   1141         ind = str_indent(i)
   1142 
   1143         # If we need to load more bytes to test, do so now.
   1144         if extracted < self.width:
   1145             output(ind, f'insn = {decode_function}_load_bytes',
   1146                    f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
   1147             extracted = self.width
   1148 
   1149         # Attempt to aid the compiler in producing compact switch statements.
   1150         # If the bits in the mask are contiguous, extract them.
   1151         sh = is_contiguous(self.mask)
   1152         if sh > 0:
   1153             # Propagate SH down into the local functions.
   1154             def str_switch(b, sh=sh):
   1155                 return f'(insn >> {sh}) & {b >> sh:#x}'
   1156 
   1157             def str_case(b, sh=sh):
   1158                 return hex(b >> sh)
   1159         else:
   1160             def str_switch(b):
   1161                 return f'insn & {whexC(b)}'
   1162 
   1163             def str_case(b):
   1164                 return whexC(b)
   1165 
   1166         output(ind, 'switch (', str_switch(self.mask), ') {\n')
   1167         for b, s in sorted(self.subs):
   1168             innermask = outermask | self.mask
   1169             innerbits = outerbits | b
   1170             output(ind, 'case ', str_case(b), ':\n')
   1171             output(ind, '    /* ',
   1172                    str_match_bits(innerbits, innermask), ' */\n')
   1173             s.output_code(i + 4, extracted, innerbits, innermask)
   1174         output(ind, '}\n')
   1175         output(ind, 'return insn;\n')
   1176 # end SizeTree
   1177 
   1178 class SizeLeaf:
   1179     """Class representing a leaf node in a size decode tree"""
   1180 
   1181     def __init__(self, m, w):
   1182         self.mask = m
   1183         self.width = w
   1184 
   1185     def str1(self, i):
   1186         return str_indent(i) + whex(self.mask)
   1187 
   1188     def __str__(self):
   1189         return self.str1(0)
   1190 
   1191     def output_code(self, i, extracted, outerbits, outermask):
   1192         global decode_function
   1193         ind = str_indent(i)
   1194 
   1195         # If we need to load more bytes, do so now.
   1196         if extracted < self.width:
   1197             output(ind, f'insn = {decode_function}_load_bytes',
   1198                    f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
   1199             extracted = self.width
   1200         output(ind, 'return insn;\n')
   1201 # end SizeLeaf
   1202 
   1203 
   1204 def build_size_tree(pats, width, outerbits, outermask):
   1205     global insnwidth
   1206 
   1207     # Collect the mask of bits that are fixed in this width
   1208     innermask = 0xff << (insnwidth - width)
   1209     innermask &= ~outermask
   1210     minwidth = None
   1211     onewidth = True
   1212     for i in pats:
   1213         innermask &= i.fixedmask
   1214         if minwidth is None:
   1215             minwidth = i.width
   1216         elif minwidth != i.width:
   1217             onewidth = False;
   1218             if minwidth < i.width:
   1219                 minwidth = i.width
   1220 
   1221     if onewidth:
   1222         return SizeLeaf(innermask, minwidth)
   1223 
   1224     if innermask == 0:
   1225         if width < minwidth:
   1226             return build_size_tree(pats, width + 8, outerbits, outermask)
   1227 
   1228         pnames = []
   1229         for p in pats:
   1230             pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
   1231         error_with_file(pats[0].file, pats[0].lineno,
   1232                         f'overlapping patterns size {width}:', pnames)
   1233 
   1234     bins = {}
   1235     for i in pats:
   1236         fb = i.fixedbits & innermask
   1237         if fb in bins:
   1238             bins[fb].append(i)
   1239         else:
   1240             bins[fb] = [i]
   1241 
   1242     fullmask = outermask | innermask
   1243     lens = sorted(bins.keys())
   1244     if len(lens) == 1:
   1245         b = lens[0]
   1246         return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
   1247 
   1248     r = SizeTree(innermask, width)
   1249     for b, l in bins.items():
   1250         s = build_size_tree(l, width, b | outerbits, fullmask)
   1251         r.subs.append((b, s))
   1252     return r
   1253 # end build_size_tree
   1254 
   1255 
   1256 def prop_size(tree):
   1257     """Propagate minimum widths up the decode size tree"""
   1258 
   1259     if isinstance(tree, SizeTree):
   1260         min = None
   1261         for (b, s) in tree.subs:
   1262             width = prop_size(s)
   1263             if min is None or min > width:
   1264                 min = width
   1265         assert min >= tree.width
   1266         tree.width = min
   1267     else:
   1268         min = tree.width
   1269     return min
   1270 # end prop_size
   1271 
   1272 
   1273 def main():
   1274     global arguments
   1275     global formats
   1276     global allpatterns
   1277     global translate_scope
   1278     global translate_prefix
   1279     global output_fd
   1280     global output_file
   1281     global input_file
   1282     global insnwidth
   1283     global insntype
   1284     global insnmask
   1285     global decode_function
   1286     global bitop_width
   1287     global variablewidth
   1288     global anyextern
   1289 
   1290     decode_scope = 'static '
   1291 
   1292     long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
   1293                  'static-decode=', 'varinsnwidth=']
   1294     try:
   1295         (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
   1296     except getopt.GetoptError as err:
   1297         error(0, err)
   1298     for o, a in opts:
   1299         if o in ('-o', '--output'):
   1300             output_file = a
   1301         elif o == '--decode':
   1302             decode_function = a
   1303             decode_scope = ''
   1304         elif o == '--static-decode':
   1305             decode_function = a
   1306         elif o == '--translate':
   1307             translate_prefix = a
   1308             translate_scope = ''
   1309         elif o in ('-w', '--insnwidth', '--varinsnwidth'):
   1310             if o == '--varinsnwidth':
   1311                 variablewidth = True
   1312             insnwidth = int(a)
   1313             if insnwidth == 16:
   1314                 insntype = 'uint16_t'
   1315                 insnmask = 0xffff
   1316             elif insnwidth == 64:
   1317                 insntype = 'uint64_t'
   1318                 insnmask = 0xffffffffffffffff
   1319                 bitop_width = 64
   1320             elif insnwidth != 32:
   1321                 error(0, 'cannot handle insns of width', insnwidth)
   1322         else:
   1323             assert False, 'unhandled option'
   1324 
   1325     if len(args) < 1:
   1326         error(0, 'missing input file')
   1327 
   1328     toppat = ExcMultiPattern(0)
   1329 
   1330     for filename in args:
   1331         input_file = filename
   1332         f = open(filename, 'rt', encoding='utf-8')
   1333         parse_file(f, toppat)
   1334         f.close()
   1335 
   1336     # We do not want to compute masks for toppat, because those masks
   1337     # are used as a starting point for build_tree.  For toppat, we must
   1338     # insist that decode begins from naught.
   1339     for i in toppat.pats:
   1340         i.prop_masks()
   1341 
   1342     toppat.build_tree()
   1343     toppat.prop_format()
   1344 
   1345     if variablewidth:
   1346         for i in toppat.pats:
   1347             i.prop_width()
   1348         stree = build_size_tree(toppat.pats, 8, 0, 0)
   1349         prop_size(stree)
   1350 
   1351     if output_file:
   1352         output_fd = open(output_file, 'wt', encoding='utf-8')
   1353     else:
   1354         output_fd = io.TextIOWrapper(sys.stdout.buffer,
   1355                                      encoding=sys.stdout.encoding,
   1356                                      errors="ignore")
   1357 
   1358     output_autogen()
   1359     for n in sorted(arguments.keys()):
   1360         f = arguments[n]
   1361         f.output_def()
   1362 
   1363     # A single translate function can be invoked for different patterns.
   1364     # Make sure that the argument sets are the same, and declare the
   1365     # function only once.
   1366     #
   1367     # If we're sharing formats, we're likely also sharing trans_* functions,
   1368     # but we can't tell which ones.  Prevent issues from the compiler by
   1369     # suppressing redundant declaration warnings.
   1370     if anyextern:
   1371         output("#pragma GCC diagnostic push\n",
   1372                "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
   1373                "#ifdef __clang__\n"
   1374                "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
   1375                "#endif\n\n")
   1376 
   1377     out_pats = {}
   1378     for i in allpatterns:
   1379         if i.name in out_pats:
   1380             p = out_pats[i.name]
   1381             if i.base.base != p.base.base:
   1382                 error(0, i.name, ' has conflicting argument sets')
   1383         else:
   1384             i.output_decl()
   1385             out_pats[i.name] = i
   1386     output('\n')
   1387 
   1388     if anyextern:
   1389         output("#pragma GCC diagnostic pop\n\n")
   1390 
   1391     for n in sorted(formats.keys()):
   1392         f = formats[n]
   1393         f.output_extract()
   1394 
   1395     output(decode_scope, 'bool ', decode_function,
   1396            '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
   1397 
   1398     i4 = str_indent(4)
   1399 
   1400     if len(allpatterns) != 0:
   1401         output(i4, 'union {\n')
   1402         for n in sorted(arguments.keys()):
   1403             f = arguments[n]
   1404             output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
   1405         output(i4, '} u;\n\n')
   1406         toppat.output_code(4, False, 0, 0)
   1407 
   1408     output(i4, 'return false;\n')
   1409     output('}\n')
   1410 
   1411     if variablewidth:
   1412         output('\n', decode_scope, insntype, ' ', decode_function,
   1413                '_load(DisasContext *ctx)\n{\n',
   1414                '    ', insntype, ' insn = 0;\n\n')
   1415         stree.output_code(4, 0, 0, 0)
   1416         output('}\n')
   1417 
   1418     if output_file:
   1419         output_fd.close()
   1420 # end main
   1421 
   1422 
   1423 if __name__ == '__main__':
   1424     main()