c_preproc.py (27592B)
1 #!/usr/bin/env python 2 # encoding: utf-8 3 # Thomas Nagy, 2006-2018 (ita) 4 5 """ 6 C/C++ preprocessor for finding dependencies 7 8 Reasons for using the Waf preprocessor by default 9 10 #. Some c/c++ extensions (Qt) require a custom preprocessor for obtaining the dependencies (.moc files) 11 #. Not all compilers provide .d files for obtaining the dependencies (portability) 12 #. A naive file scanner will not catch the constructs such as "#include foo()" 13 #. A naive file scanner will catch unnecessary dependencies (change an unused header -> recompile everything) 14 15 Regarding the speed concerns: 16 17 * the preprocessing is performed only when files must be compiled 18 * the macros are evaluated only for #if/#elif/#include 19 * system headers are not scanned by default 20 21 Now if you do not want the Waf preprocessor, the tool +gccdeps* uses the .d files produced 22 during the compilation to track the dependencies (useful when used with the boost libraries). 23 It only works with gcc >= 4.4 though. 24 25 A dumb preprocessor is also available in the tool *c_dumbpreproc* 26 """ 27 # TODO: more varargs, pragma once 28 29 import re, string, traceback 30 from waflib import Logs, Utils, Errors 31 32 class PreprocError(Errors.WafError): 33 pass 34 35 FILE_CACHE_SIZE = 100000 36 LINE_CACHE_SIZE = 100000 37 38 POPFILE = '-' 39 "Constant representing a special token used in :py:meth:`waflib.Tools.c_preproc.c_parser.start` iteration to switch to a header read previously" 40 41 recursion_limit = 150 42 "Limit on the amount of files to read in the dependency scanner" 43 44 go_absolute = False 45 "Set to True to track headers on files in /usr/include, else absolute paths are ignored (but it becomes very slow)" 46 47 standard_includes = ['/usr/local/include', '/usr/include'] 48 if Utils.is_win32: 49 standard_includes = [] 50 51 use_trigraphs = 0 52 """Apply trigraph rules (False by default)""" 53 54 # obsolete, do not use 55 strict_quotes = 0 56 57 g_optrans = { 58 'not':'!', 59 'not_eq':'!', 60 'and':'&&', 61 'and_eq':'&=', 62 'or':'||', 63 'or_eq':'|=', 64 'xor':'^', 65 'xor_eq':'^=', 66 'bitand':'&', 67 'bitor':'|', 68 'compl':'~', 69 } 70 """Operators such as and/or/xor for c++. Set an empty dict to disable.""" 71 72 # ignore #warning and #error 73 re_lines = re.compile( 74 '^[ \t]*(?:#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$', 75 re.IGNORECASE | re.MULTILINE) 76 """Match #include lines""" 77 78 re_mac = re.compile(r"^[a-zA-Z_]\w*") 79 """Match macro definitions""" 80 81 re_fun = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]') 82 """Match macro functions""" 83 84 re_pragma_once = re.compile(r'^\s*once\s*', re.IGNORECASE) 85 """Match #pragma once statements""" 86 87 re_nl = re.compile('\\\\\r*\n', re.MULTILINE) 88 """Match newlines""" 89 90 re_cpp = re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE ) 91 """Filter C/C++ comments""" 92 93 trig_def = [('??'+a, b) for a, b in zip("=-/!'()<>", r'#~\|^[]{}')] 94 """Trigraph definitions""" 95 96 chr_esc = {'0':0, 'a':7, 'b':8, 't':9, 'n':10, 'f':11, 'v':12, 'r':13, '\\':92, "'":39} 97 """Escape characters""" 98 99 NUM = 'i' 100 """Number token""" 101 102 OP = 'O' 103 """Operator token""" 104 105 IDENT = 'T' 106 """Identifier token""" 107 108 STR = 's' 109 """String token""" 110 111 CHAR = 'c' 112 """Character token""" 113 114 tok_types = [NUM, STR, IDENT, OP] 115 """Token types""" 116 117 exp_types = [ 118 r"""0[xX](?P<hex>[a-fA-F0-9]+)(?P<qual1>[uUlL]*)|L*?'(?P<char>(\\.|[^\\'])+)'|(?P<n1>\d+)[Ee](?P<exp0>[+-]*?\d+)(?P<float0>[fFlL]*)|(?P<n2>\d*\.\d+)([Ee](?P<exp1>[+-]*?\d+))?(?P<float1>[fFlL]*)|(?P<n4>\d+\.\d*)([Ee](?P<exp2>[+-]*?\d+))?(?P<float2>[fFlL]*)|(?P<oct>0*)(?P<n0>\d+)(?P<qual2>[uUlL]*)""", 119 r'L?"([^"\\]|\\.)*"', 120 r'[a-zA-Z_]\w*', 121 r'%:%:|<<=|>>=|\.\.\.|<<|<%|<:|<=|>>|>=|\+\+|\+=|--|->|-=|\*=|/=|%:|%=|%>|==|&&|&=|\|\||\|=|\^=|:>|!=|##|[\(\)\{\}\[\]<>\?\|\^\*\+&=:!#;,%/\-\?\~\.]', 122 ] 123 """Expression types""" 124 125 re_clexer = re.compile('|'.join(["(?P<%s>%s)" % (name, part) for name, part in zip(tok_types, exp_types)]), re.M) 126 """Match expressions into tokens""" 127 128 accepted = 'a' 129 """Parser state is *accepted*""" 130 131 ignored = 'i' 132 """Parser state is *ignored*, for example preprocessor lines in an #if 0 block""" 133 134 undefined = 'u' 135 """Parser state is *undefined* at the moment""" 136 137 skipped = 's' 138 """Parser state is *skipped*, for example preprocessor lines in a #elif 0 block""" 139 140 def repl(m): 141 """Replace function used with :py:attr:`waflib.Tools.c_preproc.re_cpp`""" 142 s = m.group() 143 if s[0] == '/': 144 return ' ' 145 return s 146 147 prec = {} 148 """ 149 Operator precedence rules required for parsing expressions of the form:: 150 151 #if 1 && 2 != 0 152 """ 153 ops = ['* / %', '+ -', '<< >>', '< <= >= >', '== !=', '& | ^', '&& ||', ','] 154 for x, syms in enumerate(ops): 155 for u in syms.split(): 156 prec[u] = x 157 158 def reduce_nums(val_1, val_2, val_op): 159 """ 160 Apply arithmetic rules to compute a result 161 162 :param val1: input parameter 163 :type val1: int or string 164 :param val2: input parameter 165 :type val2: int or string 166 :param val_op: C operator in *+*, */*, *-*, etc 167 :type val_op: string 168 :rtype: int 169 """ 170 #print val_1, val_2, val_op 171 172 # now perform the operation, make certain a and b are numeric 173 try: 174 a = 0 + val_1 175 except TypeError: 176 a = int(val_1) 177 try: 178 b = 0 + val_2 179 except TypeError: 180 b = int(val_2) 181 182 d = val_op 183 if d == '%': 184 c = a % b 185 elif d=='+': 186 c = a + b 187 elif d=='-': 188 c = a - b 189 elif d=='*': 190 c = a * b 191 elif d=='/': 192 c = a / b 193 elif d=='^': 194 c = a ^ b 195 elif d=='==': 196 c = int(a == b) 197 elif d=='|' or d == 'bitor': 198 c = a | b 199 elif d=='||' or d == 'or' : 200 c = int(a or b) 201 elif d=='&' or d == 'bitand': 202 c = a & b 203 elif d=='&&' or d == 'and': 204 c = int(a and b) 205 elif d=='!=' or d == 'not_eq': 206 c = int(a != b) 207 elif d=='^' or d == 'xor': 208 c = int(a^b) 209 elif d=='<=': 210 c = int(a <= b) 211 elif d=='<': 212 c = int(a < b) 213 elif d=='>': 214 c = int(a > b) 215 elif d=='>=': 216 c = int(a >= b) 217 elif d=='<<': 218 c = a << b 219 elif d=='>>': 220 c = a >> b 221 else: 222 c = 0 223 return c 224 225 def get_num(lst): 226 """ 227 Try to obtain a number from a list of tokens. The token types are defined in :py:attr:`waflib.Tools.ccroot.tok_types`. 228 229 :param lst: list of preprocessor tokens 230 :type lst: list of tuple (tokentype, value) 231 :return: a pair containing the number and the rest of the list 232 :rtype: tuple(value, list) 233 """ 234 if not lst: 235 raise PreprocError('empty list for get_num') 236 (p, v) = lst[0] 237 if p == OP: 238 if v == '(': 239 count_par = 1 240 i = 1 241 while i < len(lst): 242 (p, v) = lst[i] 243 244 if p == OP: 245 if v == ')': 246 count_par -= 1 247 if count_par == 0: 248 break 249 elif v == '(': 250 count_par += 1 251 i += 1 252 else: 253 raise PreprocError('rparen expected %r' % lst) 254 255 (num, _) = get_term(lst[1:i]) 256 return (num, lst[i+1:]) 257 258 elif v == '+': 259 return get_num(lst[1:]) 260 elif v == '-': 261 num, lst = get_num(lst[1:]) 262 return (reduce_nums('-1', num, '*'), lst) 263 elif v == '!': 264 num, lst = get_num(lst[1:]) 265 return (int(not int(num)), lst) 266 elif v == '~': 267 num, lst = get_num(lst[1:]) 268 return (~ int(num), lst) 269 else: 270 raise PreprocError('Invalid op token %r for get_num' % lst) 271 elif p == NUM: 272 return v, lst[1:] 273 elif p == IDENT: 274 # all macros should have been replaced, remaining identifiers eval to 0 275 return 0, lst[1:] 276 else: 277 raise PreprocError('Invalid token %r for get_num' % lst) 278 279 def get_term(lst): 280 """ 281 Evaluate an expression recursively, for example:: 282 283 1+1+1 -> 2+1 -> 3 284 285 :param lst: list of tokens 286 :type lst: list of tuple(token, value) 287 :return: the value and the remaining tokens 288 :rtype: value, list 289 """ 290 291 if not lst: 292 raise PreprocError('empty list for get_term') 293 num, lst = get_num(lst) 294 if not lst: 295 return (num, []) 296 (p, v) = lst[0] 297 if p == OP: 298 if v == ',': 299 # skip 300 return get_term(lst[1:]) 301 elif v == '?': 302 count_par = 0 303 i = 1 304 while i < len(lst): 305 (p, v) = lst[i] 306 307 if p == OP: 308 if v == ')': 309 count_par -= 1 310 elif v == '(': 311 count_par += 1 312 elif v == ':': 313 if count_par == 0: 314 break 315 i += 1 316 else: 317 raise PreprocError('rparen expected %r' % lst) 318 319 if int(num): 320 return get_term(lst[1:i]) 321 else: 322 return get_term(lst[i+1:]) 323 324 else: 325 num2, lst = get_num(lst[1:]) 326 327 if not lst: 328 # no more tokens to process 329 num2 = reduce_nums(num, num2, v) 330 return get_term([(NUM, num2)] + lst) 331 332 # operator precedence 333 p2, v2 = lst[0] 334 if p2 != OP: 335 raise PreprocError('op expected %r' % lst) 336 337 if prec[v2] >= prec[v]: 338 num2 = reduce_nums(num, num2, v) 339 return get_term([(NUM, num2)] + lst) 340 else: 341 num3, lst = get_num(lst[1:]) 342 num3 = reduce_nums(num2, num3, v2) 343 return get_term([(NUM, num), (p, v), (NUM, num3)] + lst) 344 345 346 raise PreprocError('cannot reduce %r' % lst) 347 348 def reduce_eval(lst): 349 """ 350 Take a list of tokens and output true or false for #if/#elif conditions. 351 352 :param lst: a list of tokens 353 :type lst: list of tuple(token, value) 354 :return: a token 355 :rtype: tuple(NUM, int) 356 """ 357 num, lst = get_term(lst) 358 return (NUM, num) 359 360 def stringize(lst): 361 """ 362 Merge a list of tokens into a string 363 364 :param lst: a list of tokens 365 :type lst: list of tuple(token, value) 366 :rtype: string 367 """ 368 lst = [str(v2) for (p2, v2) in lst] 369 return "".join(lst) 370 371 def paste_tokens(t1, t2): 372 """ 373 Token pasting works between identifiers, particular operators, and identifiers and numbers:: 374 375 a ## b -> ab 376 > ## = -> >= 377 a ## 2 -> a2 378 379 :param t1: token 380 :type t1: tuple(type, value) 381 :param t2: token 382 :type t2: tuple(type, value) 383 """ 384 p1 = None 385 if t1[0] == OP and t2[0] == OP: 386 p1 = OP 387 elif t1[0] == IDENT and (t2[0] == IDENT or t2[0] == NUM): 388 p1 = IDENT 389 elif t1[0] == NUM and t2[0] == NUM: 390 p1 = NUM 391 if not p1: 392 raise PreprocError('tokens do not make a valid paste %r and %r' % (t1, t2)) 393 return (p1, t1[1] + t2[1]) 394 395 def reduce_tokens(lst, defs, ban=[]): 396 """ 397 Replace the tokens in lst, using the macros provided in defs, and a list of macros that cannot be re-applied 398 399 :param lst: list of tokens 400 :type lst: list of tuple(token, value) 401 :param defs: macro definitions 402 :type defs: dict 403 :param ban: macros that cannot be substituted (recursion is not allowed) 404 :type ban: list of string 405 :return: the new list of tokens 406 :rtype: value, list 407 """ 408 409 i = 0 410 while i < len(lst): 411 (p, v) = lst[i] 412 413 if p == IDENT and v == "defined": 414 del lst[i] 415 if i < len(lst): 416 (p2, v2) = lst[i] 417 if p2 == IDENT: 418 if v2 in defs: 419 lst[i] = (NUM, 1) 420 else: 421 lst[i] = (NUM, 0) 422 elif p2 == OP and v2 == '(': 423 del lst[i] 424 (p2, v2) = lst[i] 425 del lst[i] # remove the ident, and change the ) for the value 426 if v2 in defs: 427 lst[i] = (NUM, 1) 428 else: 429 lst[i] = (NUM, 0) 430 else: 431 raise PreprocError('Invalid define expression %r' % lst) 432 433 elif p == IDENT and v in defs: 434 435 if isinstance(defs[v], str): 436 a, b = extract_macro(defs[v]) 437 defs[v] = b 438 macro_def = defs[v] 439 to_add = macro_def[1] 440 441 if isinstance(macro_def[0], list): 442 # macro without arguments 443 del lst[i] 444 accu = to_add[:] 445 reduce_tokens(accu, defs, ban+[v]) 446 for tmp in accu: 447 lst.insert(i, tmp) 448 i += 1 449 else: 450 # collect the arguments for the funcall 451 452 args = [] 453 del lst[i] 454 455 if i >= len(lst): 456 raise PreprocError('expected ( after %r (got nothing)' % v) 457 458 (p2, v2) = lst[i] 459 if p2 != OP or v2 != '(': 460 raise PreprocError('expected ( after %r' % v) 461 462 del lst[i] 463 464 one_param = [] 465 count_paren = 0 466 while i < len(lst): 467 p2, v2 = lst[i] 468 469 del lst[i] 470 if p2 == OP and count_paren == 0: 471 if v2 == '(': 472 one_param.append((p2, v2)) 473 count_paren += 1 474 elif v2 == ')': 475 if one_param: 476 args.append(one_param) 477 break 478 elif v2 == ',': 479 if not one_param: 480 raise PreprocError('empty param in funcall %r' % v) 481 args.append(one_param) 482 one_param = [] 483 else: 484 one_param.append((p2, v2)) 485 else: 486 one_param.append((p2, v2)) 487 if v2 == '(': 488 count_paren += 1 489 elif v2 == ')': 490 count_paren -= 1 491 else: 492 raise PreprocError('malformed macro') 493 494 # substitute the arguments within the define expression 495 accu = [] 496 arg_table = macro_def[0] 497 j = 0 498 while j < len(to_add): 499 (p2, v2) = to_add[j] 500 501 if p2 == OP and v2 == '#': 502 # stringize is for arguments only 503 if j+1 < len(to_add) and to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table: 504 toks = args[arg_table[to_add[j+1][1]]] 505 accu.append((STR, stringize(toks))) 506 j += 1 507 else: 508 accu.append((p2, v2)) 509 elif p2 == OP and v2 == '##': 510 # token pasting, how can man invent such a complicated system? 511 if accu and j+1 < len(to_add): 512 # we have at least two tokens 513 514 t1 = accu[-1] 515 516 if to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table: 517 toks = args[arg_table[to_add[j+1][1]]] 518 519 if toks: 520 accu[-1] = paste_tokens(t1, toks[0]) #(IDENT, accu[-1][1] + toks[0][1]) 521 accu.extend(toks[1:]) 522 else: 523 # error, case "a##" 524 accu.append((p2, v2)) 525 accu.extend(toks) 526 elif to_add[j+1][0] == IDENT and to_add[j+1][1] == '__VA_ARGS__': 527 # first collect the tokens 528 va_toks = [] 529 st = len(macro_def[0]) 530 pt = len(args) 531 for x in args[pt-st+1:]: 532 va_toks.extend(x) 533 va_toks.append((OP, ',')) 534 if va_toks: 535 va_toks.pop() # extra comma 536 if len(accu)>1: 537 (p3, v3) = accu[-1] 538 (p4, v4) = accu[-2] 539 if v3 == '##': 540 # remove the token paste 541 accu.pop() 542 if v4 == ',' and pt < st: 543 # remove the comma 544 accu.pop() 545 accu += va_toks 546 else: 547 accu[-1] = paste_tokens(t1, to_add[j+1]) 548 549 j += 1 550 else: 551 # Invalid paste, case "##a" or "b##" 552 accu.append((p2, v2)) 553 554 elif p2 == IDENT and v2 in arg_table: 555 toks = args[arg_table[v2]] 556 reduce_tokens(toks, defs, ban+[v]) 557 accu.extend(toks) 558 else: 559 accu.append((p2, v2)) 560 561 j += 1 562 563 564 reduce_tokens(accu, defs, ban+[v]) 565 566 for x in range(len(accu)-1, -1, -1): 567 lst.insert(i, accu[x]) 568 569 i += 1 570 571 572 def eval_macro(lst, defs): 573 """ 574 Reduce the tokens by :py:func:`waflib.Tools.c_preproc.reduce_tokens` and try to return a 0/1 result by :py:func:`waflib.Tools.c_preproc.reduce_eval`. 575 576 :param lst: list of tokens 577 :type lst: list of tuple(token, value) 578 :param defs: macro definitions 579 :type defs: dict 580 :rtype: int 581 """ 582 reduce_tokens(lst, defs, []) 583 if not lst: 584 raise PreprocError('missing tokens to evaluate') 585 586 p, v = reduce_eval(lst) 587 return int(v) != 0 588 589 def extract_macro(txt): 590 """ 591 Process a macro definition of the form:: 592 #define f(x, y) x * y 593 594 into a function or a simple macro without arguments 595 596 :param txt: expression to exact a macro definition from 597 :type txt: string 598 :return: a tuple containing the name, the list of arguments and the replacement 599 :rtype: tuple(string, [list, list]) 600 """ 601 t = tokenize(txt) 602 if re_fun.search(txt): 603 p, name = t[0] 604 605 p, v = t[1] 606 if p != OP: 607 raise PreprocError('expected (') 608 609 i = 1 610 pindex = 0 611 params = {} 612 prev = '(' 613 614 while 1: 615 i += 1 616 p, v = t[i] 617 618 if prev == '(': 619 if p == IDENT: 620 params[v] = pindex 621 pindex += 1 622 prev = p 623 elif p == OP and v == ')': 624 break 625 else: 626 raise PreprocError('unexpected token (3)') 627 elif prev == IDENT: 628 if p == OP and v == ',': 629 prev = v 630 elif p == OP and v == ')': 631 break 632 else: 633 raise PreprocError('comma or ... expected') 634 elif prev == ',': 635 if p == IDENT: 636 params[v] = pindex 637 pindex += 1 638 prev = p 639 elif p == OP and v == '...': 640 raise PreprocError('not implemented (1)') 641 else: 642 raise PreprocError('comma or ... expected (2)') 643 elif prev == '...': 644 raise PreprocError('not implemented (2)') 645 else: 646 raise PreprocError('unexpected else') 647 648 #~ print (name, [params, t[i+1:]]) 649 return (name, [params, t[i+1:]]) 650 else: 651 (p, v) = t[0] 652 if len(t) > 1: 653 return (v, [[], t[1:]]) 654 else: 655 # empty define, assign an empty token 656 return (v, [[], [('T','')]]) 657 658 re_include = re.compile(r'^\s*(<(?:.*)>|"(?:.*)")') 659 def extract_include(txt, defs): 660 """ 661 Process a line in the form:: 662 663 #include foo 664 665 :param txt: include line to process 666 :type txt: string 667 :param defs: macro definitions 668 :type defs: dict 669 :return: the file name 670 :rtype: string 671 """ 672 m = re_include.search(txt) 673 if m: 674 txt = m.group(1) 675 return txt[0], txt[1:-1] 676 677 # perform preprocessing and look at the result, it must match an include 678 toks = tokenize(txt) 679 reduce_tokens(toks, defs, ['waf_include']) 680 681 if not toks: 682 raise PreprocError('could not parse include %r' % txt) 683 684 if len(toks) == 1: 685 if toks[0][0] == STR: 686 return '"', toks[0][1] 687 else: 688 if toks[0][1] == '<' and toks[-1][1] == '>': 689 ret = '<', stringize(toks).lstrip('<').rstrip('>') 690 return ret 691 692 raise PreprocError('could not parse include %r' % txt) 693 694 def parse_char(txt): 695 """ 696 Parse a c character 697 698 :param txt: character to parse 699 :type txt: string 700 :return: a character literal 701 :rtype: string 702 """ 703 704 if not txt: 705 raise PreprocError('attempted to parse a null char') 706 if txt[0] != '\\': 707 return ord(txt) 708 c = txt[1] 709 if c == 'x': 710 if len(txt) == 4 and txt[3] in string.hexdigits: 711 return int(txt[2:], 16) 712 return int(txt[2:], 16) 713 elif c.isdigit(): 714 if c == '0' and len(txt)==2: 715 return 0 716 for i in 3, 2, 1: 717 if len(txt) > i and txt[1:1+i].isdigit(): 718 return (1+i, int(txt[1:1+i], 8)) 719 else: 720 try: 721 return chr_esc[c] 722 except KeyError: 723 raise PreprocError('could not parse char literal %r' % txt) 724 725 def tokenize(s): 726 """ 727 Convert a string into a list of tokens (shlex.split does not apply to c/c++/d) 728 729 :param s: input to tokenize 730 :type s: string 731 :return: a list of tokens 732 :rtype: list of tuple(token, value) 733 """ 734 return tokenize_private(s)[:] # force a copy of the results 735 736 def tokenize_private(s): 737 ret = [] 738 for match in re_clexer.finditer(s): 739 m = match.group 740 for name in tok_types: 741 v = m(name) 742 if v: 743 if name == IDENT: 744 if v in g_optrans: 745 name = OP 746 elif v.lower() == "true": 747 v = 1 748 name = NUM 749 elif v.lower() == "false": 750 v = 0 751 name = NUM 752 elif name == NUM: 753 if m('oct'): 754 v = int(v, 8) 755 elif m('hex'): 756 v = int(m('hex'), 16) 757 elif m('n0'): 758 v = m('n0') 759 else: 760 v = m('char') 761 if v: 762 v = parse_char(v) 763 else: 764 v = m('n2') or m('n4') 765 elif name == OP: 766 if v == '%:': 767 v = '#' 768 elif v == '%:%:': 769 v = '##' 770 elif name == STR: 771 # remove the quotes around the string 772 v = v[1:-1] 773 ret.append((name, v)) 774 break 775 return ret 776 777 def format_defines(lst): 778 ret = [] 779 for y in lst: 780 if y: 781 pos = y.find('=') 782 if pos == -1: 783 # "-DFOO" should give "#define FOO 1" 784 ret.append(y) 785 elif pos > 0: 786 # all others are assumed to be -DX=Y 787 ret.append('%s %s' % (y[:pos], y[pos+1:])) 788 else: 789 raise ValueError('Invalid define expression %r' % y) 790 return ret 791 792 class c_parser(object): 793 """ 794 Used by :py:func:`waflib.Tools.c_preproc.scan` to parse c/h files. Note that by default, 795 only project headers are parsed. 796 """ 797 def __init__(self, nodepaths=None, defines=None): 798 self.lines = [] 799 """list of lines read""" 800 801 if defines is None: 802 self.defs = {} 803 else: 804 self.defs = dict(defines) # make a copy 805 self.state = [] 806 807 self.count_files = 0 808 self.currentnode_stack = [] 809 810 self.nodepaths = nodepaths or [] 811 """Include paths""" 812 813 self.nodes = [] 814 """List of :py:class:`waflib.Node.Node` found so far""" 815 816 self.names = [] 817 """List of file names that could not be matched by any file""" 818 819 self.curfile = '' 820 """Current file""" 821 822 self.ban_includes = set() 823 """Includes that must not be read (#pragma once)""" 824 825 self.listed = set() 826 """Include nodes/names already listed to avoid duplicates in self.nodes/self.names""" 827 828 def cached_find_resource(self, node, filename): 829 """ 830 Find a file from the input directory 831 832 :param node: directory 833 :type node: :py:class:`waflib.Node.Node` 834 :param filename: header to find 835 :type filename: string 836 :return: the node if found, or None 837 :rtype: :py:class:`waflib.Node.Node` 838 """ 839 try: 840 cache = node.ctx.preproc_cache_node 841 except AttributeError: 842 cache = node.ctx.preproc_cache_node = Utils.lru_cache(FILE_CACHE_SIZE) 843 844 key = (node, filename) 845 try: 846 return cache[key] 847 except KeyError: 848 ret = node.find_resource(filename) 849 if ret: 850 if getattr(ret, 'children', None): 851 ret = None 852 elif ret.is_child_of(node.ctx.bldnode): 853 tmp = node.ctx.srcnode.search_node(ret.path_from(node.ctx.bldnode)) 854 if tmp and getattr(tmp, 'children', None): 855 ret = None 856 cache[key] = ret 857 return ret 858 859 def tryfind(self, filename, kind='"', env=None): 860 """ 861 Try to obtain a node from the filename based from the include paths. Will add 862 the node found to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes` or the file name to 863 :py:attr:`waflib.Tools.c_preproc.c_parser.names` if no corresponding file is found. Called by 864 :py:attr:`waflib.Tools.c_preproc.c_parser.start`. 865 866 :param filename: header to find 867 :type filename: string 868 :return: the node if found 869 :rtype: :py:class:`waflib.Node.Node` 870 """ 871 if filename.endswith('.moc'): 872 # we could let the qt4 module use a subclass, but then the function "scan" below must be duplicated 873 # in the qt4 and in the qt5 classes. So we have two lines here and it is sufficient. 874 self.names.append(filename) 875 return None 876 877 self.curfile = filename 878 879 found = None 880 if kind == '"': 881 if env.MSVC_VERSION: 882 for n in reversed(self.currentnode_stack): 883 found = self.cached_find_resource(n, filename) 884 if found: 885 break 886 else: 887 found = self.cached_find_resource(self.currentnode_stack[-1], filename) 888 889 if not found: 890 for n in self.nodepaths: 891 found = self.cached_find_resource(n, filename) 892 if found: 893 break 894 895 listed = self.listed 896 if found and not found in self.ban_includes: 897 if found not in listed: 898 listed.add(found) 899 self.nodes.append(found) 900 self.addlines(found) 901 else: 902 if filename not in listed: 903 listed.add(filename) 904 self.names.append(filename) 905 return found 906 907 def filter_comments(self, node): 908 """ 909 Filter the comments from a c/h file, and return the preprocessor lines. 910 The regexps :py:attr:`waflib.Tools.c_preproc.re_cpp`, :py:attr:`waflib.Tools.c_preproc.re_nl` and :py:attr:`waflib.Tools.c_preproc.re_lines` are used internally. 911 912 :return: the preprocessor directives as a list of (keyword, line) 913 :rtype: a list of string pairs 914 """ 915 # return a list of tuples : keyword, line 916 code = node.read() 917 if use_trigraphs: 918 for (a, b) in trig_def: 919 code = code.split(a).join(b) 920 code = re_nl.sub('', code) 921 code = re_cpp.sub(repl, code) 922 return re_lines.findall(code) 923 924 def parse_lines(self, node): 925 try: 926 cache = node.ctx.preproc_cache_lines 927 except AttributeError: 928 cache = node.ctx.preproc_cache_lines = Utils.lru_cache(LINE_CACHE_SIZE) 929 try: 930 return cache[node] 931 except KeyError: 932 cache[node] = lines = self.filter_comments(node) 933 lines.append((POPFILE, '')) 934 lines.reverse() 935 return lines 936 937 def addlines(self, node): 938 """ 939 Add the lines from a header in the list of preprocessor lines to parse 940 941 :param node: header 942 :type node: :py:class:`waflib.Node.Node` 943 """ 944 945 self.currentnode_stack.append(node.parent) 946 947 self.count_files += 1 948 if self.count_files > recursion_limit: 949 # issue #812 950 raise PreprocError('recursion limit exceeded') 951 952 if Logs.verbose: 953 Logs.debug('preproc: reading file %r', node) 954 try: 955 lines = self.parse_lines(node) 956 except EnvironmentError: 957 raise PreprocError('could not read the file %r' % node) 958 except Exception: 959 if Logs.verbose > 0: 960 Logs.error('parsing %r failed %s', node, traceback.format_exc()) 961 else: 962 self.lines.extend(lines) 963 964 def start(self, node, env): 965 """ 966 Preprocess a source file to obtain the dependencies, which are accumulated to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes` 967 and :py:attr:`waflib.Tools.c_preproc.c_parser.names`. 968 969 :param node: source file 970 :type node: :py:class:`waflib.Node.Node` 971 :param env: config set containing additional defines to take into account 972 :type env: :py:class:`waflib.ConfigSet.ConfigSet` 973 """ 974 Logs.debug('preproc: scanning %s (in %s)', node.name, node.parent.name) 975 976 self.current_file = node 977 self.addlines(node) 978 979 # macros may be defined on the command-line, so they must be parsed as if they were part of the file 980 if env.DEFINES: 981 lst = format_defines(env.DEFINES) 982 lst.reverse() 983 self.lines.extend([('define', x) for x in lst]) 984 985 while self.lines: 986 (token, line) = self.lines.pop() 987 if token == POPFILE: 988 self.count_files -= 1 989 self.currentnode_stack.pop() 990 continue 991 992 try: 993 state = self.state 994 995 # make certain we define the state if we are about to enter in an if block 996 if token[:2] == 'if': 997 state.append(undefined) 998 elif token == 'endif': 999 state.pop() 1000 1001 # skip lines when in a dead 'if' branch, wait for the endif 1002 if token[0] != 'e': 1003 if skipped in self.state or ignored in self.state: 1004 continue 1005 1006 if token == 'if': 1007 ret = eval_macro(tokenize(line), self.defs) 1008 if ret: 1009 state[-1] = accepted 1010 else: 1011 state[-1] = ignored 1012 elif token == 'ifdef': 1013 m = re_mac.match(line) 1014 if m and m.group() in self.defs: 1015 state[-1] = accepted 1016 else: 1017 state[-1] = ignored 1018 elif token == 'ifndef': 1019 m = re_mac.match(line) 1020 if m and m.group() in self.defs: 1021 state[-1] = ignored 1022 else: 1023 state[-1] = accepted 1024 elif token == 'include' or token == 'import': 1025 (kind, inc) = extract_include(line, self.defs) 1026 self.current_file = self.tryfind(inc, kind, env) 1027 if token == 'import': 1028 self.ban_includes.add(self.current_file) 1029 elif token == 'elif': 1030 if state[-1] == accepted: 1031 state[-1] = skipped 1032 elif state[-1] == ignored: 1033 if eval_macro(tokenize(line), self.defs): 1034 state[-1] = accepted 1035 elif token == 'else': 1036 if state[-1] == accepted: 1037 state[-1] = skipped 1038 elif state[-1] == ignored: 1039 state[-1] = accepted 1040 elif token == 'define': 1041 try: 1042 self.defs[self.define_name(line)] = line 1043 except AttributeError: 1044 raise PreprocError('Invalid define line %r' % line) 1045 elif token == 'undef': 1046 m = re_mac.match(line) 1047 if m and m.group() in self.defs: 1048 self.defs.__delitem__(m.group()) 1049 #print "undef %s" % name 1050 elif token == 'pragma': 1051 if re_pragma_once.match(line.lower()): 1052 self.ban_includes.add(self.current_file) 1053 except Exception as e: 1054 if Logs.verbose: 1055 Logs.debug('preproc: line parsing failed (%s): %s %s', e, line, traceback.format_exc()) 1056 1057 def define_name(self, line): 1058 """ 1059 :param line: define line 1060 :type line: string 1061 :rtype: string 1062 :return: the define name 1063 """ 1064 return re_mac.match(line).group() 1065 1066 def scan(task): 1067 """ 1068 Get the dependencies using a c/c++ preprocessor, this is required for finding dependencies of the kind:: 1069 1070 #include some_macro() 1071 1072 This function is bound as a task method on :py:class:`waflib.Tools.c.c` and :py:class:`waflib.Tools.cxx.cxx` for example 1073 """ 1074 try: 1075 incn = task.generator.includes_nodes 1076 except AttributeError: 1077 raise Errors.WafError('%r is missing a feature such as "c", "cxx" or "includes": ' % task.generator) 1078 1079 if go_absolute: 1080 nodepaths = incn + [task.generator.bld.root.find_dir(x) for x in standard_includes] 1081 else: 1082 nodepaths = [x for x in incn if x.is_child_of(x.ctx.srcnode) or x.is_child_of(x.ctx.bldnode)] 1083 1084 tmp = c_parser(nodepaths) 1085 tmp.start(task.inputs[0], task.env) 1086 return (tmp.nodes, tmp.names)