waf

FORK: waf with some random patches
git clone https://git.neptards.moe/neptards/waf.git
Log | Files | Refs | README

c_preproc.py (27592B)


      1 #!/usr/bin/env python
      2 # encoding: utf-8
      3 # Thomas Nagy, 2006-2018 (ita)
      4 
      5 """
      6 C/C++ preprocessor for finding dependencies
      7 
      8 Reasons for using the Waf preprocessor by default
      9 
     10 #. Some c/c++ extensions (Qt) require a custom preprocessor for obtaining the dependencies (.moc files)
     11 #. Not all compilers provide .d files for obtaining the dependencies (portability)
     12 #. A naive file scanner will not catch the constructs such as "#include foo()"
     13 #. A naive file scanner will catch unnecessary dependencies (change an unused header -> recompile everything)
     14 
     15 Regarding the speed concerns:
     16 
     17 * the preprocessing is performed only when files must be compiled
     18 * the macros are evaluated only for #if/#elif/#include
     19 * system headers are not scanned by default
     20 
     21 Now if you do not want the Waf preprocessor, the tool +gccdeps* uses the .d files produced
     22 during the compilation to track the dependencies (useful when used with the boost libraries).
     23 It only works with gcc >= 4.4 though.
     24 
     25 A dumb preprocessor is also available in the tool *c_dumbpreproc*
     26 """
     27 # TODO: more varargs, pragma once
     28 
     29 import re, string, traceback
     30 from waflib import Logs, Utils, Errors
     31 
     32 class PreprocError(Errors.WafError):
     33 	pass
     34 
     35 FILE_CACHE_SIZE = 100000
     36 LINE_CACHE_SIZE = 100000
     37 
     38 POPFILE = '-'
     39 "Constant representing a special token used in :py:meth:`waflib.Tools.c_preproc.c_parser.start` iteration to switch to a header read previously"
     40 
     41 recursion_limit = 150
     42 "Limit on the amount of files to read in the dependency scanner"
     43 
     44 go_absolute = False
     45 "Set to True to track headers on files in /usr/include, else absolute paths are ignored (but it becomes very slow)"
     46 
     47 standard_includes = ['/usr/local/include', '/usr/include']
     48 if Utils.is_win32:
     49 	standard_includes = []
     50 
     51 use_trigraphs = 0
     52 """Apply trigraph rules (False by default)"""
     53 
     54 # obsolete, do not use
     55 strict_quotes = 0
     56 
     57 g_optrans = {
     58 'not':'!',
     59 'not_eq':'!',
     60 'and':'&&',
     61 'and_eq':'&=',
     62 'or':'||',
     63 'or_eq':'|=',
     64 'xor':'^',
     65 'xor_eq':'^=',
     66 'bitand':'&',
     67 'bitor':'|',
     68 'compl':'~',
     69 }
     70 """Operators such as and/or/xor for c++. Set an empty dict to disable."""
     71 
     72 # ignore #warning and #error
     73 re_lines = re.compile(
     74 	'^[ \t]*(?:#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',
     75 	re.IGNORECASE | re.MULTILINE)
     76 """Match #include lines"""
     77 
     78 re_mac = re.compile(r"^[a-zA-Z_]\w*")
     79 """Match macro definitions"""
     80 
     81 re_fun = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]')
     82 """Match macro functions"""
     83 
     84 re_pragma_once = re.compile(r'^\s*once\s*', re.IGNORECASE)
     85 """Match #pragma once statements"""
     86 
     87 re_nl = re.compile('\\\\\r*\n', re.MULTILINE)
     88 """Match newlines"""
     89 
     90 re_cpp = re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE )
     91 """Filter C/C++ comments"""
     92 
     93 trig_def = [('??'+a, b) for a, b in zip("=-/!'()<>", r'#~\|^[]{}')]
     94 """Trigraph definitions"""
     95 
     96 chr_esc = {'0':0, 'a':7, 'b':8, 't':9, 'n':10, 'f':11, 'v':12, 'r':13, '\\':92, "'":39}
     97 """Escape characters"""
     98 
     99 NUM   = 'i'
    100 """Number token"""
    101 
    102 OP    = 'O'
    103 """Operator token"""
    104 
    105 IDENT = 'T'
    106 """Identifier token"""
    107 
    108 STR   = 's'
    109 """String token"""
    110 
    111 CHAR  = 'c'
    112 """Character token"""
    113 
    114 tok_types = [NUM, STR, IDENT, OP]
    115 """Token types"""
    116 
    117 exp_types = [
    118 	r"""0[xX](?P<hex>[a-fA-F0-9]+)(?P<qual1>[uUlL]*)|L*?'(?P<char>(\\.|[^\\'])+)'|(?P<n1>\d+)[Ee](?P<exp0>[+-]*?\d+)(?P<float0>[fFlL]*)|(?P<n2>\d*\.\d+)([Ee](?P<exp1>[+-]*?\d+))?(?P<float1>[fFlL]*)|(?P<n4>\d+\.\d*)([Ee](?P<exp2>[+-]*?\d+))?(?P<float2>[fFlL]*)|(?P<oct>0*)(?P<n0>\d+)(?P<qual2>[uUlL]*)""",
    119 	r'L?"([^"\\]|\\.)*"',
    120 	r'[a-zA-Z_]\w*',
    121 	r'%:%:|<<=|>>=|\.\.\.|<<|<%|<:|<=|>>|>=|\+\+|\+=|--|->|-=|\*=|/=|%:|%=|%>|==|&&|&=|\|\||\|=|\^=|:>|!=|##|[\(\)\{\}\[\]<>\?\|\^\*\+&=:!#;,%/\-\?\~\.]',
    122 ]
    123 """Expression types"""
    124 
    125 re_clexer = re.compile('|'.join(["(?P<%s>%s)" % (name, part) for name, part in zip(tok_types, exp_types)]), re.M)
    126 """Match expressions into tokens"""
    127 
    128 accepted  = 'a'
    129 """Parser state is *accepted*"""
    130 
    131 ignored   = 'i'
    132 """Parser state is *ignored*, for example preprocessor lines in an #if 0 block"""
    133 
    134 undefined = 'u'
    135 """Parser state is *undefined* at the moment"""
    136 
    137 skipped   = 's'
    138 """Parser state is *skipped*, for example preprocessor lines in a #elif 0 block"""
    139 
    140 def repl(m):
    141 	"""Replace function used with :py:attr:`waflib.Tools.c_preproc.re_cpp`"""
    142 	s = m.group()
    143 	if s[0] == '/':
    144 		return ' '
    145 	return s
    146 
    147 prec = {}
    148 """
    149 Operator precedence rules required for parsing expressions of the form::
    150 
    151 	#if 1 && 2 != 0
    152 """
    153 ops = ['* / %', '+ -', '<< >>', '< <= >= >', '== !=', '& | ^', '&& ||', ',']
    154 for x, syms in enumerate(ops):
    155 	for u in syms.split():
    156 		prec[u] = x
    157 
    158 def reduce_nums(val_1, val_2, val_op):
    159 	"""
    160 	Apply arithmetic rules to compute a result
    161 
    162 	:param val1: input parameter
    163 	:type val1: int or string
    164 	:param val2: input parameter
    165 	:type val2: int or string
    166 	:param val_op: C operator in *+*, */*, *-*, etc
    167 	:type val_op: string
    168 	:rtype: int
    169 	"""
    170 	#print val_1, val_2, val_op
    171 
    172 	# now perform the operation, make certain a and b are numeric
    173 	try:
    174 		a = 0 + val_1
    175 	except TypeError:
    176 		a = int(val_1)
    177 	try:
    178 		b = 0 + val_2
    179 	except TypeError:
    180 		b = int(val_2)
    181 
    182 	d = val_op
    183 	if d == '%':
    184 		c = a % b
    185 	elif d=='+':
    186 		c = a + b
    187 	elif d=='-':
    188 		c = a - b
    189 	elif d=='*':
    190 		c = a * b
    191 	elif d=='/':
    192 		c = a / b
    193 	elif d=='^':
    194 		c = a ^ b
    195 	elif d=='==':
    196 		c = int(a == b)
    197 	elif d=='|'  or d == 'bitor':
    198 		c = a | b
    199 	elif d=='||' or d == 'or' :
    200 		c = int(a or b)
    201 	elif d=='&'  or d == 'bitand':
    202 		c = a & b
    203 	elif d=='&&' or d == 'and':
    204 		c = int(a and b)
    205 	elif d=='!=' or d == 'not_eq':
    206 		c = int(a != b)
    207 	elif d=='^'  or d == 'xor':
    208 		c = int(a^b)
    209 	elif d=='<=':
    210 		c = int(a <= b)
    211 	elif d=='<':
    212 		c = int(a < b)
    213 	elif d=='>':
    214 		c = int(a > b)
    215 	elif d=='>=':
    216 		c = int(a >= b)
    217 	elif d=='<<':
    218 		c = a << b
    219 	elif d=='>>':
    220 		c = a >> b
    221 	else:
    222 		c = 0
    223 	return c
    224 
    225 def get_num(lst):
    226 	"""
    227 	Try to obtain a number from a list of tokens. The token types are defined in :py:attr:`waflib.Tools.ccroot.tok_types`.
    228 
    229 	:param lst: list of preprocessor tokens
    230 	:type lst: list of tuple (tokentype, value)
    231 	:return: a pair containing the number and the rest of the list
    232 	:rtype: tuple(value, list)
    233 	"""
    234 	if not lst:
    235 		raise PreprocError('empty list for get_num')
    236 	(p, v) = lst[0]
    237 	if p == OP:
    238 		if v == '(':
    239 			count_par = 1
    240 			i = 1
    241 			while i < len(lst):
    242 				(p, v) = lst[i]
    243 
    244 				if p == OP:
    245 					if v == ')':
    246 						count_par -= 1
    247 						if count_par == 0:
    248 							break
    249 					elif v == '(':
    250 						count_par += 1
    251 				i += 1
    252 			else:
    253 				raise PreprocError('rparen expected %r' % lst)
    254 
    255 			(num, _) = get_term(lst[1:i])
    256 			return (num, lst[i+1:])
    257 
    258 		elif v == '+':
    259 			return get_num(lst[1:])
    260 		elif v == '-':
    261 			num, lst = get_num(lst[1:])
    262 			return (reduce_nums('-1', num, '*'), lst)
    263 		elif v == '!':
    264 			num, lst = get_num(lst[1:])
    265 			return (int(not int(num)), lst)
    266 		elif v == '~':
    267 			num, lst = get_num(lst[1:])
    268 			return (~ int(num), lst)
    269 		else:
    270 			raise PreprocError('Invalid op token %r for get_num' % lst)
    271 	elif p == NUM:
    272 		return v, lst[1:]
    273 	elif p == IDENT:
    274 		# all macros should have been replaced, remaining identifiers eval to 0
    275 		return 0, lst[1:]
    276 	else:
    277 		raise PreprocError('Invalid token %r for get_num' % lst)
    278 
    279 def get_term(lst):
    280 	"""
    281 	Evaluate an expression recursively, for example::
    282 
    283 		1+1+1 -> 2+1 -> 3
    284 
    285 	:param lst: list of tokens
    286 	:type lst: list of tuple(token, value)
    287 	:return: the value and the remaining tokens
    288 	:rtype: value, list
    289 	"""
    290 
    291 	if not lst:
    292 		raise PreprocError('empty list for get_term')
    293 	num, lst = get_num(lst)
    294 	if not lst:
    295 		return (num, [])
    296 	(p, v) = lst[0]
    297 	if p == OP:
    298 		if v == ',':
    299 			# skip
    300 			return get_term(lst[1:])
    301 		elif v == '?':
    302 			count_par = 0
    303 			i = 1
    304 			while i < len(lst):
    305 				(p, v) = lst[i]
    306 
    307 				if p == OP:
    308 					if v == ')':
    309 						count_par -= 1
    310 					elif v == '(':
    311 						count_par += 1
    312 					elif v == ':':
    313 						if count_par == 0:
    314 							break
    315 				i += 1
    316 			else:
    317 				raise PreprocError('rparen expected %r' % lst)
    318 
    319 			if int(num):
    320 				return get_term(lst[1:i])
    321 			else:
    322 				return get_term(lst[i+1:])
    323 
    324 		else:
    325 			num2, lst = get_num(lst[1:])
    326 
    327 			if not lst:
    328 				# no more tokens to process
    329 				num2 = reduce_nums(num, num2, v)
    330 				return get_term([(NUM, num2)] + lst)
    331 
    332 			# operator precedence
    333 			p2, v2 = lst[0]
    334 			if p2 != OP:
    335 				raise PreprocError('op expected %r' % lst)
    336 
    337 			if prec[v2] >= prec[v]:
    338 				num2 = reduce_nums(num, num2, v)
    339 				return get_term([(NUM, num2)] + lst)
    340 			else:
    341 				num3, lst = get_num(lst[1:])
    342 				num3 = reduce_nums(num2, num3, v2)
    343 				return get_term([(NUM, num), (p, v), (NUM, num3)] + lst)
    344 
    345 
    346 	raise PreprocError('cannot reduce %r' % lst)
    347 
    348 def reduce_eval(lst):
    349 	"""
    350 	Take a list of tokens and output true or false for #if/#elif conditions.
    351 
    352 	:param lst: a list of tokens
    353 	:type lst: list of tuple(token, value)
    354 	:return: a token
    355 	:rtype: tuple(NUM, int)
    356 	"""
    357 	num, lst = get_term(lst)
    358 	return (NUM, num)
    359 
    360 def stringize(lst):
    361 	"""
    362 	Merge a list of tokens into a string
    363 
    364 	:param lst: a list of tokens
    365 	:type lst: list of tuple(token, value)
    366 	:rtype: string
    367 	"""
    368 	lst = [str(v2) for (p2, v2) in lst]
    369 	return "".join(lst)
    370 
    371 def paste_tokens(t1, t2):
    372 	"""
    373 	Token pasting works between identifiers, particular operators, and identifiers and numbers::
    374 
    375 		a ## b  ->  ab
    376 		> ## =  ->  >=
    377 		a ## 2  ->  a2
    378 
    379 	:param t1: token
    380 	:type t1: tuple(type, value)
    381 	:param t2: token
    382 	:type t2: tuple(type, value)
    383 	"""
    384 	p1 = None
    385 	if t1[0] == OP and t2[0] == OP:
    386 		p1 = OP
    387 	elif t1[0] == IDENT and (t2[0] == IDENT or t2[0] == NUM):
    388 		p1 = IDENT
    389 	elif t1[0] == NUM and t2[0] == NUM:
    390 		p1 = NUM
    391 	if not p1:
    392 		raise PreprocError('tokens do not make a valid paste %r and %r' % (t1, t2))
    393 	return (p1, t1[1] + t2[1])
    394 
    395 def reduce_tokens(lst, defs, ban=[]):
    396 	"""
    397 	Replace the tokens in lst, using the macros provided in defs, and a list of macros that cannot be re-applied
    398 
    399 	:param lst: list of tokens
    400 	:type lst: list of tuple(token, value)
    401 	:param defs: macro definitions
    402 	:type defs: dict
    403 	:param ban: macros that cannot be substituted (recursion is not allowed)
    404 	:type ban: list of string
    405 	:return: the new list of tokens
    406 	:rtype: value, list
    407 	"""
    408 
    409 	i = 0
    410 	while i < len(lst):
    411 		(p, v) = lst[i]
    412 
    413 		if p == IDENT and v == "defined":
    414 			del lst[i]
    415 			if i < len(lst):
    416 				(p2, v2) = lst[i]
    417 				if p2 == IDENT:
    418 					if v2 in defs:
    419 						lst[i] = (NUM, 1)
    420 					else:
    421 						lst[i] = (NUM, 0)
    422 				elif p2 == OP and v2 == '(':
    423 					del lst[i]
    424 					(p2, v2) = lst[i]
    425 					del lst[i] # remove the ident, and change the ) for the value
    426 					if v2 in defs:
    427 						lst[i] = (NUM, 1)
    428 					else:
    429 						lst[i] = (NUM, 0)
    430 				else:
    431 					raise PreprocError('Invalid define expression %r' % lst)
    432 
    433 		elif p == IDENT and v in defs:
    434 
    435 			if isinstance(defs[v], str):
    436 				a, b = extract_macro(defs[v])
    437 				defs[v] = b
    438 			macro_def = defs[v]
    439 			to_add = macro_def[1]
    440 
    441 			if isinstance(macro_def[0], list):
    442 				# macro without arguments
    443 				del lst[i]
    444 				accu = to_add[:]
    445 				reduce_tokens(accu, defs, ban+[v])
    446 				for tmp in accu:
    447 					lst.insert(i, tmp)
    448 					i += 1
    449 			else:
    450 				# collect the arguments for the funcall
    451 
    452 				args = []
    453 				del lst[i]
    454 
    455 				if i >= len(lst):
    456 					raise PreprocError('expected ( after %r (got nothing)' % v)
    457 
    458 				(p2, v2) = lst[i]
    459 				if p2 != OP or v2 != '(':
    460 					raise PreprocError('expected ( after %r' % v)
    461 
    462 				del lst[i]
    463 
    464 				one_param = []
    465 				count_paren = 0
    466 				while i < len(lst):
    467 					p2, v2 = lst[i]
    468 
    469 					del lst[i]
    470 					if p2 == OP and count_paren == 0:
    471 						if v2 == '(':
    472 							one_param.append((p2, v2))
    473 							count_paren += 1
    474 						elif v2 == ')':
    475 							if one_param:
    476 								args.append(one_param)
    477 							break
    478 						elif v2 == ',':
    479 							if not one_param:
    480 								raise PreprocError('empty param in funcall %r' % v)
    481 							args.append(one_param)
    482 							one_param = []
    483 						else:
    484 							one_param.append((p2, v2))
    485 					else:
    486 						one_param.append((p2, v2))
    487 						if   v2 == '(':
    488 							count_paren += 1
    489 						elif v2 == ')':
    490 							count_paren -= 1
    491 				else:
    492 					raise PreprocError('malformed macro')
    493 
    494 				# substitute the arguments within the define expression
    495 				accu = []
    496 				arg_table = macro_def[0]
    497 				j = 0
    498 				while j < len(to_add):
    499 					(p2, v2) = to_add[j]
    500 
    501 					if p2 == OP and v2 == '#':
    502 						# stringize is for arguments only
    503 						if j+1 < len(to_add) and to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
    504 							toks = args[arg_table[to_add[j+1][1]]]
    505 							accu.append((STR, stringize(toks)))
    506 							j += 1
    507 						else:
    508 							accu.append((p2, v2))
    509 					elif p2 == OP and v2 == '##':
    510 						# token pasting, how can man invent such a complicated system?
    511 						if accu and j+1 < len(to_add):
    512 							# we have at least two tokens
    513 
    514 							t1 = accu[-1]
    515 
    516 							if to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
    517 								toks = args[arg_table[to_add[j+1][1]]]
    518 
    519 								if toks:
    520 									accu[-1] = paste_tokens(t1, toks[0]) #(IDENT, accu[-1][1] + toks[0][1])
    521 									accu.extend(toks[1:])
    522 								else:
    523 									# error, case "a##"
    524 									accu.append((p2, v2))
    525 									accu.extend(toks)
    526 							elif to_add[j+1][0] == IDENT and to_add[j+1][1] == '__VA_ARGS__':
    527 								# first collect the tokens
    528 								va_toks = []
    529 								st = len(macro_def[0])
    530 								pt = len(args)
    531 								for x in args[pt-st+1:]:
    532 									va_toks.extend(x)
    533 									va_toks.append((OP, ','))
    534 								if va_toks:
    535 									va_toks.pop() # extra comma
    536 								if len(accu)>1:
    537 									(p3, v3) = accu[-1]
    538 									(p4, v4) = accu[-2]
    539 									if v3 == '##':
    540 										# remove the token paste
    541 										accu.pop()
    542 										if v4 == ',' and pt < st:
    543 											# remove the comma
    544 											accu.pop()
    545 								accu += va_toks
    546 							else:
    547 								accu[-1] = paste_tokens(t1, to_add[j+1])
    548 
    549 							j += 1
    550 						else:
    551 							# Invalid paste, case    "##a" or "b##"
    552 							accu.append((p2, v2))
    553 
    554 					elif p2 == IDENT and v2 in arg_table:
    555 						toks = args[arg_table[v2]]
    556 						reduce_tokens(toks, defs, ban+[v])
    557 						accu.extend(toks)
    558 					else:
    559 						accu.append((p2, v2))
    560 
    561 					j += 1
    562 
    563 
    564 				reduce_tokens(accu, defs, ban+[v])
    565 
    566 				for x in range(len(accu)-1, -1, -1):
    567 					lst.insert(i, accu[x])
    568 
    569 		i += 1
    570 
    571 
    572 def eval_macro(lst, defs):
    573 	"""
    574 	Reduce the tokens by :py:func:`waflib.Tools.c_preproc.reduce_tokens` and try to return a 0/1 result by :py:func:`waflib.Tools.c_preproc.reduce_eval`.
    575 
    576 	:param lst: list of tokens
    577 	:type lst: list of tuple(token, value)
    578 	:param defs: macro definitions
    579 	:type defs: dict
    580 	:rtype: int
    581 	"""
    582 	reduce_tokens(lst, defs, [])
    583 	if not lst:
    584 		raise PreprocError('missing tokens to evaluate')
    585 
    586 	p, v = reduce_eval(lst)
    587 	return int(v) != 0
    588 
    589 def extract_macro(txt):
    590 	"""
    591 	Process a macro definition of the form::
    592 		 #define f(x, y) x * y
    593 
    594 	into a function or a simple macro without arguments
    595 
    596 	:param txt: expression to exact a macro definition from
    597 	:type txt: string
    598 	:return: a tuple containing the name, the list of arguments and the replacement
    599 	:rtype: tuple(string, [list, list])
    600 	"""
    601 	t = tokenize(txt)
    602 	if re_fun.search(txt):
    603 		p, name = t[0]
    604 
    605 		p, v = t[1]
    606 		if p != OP:
    607 			raise PreprocError('expected (')
    608 
    609 		i = 1
    610 		pindex = 0
    611 		params = {}
    612 		prev = '('
    613 
    614 		while 1:
    615 			i += 1
    616 			p, v = t[i]
    617 
    618 			if prev == '(':
    619 				if p == IDENT:
    620 					params[v] = pindex
    621 					pindex += 1
    622 					prev = p
    623 				elif p == OP and v == ')':
    624 					break
    625 				else:
    626 					raise PreprocError('unexpected token (3)')
    627 			elif prev == IDENT:
    628 				if p == OP and v == ',':
    629 					prev = v
    630 				elif p == OP and v == ')':
    631 					break
    632 				else:
    633 					raise PreprocError('comma or ... expected')
    634 			elif prev == ',':
    635 				if p == IDENT:
    636 					params[v] = pindex
    637 					pindex += 1
    638 					prev = p
    639 				elif p == OP and v == '...':
    640 					raise PreprocError('not implemented (1)')
    641 				else:
    642 					raise PreprocError('comma or ... expected (2)')
    643 			elif prev == '...':
    644 				raise PreprocError('not implemented (2)')
    645 			else:
    646 				raise PreprocError('unexpected else')
    647 
    648 		#~ print (name, [params, t[i+1:]])
    649 		return (name, [params, t[i+1:]])
    650 	else:
    651 		(p, v) = t[0]
    652 		if len(t) > 1:
    653 			return (v, [[], t[1:]])
    654 		else:
    655 			# empty define, assign an empty token
    656 			return (v, [[], [('T','')]])
    657 
    658 re_include = re.compile(r'^\s*(<(?:.*)>|"(?:.*)")')
    659 def extract_include(txt, defs):
    660 	"""
    661 	Process a line in the form::
    662 
    663 		#include foo
    664 
    665 	:param txt: include line to process
    666 	:type txt: string
    667 	:param defs: macro definitions
    668 	:type defs: dict
    669 	:return: the file name
    670 	:rtype: string
    671 	"""
    672 	m = re_include.search(txt)
    673 	if m:
    674 		txt = m.group(1)
    675 		return txt[0], txt[1:-1]
    676 
    677 	# perform preprocessing and look at the result, it must match an include
    678 	toks = tokenize(txt)
    679 	reduce_tokens(toks, defs, ['waf_include'])
    680 
    681 	if not toks:
    682 		raise PreprocError('could not parse include %r' % txt)
    683 
    684 	if len(toks) == 1:
    685 		if toks[0][0] == STR:
    686 			return '"', toks[0][1]
    687 	else:
    688 		if toks[0][1] == '<' and toks[-1][1] == '>':
    689 			ret = '<', stringize(toks).lstrip('<').rstrip('>')
    690 			return ret
    691 
    692 	raise PreprocError('could not parse include %r' % txt)
    693 
    694 def parse_char(txt):
    695 	"""
    696 	Parse a c character
    697 
    698 	:param txt: character to parse
    699 	:type txt: string
    700 	:return: a character literal
    701 	:rtype: string
    702 	"""
    703 
    704 	if not txt:
    705 		raise PreprocError('attempted to parse a null char')
    706 	if txt[0] != '\\':
    707 		return ord(txt)
    708 	c = txt[1]
    709 	if c == 'x':
    710 		if len(txt) == 4 and txt[3] in string.hexdigits:
    711 			return int(txt[2:], 16)
    712 		return int(txt[2:], 16)
    713 	elif c.isdigit():
    714 		if c == '0' and len(txt)==2:
    715 			return 0
    716 		for i in 3, 2, 1:
    717 			if len(txt) > i and txt[1:1+i].isdigit():
    718 				return (1+i, int(txt[1:1+i], 8))
    719 	else:
    720 		try:
    721 			return chr_esc[c]
    722 		except KeyError:
    723 			raise PreprocError('could not parse char literal %r' % txt)
    724 
    725 def tokenize(s):
    726 	"""
    727 	Convert a string into a list of tokens (shlex.split does not apply to c/c++/d)
    728 
    729 	:param s: input to tokenize
    730 	:type s: string
    731 	:return: a list of tokens
    732 	:rtype: list of tuple(token, value)
    733 	"""
    734 	return tokenize_private(s)[:] # force a copy of the results
    735 
    736 def tokenize_private(s):
    737 	ret = []
    738 	for match in re_clexer.finditer(s):
    739 		m = match.group
    740 		for name in tok_types:
    741 			v = m(name)
    742 			if v:
    743 				if name == IDENT:
    744 					if v in g_optrans:
    745 						name = OP
    746 					elif v.lower() == "true":
    747 						v = 1
    748 						name = NUM
    749 					elif v.lower() == "false":
    750 						v = 0
    751 						name = NUM
    752 				elif name == NUM:
    753 					if m('oct'):
    754 						v = int(v, 8)
    755 					elif m('hex'):
    756 						v = int(m('hex'), 16)
    757 					elif m('n0'):
    758 						v = m('n0')
    759 					else:
    760 						v = m('char')
    761 						if v:
    762 							v = parse_char(v)
    763 						else:
    764 							v = m('n2') or m('n4')
    765 				elif name == OP:
    766 					if v == '%:':
    767 						v = '#'
    768 					elif v == '%:%:':
    769 						v = '##'
    770 				elif name == STR:
    771 					# remove the quotes around the string
    772 					v = v[1:-1]
    773 				ret.append((name, v))
    774 				break
    775 	return ret
    776 
    777 def format_defines(lst):
    778 	ret = []
    779 	for y in lst:
    780 		if y:
    781 			pos = y.find('=')
    782 			if pos == -1:
    783 				# "-DFOO" should give "#define FOO 1"
    784 				ret.append(y)
    785 			elif pos > 0:
    786 				# all others are assumed to be -DX=Y
    787 				ret.append('%s %s' % (y[:pos], y[pos+1:]))
    788 			else:
    789 				raise ValueError('Invalid define expression %r' % y)
    790 	return ret
    791 
    792 class c_parser(object):
    793 	"""
    794 	Used by :py:func:`waflib.Tools.c_preproc.scan` to parse c/h files. Note that by default,
    795 	only project headers are parsed.
    796 	"""
    797 	def __init__(self, nodepaths=None, defines=None):
    798 		self.lines = []
    799 		"""list of lines read"""
    800 
    801 		if defines is None:
    802 			self.defs  = {}
    803 		else:
    804 			self.defs  = dict(defines) # make a copy
    805 		self.state = []
    806 
    807 		self.count_files = 0
    808 		self.currentnode_stack = []
    809 
    810 		self.nodepaths = nodepaths or []
    811 		"""Include paths"""
    812 
    813 		self.nodes = []
    814 		"""List of :py:class:`waflib.Node.Node` found so far"""
    815 
    816 		self.names = []
    817 		"""List of file names that could not be matched by any file"""
    818 
    819 		self.curfile = ''
    820 		"""Current file"""
    821 
    822 		self.ban_includes = set()
    823 		"""Includes that must not be read (#pragma once)"""
    824 
    825 		self.listed = set()
    826 		"""Include nodes/names already listed to avoid duplicates in self.nodes/self.names"""
    827 
    828 	def cached_find_resource(self, node, filename):
    829 		"""
    830 		Find a file from the input directory
    831 
    832 		:param node: directory
    833 		:type node: :py:class:`waflib.Node.Node`
    834 		:param filename: header to find
    835 		:type filename: string
    836 		:return: the node if found, or None
    837 		:rtype: :py:class:`waflib.Node.Node`
    838 		"""
    839 		try:
    840 			cache = node.ctx.preproc_cache_node
    841 		except AttributeError:
    842 			cache = node.ctx.preproc_cache_node = Utils.lru_cache(FILE_CACHE_SIZE)
    843 
    844 		key = (node, filename)
    845 		try:
    846 			return cache[key]
    847 		except KeyError:
    848 			ret = node.find_resource(filename)
    849 			if ret:
    850 				if getattr(ret, 'children', None):
    851 					ret = None
    852 				elif ret.is_child_of(node.ctx.bldnode):
    853 					tmp = node.ctx.srcnode.search_node(ret.path_from(node.ctx.bldnode))
    854 					if tmp and getattr(tmp, 'children', None):
    855 						ret = None
    856 			cache[key] = ret
    857 			return ret
    858 
    859 	def tryfind(self, filename, kind='"', env=None):
    860 		"""
    861 		Try to obtain a node from the filename based from the include paths. Will add
    862 		the node found to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes` or the file name to
    863 		:py:attr:`waflib.Tools.c_preproc.c_parser.names` if no corresponding file is found. Called by
    864 		:py:attr:`waflib.Tools.c_preproc.c_parser.start`.
    865 
    866 		:param filename: header to find
    867 		:type filename: string
    868 		:return: the node if found
    869 		:rtype: :py:class:`waflib.Node.Node`
    870 		"""
    871 		if filename.endswith('.moc'):
    872 			# we could let the qt4 module use a subclass, but then the function "scan" below must be duplicated
    873 			# in the qt4 and in the qt5 classes. So we have two lines here and it is sufficient.
    874 			self.names.append(filename)
    875 			return None
    876 
    877 		self.curfile = filename
    878 
    879 		found = None
    880 		if kind == '"':
    881 			if env.MSVC_VERSION:
    882 				for n in reversed(self.currentnode_stack):
    883 					found = self.cached_find_resource(n, filename)
    884 					if found:
    885 						break
    886 			else:
    887 				found = self.cached_find_resource(self.currentnode_stack[-1], filename)
    888 
    889 		if not found:
    890 			for n in self.nodepaths:
    891 				found = self.cached_find_resource(n, filename)
    892 				if found:
    893 					break
    894 
    895 		listed = self.listed
    896 		if found and not found in self.ban_includes:
    897 			if found not in listed:
    898 				listed.add(found)
    899 				self.nodes.append(found)
    900 			self.addlines(found)
    901 		else:
    902 			if filename not in listed:
    903 				listed.add(filename)
    904 				self.names.append(filename)
    905 		return found
    906 
    907 	def filter_comments(self, node):
    908 		"""
    909 		Filter the comments from a c/h file, and return the preprocessor lines.
    910 		The regexps :py:attr:`waflib.Tools.c_preproc.re_cpp`, :py:attr:`waflib.Tools.c_preproc.re_nl` and :py:attr:`waflib.Tools.c_preproc.re_lines` are used internally.
    911 
    912 		:return: the preprocessor directives as a list of (keyword, line)
    913 		:rtype: a list of string pairs
    914 		"""
    915 		# return a list of tuples : keyword, line
    916 		code = node.read()
    917 		if use_trigraphs:
    918 			for (a, b) in trig_def:
    919 				code = code.split(a).join(b)
    920 		code = re_nl.sub('', code)
    921 		code = re_cpp.sub(repl, code)
    922 		return re_lines.findall(code)
    923 
    924 	def parse_lines(self, node):
    925 		try:
    926 			cache = node.ctx.preproc_cache_lines
    927 		except AttributeError:
    928 			cache = node.ctx.preproc_cache_lines = Utils.lru_cache(LINE_CACHE_SIZE)
    929 		try:
    930 			return cache[node]
    931 		except KeyError:
    932 			cache[node] = lines = self.filter_comments(node)
    933 			lines.append((POPFILE, ''))
    934 			lines.reverse()
    935 			return lines
    936 
    937 	def addlines(self, node):
    938 		"""
    939 		Add the lines from a header in the list of preprocessor lines to parse
    940 
    941 		:param node: header
    942 		:type node: :py:class:`waflib.Node.Node`
    943 		"""
    944 
    945 		self.currentnode_stack.append(node.parent)
    946 
    947 		self.count_files += 1
    948 		if self.count_files > recursion_limit:
    949 			# issue #812
    950 			raise PreprocError('recursion limit exceeded')
    951 
    952 		if Logs.verbose:
    953 			Logs.debug('preproc: reading file %r', node)
    954 		try:
    955 			lines = self.parse_lines(node)
    956 		except EnvironmentError:
    957 			raise PreprocError('could not read the file %r' % node)
    958 		except Exception:
    959 			if Logs.verbose > 0:
    960 				Logs.error('parsing %r failed %s', node, traceback.format_exc())
    961 		else:
    962 			self.lines.extend(lines)
    963 
    964 	def start(self, node, env):
    965 		"""
    966 		Preprocess a source file to obtain the dependencies, which are accumulated to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes`
    967 		and :py:attr:`waflib.Tools.c_preproc.c_parser.names`.
    968 
    969 		:param node: source file
    970 		:type node: :py:class:`waflib.Node.Node`
    971 		:param env: config set containing additional defines to take into account
    972 		:type env: :py:class:`waflib.ConfigSet.ConfigSet`
    973 		"""
    974 		Logs.debug('preproc: scanning %s (in %s)', node.name, node.parent.name)
    975 
    976 		self.current_file = node
    977 		self.addlines(node)
    978 
    979 		# macros may be defined on the command-line, so they must be parsed as if they were part of the file
    980 		if env.DEFINES:
    981 			lst = format_defines(env.DEFINES)
    982 			lst.reverse()
    983 			self.lines.extend([('define', x) for x in lst])
    984 
    985 		while self.lines:
    986 			(token, line) = self.lines.pop()
    987 			if token == POPFILE:
    988 				self.count_files -= 1
    989 				self.currentnode_stack.pop()
    990 				continue
    991 
    992 			try:
    993 				state = self.state
    994 
    995 				# make certain we define the state if we are about to enter in an if block
    996 				if token[:2] == 'if':
    997 					state.append(undefined)
    998 				elif token == 'endif':
    999 					state.pop()
   1000 
   1001 				# skip lines when in a dead 'if' branch, wait for the endif
   1002 				if token[0] != 'e':
   1003 					if skipped in self.state or ignored in self.state:
   1004 						continue
   1005 
   1006 				if token == 'if':
   1007 					ret = eval_macro(tokenize(line), self.defs)
   1008 					if ret:
   1009 						state[-1] = accepted
   1010 					else:
   1011 						state[-1] = ignored
   1012 				elif token == 'ifdef':
   1013 					m = re_mac.match(line)
   1014 					if m and m.group() in self.defs:
   1015 						state[-1] = accepted
   1016 					else:
   1017 						state[-1] = ignored
   1018 				elif token == 'ifndef':
   1019 					m = re_mac.match(line)
   1020 					if m and m.group() in self.defs:
   1021 						state[-1] = ignored
   1022 					else:
   1023 						state[-1] = accepted
   1024 				elif token == 'include' or token == 'import':
   1025 					(kind, inc) = extract_include(line, self.defs)
   1026 					self.current_file = self.tryfind(inc, kind, env)
   1027 					if token == 'import':
   1028 						self.ban_includes.add(self.current_file)
   1029 				elif token == 'elif':
   1030 					if state[-1] == accepted:
   1031 						state[-1] = skipped
   1032 					elif state[-1] == ignored:
   1033 						if eval_macro(tokenize(line), self.defs):
   1034 							state[-1] = accepted
   1035 				elif token == 'else':
   1036 					if state[-1] == accepted:
   1037 						state[-1] = skipped
   1038 					elif state[-1] == ignored:
   1039 						state[-1] = accepted
   1040 				elif token == 'define':
   1041 					try:
   1042 						self.defs[self.define_name(line)] = line
   1043 					except AttributeError:
   1044 						raise PreprocError('Invalid define line %r' % line)
   1045 				elif token == 'undef':
   1046 					m = re_mac.match(line)
   1047 					if m and m.group() in self.defs:
   1048 						self.defs.__delitem__(m.group())
   1049 						#print "undef %s" % name
   1050 				elif token == 'pragma':
   1051 					if re_pragma_once.match(line.lower()):
   1052 						self.ban_includes.add(self.current_file)
   1053 			except Exception as e:
   1054 				if Logs.verbose:
   1055 					Logs.debug('preproc: line parsing failed (%s): %s %s', e, line, traceback.format_exc())
   1056 
   1057 	def define_name(self, line):
   1058 		"""
   1059 		:param line: define line
   1060 		:type line: string
   1061 		:rtype: string
   1062 		:return: the define name
   1063 		"""
   1064 		return re_mac.match(line).group()
   1065 
   1066 def scan(task):
   1067 	"""
   1068 	Get the dependencies using a c/c++ preprocessor, this is required for finding dependencies of the kind::
   1069 
   1070 		#include some_macro()
   1071 
   1072 	This function is bound as a task method on :py:class:`waflib.Tools.c.c` and :py:class:`waflib.Tools.cxx.cxx` for example
   1073 	"""
   1074 	try:
   1075 		incn = task.generator.includes_nodes
   1076 	except AttributeError:
   1077 		raise Errors.WafError('%r is missing a feature such as "c", "cxx" or "includes": ' % task.generator)
   1078 
   1079 	if go_absolute:
   1080 		nodepaths = incn + [task.generator.bld.root.find_dir(x) for x in standard_includes]
   1081 	else:
   1082 		nodepaths = [x for x in incn if x.is_child_of(x.ctx.srcnode) or x.is_child_of(x.ctx.bldnode)]
   1083 
   1084 	tmp = c_parser(nodepaths)
   1085 	tmp.start(task.inputs[0], task.env)
   1086 	return (tmp.nodes, tmp.names)