PageRenderTime 55ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/python/helpers/profiler/ply/cpp.py

http://github.com/JetBrains/intellij-community
Python | 908 lines | 822 code | 29 blank | 57 comment | 48 complexity | c718f3dc0b0ac3c05f1143c7a02499a3 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, MPL-2.0-no-copyleft-exception, MIT, EPL-1.0, AGPL-1.0
  1. # -----------------------------------------------------------------------------
  2. # cpp.py
  3. #
  4. # Author: David Beazley (http://www.dabeaz.com)
  5. # Copyright (C) 2007
  6. # All rights reserved
  7. #
  8. # This module implements an ANSI-C style lexical preprocessor for PLY.
  9. # -----------------------------------------------------------------------------
  10. from __future__ import generators
  11. # -----------------------------------------------------------------------------
  12. # Default preprocessor lexer definitions. These tokens are enough to get
  13. # a basic preprocessor working. Other modules may import these if they want
  14. # -----------------------------------------------------------------------------
  15. tokens = (
  16. 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
  17. )
  18. literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
  19. # Whitespace
  20. def t_CPP_WS(t):
  21. r'\s+'
  22. t.lexer.lineno += t.value.count("\n")
  23. return t
  24. t_CPP_POUND = r'\#'
  25. t_CPP_DPOUND = r'\#\#'
  26. # Identifier
  27. t_CPP_ID = r'[A-Za-z_][\w_]*'
  28. # Integer literal
  29. def CPP_INTEGER(t):
  30. r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
  31. return t
  32. t_CPP_INTEGER = CPP_INTEGER
  33. # Floating literal
  34. t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
  35. # String literal
  36. def t_CPP_STRING(t):
  37. r'\"([^\\\n]|(\\(.|\n)))*?\"'
  38. t.lexer.lineno += t.value.count("\n")
  39. return t
  40. # Character constant 'c' or L'c'
  41. def t_CPP_CHAR(t):
  42. r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
  43. t.lexer.lineno += t.value.count("\n")
  44. return t
  45. # Comment
  46. def t_CPP_COMMENT1(t):
  47. r'(/\*(.|\n)*?\*/)'
  48. ncr = t.value.count("\n")
  49. t.lexer.lineno += ncr
  50. # replace with one space or a number of '\n'
  51. t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
  52. return t
  53. # Line comment
  54. def t_CPP_COMMENT2(t):
  55. r'(//.*?(\n|$))'
  56. # replace with '/n'
  57. t.type = 'CPP_WS'; t.value = '\n'
  58. def t_error(t):
  59. t.type = t.value[0]
  60. t.value = t.value[0]
  61. t.lexer.skip(1)
  62. return t
  63. import re
  64. import copy
  65. import time
  66. import os.path
  67. # -----------------------------------------------------------------------------
  68. # trigraph()
  69. #
  70. # Given an input string, this function replaces all trigraph sequences.
  71. # The following mapping is used:
  72. #
  73. # ??= #
  74. # ??/ \
  75. # ??' ^
  76. # ??( [
  77. # ??) ]
  78. # ??! |
  79. # ??< {
  80. # ??> }
  81. # ??- ~
  82. # -----------------------------------------------------------------------------
  83. _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
  84. _trigraph_rep = {
  85. '=':'#',
  86. '/':'\\',
  87. "'":'^',
  88. '(':'[',
  89. ')':']',
  90. '!':'|',
  91. '<':'{',
  92. '>':'}',
  93. '-':'~'
  94. }
  95. def trigraph(input):
  96. return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
  97. # ------------------------------------------------------------------
  98. # Macro object
  99. #
  100. # This object holds information about preprocessor macros
  101. #
  102. # .name - Macro name (string)
  103. # .value - Macro value (a list of tokens)
  104. # .arglist - List of argument names
  105. # .variadic - Boolean indicating whether or not variadic macro
  106. # .vararg - Name of the variadic parameter
  107. #
  108. # When a macro is created, the macro replacement token sequence is
  109. # pre-scanned and used to create patch lists that are later used
  110. # during macro expansion
  111. # ------------------------------------------------------------------
  112. class Macro(object):
  113. def __init__(self,name,value,arglist=None,variadic=False):
  114. self.name = name
  115. self.value = value
  116. self.arglist = arglist
  117. self.variadic = variadic
  118. if variadic:
  119. self.vararg = arglist[-1]
  120. self.source = None
  121. # ------------------------------------------------------------------
  122. # Preprocessor object
  123. #
  124. # Object representing a preprocessor. Contains macro definitions,
  125. # include directories, and other information
  126. # ------------------------------------------------------------------
  127. class Preprocessor(object):
  128. def __init__(self,lexer=None):
  129. if lexer is None:
  130. lexer = lex.lexer
  131. self.lexer = lexer
  132. self.macros = { }
  133. self.path = []
  134. self.temp_path = []
  135. # Probe the lexer for selected tokens
  136. self.lexprobe()
  137. tm = time.localtime()
  138. self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
  139. self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
  140. self.parser = None
  141. # -----------------------------------------------------------------------------
  142. # tokenize()
  143. #
  144. # Utility function. Given a string of text, tokenize into a list of tokens
  145. # -----------------------------------------------------------------------------
  146. def tokenize(self,text):
  147. tokens = []
  148. self.lexer.input(text)
  149. while True:
  150. tok = self.lexer.token()
  151. if not tok: break
  152. tokens.append(tok)
  153. return tokens
  154. # ---------------------------------------------------------------------
  155. # error()
  156. #
  157. # Report a preprocessor error/warning of some kind
  158. # ----------------------------------------------------------------------
  159. def error(self,file,line,msg):
  160. print("%s:%d %s" % (file,line,msg))
  161. # ----------------------------------------------------------------------
  162. # lexprobe()
  163. #
  164. # This method probes the preprocessor lexer object to discover
  165. # the token types of symbols that are important to the preprocessor.
  166. # If this works right, the preprocessor will simply "work"
  167. # with any suitable lexer regardless of how tokens have been named.
  168. # ----------------------------------------------------------------------
  169. def lexprobe(self):
  170. # Determine the token type for identifiers
  171. self.lexer.input("identifier")
  172. tok = self.lexer.token()
  173. if not tok or tok.value != "identifier":
  174. print("Couldn't determine identifier type")
  175. else:
  176. self.t_ID = tok.type
  177. # Determine the token type for integers
  178. self.lexer.input("12345")
  179. tok = self.lexer.token()
  180. if not tok or int(tok.value) != 12345:
  181. print("Couldn't determine integer type")
  182. else:
  183. self.t_INTEGER = tok.type
  184. self.t_INTEGER_TYPE = type(tok.value)
  185. # Determine the token type for strings enclosed in double quotes
  186. self.lexer.input("\"filename\"")
  187. tok = self.lexer.token()
  188. if not tok or tok.value != "\"filename\"":
  189. print("Couldn't determine string type")
  190. else:
  191. self.t_STRING = tok.type
  192. # Determine the token type for whitespace--if any
  193. self.lexer.input(" ")
  194. tok = self.lexer.token()
  195. if not tok or tok.value != " ":
  196. self.t_SPACE = None
  197. else:
  198. self.t_SPACE = tok.type
  199. # Determine the token type for newlines
  200. self.lexer.input("\n")
  201. tok = self.lexer.token()
  202. if not tok or tok.value != "\n":
  203. self.t_NEWLINE = None
  204. print("Couldn't determine token for newlines")
  205. else:
  206. self.t_NEWLINE = tok.type
  207. self.t_WS = (self.t_SPACE, self.t_NEWLINE)
  208. # Check for other characters used by the preprocessor
  209. chars = [ '<','>','#','##','\\','(',')',',','.']
  210. for c in chars:
  211. self.lexer.input(c)
  212. tok = self.lexer.token()
  213. if not tok or tok.value != c:
  214. print("Unable to lex '%s' required for preprocessor" % c)
  215. # ----------------------------------------------------------------------
  216. # add_path()
  217. #
  218. # Adds a search path to the preprocessor.
  219. # ----------------------------------------------------------------------
  220. def add_path(self,path):
  221. self.path.append(path)
  222. # ----------------------------------------------------------------------
  223. # group_lines()
  224. #
  225. # Given an input string, this function splits it into lines. Trailing whitespace
  226. # is removed. Any line ending with \ is grouped with the next line. This
  227. # function forms the lowest level of the preprocessor---grouping into text into
  228. # a line-by-line format.
  229. # ----------------------------------------------------------------------
  230. def group_lines(self,input):
  231. lex = self.lexer.clone()
  232. lines = [x.rstrip() for x in input.splitlines()]
  233. for i in xrange(len(lines)):
  234. j = i+1
  235. while lines[i].endswith('\\') and (j < len(lines)):
  236. lines[i] = lines[i][:-1]+lines[j]
  237. lines[j] = ""
  238. j += 1
  239. input = "\n".join(lines)
  240. lex.input(input)
  241. lex.lineno = 1
  242. current_line = []
  243. while True:
  244. tok = lex.token()
  245. if not tok:
  246. break
  247. current_line.append(tok)
  248. if tok.type in self.t_WS and '\n' in tok.value:
  249. yield current_line
  250. current_line = []
  251. if current_line:
  252. yield current_line
  253. # ----------------------------------------------------------------------
  254. # tokenstrip()
  255. #
  256. # Remove leading/trailing whitespace tokens from a token list
  257. # ----------------------------------------------------------------------
  258. def tokenstrip(self,tokens):
  259. i = 0
  260. while i < len(tokens) and tokens[i].type in self.t_WS:
  261. i += 1
  262. del tokens[:i]
  263. i = len(tokens)-1
  264. while i >= 0 and tokens[i].type in self.t_WS:
  265. i -= 1
  266. del tokens[i+1:]
  267. return tokens
  268. # ----------------------------------------------------------------------
  269. # collect_args()
  270. #
  271. # Collects comma separated arguments from a list of tokens. The arguments
  272. # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
  273. # where tokencount is the number of tokens consumed, args is a list of arguments,
  274. # and positions is a list of integers containing the starting index of each
  275. # argument. Each argument is represented by a list of tokens.
  276. #
  277. # When collecting arguments, leading and trailing whitespace is removed
  278. # from each argument.
  279. #
  280. # This function properly handles nested parenthesis and commas---these do not
  281. # define new arguments.
  282. # ----------------------------------------------------------------------
  283. def collect_args(self,tokenlist):
  284. args = []
  285. positions = []
  286. current_arg = []
  287. nesting = 1
  288. tokenlen = len(tokenlist)
  289. # Search for the opening '('.
  290. i = 0
  291. while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
  292. i += 1
  293. if (i < tokenlen) and (tokenlist[i].value == '('):
  294. positions.append(i+1)
  295. else:
  296. self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
  297. return 0, [], []
  298. i += 1
  299. while i < tokenlen:
  300. t = tokenlist[i]
  301. if t.value == '(':
  302. current_arg.append(t)
  303. nesting += 1
  304. elif t.value == ')':
  305. nesting -= 1
  306. if nesting == 0:
  307. if current_arg:
  308. args.append(self.tokenstrip(current_arg))
  309. positions.append(i)
  310. return i+1,args,positions
  311. current_arg.append(t)
  312. elif t.value == ',' and nesting == 1:
  313. args.append(self.tokenstrip(current_arg))
  314. positions.append(i+1)
  315. current_arg = []
  316. else:
  317. current_arg.append(t)
  318. i += 1
  319. # Missing end argument
  320. self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
  321. return 0, [],[]
  322. # ----------------------------------------------------------------------
  323. # macro_prescan()
  324. #
  325. # Examine the macro value (token sequence) and identify patch points
  326. # This is used to speed up macro expansion later on---we'll know
  327. # right away where to apply patches to the value to form the expansion
  328. # ----------------------------------------------------------------------
  329. def macro_prescan(self,macro):
  330. macro.patch = [] # Standard macro arguments
  331. macro.str_patch = [] # String conversion expansion
  332. macro.var_comma_patch = [] # Variadic macro comma patch
  333. i = 0
  334. while i < len(macro.value):
  335. if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
  336. argnum = macro.arglist.index(macro.value[i].value)
  337. # Conversion of argument to a string
  338. if i > 0 and macro.value[i-1].value == '#':
  339. macro.value[i] = copy.copy(macro.value[i])
  340. macro.value[i].type = self.t_STRING
  341. del macro.value[i-1]
  342. macro.str_patch.append((argnum,i-1))
  343. continue
  344. # Concatenation
  345. elif (i > 0 and macro.value[i-1].value == '##'):
  346. macro.patch.append(('c',argnum,i-1))
  347. del macro.value[i-1]
  348. continue
  349. elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
  350. macro.patch.append(('c',argnum,i))
  351. i += 1
  352. continue
  353. # Standard expansion
  354. else:
  355. macro.patch.append(('e',argnum,i))
  356. elif macro.value[i].value == '##':
  357. if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
  358. ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
  359. (macro.value[i+1].value == macro.vararg):
  360. macro.var_comma_patch.append(i-1)
  361. i += 1
  362. macro.patch.sort(key=lambda x: x[2],reverse=True)
  363. # ----------------------------------------------------------------------
  364. # macro_expand_args()
  365. #
  366. # Given a Macro and list of arguments (each a token list), this method
  367. # returns an expanded version of a macro. The return value is a token sequence
  368. # representing the replacement macro tokens
  369. # ----------------------------------------------------------------------
  370. def macro_expand_args(self,macro,args):
  371. # Make a copy of the macro token sequence
  372. rep = [copy.copy(_x) for _x in macro.value]
  373. # Make string expansion patches. These do not alter the length of the replacement sequence
  374. str_expansion = {}
  375. for argnum, i in macro.str_patch:
  376. if argnum not in str_expansion:
  377. str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
  378. rep[i] = copy.copy(rep[i])
  379. rep[i].value = str_expansion[argnum]
  380. # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
  381. comma_patch = False
  382. if macro.variadic and not args[-1]:
  383. for i in macro.var_comma_patch:
  384. rep[i] = None
  385. comma_patch = True
  386. # Make all other patches. The order of these matters. It is assumed that the patch list
  387. # has been sorted in reverse order of patch location since replacements will cause the
  388. # size of the replacement sequence to expand from the patch point.
  389. expanded = { }
  390. for ptype, argnum, i in macro.patch:
  391. # Concatenation. Argument is left unexpanded
  392. if ptype == 'c':
  393. rep[i:i+1] = args[argnum]
  394. # Normal expansion. Argument is macro expanded first
  395. elif ptype == 'e':
  396. if argnum not in expanded:
  397. expanded[argnum] = self.expand_macros(args[argnum])
  398. rep[i:i+1] = expanded[argnum]
  399. # Get rid of removed comma if necessary
  400. if comma_patch:
  401. rep = [_i for _i in rep if _i]
  402. return rep
  403. # ----------------------------------------------------------------------
  404. # expand_macros()
  405. #
  406. # Given a list of tokens, this function performs macro expansion.
  407. # The expanded argument is a dictionary that contains macros already
  408. # expanded. This is used to prevent infinite recursion.
  409. # ----------------------------------------------------------------------
  410. def expand_macros(self,tokens,expanded=None):
  411. if expanded is None:
  412. expanded = {}
  413. i = 0
  414. while i < len(tokens):
  415. t = tokens[i]
  416. if t.type == self.t_ID:
  417. if t.value in self.macros and t.value not in expanded:
  418. # Yes, we found a macro match
  419. expanded[t.value] = True
  420. m = self.macros[t.value]
  421. if not m.arglist:
  422. # A simple macro
  423. ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
  424. for e in ex:
  425. e.lineno = t.lineno
  426. tokens[i:i+1] = ex
  427. i += len(ex)
  428. else:
  429. # A macro with arguments
  430. j = i + 1
  431. while j < len(tokens) and tokens[j].type in self.t_WS:
  432. j += 1
  433. if tokens[j].value == '(':
  434. tokcount,args,positions = self.collect_args(tokens[j:])
  435. if not m.variadic and len(args) != len(m.arglist):
  436. self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
  437. i = j + tokcount
  438. elif m.variadic and len(args) < len(m.arglist)-1:
  439. if len(m.arglist) > 2:
  440. self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
  441. else:
  442. self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
  443. i = j + tokcount
  444. else:
  445. if m.variadic:
  446. if len(args) == len(m.arglist)-1:
  447. args.append([])
  448. else:
  449. args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
  450. del args[len(m.arglist):]
  451. # Get macro replacement text
  452. rep = self.macro_expand_args(m,args)
  453. rep = self.expand_macros(rep,expanded)
  454. for r in rep:
  455. r.lineno = t.lineno
  456. tokens[i:j+tokcount] = rep
  457. i += len(rep)
  458. del expanded[t.value]
  459. continue
  460. elif t.value == '__LINE__':
  461. t.type = self.t_INTEGER
  462. t.value = self.t_INTEGER_TYPE(t.lineno)
  463. i += 1
  464. return tokens
  465. # ----------------------------------------------------------------------
  466. # evalexpr()
  467. #
  468. # Evaluate an expression token sequence for the purposes of evaluating
  469. # integral expressions.
  470. # ----------------------------------------------------------------------
  471. def evalexpr(self,tokens):
  472. # tokens = tokenize(line)
  473. # Search for defined macros
  474. i = 0
  475. while i < len(tokens):
  476. if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
  477. j = i + 1
  478. needparen = False
  479. result = "0L"
  480. while j < len(tokens):
  481. if tokens[j].type in self.t_WS:
  482. j += 1
  483. continue
  484. elif tokens[j].type == self.t_ID:
  485. if tokens[j].value in self.macros:
  486. result = "1L"
  487. else:
  488. result = "0L"
  489. if not needparen: break
  490. elif tokens[j].value == '(':
  491. needparen = True
  492. elif tokens[j].value == ')':
  493. break
  494. else:
  495. self.error(self.source,tokens[i].lineno,"Malformed defined()")
  496. j += 1
  497. tokens[i].type = self.t_INTEGER
  498. tokens[i].value = self.t_INTEGER_TYPE(result)
  499. del tokens[i+1:j+1]
  500. i += 1
  501. tokens = self.expand_macros(tokens)
  502. for i,t in enumerate(tokens):
  503. if t.type == self.t_ID:
  504. tokens[i] = copy.copy(t)
  505. tokens[i].type = self.t_INTEGER
  506. tokens[i].value = self.t_INTEGER_TYPE("0L")
  507. elif t.type == self.t_INTEGER:
  508. tokens[i] = copy.copy(t)
  509. # Strip off any trailing suffixes
  510. tokens[i].value = str(tokens[i].value)
  511. while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
  512. tokens[i].value = tokens[i].value[:-1]
  513. expr = "".join([str(x.value) for x in tokens])
  514. expr = expr.replace("&&"," and ")
  515. expr = expr.replace("||"," or ")
  516. expr = expr.replace("!"," not ")
  517. try:
  518. result = eval(expr)
  519. except StandardError:
  520. self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
  521. result = 0
  522. return result
  523. # ----------------------------------------------------------------------
  524. # parsegen()
  525. #
  526. # Parse an input string/
  527. # ----------------------------------------------------------------------
  528. def parsegen(self,input,source=None):
  529. # Replace trigraph sequences
  530. t = trigraph(input)
  531. lines = self.group_lines(t)
  532. if not source:
  533. source = ""
  534. self.define("__FILE__ \"%s\"" % source)
  535. self.source = source
  536. chunk = []
  537. enable = True
  538. iftrigger = False
  539. ifstack = []
  540. for x in lines:
  541. for i,tok in enumerate(x):
  542. if tok.type not in self.t_WS: break
  543. if tok.value == '#':
  544. # Preprocessor directive
  545. # insert necessary whitespace instead of eaten tokens
  546. for tok in x:
  547. if tok.type in self.t_WS and '\n' in tok.value:
  548. chunk.append(tok)
  549. dirtokens = self.tokenstrip(x[i+1:])
  550. if dirtokens:
  551. name = dirtokens[0].value
  552. args = self.tokenstrip(dirtokens[1:])
  553. else:
  554. name = ""
  555. args = []
  556. if name == 'define':
  557. if enable:
  558. for tok in self.expand_macros(chunk):
  559. yield tok
  560. chunk = []
  561. self.define(args)
  562. elif name == 'include':
  563. if enable:
  564. for tok in self.expand_macros(chunk):
  565. yield tok
  566. chunk = []
  567. oldfile = self.macros['__FILE__']
  568. for tok in self.include(args):
  569. yield tok
  570. self.macros['__FILE__'] = oldfile
  571. self.source = source
  572. elif name == 'undef':
  573. if enable:
  574. for tok in self.expand_macros(chunk):
  575. yield tok
  576. chunk = []
  577. self.undef(args)
  578. elif name == 'ifdef':
  579. ifstack.append((enable,iftrigger))
  580. if enable:
  581. if not args[0].value in self.macros:
  582. enable = False
  583. iftrigger = False
  584. else:
  585. iftrigger = True
  586. elif name == 'ifndef':
  587. ifstack.append((enable,iftrigger))
  588. if enable:
  589. if args[0].value in self.macros:
  590. enable = False
  591. iftrigger = False
  592. else:
  593. iftrigger = True
  594. elif name == 'if':
  595. ifstack.append((enable,iftrigger))
  596. if enable:
  597. result = self.evalexpr(args)
  598. if not result:
  599. enable = False
  600. iftrigger = False
  601. else:
  602. iftrigger = True
  603. elif name == 'elif':
  604. if ifstack:
  605. if ifstack[-1][0]: # We only pay attention if outer "if" allows this
  606. if enable: # If already true, we flip enable False
  607. enable = False
  608. elif not iftrigger: # If False, but not triggered yet, we'll check expression
  609. result = self.evalexpr(args)
  610. if result:
  611. enable = True
  612. iftrigger = True
  613. else:
  614. self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
  615. elif name == 'else':
  616. if ifstack:
  617. if ifstack[-1][0]:
  618. if enable:
  619. enable = False
  620. elif not iftrigger:
  621. enable = True
  622. iftrigger = True
  623. else:
  624. self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
  625. elif name == 'endif':
  626. if ifstack:
  627. enable,iftrigger = ifstack.pop()
  628. else:
  629. self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
  630. else:
  631. # Unknown preprocessor directive
  632. pass
  633. else:
  634. # Normal text
  635. if enable:
  636. chunk.extend(x)
  637. for tok in self.expand_macros(chunk):
  638. yield tok
  639. chunk = []
  640. # ----------------------------------------------------------------------
  641. # include()
  642. #
  643. # Implementation of file-inclusion
  644. # ----------------------------------------------------------------------
  645. def include(self,tokens):
  646. # Try to extract the filename and then process an include file
  647. if not tokens:
  648. return
  649. if tokens:
  650. if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
  651. tokens = self.expand_macros(tokens)
  652. if tokens[0].value == '<':
  653. # Include <...>
  654. i = 1
  655. while i < len(tokens):
  656. if tokens[i].value == '>':
  657. break
  658. i += 1
  659. else:
  660. print("Malformed #include <...>")
  661. return
  662. filename = "".join([x.value for x in tokens[1:i]])
  663. path = self.path + [""] + self.temp_path
  664. elif tokens[0].type == self.t_STRING:
  665. filename = tokens[0].value[1:-1]
  666. path = self.temp_path + [""] + self.path
  667. else:
  668. print("Malformed #include statement")
  669. return
  670. for p in path:
  671. iname = os.path.join(p,filename)
  672. try:
  673. data = open(iname,"r").read()
  674. dname = os.path.dirname(iname)
  675. if dname:
  676. self.temp_path.insert(0,dname)
  677. for tok in self.parsegen(data,filename):
  678. yield tok
  679. if dname:
  680. del self.temp_path[0]
  681. break
  682. except IOError:
  683. pass
  684. else:
  685. print("Couldn't find '%s'" % filename)
  686. # ----------------------------------------------------------------------
  687. # define()
  688. #
  689. # Define a new macro
  690. # ----------------------------------------------------------------------
  691. def define(self,tokens):
  692. if isinstance(tokens,(str,unicode)):
  693. tokens = self.tokenize(tokens)
  694. linetok = tokens
  695. try:
  696. name = linetok[0]
  697. if len(linetok) > 1:
  698. mtype = linetok[1]
  699. else:
  700. mtype = None
  701. if not mtype:
  702. m = Macro(name.value,[])
  703. self.macros[name.value] = m
  704. elif mtype.type in self.t_WS:
  705. # A normal macro
  706. m = Macro(name.value,self.tokenstrip(linetok[2:]))
  707. self.macros[name.value] = m
  708. elif mtype.value == '(':
  709. # A macro with arguments
  710. tokcount, args, positions = self.collect_args(linetok[1:])
  711. variadic = False
  712. for a in args:
  713. if variadic:
  714. print("No more arguments may follow a variadic argument")
  715. break
  716. astr = "".join([str(_i.value) for _i in a])
  717. if astr == "...":
  718. variadic = True
  719. a[0].type = self.t_ID
  720. a[0].value = '__VA_ARGS__'
  721. variadic = True
  722. del a[1:]
  723. continue
  724. elif astr[-3:] == "..." and a[0].type == self.t_ID:
  725. variadic = True
  726. del a[1:]
  727. # If, for some reason, "." is part of the identifier, strip off the name for the purposes
  728. # of macro expansion
  729. if a[0].value[-3:] == '...':
  730. a[0].value = a[0].value[:-3]
  731. continue
  732. if len(a) > 1 or a[0].type != self.t_ID:
  733. print("Invalid macro argument")
  734. break
  735. else:
  736. mvalue = self.tokenstrip(linetok[1+tokcount:])
  737. i = 0
  738. while i < len(mvalue):
  739. if i+1 < len(mvalue):
  740. if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
  741. del mvalue[i]
  742. continue
  743. elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
  744. del mvalue[i+1]
  745. i += 1
  746. m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
  747. self.macro_prescan(m)
  748. self.macros[name.value] = m
  749. else:
  750. print("Bad macro definition")
  751. except LookupError:
  752. print("Bad macro definition")
  753. # ----------------------------------------------------------------------
  754. # undef()
  755. #
  756. # Undefine a macro
  757. # ----------------------------------------------------------------------
  758. def undef(self,tokens):
  759. id = tokens[0].value
  760. try:
  761. del self.macros[id]
  762. except LookupError:
  763. pass
  764. # ----------------------------------------------------------------------
  765. # parse()
  766. #
  767. # Parse input text.
  768. # ----------------------------------------------------------------------
  769. def parse(self,input,source=None,ignore={}):
  770. self.ignore = ignore
  771. self.parser = self.parsegen(input,source)
  772. # ----------------------------------------------------------------------
  773. # token()
  774. #
  775. # Method to return individual tokens
  776. # ----------------------------------------------------------------------
  777. def token(self):
  778. try:
  779. while True:
  780. tok = next(self.parser)
  781. if tok.type not in self.ignore: return tok
  782. except StopIteration:
  783. self.parser = None
  784. return None
  785. if __name__ == '__main__':
  786. import ply.lex as lex
  787. lexer = lex.lex()
  788. # Run a preprocessor
  789. import sys
  790. f = open(sys.argv[1])
  791. input = f.read()
  792. p = Preprocessor(lexer)
  793. p.parse(input,sys.argv[1])
  794. while True:
  795. tok = p.token()
  796. if not tok: break
  797. print(p.source, tok)