PageRenderTime 56ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/thirtytwo-build.py

https://bitbucket.org/lifthrasiir/parang-thirtytwo
Python | 1054 lines | 954 code | 56 blank | 44 comment | 153 complexity | a2ea87ed60276ef46812a9a36a96199d MD5 | raw file
  1. # thirtytwo-build.py -- mechanically translates C header into Python
  2. # Copyright (c) 2009, Kang Seonghoon. See thirtytwo package for full license.
  3. #
  4. # Mainly used for machine generation of thirtytwo modules: thirtytwo._consts,
  5. # thirtytwo._types, thirtytwo._funcs. They are accompanied with hand-written
  6. # additional routines at thirtytwo.consts etc.
  7. #
  8. # Features and limitations:
  9. # - Intended to parse MinGW w32api headers.
  10. # - Requires Windows environment, the most recent version possible.
  11. # - Parses the certain subset of C language.
  12. # - Handles both object-like and function-like macros.
  13. # - Handles conditional preprocessor blocks correctly, and even can be
  14. # configured to ignore certain blocks at all.
  15. # - Handles "#include" and "#pragma pack()" directives as well.
  16. # - Has a partial support for ## operator and no support for # operator.
  17. # - Parses almost all C type definitions and function prototype correctly,
  18. # as long as there are no duplicated symbols.
  19. # - Translates majority of C header code into correct Python code.
  20. # - Simple C expression with numbers, strings, type casts, arithmetic/bitwise
  21. # operators, comparisons and function call can be translated.
  22. # - Typedefs, macros for types, structures, unions and enums are translated
  23. # into appropriate definition code.
  24. # - Function definition is replaced with import code: correct library is
  25. # searched from predefined list, so recent Windows is required for
  26. # generation. (Well I'm using Windows XP however.)
  27. # - Incorrectly generated Python code is commented and reported for
  28. # later inspection.
  29. # - Complex C symbols (i.e. type of anonymous struct, separate namespace for
  30. # struct/union/enum etc.) are mangled. Typedef'ed ones are not affected.
  31. import os
  32. import sys
  33. import re
  34. import ctypes
  35. import cStringIO as stringio
  36. TOKEN_PATTERN = re.compile(r'''
  37. # multicharacter operator or punctuator
  38. (?: [+\-*/%&|^<=>!]= | && | \|\| | <<=? | >>=? |
  39. \+\+ | -- | -> | \.\.\. | \#\# ) |
  40. # numbers
  41. \.?[0-9](?:[eE][-+]|[A-Za-z0-9.])* |
  42. # character & string literals
  43. L?'(?:\\["'?\\abfnrtv]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|[^'"\\])+' |
  44. L?"(?:\\["'?\\abfnrtv]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|[^'"\\])*" |
  45. # identifier & keywords
  46. [A-Za-z_][A-Za-z_0-9]* |
  47. # newline (only for pp directive, otherwise ignored)
  48. \n+ |
  49. # whitespaces and comments (only for pp directive, otherwise ignored)
  50. (?: //[^\n]* | /\*.*?\*/ | [ \t\v]+ )+ |
  51. # other characters
  52. .
  53. ''', re.X | re.S)
  54. BINARY_OPERATORS = set([
  55. '+', '-', '*', '/', '%', '&', '|', '^', '<<', '>>',
  56. '=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=',
  57. '==', '!=', '<', '<=', '>', '>=', 'or', 'and', ',',
  58. # this list also includes ternary operators which can be thought as
  59. # set of binary operators only seen in the certain form.
  60. '?', ':',
  61. ])
  62. def is_ident(s):
  63. return (s != '' and not '0'<=s[0]<='9' and
  64. all('A'<=i<='Z' or 'a'<=i<='z' or '0'<=i<='9' or i=='_' for i in s))
  65. # type placeholders
  66. VOID = '*void*'
  67. VARARG = '*vararg*'
  68. ANYFUNCTION = '*anyfunc*'
  69. PRIMTYPE_MAPPINGS = dict((tuple(sorted(k)), v) for k, v in [
  70. (('bool',), '_X_bool'),
  71. (('char',), '_X_char'),
  72. (('signed', 'char'), '_X_char'),
  73. (('unsigned', 'char'), '_X_ubyte'), # no c_uchar
  74. (('wchar_t',), '_X_wchar'),
  75. (('int',), '_X_int'),
  76. (('signed',), '_X_int'),
  77. (('signed', 'int'), '_X_int'),
  78. (('unsigned',), '_X_uint'),
  79. (('unsigned', 'int'), '_X_uint'),
  80. (('short',), '_X_short'),
  81. (('short', 'int'), '_X_short'),
  82. (('signed', 'short'), '_X_short'),
  83. (('signed', 'short', 'int'), '_X_short'),
  84. (('unsigned', 'short'), '_X_ushort'),
  85. (('unsigned', 'short', 'int'), '_X_ushort'),
  86. (('long',), '_X_long'),
  87. (('long', 'int'), '_X_long'),
  88. (('signed', 'long'), '_X_long'),
  89. (('signed', 'long', 'int'), '_X_long'),
  90. (('unsigned', 'long'), '_X_ulong'),
  91. (('unsigned', 'long', 'int'), '_X_ulong'),
  92. (('long', 'long'), '_X_longlong'),
  93. (('long', 'long', 'int'), '_X_longlong'),
  94. (('signed', 'long', 'long'), '_X_longlong'),
  95. (('signed', 'long', 'long', 'int'), '_X_longlong'),
  96. (('unsigned', 'long', 'long'), '_X_ulonglong'),
  97. (('unsigned', 'long', 'long', 'int'), '_X_ulonglong'),
  98. (('float',), '_X_float'),
  99. (('double',), '_X_double'),
  100. (('long', 'double'), '_X_longdouble'),
  101. (('size_t',), '_X_size_t'),
  102. (('int8_t',), '_X_byte'),
  103. (('int16_t',), '_X_int16'),
  104. (('int32_t',), '_X_int32'),
  105. (('int64_t',), '_X_int64'),
  106. (('uint8_t',), '_X_ubyte'),
  107. (('uint16_t',), '_X_uint16'),
  108. (('uint32_t',), '_X_uint32'),
  109. (('uint64_t',), '_X_uint64'),
  110. (('__int64',), '_X_int64'),
  111. (('signed', '__int64'), '_X_int64'),
  112. (('unsigned', '__int64'), '_X_uint64'),
  113. (('void',), VOID),
  114. (('va_list',), '_X_void_p'),
  115. ((), None), # only storage class or CV; possible for complex type
  116. ])
  117. class bufferiter(object):
  118. def __init__(self, iterable):
  119. self.next = iter(iterable).next
  120. self.buffer = []
  121. def __call__(self):
  122. if self.buffer:
  123. return self.buffer.pop()
  124. else:
  125. return self.next()
  126. def peek(self):
  127. try:
  128. next = self()
  129. self.putback(next)
  130. return next
  131. except StopIteration:
  132. return None
  133. def putback(self, value):
  134. self.buffer.append(value)
  135. def skip(self, func=None):
  136. next = self()
  137. if func is None:
  138. while not next: next = self()
  139. else:
  140. while func(next): next = self()
  141. return next
  142. def until(self, func):
  143. result = [self()]
  144. while func(result[-1]): result.append(self())
  145. self.putback(result[-1])
  146. return result[:-1]
  147. class Processor(object):
  148. def __init__(self, paths, allowed_headers, allowed_dlls):
  149. self.paths = paths
  150. self.allowed_headers = allowed_headers
  151. self.allowed_dlls = {}
  152. for dll in allowed_dlls:
  153. try:
  154. self.allowed_dlls[dll] = getattr(ctypes.windll, dll)
  155. except:
  156. print >>sys.stderr, 'Warning: Library %r doesn\'t exist.' % dll
  157. self.ppsymbols = {} # (as_defined, as_undefined, args or None, tokens)
  158. self.symbols = {} # (type, defn)
  159. self.init_output('thirtytwo/')
  160. def init_output(self, prefix):
  161. # thirtytwo._consts module: almost all #define, enums
  162. self.consts_ctx = {}
  163. self.consts_buf = open(os.path.join(prefix, '_consts.py'), 'wb')
  164. self.consts_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
  165. 'from thirtytwo._support import *\n'
  166. 'from thirtytwo._types import *\n\n')
  167. # thirtytwo._types module: typedef, struct, union
  168. self.types_ctx = {}
  169. self.types_buf = open(os.path.join(prefix, '_types.py'), 'wb')
  170. self.types_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
  171. 'from thirtytwo._support import *\n\n')
  172. # thirtytwo._funcs module: function decl, function-wrapping #define
  173. self.funcs_ctx = {}
  174. self.funcs_buf = open(os.path.join(prefix, '_funcs.py'), 'wb')
  175. self.funcs_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
  176. 'from thirtytwo._support import *\n'
  177. 'from thirtytwo._consts import *\n'
  178. 'from thirtytwo._types import *\n\n')
  179. exec 'from thirtytwo._support import *\n' in self.consts_ctx, self.consts_ctx
  180. self.types_ctx.update(self.consts_ctx)
  181. self.funcs_ctx.update(self.consts_ctx)
  182. def emit_py(self, line, ctx, buf):
  183. # to treat "#" or "##" symbol outside the string as error,
  184. # we replaces them with "\#" or "\#\#" so it doesn't affect
  185. # string. (unless "\#" sequence is already present in the
  186. # string, but we can make sure it cannot happen)
  187. line = line[:1] + line[1:].replace('#', '\\#')
  188. try:
  189. exec line in ctx
  190. buf.write(line + '\n')
  191. except Exception, e:
  192. buf.write('##%s[%s]: %s\n' % (e.__class__.__name__,
  193. ' '.join(str(e).split()), line))
  194. def emit_const(self, line):
  195. self.emit_py(line, self.consts_ctx, self.consts_buf)
  196. self.funcs_ctx.update(self.consts_ctx)
  197. def emit_type(self, line):
  198. self.emit_py(line, self.types_ctx, self.types_buf)
  199. self.consts_ctx.update(self.types_ctx)
  200. self.funcs_ctx.update(self.types_ctx)
  201. def emit_func(self, line):
  202. self.emit_py(line, self.funcs_ctx, self.funcs_buf)
  203. def add_ppsymbol(self, name, tokens, tokens2=None,
  204. as_defined=True, as_undefined=None):
  205. args = None
  206. if tokens2 is not None:
  207. args = tokens
  208. tokens = tokens2
  209. self.ppsymbols[name] = (as_defined, as_undefined, args,
  210. filter(None, self.tokenize(tokens)))
  211. def to_pyname(self, name):
  212. if ' ' in name:
  213. parts = name.split()
  214. counter = parts.pop() if parts[-1].isdigit() else '0'
  215. name = '_X%sX_%s' % ({'*global*': 'g', 'struct': 's',
  216. 'union': 'u', 'enum': 'e'}[parts[0]], counter)
  217. if len(parts) > 1: name += parts[1]
  218. return name
  219. elif name.startswith('_X') and not name.startswith('_X_'):
  220. return '_XxX' + name[2:]
  221. else:
  222. return name
  223. def to_pytype(self, primtype, props, context):
  224. if primtype is VOID:
  225. # this is mainly for function return type.
  226. pytype = 'None'
  227. else:
  228. pytype = self.to_pyname(primtype)
  229. skipptr = False
  230. for prop in reversed(props):
  231. if prop[0] == '*':
  232. if pytype is ANYFUNCTION:
  233. pytype = '_X_void_p'
  234. elif not skipptr:
  235. pytype = '_X_POINTER(%s)' % pytype
  236. skipptr = False
  237. elif prop[0] == '()':
  238. # sometimes the exact prototype is not available in ctypes...
  239. if prop[2] is None or any(t is VARARG for _,t,_ in prop[2]):
  240. pytype = ANYFUNCTION
  241. else:
  242. if prop[1] == 'stdcall':
  243. ctor = '_X_WINFUNCTYPE'
  244. else:
  245. ctor = '_X_CFUNCTYPE'
  246. argtypes = [pytype]
  247. for aname, aprimtype, aprops in prop[2]:
  248. argtypes.append(self.to_pytype(aprimtype, aprops, context))
  249. pytype = '%s(%s)' % (ctor, ', '.join(argtypes))
  250. skipptr = True
  251. elif prop[0] == '[]':
  252. pytype = '%s*%s' % (pytype,
  253. self.to_pyexpr(['('] + prop[1] + [')'], context))
  254. else:
  255. assert False
  256. assert not skipptr
  257. return pytype
  258. def to_pyexpr(self, tokens, context):
  259. tokens = tokens[:] # type cast conversion changes tokens.
  260. expr = []
  261. i = 0
  262. while i < len(tokens):
  263. if tokens[i] == '&&':
  264. expr.append('and')
  265. elif tokens[i] == '||':
  266. expr.append('or')
  267. elif tokens[i] == '!':
  268. expr.append('not')
  269. elif tokens[i] == '->':
  270. expr.append('.')
  271. elif tokens[i].startswith("'"):
  272. expr.append(repr(ord(eval(tokens[i]))))
  273. elif tokens[i].startswith('"'):
  274. expr.append(repr(eval(tokens[i])))
  275. elif tokens[i].startswith("L'"):
  276. expr.append(repr(ord(eval('u' + tokens[i][1:]))))
  277. elif tokens[i].startswith('L"'):
  278. expr.append(repr(eval('u' + tokens[i][1:])))
  279. elif '0' <= tokens[i] <= '9':
  280. number = tokens[i].lower()
  281. if '.' in number:
  282. number = number.rstrip('f')
  283. if number.startswith('0x'):
  284. number = repr(float.fromhex(number))
  285. else:
  286. number = number.rstrip('sul')
  287. expr.append(number)
  288. elif tokens[i] == '(': # can be type cast
  289. try:
  290. iter = bufferiter(tokens[i+1:])
  291. primtype, storage, props, name = \
  292. self.parse_type(iter, declaration=False)
  293. except Exception:
  294. iscast = False
  295. else:
  296. assert not storage and not name
  297. istypesym = lambda t: self.symbols.get(t, (None,))[0] in \
  298. ('typedef', 'struct', 'union',
  299. 'enum', 'builtin')
  300. iscast = iter.peek() == ')'
  301. if iscast:
  302. ignorecast = primtype is VOID and not props
  303. iscast = istypesym(primtype) or ignorecast
  304. if iscast:
  305. # type cast expr ends at the next unmatching )/]/} or
  306. # binary/ternary operators whichever comes first.
  307. depth = 0
  308. typeend = None
  309. exprend = i
  310. while exprend < len(tokens):
  311. if tokens[exprend] in '([{':
  312. depth += 1
  313. elif tokens[exprend] in ')]}':
  314. depth -= 1
  315. if depth == 0 and typeend is None: typeend = exprend
  316. if depth < 0: break
  317. elif depth == 0:
  318. # strictly binary operator only
  319. if tokens[exprend] in BINARY_OPERATORS and \
  320. exprend - 1 > typeend:
  321. break
  322. exprend += 1
  323. assert depth <= 0
  324. iscast = typeend is not None and typeend + 1 < exprend
  325. if iscast:
  326. # XXX we are "pretending" pytype as C syntax!
  327. if not ignorecast: # ignore (void)
  328. expr.append('_X_cast')
  329. expr.append('(')
  330. expr += self.tokenize(self.to_pytype(primtype, props, context))
  331. expr.append(',')
  332. tokens.insert(exprend, ')')
  333. i = typeend
  334. else:
  335. expr.append(tokens[i])
  336. else:
  337. expr.append(tokens[i])
  338. i += 1
  339. exprstr = []
  340. prevtoken = pprevtoken = '@'
  341. for token in expr:
  342. if is_ident(prevtoken) and is_ident(token):
  343. exprstr.append(' ')
  344. elif prevtoken in BINARY_OPERATORS and \
  345. not (prevtoken == '-' and pprevtoken in '([{'):
  346. exprstr.append(' ')
  347. elif token in BINARY_OPERATORS and token != ',':
  348. exprstr.append(' ')
  349. exprstr.append(token)
  350. pprevtoken = prevtoken
  351. prevtoken = token
  352. exprstr = ''.join(exprstr)
  353. try:
  354. result = eval(exprstr, context, context)
  355. if isinstance(result, (int, long, float, str, unicode)) or \
  356. isinstance(result.value, (int, long, float, str, unicode)):
  357. return repr(result)
  358. except:
  359. pass
  360. return exprstr
  361. def search_dll_by_func(self, fname):
  362. for k, v in self.allowed_dlls.items():
  363. try:
  364. getattr(v, fname)
  365. return k
  366. except AttributeError:
  367. pass
  368. return '_unknown_'
  369. def handle_ppsymbol(self, name, args, tokens):
  370. self.ppsymbols[name] = (True, False, args, tokens)
  371. if tokens:
  372. expr = self.to_pyexpr(tokens, self.consts_ctx)
  373. if args is None:
  374. self.emit_const('%s = %s' % (name, expr))
  375. else:
  376. self.emit_const('%s = lambda %s: %s' %
  377. (name, ', '.join(args), expr))
  378. def handle_symbol(self, name, type, defn):
  379. self.symbols[name] = (type, defn)
  380. if type == 'struct' or type == 'union':
  381. fields = []
  382. anons = []
  383. for mname, mprimtype, mprops, mbitsz in defn:
  384. mpytype = self.to_pytype(mprimtype, mprops, self.types_ctx)
  385. if mname is None:
  386. mname = '_XaX_%d' % (len(anons) + 1)
  387. anons.append(mname)
  388. if mbitsz is None:
  389. fields.append('(%r, %s)' % (mname, mpytype))
  390. else:
  391. fields.append('(%r, %s, %s)' % (mname, mpytype,
  392. self.to_pyexpr(mbitsz, self.types_ctx)))
  393. pyname = self.to_pyname(name)
  394. head = 'class %s(%s): ' % (pyname,
  395. '_X_Structure' if type == 'struct' else '_X_Union')
  396. body = '@_fields_ = [%s]' % ', '.join(fields)
  397. if self.alignstack[-1] is not None:
  398. body = '@_pack_ = %d; ' % self.alignstack[-1] + body
  399. if anons:
  400. body += '; @_anonymous_ = %r' % anons
  401. if pyname in body: # self-referential: split declaration
  402. self.emit_type(head + 'pass')
  403. self.emit_type(body.replace('@', pyname + '.'))
  404. else:
  405. self.emit_type(head + body.replace('@', ''))
  406. return
  407. elif type == 'typedef':
  408. assert ' ' not in name
  409. self.emit_type('%s = %s' % (name, self.to_pytype(defn[0], defn[1],
  410. self.types_ctx)))
  411. return
  412. elif type == 'enum':
  413. pyname = self.to_pyname(name)
  414. self.emit_const('%s = _X_int' % pyname)
  415. mbase = ['0']
  416. offset = 0
  417. for mname, mvalue in defn:
  418. if mvalue:
  419. mbase = mvalue
  420. offset = 0
  421. else:
  422. offset += 1
  423. self.emit_const('%s = %s(%s)' % (mname, pyname,
  424. self.to_pyexpr(['('] + mbase + [')', '+', str(offset)],
  425. self.consts_ctx)))
  426. return
  427. elif type == '':
  428. primtype, props = defn
  429. if props and props[-1][0] == '()':
  430. dll = self.search_dll_by_func(name)
  431. pytype = self.to_pytype(primtype, [('*',)] + props, self.funcs_ctx)
  432. if pytype is ANYFUNCTION:
  433. self.emit_func('%s = _X_windll.%s.%s' % (name, dll, name))
  434. else:
  435. self.emit_func('%s = %s((%r, _X_windll.%s))' %
  436. (name, pytype, name, dll))
  437. return
  438. else:
  439. print '# %s := %s %s' % (name, type, defn)
  440. def subst_pp(self, tokens, ifcond=False):
  441. ntokens = len(tokens)
  442. result = []
  443. i = 0
  444. while i < ntokens:
  445. token = tokens[i]
  446. i += 1
  447. if not is_ident(token):
  448. result.append(token)
  449. continue
  450. if token in self.ppsymbols:
  451. spec = self.ppsymbols[token]
  452. if spec[2] is None: # object-like
  453. result.extend(spec[3])
  454. elif i < ntokens and tokens[i] == '(': # function-like
  455. args = []
  456. while tokens[i] != ')':
  457. i += 1
  458. start = i
  459. depth = 0
  460. while depth > 0 or (tokens[i] != ',' and tokens[i] != ')'):
  461. if tokens[i] == '(': depth += 1
  462. elif tokens[i] == ')': depth -= 1
  463. i += 1
  464. args.append(self.subst_pp(tokens[start:i], ifcond))
  465. i += 1
  466. assert len(args) == len(spec[2])
  467. iresult = []
  468. for token in spec[3]:
  469. try:
  470. iresult.extend(args[spec[2].index(token)])
  471. except:
  472. iresult.append(token)
  473. result.extend(iresult)
  474. else:
  475. result.append(token)
  476. elif ifcond: # as #if's condition expression
  477. if token == 'defined': # special case
  478. i += 1
  479. token = tokens[i-1]
  480. if token == '(':
  481. i += 2
  482. assert tokens[i-1] == ')'
  483. token = tokens[i-2]
  484. result.append('1' if token in self.ppsymbols else '0')
  485. else:
  486. result.append('0')
  487. else:
  488. result.append(token)
  489. return result
  490. def concat_pp(self, tokens):
  491. result = []
  492. concatnext = False
  493. for token in tokens:
  494. if token == '##':
  495. concatnext = True
  496. elif concatnext: # XXX can produce invalid token
  497. result[-1] += token
  498. concatnext = False
  499. else:
  500. result.append(token)
  501. return result
  502. def eval_cond_pp(self, tokens):
  503. tokens = self.subst_pp(tokens, True)
  504. expr = []
  505. i = 0
  506. while i < len(tokens):
  507. if tokens[i] == '&&':
  508. expr.append('and')
  509. elif tokens[i] == '||':
  510. expr.append('or')
  511. elif tokens[i] == '!':
  512. expr.append('not')
  513. elif tokens[i].startswith("'") or tokens[i].startswith("L'"):
  514. expr.append(ord(tokens[i].split("'")[1]))
  515. else:
  516. expr.append(tokens[i])
  517. i += 1
  518. return bool(eval(' '.join(expr), {}, {}))
  519. def process_pp(self, tokens):
  520. if not tokens: return
  521. if tokens[0] == 'if':
  522. cond = self.eval_cond_pp(tokens[1:])
  523. self.ppblocks.append((self.ppblocks[-1][0] and cond,
  524. self.ppblocks[-1][0] and not cond))
  525. return
  526. if tokens[0] == 'ifdef':
  527. assert len(tokens) == 2
  528. try:
  529. blockstat = self.ppsymbols[tokens[1]][0:2]
  530. except:
  531. blockstat = (False, True)
  532. self.ppblocks.append((self.ppblocks[-1][0] and blockstat[0],
  533. self.ppblocks[-1][0] and blockstat[1]))
  534. return
  535. if tokens[0] == 'ifndef':
  536. assert len(tokens) == 2
  537. try:
  538. blockstat = self.ppsymbols[tokens[1]][0:2][::-1]
  539. except:
  540. blockstat = (True, False)
  541. self.ppblocks.append((self.ppblocks[-1][0] and blockstat[0],
  542. self.ppblocks[-1][0] and blockstat[1]))
  543. return
  544. if tokens[0] == 'elif':
  545. cond = self.eval_cond_pp(tokens[1:])
  546. self.ppblocks[-1] = (self.ppblocks[-1][1] and cond,
  547. self.ppblocks[-1][1] and not cond)
  548. return
  549. if tokens[0] == 'else':
  550. assert len(tokens) == 1
  551. self.ppblocks[-1] = self.ppblocks[-1][1:]
  552. return
  553. if tokens[0] == 'endif':
  554. assert len(tokens) == 1
  555. self.ppblocks.pop()
  556. return
  557. if not self.ppblocks[-1][0]: # current block is ignored
  558. return
  559. if tokens[0] == 'define':
  560. assert len(tokens) >= 2
  561. substed = self.concat_pp(self.subst_pp(tokens[2:]))
  562. self.handle_ppsymbol(tokens[1], None, substed)
  563. return
  564. if tokens[0] == 'define()':
  565. assert len(tokens) >= 4 # "define()", "MACRONAME", "(", ")"
  566. assert tokens[2] == '('
  567. argnames = []
  568. if tokens[3] == ')':
  569. body = tokens[4:]
  570. else:
  571. for i in xrange(3, len(tokens), 2):
  572. argnames.append(tokens[i])
  573. if tokens[i+1] == ')':
  574. body = tokens[i+2:]
  575. break
  576. assert tokens[i+1] == ','
  577. else:
  578. assert False
  579. substed = self.subst_pp(body)
  580. self.handle_ppsymbol(tokens[1], argnames, substed)
  581. return
  582. if tokens[0] == 'undef':
  583. assert len(tokens) == 2
  584. try: del self.ppsymbols[tokens[1]]
  585. except: pass
  586. return
  587. if tokens[0] == 'pragma' and tokens[1] == 'pack':
  588. assert tokens[2] == '(' and tokens[-1] == ')'
  589. if tokens[3] == 'push':
  590. assert len(tokens) == 7
  591. assert tokens[4] == ',' and tokens[5].isdigit()
  592. self.alignstack.append(int(tokens[5]))
  593. return
  594. if tokens[3] == 'pop':
  595. assert len(tokens) == 5
  596. self.alignstack.pop()
  597. return
  598. if tokens[0] == 'include':
  599. if len(tokens) == 2:
  600. tokens[1:] = self.concat_pp(self.subst_pp(tokens[1:]))
  601. assert ((tokens[1] == '<' and tokens[-1] == '>') or
  602. (tokens[1] == '"' and tokens[-1] == '"'))
  603. filename = ''.join(tokens[2:-1])
  604. self.process_internal(filename)
  605. return
  606. print '# unknown PP directive: ' + ' '.join(tokens)
  607. def tokenize(self, s):
  608. return ['\n' if token[0] == '\n' else
  609. '' if token[0].isspace() or token[:2] in ('//', '/*') else token
  610. for token in TOKEN_PATTERN.findall(s.replace('\\\n', ''))]
  611. def preprocess(self, s):
  612. tokens = self.tokenize(s)
  613. tokens.append('\n') # the last empty line is to be ignored
  614. iter = bufferiter(tokens)
  615. try:
  616. buffer = []
  617. while True:
  618. token = iter.skip()
  619. if token == '#': # preprocessor line
  620. # flush current buffer
  621. for token in self.concat_pp(self.subst_pp(buffer)):
  622. yield token
  623. buffer = []
  624. token = iter.skip()
  625. line = [token]
  626. if token == 'define':
  627. macroname = iter.skip()
  628. if macroname != '\n':
  629. line.append(macroname)
  630. line.append(iter())
  631. if line[-1] == '(':
  632. # define pseudo-directive '#define()' in this case
  633. line[0] = 'define()'
  634. if line[-1] == '\n':
  635. del line[-1]
  636. else:
  637. line += iter.until(lambda t: t != '\n')
  638. token = iter.skip()
  639. assert token == '\n'
  640. elif token != '\n':
  641. line += iter.until(lambda t: t != '\n')
  642. token = iter.skip()
  643. assert token == '\n'
  644. self.process_pp(filter(None, line))
  645. elif token == '\n':
  646. pass
  647. elif not self.ppblocks[-1][0]: # ignored line
  648. iter.skip(lambda t: t != '\n')
  649. else:
  650. buffer.append(token)
  651. buffer += filter(None, iter.until(lambda t: t != '\n'))
  652. token = iter.skip()
  653. assert token == '\n'
  654. except StopIteration:
  655. pass
  656. for token in self.concat_pp(self.subst_pp(buffer)):
  657. yield token
  658. def parse_primtype(self, iter):
  659. token = iter()
  660. storage = ''
  661. const = volatile = restrict = False
  662. typename = []
  663. while True:
  664. if token == 'const':
  665. assert not const
  666. const = True
  667. elif token == 'volatile':
  668. assert not volatile
  669. volatile = True
  670. elif token == 'restrict':
  671. assert not restrict
  672. restrict = True
  673. elif token == 'extern' or token == 'static':
  674. assert not storage
  675. storage = token
  676. elif tuple(sorted(typename + [token])) in PRIMTYPE_MAPPINGS:
  677. typename.append(token)
  678. else:
  679. break
  680. token = iter()
  681. iter.putback(token)
  682. typename = tuple(sorted(typename))
  683. return PRIMTYPE_MAPPINGS[typename], storage
  684. def parse_type(self, iter, prev=None, declaration=True):
  685. if prev is None:
  686. primtype, storage = self.parse_primtype(iter)
  687. else: # e.g. int a, *b;
  688. primtype, storage = prev
  689. token = iter()
  690. if primtype is None:
  691. if token in ('struct', 'union', 'enum'):
  692. # storage class or CV + struct/union/enum
  693. assert primtype is None
  694. complextype = token
  695. complexname = None
  696. token = iter()
  697. if is_ident(token):
  698. complexname = token
  699. token = iter()
  700. assert (declaration and token == '{') or complexname is not None
  701. if complexname is None:
  702. primtype = '%s %d' % (self.anonprefix or '*global*',
  703. self.anoncounter)
  704. self.anoncounter += 1
  705. else:
  706. primtype = '%s %s' % (complextype, complexname)
  707. if declaration and token == '{':
  708. members = []
  709. if self.anonprefix is None:
  710. self.anonprefix = primtype
  711. prevcounter = self.anoncounter
  712. self.anoncounter = 1
  713. if complextype == 'enum': # declarator-like
  714. while True:
  715. name = iter()
  716. assert is_ident(name)
  717. token = iter()
  718. if token == '=':
  719. value = iter.until(lambda t: t != ',' and
  720. t != '}')
  721. token = iter()
  722. else:
  723. value = []
  724. members.append((name, value))
  725. if token == '}': break
  726. assert token == ','
  727. else: # declaration-like
  728. while True:
  729. token = iter()
  730. if token == '}': break
  731. iter.putback(token)
  732. prev = None
  733. while True:
  734. mprimtype, mstorage, mprops, mname = \
  735. self.parse_type(iter, prev)
  736. assert not mstorage and (
  737. (isinstance(mprimtype, str) and
  738. (mprimtype.startswith('struct ') or
  739. mprimtype.startswith('union '))) or
  740. mname is not None)
  741. token = iter()
  742. if token == ':':
  743. mbitsz = iter.until(lambda t: t != ';' and
  744. t != ',')
  745. assert mbitsz
  746. token = iter()
  747. else:
  748. mbitsz = None
  749. members.append((mname, mprimtype, mprops, mbitsz))
  750. if token == ';': break
  751. assert token == ','
  752. prev = (mprimtype, mstorage)
  753. self.handle_symbol(primtype, complextype, members)
  754. if self.anonprefix == primtype:
  755. self.anonprefix = None
  756. self.anoncounter = prevcounter
  757. token = iter()
  758. else:
  759. # pre-existing type
  760. assert is_ident(token)
  761. primtype = token
  762. token = iter()
  763. name = None
  764. props = []
  765. lpropstack = [[]]
  766. ldeclstack = [None]
  767. while True:
  768. if token == '(':
  769. lpropstack.append([])
  770. ldeclstack.append(None)
  771. elif token in ('const', 'volatile', 'inline', '__inline__'):
  772. pass # unused
  773. elif token == '__stdcall': # calling convention
  774. ldeclstack[-1] = 'stdcall'
  775. elif token == '*': # pointer
  776. while token == '*':
  777. lpropstack[-1].append(('*',))
  778. token = iter()
  779. iter.putback(token)
  780. else:
  781. break
  782. token = iter()
  783. if declaration and is_ident(token):
  784. name = token
  785. token = iter()
  786. rprops = []
  787. while True:
  788. if token == '(': # function
  789. args = []
  790. token = iter()
  791. if token == 'void' and iter.peek() == ')': # no arguments
  792. token = iter()
  793. assert token == ')'
  794. elif token == ')': # prototypeless
  795. args = None
  796. else:
  797. iter.putback(token)
  798. while True:
  799. token = iter()
  800. if token == '...': # vararg
  801. aname = astorage = None
  802. aprimtype = VARARG
  803. aprops = []
  804. else:
  805. iter.putback(token)
  806. aprimtype, astorage, aprops, aname = self.parse_type(iter)
  807. assert not astorage
  808. args.append((aname, aprimtype, aprops))
  809. token = iter()
  810. if token == ')': break
  811. assert token == ','
  812. assert not any(t is VOID and not p for n,t,p in args)
  813. rprops.append(('()', ldeclstack[-1], args))
  814. ldeclstack[-1] = None
  815. elif token == '[': # array indices
  816. tokens = iter.until(lambda t: t != ']')
  817. rprops.append(('[]', tokens))
  818. token = iter()
  819. assert token == ']'
  820. elif len(lpropstack) > 1 and token == ')':
  821. props += lpropstack.pop()[::-1]
  822. props += rprops
  823. decl = ldeclstack.pop()
  824. if decl is not None:
  825. assert not ldeclstack[-1]
  826. ldeclstack[-1] = decl
  827. else:
  828. break
  829. token = iter()
  830. assert len(lpropstack) == len(ldeclstack) == 1
  831. assert not ldeclstack[0]
  832. iter.putback(token)
  833. props += lpropstack[0][::-1]
  834. props += rprops
  835. if props and props[-1][0] == '*': # special types
  836. if primtype == '_X_char':
  837. primtype = '_X_char_p'
  838. props.pop()
  839. elif primtype == '_X_wchar':
  840. primtype = '_X_wchar_p'
  841. props.pop()
  842. elif primtype is VOID:
  843. primtype = '_X_void_p'
  844. props.pop()
  845. return primtype, storage, props, name
  846. def parse(self, tokens):
  847. try:
  848. iter = bufferiter(tokens)
  849. while True:
  850. token = iter()
  851. if token == 'typedef':
  852. prev = None
  853. while True:
  854. primtype, storage, props, name = self.parse_type(iter, prev)
  855. assert name is not None and not storage
  856. self.handle_symbol(name, 'typedef', (primtype, props))
  857. token = iter()
  858. if token == ';': break
  859. assert token == ','
  860. prev = (primtype, storage)
  861. else:
  862. iter.putback(token)
  863. prev = None
  864. while True:
  865. primtype, storage, props, name = self.parse_type(iter, prev)
  866. if name is not None:
  867. self.handle_symbol(name, storage, (primtype, props))
  868. else:
  869. assert (isinstance(primtype, str) and
  870. (primtype.startswith('struct ') or
  871. primtype.startswith('union ') or
  872. primtype.startswith('enum ')))
  873. token = iter()
  874. if token == ';': break
  875. assert token == ','
  876. prev = (primtype, storage)
  877. except StopIteration:
  878. pass
  879. def process_internal(self, filename):
  880. if filename not in self.allowed_headers:
  881. print >>sys.stderr, 'skipping %s...' % filename
  882. return
  883. for ipath in self.paths:
  884. path = os.path.join(ipath, filename)
  885. try:
  886. fp = open(path, 'rU')
  887. break
  888. except:
  889. pass
  890. print >>sys.stderr, 'processing %s...' % filename
  891. self.parse(self.preprocess(fp.read()))
  892. def process(self, filename):
  893. self.ppblocks = [(True, False)]
  894. self.alignstack = [None]
  895. self.anonprefix = None
  896. self.anoncounter = 1
  897. self.process_internal(filename)
  898. assert len(self.ppblocks) == 1
  899. if __name__ == '__main__':
  900. ALLOWED_HEADERS = set([
  901. # windows.h with WIN32_LEAN_AND_MEAN
  902. 'windows.h', 'windef.h', 'wincon.h', 'winbase.h', 'wingdi.h',
  903. 'winuser.h', 'winnls.h', 'winver.h', 'winnetwk.h', 'winreg.h',
  904. 'winsvc.h', 'winnt.h', 'winerror.h', 'basetsd.h',
  905. # windows.h without WIN32_LEAN_AND_MEAN
  906. 'cderr.h', 'dde.h', 'ddeml.h', 'dlgs.h', 'imm.h', 'lzexpand.h',
  907. 'mmsystem.h', 'nb30.h', 'rpc.h', 'rpcdce.h', 'rpcdcep.h',
  908. 'rpcnsi.h', 'rpcnterr.h', 'rpcndr.h', 'rpcnsip.h', 'shellapi.h',
  909. 'winperf.h', 'commdlg.h', 'unknwn.h', 'objfwd.h', 'basetyps.h',
  910. 'wtypes.h', 'winspool.h', 'ole2.h', 'winsock2.h',
  911. # additional
  912. 'commctrl.h', 'prsht.h',
  913. # #pragma pack() headers
  914. 'pshpack1.h', 'pshpack2.h', 'pshpack4.h', 'pshpack8.h', 'poppack.h',
  915. ])
  916. ALLOWED_DLLS = set([
  917. 'kernel32', 'user32', 'gdi32', 'advapi32', 'comctl32', 'comdlg32',
  918. 'shell32', 'wsock32', 'ws2_32', 'imm32', 'winmm', 'opengl32',
  919. 'version', 'winspool', 'rpcrt4', 'rpcns4', 'mpr',
  920. ])
  921. proc = Processor(paths=sys.argv[1:],
  922. allowed_headers=ALLOWED_HEADERS, allowed_dlls=ALLOWED_DLLS)
  923. proc.add_ppsymbol('_X86_', '')
  924. proc.add_ppsymbol('NULL', 'None')
  925. proc.add_ppsymbol('UNICODE', '')
  926. proc.add_ppsymbol('NO_STRICT', '')
  927. proc.add_ppsymbol('NOMINMAX', '')
  928. proc.add_ppsymbol('WIN32_LEAN_AND_MEAN', '')
  929. proc.add_ppsymbol('WINVER', '0x0501')
  930. proc.add_ppsymbol('DECLSPEC_NORETURN', '')
  931. proc.add_ppsymbol('DECLARE_STDCALL_P', ['type'], 'type __stdcall')
  932. proc.process('windows.h')
  933. proc.process('commctrl.h')