PageRenderTime 65ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/lib_pypy/cffi/cparser.py

https://bitbucket.org/halgari/pypy
Python | 584 lines | 497 code | 23 blank | 64 comment | 125 complexity | a6b3c1219063bf0372cddbc34911e697 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, AGPL-3.0
  1. from . import api, model
  2. from .commontypes import COMMON_TYPES, resolve_common_type
  3. try:
  4. from . import _pycparser as pycparser
  5. except ImportError:
  6. import pycparser
  7. import weakref, re, sys
  8. try:
  9. if sys.version_info < (3,):
  10. import thread as _thread
  11. else:
  12. import _thread
  13. lock = _thread.allocate_lock()
  14. except ImportError:
  15. lock = None
  16. _r_comment = re.compile(r"/\*.*?\*/|//.*?$", re.DOTALL | re.MULTILINE)
  17. _r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)\s+(.*?)$",
  18. re.MULTILINE)
  19. _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
  20. _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
  21. _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
  22. _r_words = re.compile(r"\w+|\S")
  23. _parser_cache = None
  24. _r_int_literal = re.compile(r"^0?x?[0-9a-f]+u?l?$", re.IGNORECASE)
  25. def _get_parser():
  26. global _parser_cache
  27. if _parser_cache is None:
  28. _parser_cache = pycparser.CParser()
  29. return _parser_cache
  30. def _preprocess(csource):
  31. # Remove comments. NOTE: this only work because the cdef() section
  32. # should not contain any string literal!
  33. csource = _r_comment.sub(' ', csource)
  34. # Remove the "#define FOO x" lines
  35. macros = {}
  36. for match in _r_define.finditer(csource):
  37. macroname, macrovalue = match.groups()
  38. macros[macroname] = macrovalue
  39. csource = _r_define.sub('', csource)
  40. # Replace "[...]" with "[__dotdotdotarray__]"
  41. csource = _r_partial_array.sub('[__dotdotdotarray__]', csource)
  42. # Replace "...}" with "__dotdotdotNUM__}". This construction should
  43. # occur only at the end of enums; at the end of structs we have "...;}"
  44. # and at the end of vararg functions "...);". Also replace "=...[,}]"
  45. # with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when
  46. # giving an unknown value.
  47. matches = list(_r_partial_enum.finditer(csource))
  48. for number, match in enumerate(reversed(matches)):
  49. p = match.start()
  50. if csource[p] == '=':
  51. p2 = csource.find('...', p, match.end())
  52. assert p2 > p
  53. csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number,
  54. csource[p2+3:])
  55. else:
  56. assert csource[p:p+3] == '...'
  57. csource = '%s __dotdotdot%d__ %s' % (csource[:p], number,
  58. csource[p+3:])
  59. # Replace all remaining "..." with the same name, "__dotdotdot__",
  60. # which is declared with a typedef for the purpose of C parsing.
  61. return csource.replace('...', ' __dotdotdot__ '), macros
  62. def _common_type_names(csource):
  63. # Look in the source for what looks like usages of types from the
  64. # list of common types. A "usage" is approximated here as the
  65. # appearance of the word, minus a "definition" of the type, which
  66. # is the last word in a "typedef" statement. Approximative only
  67. # but should be fine for all the common types.
  68. look_for_words = set(COMMON_TYPES)
  69. look_for_words.add(';')
  70. look_for_words.add('typedef')
  71. words_used = set()
  72. is_typedef = False
  73. previous_word = ''
  74. for word in _r_words.findall(csource):
  75. if word in look_for_words:
  76. if word == ';':
  77. if is_typedef:
  78. words_used.discard(previous_word)
  79. look_for_words.discard(previous_word)
  80. is_typedef = False
  81. elif word == 'typedef':
  82. is_typedef = True
  83. else: # word in COMMON_TYPES
  84. words_used.add(word)
  85. previous_word = word
  86. return words_used
  87. class Parser(object):
  88. def __init__(self):
  89. self._declarations = {}
  90. self._anonymous_counter = 0
  91. self._structnode2type = weakref.WeakKeyDictionary()
  92. self._override = False
  93. self._packed = False
  94. self._int_constants = {}
  95. def _parse(self, csource):
  96. csource, macros = _preprocess(csource)
  97. # XXX: for more efficiency we would need to poke into the
  98. # internals of CParser... the following registers the
  99. # typedefs, because their presence or absence influences the
  100. # parsing itself (but what they are typedef'ed to plays no role)
  101. ctn = _common_type_names(csource)
  102. typenames = []
  103. for name in sorted(self._declarations):
  104. if name.startswith('typedef '):
  105. name = name[8:]
  106. typenames.append(name)
  107. ctn.discard(name)
  108. typenames += sorted(ctn)
  109. #
  110. csourcelines = ['typedef int %s;' % typename for typename in typenames]
  111. csourcelines.append('typedef int __dotdotdot__;')
  112. csourcelines.append(csource)
  113. csource = '\n'.join(csourcelines)
  114. if lock is not None:
  115. lock.acquire() # pycparser is not thread-safe...
  116. try:
  117. ast = _get_parser().parse(csource)
  118. except pycparser.c_parser.ParseError as e:
  119. self.convert_pycparser_error(e, csource)
  120. finally:
  121. if lock is not None:
  122. lock.release()
  123. # csource will be used to find buggy source text
  124. return ast, macros, csource
  125. def _convert_pycparser_error(self, e, csource):
  126. # xxx look for ":NUM:" at the start of str(e) and try to interpret
  127. # it as a line number
  128. line = None
  129. msg = str(e)
  130. if msg.startswith(':') and ':' in msg[1:]:
  131. linenum = msg[1:msg.find(':',1)]
  132. if linenum.isdigit():
  133. linenum = int(linenum, 10)
  134. csourcelines = csource.splitlines()
  135. if 1 <= linenum <= len(csourcelines):
  136. line = csourcelines[linenum-1]
  137. return line
  138. def convert_pycparser_error(self, e, csource):
  139. line = self._convert_pycparser_error(e, csource)
  140. msg = str(e)
  141. if line:
  142. msg = 'cannot parse "%s"\n%s' % (line.strip(), msg)
  143. else:
  144. msg = 'parse error\n%s' % (msg,)
  145. raise api.CDefError(msg)
  146. def parse(self, csource, override=False, packed=False):
  147. prev_override = self._override
  148. prev_packed = self._packed
  149. try:
  150. self._override = override
  151. self._packed = packed
  152. self._internal_parse(csource)
  153. finally:
  154. self._override = prev_override
  155. self._packed = prev_packed
  156. def _internal_parse(self, csource):
  157. ast, macros, csource = self._parse(csource)
  158. # add the macros
  159. self._process_macros(macros)
  160. # find the first "__dotdotdot__" and use that as a separator
  161. # between the repeated typedefs and the real csource
  162. iterator = iter(ast.ext)
  163. for decl in iterator:
  164. if decl.name == '__dotdotdot__':
  165. break
  166. #
  167. try:
  168. for decl in iterator:
  169. if isinstance(decl, pycparser.c_ast.Decl):
  170. self._parse_decl(decl)
  171. elif isinstance(decl, pycparser.c_ast.Typedef):
  172. if not decl.name:
  173. raise api.CDefError("typedef does not declare any name",
  174. decl)
  175. if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType)
  176. and decl.type.type.names == ['__dotdotdot__']):
  177. realtype = model.unknown_type(decl.name)
  178. elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
  179. isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
  180. isinstance(decl.type.type.type,
  181. pycparser.c_ast.IdentifierType) and
  182. decl.type.type.type.names == ['__dotdotdot__']):
  183. realtype = model.unknown_ptr_type(decl.name)
  184. else:
  185. realtype = self._get_type(decl.type, name=decl.name)
  186. self._declare('typedef ' + decl.name, realtype)
  187. else:
  188. raise api.CDefError("unrecognized construct", decl)
  189. except api.FFIError as e:
  190. msg = self._convert_pycparser_error(e, csource)
  191. if msg:
  192. e.args = (e.args[0] + "\n *** Err: %s" % msg,)
  193. raise
  194. def _add_constants(self, key, val):
  195. if key in self._int_constants:
  196. raise api.FFIError(
  197. "multiple declarations of constant: %s" % (key,))
  198. self._int_constants[key] = val
  199. def _process_macros(self, macros):
  200. for key, value in macros.items():
  201. value = value.strip()
  202. match = _r_int_literal.search(value)
  203. if match is not None:
  204. int_str = match.group(0).lower().rstrip("ul")
  205. # "010" is not valid oct in py3
  206. if (int_str.startswith("0") and
  207. int_str != "0" and
  208. not int_str.startswith("0x")):
  209. int_str = "0o" + int_str[1:]
  210. pyvalue = int(int_str, 0)
  211. self._add_constants(key, pyvalue)
  212. elif value == '...':
  213. self._declare('macro ' + key, value)
  214. else:
  215. raise api.CDefError('only supports the syntax "#define '
  216. '%s ..." (literally) or "#define '
  217. '%s 0x1FF" for now' % (key, key))
  218. def _parse_decl(self, decl):
  219. node = decl.type
  220. if isinstance(node, pycparser.c_ast.FuncDecl):
  221. tp = self._get_type(node, name=decl.name)
  222. assert isinstance(tp, model.RawFunctionType)
  223. tp = self._get_type_pointer(tp)
  224. self._declare('function ' + decl.name, tp)
  225. else:
  226. if isinstance(node, pycparser.c_ast.Struct):
  227. # XXX do we need self._declare in any of those?
  228. if node.decls is not None:
  229. self._get_struct_union_enum_type('struct', node)
  230. elif isinstance(node, pycparser.c_ast.Union):
  231. if node.decls is not None:
  232. self._get_struct_union_enum_type('union', node)
  233. elif isinstance(node, pycparser.c_ast.Enum):
  234. if node.values is not None:
  235. self._get_struct_union_enum_type('enum', node)
  236. elif not decl.name:
  237. raise api.CDefError("construct does not declare any variable",
  238. decl)
  239. #
  240. if decl.name:
  241. tp = self._get_type(node, partial_length_ok=True)
  242. if self._is_constant_globalvar(node):
  243. self._declare('constant ' + decl.name, tp)
  244. else:
  245. self._declare('variable ' + decl.name, tp)
  246. def parse_type(self, cdecl):
  247. ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2]
  248. assert not macros
  249. exprnode = ast.ext[-1].type.args.params[0]
  250. if isinstance(exprnode, pycparser.c_ast.ID):
  251. raise api.CDefError("unknown identifier '%s'" % (exprnode.name,))
  252. return self._get_type(exprnode.type)
  253. def _declare(self, name, obj):
  254. if name in self._declarations:
  255. if self._declarations[name] is obj:
  256. return
  257. if not self._override:
  258. raise api.FFIError(
  259. "multiple declarations of %s (for interactive usage, "
  260. "try cdef(xx, override=True))" % (name,))
  261. assert '__dotdotdot__' not in name.split()
  262. self._declarations[name] = obj
  263. def _get_type_pointer(self, type, const=False):
  264. if isinstance(type, model.RawFunctionType):
  265. return type.as_function_pointer()
  266. if const:
  267. return model.ConstPointerType(type)
  268. return model.PointerType(type)
  269. def _get_type(self, typenode, name=None, partial_length_ok=False):
  270. # first, dereference typedefs, if we have it already parsed, we're good
  271. if (isinstance(typenode, pycparser.c_ast.TypeDecl) and
  272. isinstance(typenode.type, pycparser.c_ast.IdentifierType) and
  273. len(typenode.type.names) == 1 and
  274. ('typedef ' + typenode.type.names[0]) in self._declarations):
  275. type = self._declarations['typedef ' + typenode.type.names[0]]
  276. return type
  277. #
  278. if isinstance(typenode, pycparser.c_ast.ArrayDecl):
  279. # array type
  280. if typenode.dim is None:
  281. length = None
  282. else:
  283. length = self._parse_constant(
  284. typenode.dim, partial_length_ok=partial_length_ok)
  285. return model.ArrayType(self._get_type(typenode.type), length)
  286. #
  287. if isinstance(typenode, pycparser.c_ast.PtrDecl):
  288. # pointer type
  289. const = (isinstance(typenode.type, pycparser.c_ast.TypeDecl)
  290. and 'const' in typenode.type.quals)
  291. return self._get_type_pointer(self._get_type(typenode.type), const)
  292. #
  293. if isinstance(typenode, pycparser.c_ast.TypeDecl):
  294. type = typenode.type
  295. if isinstance(type, pycparser.c_ast.IdentifierType):
  296. # assume a primitive type. get it from .names, but reduce
  297. # synonyms to a single chosen combination
  298. names = list(type.names)
  299. if names != ['signed', 'char']: # keep this unmodified
  300. prefixes = {}
  301. while names:
  302. name = names[0]
  303. if name in ('short', 'long', 'signed', 'unsigned'):
  304. prefixes[name] = prefixes.get(name, 0) + 1
  305. del names[0]
  306. else:
  307. break
  308. # ignore the 'signed' prefix below, and reorder the others
  309. newnames = []
  310. for prefix in ('unsigned', 'short', 'long'):
  311. for i in range(prefixes.get(prefix, 0)):
  312. newnames.append(prefix)
  313. if not names:
  314. names = ['int'] # implicitly
  315. if names == ['int']: # but kill it if 'short' or 'long'
  316. if 'short' in prefixes or 'long' in prefixes:
  317. names = []
  318. names = newnames + names
  319. ident = ' '.join(names)
  320. if ident == 'void':
  321. return model.void_type
  322. if ident == '__dotdotdot__':
  323. raise api.FFIError(':%d: bad usage of "..."' %
  324. typenode.coord.line)
  325. return resolve_common_type(ident)
  326. #
  327. if isinstance(type, pycparser.c_ast.Struct):
  328. # 'struct foobar'
  329. return self._get_struct_union_enum_type('struct', type, name)
  330. #
  331. if isinstance(type, pycparser.c_ast.Union):
  332. # 'union foobar'
  333. return self._get_struct_union_enum_type('union', type, name)
  334. #
  335. if isinstance(type, pycparser.c_ast.Enum):
  336. # 'enum foobar'
  337. return self._get_struct_union_enum_type('enum', type, name)
  338. #
  339. if isinstance(typenode, pycparser.c_ast.FuncDecl):
  340. # a function type
  341. return self._parse_function_type(typenode, name)
  342. #
  343. # nested anonymous structs or unions end up here
  344. if isinstance(typenode, pycparser.c_ast.Struct):
  345. return self._get_struct_union_enum_type('struct', typenode, name,
  346. nested=True)
  347. if isinstance(typenode, pycparser.c_ast.Union):
  348. return self._get_struct_union_enum_type('union', typenode, name,
  349. nested=True)
  350. #
  351. raise api.FFIError(":%d: bad or unsupported type declaration" %
  352. typenode.coord.line)
  353. def _parse_function_type(self, typenode, funcname=None):
  354. params = list(getattr(typenode.args, 'params', []))
  355. ellipsis = (
  356. len(params) > 0 and
  357. isinstance(params[-1].type, pycparser.c_ast.TypeDecl) and
  358. isinstance(params[-1].type.type,
  359. pycparser.c_ast.IdentifierType) and
  360. params[-1].type.type.names == ['__dotdotdot__'])
  361. if ellipsis:
  362. params.pop()
  363. if not params:
  364. raise api.CDefError(
  365. "%s: a function with only '(...)' as argument"
  366. " is not correct C" % (funcname or 'in expression'))
  367. elif (len(params) == 1 and
  368. isinstance(params[0].type, pycparser.c_ast.TypeDecl) and
  369. isinstance(params[0].type.type, pycparser.c_ast.IdentifierType)
  370. and list(params[0].type.type.names) == ['void']):
  371. del params[0]
  372. args = [self._as_func_arg(self._get_type(argdeclnode.type))
  373. for argdeclnode in params]
  374. result = self._get_type(typenode.type)
  375. return model.RawFunctionType(tuple(args), result, ellipsis)
  376. def _as_func_arg(self, type):
  377. if isinstance(type, model.ArrayType):
  378. return model.PointerType(type.item)
  379. elif isinstance(type, model.RawFunctionType):
  380. return type.as_function_pointer()
  381. else:
  382. return type
  383. def _is_constant_globalvar(self, typenode):
  384. if isinstance(typenode, pycparser.c_ast.PtrDecl):
  385. return 'const' in typenode.quals
  386. if isinstance(typenode, pycparser.c_ast.TypeDecl):
  387. return 'const' in typenode.quals
  388. return False
  389. def _get_struct_union_enum_type(self, kind, type, name=None, nested=False):
  390. # First, a level of caching on the exact 'type' node of the AST.
  391. # This is obscure, but needed because pycparser "unrolls" declarations
  392. # such as "typedef struct { } foo_t, *foo_p" and we end up with
  393. # an AST that is not a tree, but a DAG, with the "type" node of the
  394. # two branches foo_t and foo_p of the trees being the same node.
  395. # It's a bit silly but detecting "DAG-ness" in the AST tree seems
  396. # to be the only way to distinguish this case from two independent
  397. # structs. See test_struct_with_two_usages.
  398. try:
  399. return self._structnode2type[type]
  400. except KeyError:
  401. pass
  402. #
  403. # Note that this must handle parsing "struct foo" any number of
  404. # times and always return the same StructType object. Additionally,
  405. # one of these times (not necessarily the first), the fields of
  406. # the struct can be specified with "struct foo { ...fields... }".
  407. # If no name is given, then we have to create a new anonymous struct
  408. # with no caching; in this case, the fields are either specified
  409. # right now or never.
  410. #
  411. force_name = name
  412. name = type.name
  413. #
  414. # get the type or create it if needed
  415. if name is None:
  416. # 'force_name' is used to guess a more readable name for
  417. # anonymous structs, for the common case "typedef struct { } foo".
  418. if force_name is not None:
  419. explicit_name = '$%s' % force_name
  420. else:
  421. self._anonymous_counter += 1
  422. explicit_name = '$%d' % self._anonymous_counter
  423. tp = None
  424. else:
  425. explicit_name = name
  426. key = '%s %s' % (kind, name)
  427. tp = self._declarations.get(key, None)
  428. #
  429. if tp is None:
  430. if kind == 'struct':
  431. tp = model.StructType(explicit_name, None, None, None)
  432. elif kind == 'union':
  433. tp = model.UnionType(explicit_name, None, None, None)
  434. elif kind == 'enum':
  435. tp = self._build_enum_type(explicit_name, type.values)
  436. else:
  437. raise AssertionError("kind = %r" % (kind,))
  438. if name is not None:
  439. self._declare(key, tp)
  440. else:
  441. if kind == 'enum' and type.values is not None:
  442. raise NotImplementedError(
  443. "enum %s: the '{}' declaration should appear on the first "
  444. "time the enum is mentioned, not later" % explicit_name)
  445. if not tp.forcename:
  446. tp.force_the_name(force_name)
  447. if tp.forcename and '$' in tp.name:
  448. self._declare('anonymous %s' % tp.forcename, tp)
  449. #
  450. self._structnode2type[type] = tp
  451. #
  452. # enums: done here
  453. if kind == 'enum':
  454. return tp
  455. #
  456. # is there a 'type.decls'? If yes, then this is the place in the
  457. # C sources that declare the fields. If no, then just return the
  458. # existing type, possibly still incomplete.
  459. if type.decls is None:
  460. return tp
  461. #
  462. if tp.fldnames is not None:
  463. raise api.CDefError("duplicate declaration of struct %s" % name)
  464. fldnames = []
  465. fldtypes = []
  466. fldbitsize = []
  467. for decl in type.decls:
  468. if (isinstance(decl.type, pycparser.c_ast.IdentifierType) and
  469. ''.join(decl.type.names) == '__dotdotdot__'):
  470. # XXX pycparser is inconsistent: 'names' should be a list
  471. # of strings, but is sometimes just one string. Use
  472. # str.join() as a way to cope with both.
  473. self._make_partial(tp, nested)
  474. continue
  475. if decl.bitsize is None:
  476. bitsize = -1
  477. else:
  478. bitsize = self._parse_constant(decl.bitsize)
  479. self._partial_length = False
  480. type = self._get_type(decl.type, partial_length_ok=True)
  481. if self._partial_length:
  482. self._make_partial(tp, nested)
  483. if isinstance(type, model.StructType) and type.partial:
  484. self._make_partial(tp, nested)
  485. fldnames.append(decl.name or '')
  486. fldtypes.append(type)
  487. fldbitsize.append(bitsize)
  488. tp.fldnames = tuple(fldnames)
  489. tp.fldtypes = tuple(fldtypes)
  490. tp.fldbitsize = tuple(fldbitsize)
  491. if fldbitsize != [-1] * len(fldbitsize):
  492. if isinstance(tp, model.StructType) and tp.partial:
  493. raise NotImplementedError("%s: using both bitfields and '...;'"
  494. % (tp,))
  495. tp.packed = self._packed
  496. return tp
  497. def _make_partial(self, tp, nested):
  498. if not isinstance(tp, model.StructOrUnion):
  499. raise api.CDefError("%s cannot be partial" % (tp,))
  500. if not tp.has_c_name() and not nested:
  501. raise NotImplementedError("%s is partial but has no C name" %(tp,))
  502. tp.partial = True
  503. def _parse_constant(self, exprnode, partial_length_ok=False):
  504. # for now, limited to expressions that are an immediate number
  505. # or negative number
  506. if isinstance(exprnode, pycparser.c_ast.Constant):
  507. return int(exprnode.value, 0)
  508. #
  509. if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
  510. exprnode.op == '-'):
  511. return -self._parse_constant(exprnode.expr)
  512. # load previously defined int constant
  513. if (isinstance(exprnode, pycparser.c_ast.ID) and
  514. exprnode.name in self._int_constants):
  515. return self._int_constants[exprnode.name]
  516. #
  517. if partial_length_ok:
  518. if (isinstance(exprnode, pycparser.c_ast.ID) and
  519. exprnode.name == '__dotdotdotarray__'):
  520. self._partial_length = True
  521. return '...'
  522. #
  523. raise api.FFIError(":%d: unsupported expression: expected a "
  524. "simple numeric constant" % exprnode.coord.line)
  525. def _build_enum_type(self, explicit_name, decls):
  526. if decls is not None:
  527. enumerators1 = [enum.name for enum in decls.enumerators]
  528. enumerators = [s for s in enumerators1
  529. if not _r_enum_dotdotdot.match(s)]
  530. partial = len(enumerators) < len(enumerators1)
  531. enumerators = tuple(enumerators)
  532. enumvalues = []
  533. nextenumvalue = 0
  534. for enum in decls.enumerators[:len(enumerators)]:
  535. if enum.value is not None:
  536. nextenumvalue = self._parse_constant(enum.value)
  537. enumvalues.append(nextenumvalue)
  538. self._add_constants(enum.name, nextenumvalue)
  539. nextenumvalue += 1
  540. enumvalues = tuple(enumvalues)
  541. tp = model.EnumType(explicit_name, enumerators, enumvalues)
  542. tp.partial = partial
  543. else: # opaque enum
  544. tp = model.EnumType(explicit_name, (), ())
  545. return tp
  546. def include(self, other):
  547. for name, tp in other._declarations.items():
  548. kind = name.split(' ', 1)[0]
  549. if kind in ('typedef', 'struct', 'union', 'enum'):
  550. self._declare(name, tp)
  551. for k, v in other._int_constants.items():
  552. self._add_constants(k, v)