/lib_pypy/cffi/cparser.py
Python | 584 lines | 497 code | 23 blank | 64 comment | 125 complexity | a6b3c1219063bf0372cddbc34911e697 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, AGPL-3.0
- from . import api, model
- from .commontypes import COMMON_TYPES, resolve_common_type
- try:
- from . import _pycparser as pycparser
- except ImportError:
- import pycparser
- import weakref, re, sys
- try:
- if sys.version_info < (3,):
- import thread as _thread
- else:
- import _thread
- lock = _thread.allocate_lock()
- except ImportError:
- lock = None
- _r_comment = re.compile(r"/\*.*?\*/|//.*?$", re.DOTALL | re.MULTILINE)
- _r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)\s+(.*?)$",
- re.MULTILINE)
- _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
- _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
- _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
- _r_words = re.compile(r"\w+|\S")
- _parser_cache = None
- _r_int_literal = re.compile(r"^0?x?[0-9a-f]+u?l?$", re.IGNORECASE)
- def _get_parser():
- global _parser_cache
- if _parser_cache is None:
- _parser_cache = pycparser.CParser()
- return _parser_cache
- def _preprocess(csource):
- # Remove comments. NOTE: this only work because the cdef() section
- # should not contain any string literal!
- csource = _r_comment.sub(' ', csource)
- # Remove the "#define FOO x" lines
- macros = {}
- for match in _r_define.finditer(csource):
- macroname, macrovalue = match.groups()
- macros[macroname] = macrovalue
- csource = _r_define.sub('', csource)
- # Replace "[...]" with "[__dotdotdotarray__]"
- csource = _r_partial_array.sub('[__dotdotdotarray__]', csource)
- # Replace "...}" with "__dotdotdotNUM__}". This construction should
- # occur only at the end of enums; at the end of structs we have "...;}"
- # and at the end of vararg functions "...);". Also replace "=...[,}]"
- # with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when
- # giving an unknown value.
- matches = list(_r_partial_enum.finditer(csource))
- for number, match in enumerate(reversed(matches)):
- p = match.start()
- if csource[p] == '=':
- p2 = csource.find('...', p, match.end())
- assert p2 > p
- csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number,
- csource[p2+3:])
- else:
- assert csource[p:p+3] == '...'
- csource = '%s __dotdotdot%d__ %s' % (csource[:p], number,
- csource[p+3:])
- # Replace all remaining "..." with the same name, "__dotdotdot__",
- # which is declared with a typedef for the purpose of C parsing.
- return csource.replace('...', ' __dotdotdot__ '), macros
- def _common_type_names(csource):
- # Look in the source for what looks like usages of types from the
- # list of common types. A "usage" is approximated here as the
- # appearance of the word, minus a "definition" of the type, which
- # is the last word in a "typedef" statement. Approximative only
- # but should be fine for all the common types.
- look_for_words = set(COMMON_TYPES)
- look_for_words.add(';')
- look_for_words.add('typedef')
- words_used = set()
- is_typedef = False
- previous_word = ''
- for word in _r_words.findall(csource):
- if word in look_for_words:
- if word == ';':
- if is_typedef:
- words_used.discard(previous_word)
- look_for_words.discard(previous_word)
- is_typedef = False
- elif word == 'typedef':
- is_typedef = True
- else: # word in COMMON_TYPES
- words_used.add(word)
- previous_word = word
- return words_used
- class Parser(object):
- def __init__(self):
- self._declarations = {}
- self._anonymous_counter = 0
- self._structnode2type = weakref.WeakKeyDictionary()
- self._override = False
- self._packed = False
- self._int_constants = {}
- def _parse(self, csource):
- csource, macros = _preprocess(csource)
- # XXX: for more efficiency we would need to poke into the
- # internals of CParser... the following registers the
- # typedefs, because their presence or absence influences the
- # parsing itself (but what they are typedef'ed to plays no role)
- ctn = _common_type_names(csource)
- typenames = []
- for name in sorted(self._declarations):
- if name.startswith('typedef '):
- name = name[8:]
- typenames.append(name)
- ctn.discard(name)
- typenames += sorted(ctn)
- #
- csourcelines = ['typedef int %s;' % typename for typename in typenames]
- csourcelines.append('typedef int __dotdotdot__;')
- csourcelines.append(csource)
- csource = '\n'.join(csourcelines)
- if lock is not None:
- lock.acquire() # pycparser is not thread-safe...
- try:
- ast = _get_parser().parse(csource)
- except pycparser.c_parser.ParseError as e:
- self.convert_pycparser_error(e, csource)
- finally:
- if lock is not None:
- lock.release()
- # csource will be used to find buggy source text
- return ast, macros, csource
- def _convert_pycparser_error(self, e, csource):
- # xxx look for ":NUM:" at the start of str(e) and try to interpret
- # it as a line number
- line = None
- msg = str(e)
- if msg.startswith(':') and ':' in msg[1:]:
- linenum = msg[1:msg.find(':',1)]
- if linenum.isdigit():
- linenum = int(linenum, 10)
- csourcelines = csource.splitlines()
- if 1 <= linenum <= len(csourcelines):
- line = csourcelines[linenum-1]
- return line
- def convert_pycparser_error(self, e, csource):
- line = self._convert_pycparser_error(e, csource)
- msg = str(e)
- if line:
- msg = 'cannot parse "%s"\n%s' % (line.strip(), msg)
- else:
- msg = 'parse error\n%s' % (msg,)
- raise api.CDefError(msg)
- def parse(self, csource, override=False, packed=False):
- prev_override = self._override
- prev_packed = self._packed
- try:
- self._override = override
- self._packed = packed
- self._internal_parse(csource)
- finally:
- self._override = prev_override
- self._packed = prev_packed
- def _internal_parse(self, csource):
- ast, macros, csource = self._parse(csource)
- # add the macros
- self._process_macros(macros)
- # find the first "__dotdotdot__" and use that as a separator
- # between the repeated typedefs and the real csource
- iterator = iter(ast.ext)
- for decl in iterator:
- if decl.name == '__dotdotdot__':
- break
- #
- try:
- for decl in iterator:
- if isinstance(decl, pycparser.c_ast.Decl):
- self._parse_decl(decl)
- elif isinstance(decl, pycparser.c_ast.Typedef):
- if not decl.name:
- raise api.CDefError("typedef does not declare any name",
- decl)
- if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType)
- and decl.type.type.names == ['__dotdotdot__']):
- realtype = model.unknown_type(decl.name)
- elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
- isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
- isinstance(decl.type.type.type,
- pycparser.c_ast.IdentifierType) and
- decl.type.type.type.names == ['__dotdotdot__']):
- realtype = model.unknown_ptr_type(decl.name)
- else:
- realtype = self._get_type(decl.type, name=decl.name)
- self._declare('typedef ' + decl.name, realtype)
- else:
- raise api.CDefError("unrecognized construct", decl)
- except api.FFIError as e:
- msg = self._convert_pycparser_error(e, csource)
- if msg:
- e.args = (e.args[0] + "\n *** Err: %s" % msg,)
- raise
- def _add_constants(self, key, val):
- if key in self._int_constants:
- raise api.FFIError(
- "multiple declarations of constant: %s" % (key,))
- self._int_constants[key] = val
- def _process_macros(self, macros):
- for key, value in macros.items():
- value = value.strip()
- match = _r_int_literal.search(value)
- if match is not None:
- int_str = match.group(0).lower().rstrip("ul")
- # "010" is not valid oct in py3
- if (int_str.startswith("0") and
- int_str != "0" and
- not int_str.startswith("0x")):
- int_str = "0o" + int_str[1:]
- pyvalue = int(int_str, 0)
- self._add_constants(key, pyvalue)
- elif value == '...':
- self._declare('macro ' + key, value)
- else:
- raise api.CDefError('only supports the syntax "#define '
- '%s ..." (literally) or "#define '
- '%s 0x1FF" for now' % (key, key))
- def _parse_decl(self, decl):
- node = decl.type
- if isinstance(node, pycparser.c_ast.FuncDecl):
- tp = self._get_type(node, name=decl.name)
- assert isinstance(tp, model.RawFunctionType)
- tp = self._get_type_pointer(tp)
- self._declare('function ' + decl.name, tp)
- else:
- if isinstance(node, pycparser.c_ast.Struct):
- # XXX do we need self._declare in any of those?
- if node.decls is not None:
- self._get_struct_union_enum_type('struct', node)
- elif isinstance(node, pycparser.c_ast.Union):
- if node.decls is not None:
- self._get_struct_union_enum_type('union', node)
- elif isinstance(node, pycparser.c_ast.Enum):
- if node.values is not None:
- self._get_struct_union_enum_type('enum', node)
- elif not decl.name:
- raise api.CDefError("construct does not declare any variable",
- decl)
- #
- if decl.name:
- tp = self._get_type(node, partial_length_ok=True)
- if self._is_constant_globalvar(node):
- self._declare('constant ' + decl.name, tp)
- else:
- self._declare('variable ' + decl.name, tp)
- def parse_type(self, cdecl):
- ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2]
- assert not macros
- exprnode = ast.ext[-1].type.args.params[0]
- if isinstance(exprnode, pycparser.c_ast.ID):
- raise api.CDefError("unknown identifier '%s'" % (exprnode.name,))
- return self._get_type(exprnode.type)
- def _declare(self, name, obj):
- if name in self._declarations:
- if self._declarations[name] is obj:
- return
- if not self._override:
- raise api.FFIError(
- "multiple declarations of %s (for interactive usage, "
- "try cdef(xx, override=True))" % (name,))
- assert '__dotdotdot__' not in name.split()
- self._declarations[name] = obj
- def _get_type_pointer(self, type, const=False):
- if isinstance(type, model.RawFunctionType):
- return type.as_function_pointer()
- if const:
- return model.ConstPointerType(type)
- return model.PointerType(type)
- def _get_type(self, typenode, name=None, partial_length_ok=False):
- # first, dereference typedefs, if we have it already parsed, we're good
- if (isinstance(typenode, pycparser.c_ast.TypeDecl) and
- isinstance(typenode.type, pycparser.c_ast.IdentifierType) and
- len(typenode.type.names) == 1 and
- ('typedef ' + typenode.type.names[0]) in self._declarations):
- type = self._declarations['typedef ' + typenode.type.names[0]]
- return type
- #
- if isinstance(typenode, pycparser.c_ast.ArrayDecl):
- # array type
- if typenode.dim is None:
- length = None
- else:
- length = self._parse_constant(
- typenode.dim, partial_length_ok=partial_length_ok)
- return model.ArrayType(self._get_type(typenode.type), length)
- #
- if isinstance(typenode, pycparser.c_ast.PtrDecl):
- # pointer type
- const = (isinstance(typenode.type, pycparser.c_ast.TypeDecl)
- and 'const' in typenode.type.quals)
- return self._get_type_pointer(self._get_type(typenode.type), const)
- #
- if isinstance(typenode, pycparser.c_ast.TypeDecl):
- type = typenode.type
- if isinstance(type, pycparser.c_ast.IdentifierType):
- # assume a primitive type. get it from .names, but reduce
- # synonyms to a single chosen combination
- names = list(type.names)
- if names != ['signed', 'char']: # keep this unmodified
- prefixes = {}
- while names:
- name = names[0]
- if name in ('short', 'long', 'signed', 'unsigned'):
- prefixes[name] = prefixes.get(name, 0) + 1
- del names[0]
- else:
- break
- # ignore the 'signed' prefix below, and reorder the others
- newnames = []
- for prefix in ('unsigned', 'short', 'long'):
- for i in range(prefixes.get(prefix, 0)):
- newnames.append(prefix)
- if not names:
- names = ['int'] # implicitly
- if names == ['int']: # but kill it if 'short' or 'long'
- if 'short' in prefixes or 'long' in prefixes:
- names = []
- names = newnames + names
- ident = ' '.join(names)
- if ident == 'void':
- return model.void_type
- if ident == '__dotdotdot__':
- raise api.FFIError(':%d: bad usage of "..."' %
- typenode.coord.line)
- return resolve_common_type(ident)
- #
- if isinstance(type, pycparser.c_ast.Struct):
- # 'struct foobar'
- return self._get_struct_union_enum_type('struct', type, name)
- #
- if isinstance(type, pycparser.c_ast.Union):
- # 'union foobar'
- return self._get_struct_union_enum_type('union', type, name)
- #
- if isinstance(type, pycparser.c_ast.Enum):
- # 'enum foobar'
- return self._get_struct_union_enum_type('enum', type, name)
- #
- if isinstance(typenode, pycparser.c_ast.FuncDecl):
- # a function type
- return self._parse_function_type(typenode, name)
- #
- # nested anonymous structs or unions end up here
- if isinstance(typenode, pycparser.c_ast.Struct):
- return self._get_struct_union_enum_type('struct', typenode, name,
- nested=True)
- if isinstance(typenode, pycparser.c_ast.Union):
- return self._get_struct_union_enum_type('union', typenode, name,
- nested=True)
- #
- raise api.FFIError(":%d: bad or unsupported type declaration" %
- typenode.coord.line)
- def _parse_function_type(self, typenode, funcname=None):
- params = list(getattr(typenode.args, 'params', []))
- ellipsis = (
- len(params) > 0 and
- isinstance(params[-1].type, pycparser.c_ast.TypeDecl) and
- isinstance(params[-1].type.type,
- pycparser.c_ast.IdentifierType) and
- params[-1].type.type.names == ['__dotdotdot__'])
- if ellipsis:
- params.pop()
- if not params:
- raise api.CDefError(
- "%s: a function with only '(...)' as argument"
- " is not correct C" % (funcname or 'in expression'))
- elif (len(params) == 1 and
- isinstance(params[0].type, pycparser.c_ast.TypeDecl) and
- isinstance(params[0].type.type, pycparser.c_ast.IdentifierType)
- and list(params[0].type.type.names) == ['void']):
- del params[0]
- args = [self._as_func_arg(self._get_type(argdeclnode.type))
- for argdeclnode in params]
- result = self._get_type(typenode.type)
- return model.RawFunctionType(tuple(args), result, ellipsis)
- def _as_func_arg(self, type):
- if isinstance(type, model.ArrayType):
- return model.PointerType(type.item)
- elif isinstance(type, model.RawFunctionType):
- return type.as_function_pointer()
- else:
- return type
- def _is_constant_globalvar(self, typenode):
- if isinstance(typenode, pycparser.c_ast.PtrDecl):
- return 'const' in typenode.quals
- if isinstance(typenode, pycparser.c_ast.TypeDecl):
- return 'const' in typenode.quals
- return False
- def _get_struct_union_enum_type(self, kind, type, name=None, nested=False):
- # First, a level of caching on the exact 'type' node of the AST.
- # This is obscure, but needed because pycparser "unrolls" declarations
- # such as "typedef struct { } foo_t, *foo_p" and we end up with
- # an AST that is not a tree, but a DAG, with the "type" node of the
- # two branches foo_t and foo_p of the trees being the same node.
- # It's a bit silly but detecting "DAG-ness" in the AST tree seems
- # to be the only way to distinguish this case from two independent
- # structs. See test_struct_with_two_usages.
- try:
- return self._structnode2type[type]
- except KeyError:
- pass
- #
- # Note that this must handle parsing "struct foo" any number of
- # times and always return the same StructType object. Additionally,
- # one of these times (not necessarily the first), the fields of
- # the struct can be specified with "struct foo { ...fields... }".
- # If no name is given, then we have to create a new anonymous struct
- # with no caching; in this case, the fields are either specified
- # right now or never.
- #
- force_name = name
- name = type.name
- #
- # get the type or create it if needed
- if name is None:
- # 'force_name' is used to guess a more readable name for
- # anonymous structs, for the common case "typedef struct { } foo".
- if force_name is not None:
- explicit_name = '$%s' % force_name
- else:
- self._anonymous_counter += 1
- explicit_name = '$%d' % self._anonymous_counter
- tp = None
- else:
- explicit_name = name
- key = '%s %s' % (kind, name)
- tp = self._declarations.get(key, None)
- #
- if tp is None:
- if kind == 'struct':
- tp = model.StructType(explicit_name, None, None, None)
- elif kind == 'union':
- tp = model.UnionType(explicit_name, None, None, None)
- elif kind == 'enum':
- tp = self._build_enum_type(explicit_name, type.values)
- else:
- raise AssertionError("kind = %r" % (kind,))
- if name is not None:
- self._declare(key, tp)
- else:
- if kind == 'enum' and type.values is not None:
- raise NotImplementedError(
- "enum %s: the '{}' declaration should appear on the first "
- "time the enum is mentioned, not later" % explicit_name)
- if not tp.forcename:
- tp.force_the_name(force_name)
- if tp.forcename and '$' in tp.name:
- self._declare('anonymous %s' % tp.forcename, tp)
- #
- self._structnode2type[type] = tp
- #
- # enums: done here
- if kind == 'enum':
- return tp
- #
- # is there a 'type.decls'? If yes, then this is the place in the
- # C sources that declare the fields. If no, then just return the
- # existing type, possibly still incomplete.
- if type.decls is None:
- return tp
- #
- if tp.fldnames is not None:
- raise api.CDefError("duplicate declaration of struct %s" % name)
- fldnames = []
- fldtypes = []
- fldbitsize = []
- for decl in type.decls:
- if (isinstance(decl.type, pycparser.c_ast.IdentifierType) and
- ''.join(decl.type.names) == '__dotdotdot__'):
- # XXX pycparser is inconsistent: 'names' should be a list
- # of strings, but is sometimes just one string. Use
- # str.join() as a way to cope with both.
- self._make_partial(tp, nested)
- continue
- if decl.bitsize is None:
- bitsize = -1
- else:
- bitsize = self._parse_constant(decl.bitsize)
- self._partial_length = False
- type = self._get_type(decl.type, partial_length_ok=True)
- if self._partial_length:
- self._make_partial(tp, nested)
- if isinstance(type, model.StructType) and type.partial:
- self._make_partial(tp, nested)
- fldnames.append(decl.name or '')
- fldtypes.append(type)
- fldbitsize.append(bitsize)
- tp.fldnames = tuple(fldnames)
- tp.fldtypes = tuple(fldtypes)
- tp.fldbitsize = tuple(fldbitsize)
- if fldbitsize != [-1] * len(fldbitsize):
- if isinstance(tp, model.StructType) and tp.partial:
- raise NotImplementedError("%s: using both bitfields and '...;'"
- % (tp,))
- tp.packed = self._packed
- return tp
- def _make_partial(self, tp, nested):
- if not isinstance(tp, model.StructOrUnion):
- raise api.CDefError("%s cannot be partial" % (tp,))
- if not tp.has_c_name() and not nested:
- raise NotImplementedError("%s is partial but has no C name" %(tp,))
- tp.partial = True
- def _parse_constant(self, exprnode, partial_length_ok=False):
- # for now, limited to expressions that are an immediate number
- # or negative number
- if isinstance(exprnode, pycparser.c_ast.Constant):
- return int(exprnode.value, 0)
- #
- if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
- exprnode.op == '-'):
- return -self._parse_constant(exprnode.expr)
- # load previously defined int constant
- if (isinstance(exprnode, pycparser.c_ast.ID) and
- exprnode.name in self._int_constants):
- return self._int_constants[exprnode.name]
- #
- if partial_length_ok:
- if (isinstance(exprnode, pycparser.c_ast.ID) and
- exprnode.name == '__dotdotdotarray__'):
- self._partial_length = True
- return '...'
- #
- raise api.FFIError(":%d: unsupported expression: expected a "
- "simple numeric constant" % exprnode.coord.line)
- def _build_enum_type(self, explicit_name, decls):
- if decls is not None:
- enumerators1 = [enum.name for enum in decls.enumerators]
- enumerators = [s for s in enumerators1
- if not _r_enum_dotdotdot.match(s)]
- partial = len(enumerators) < len(enumerators1)
- enumerators = tuple(enumerators)
- enumvalues = []
- nextenumvalue = 0
- for enum in decls.enumerators[:len(enumerators)]:
- if enum.value is not None:
- nextenumvalue = self._parse_constant(enum.value)
- enumvalues.append(nextenumvalue)
- self._add_constants(enum.name, nextenumvalue)
- nextenumvalue += 1
- enumvalues = tuple(enumvalues)
- tp = model.EnumType(explicit_name, enumerators, enumvalues)
- tp.partial = partial
- else: # opaque enum
- tp = model.EnumType(explicit_name, (), ())
- return tp
- def include(self, other):
- for name, tp in other._declarations.items():
- kind = name.split(' ', 1)[0]
- if kind in ('typedef', 'struct', 'union', 'enum'):
- self._declare(name, tp)
- for k, v in other._int_constants.items():
- self._add_constants(k, v)