/thirtytwo-build.py
Python | 1054 lines | 954 code | 56 blank | 44 comment | 153 complexity | a2ea87ed60276ef46812a9a36a96199d MD5 | raw file
- # thirtytwo-build.py -- mechanically translates C header into Python
- # Copyright (c) 2009, Kang Seonghoon. See thirtytwo package for full license.
- #
- # Mainly used for machine generation of thirtytwo modules: thirtytwo._consts,
- # thirtytwo._types, thirtytwo._funcs. They are accompanied with hand-written
- # additional routines at thirtytwo.consts etc.
- #
- # Features and limitations:
- # - Intended to parse MinGW w32api headers.
- # - Requires Windows environment, the most recent version possible.
- # - Parses the certain subset of C language.
- # - Handles both object-like and function-like macros.
- # - Handles conditional preprocessor blocks correctly, and even can be
- # configured to ignore certain blocks at all.
- # - Handles "#include" and "#pragma pack()" directives as well.
- # - Has a partial support for ## operator and no support for # operator.
- # - Parses almost all C type definitions and function prototype correctly,
- # as long as there are no duplicated symbols.
- # - Translates majority of C header code into correct Python code.
- # - Simple C expression with numbers, strings, type casts, arithmetic/bitwise
- # operators, comparisons and function call can be translated.
- # - Typedefs, macros for types, structures, unions and enums are translated
- # into appropriate definition code.
- # - Function definition is replaced with import code: correct library is
- # searched from predefined list, so recent Windows is required for
- # generation. (Well I'm using Windows XP however.)
- # - Incorrectly generated Python code is commented and reported for
- # later inspection.
- # - Complex C symbols (i.e. type of anonymous struct, separate namespace for
- # struct/union/enum etc.) are mangled. Typedef'ed ones are not affected.
- import os
- import sys
- import re
- import ctypes
- import cStringIO as stringio
- TOKEN_PATTERN = re.compile(r'''
- # multicharacter operator or punctuator
- (?: [+\-*/%&|^<=>!]= | && | \|\| | <<=? | >>=? |
- \+\+ | -- | -> | \.\.\. | \#\# ) |
- # numbers
- \.?[0-9](?:[eE][-+]|[A-Za-z0-9.])* |
- # character & string literals
- L?'(?:\\["'?\\abfnrtv]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|[^'"\\])+' |
- L?"(?:\\["'?\\abfnrtv]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|[^'"\\])*" |
- # identifier & keywords
- [A-Za-z_][A-Za-z_0-9]* |
- # newline (only for pp directive, otherwise ignored)
- \n+ |
- # whitespaces and comments (only for pp directive, otherwise ignored)
- (?: //[^\n]* | /\*.*?\*/ | [ \t\v]+ )+ |
- # other characters
- .
- ''', re.X | re.S)
- BINARY_OPERATORS = set([
- '+', '-', '*', '/', '%', '&', '|', '^', '<<', '>>',
- '=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=',
- '==', '!=', '<', '<=', '>', '>=', 'or', 'and', ',',
- # this list also includes ternary operators which can be thought as
- # set of binary operators only seen in the certain form.
- '?', ':',
- ])
- def is_ident(s):
- return (s != '' and not '0'<=s[0]<='9' and
- all('A'<=i<='Z' or 'a'<=i<='z' or '0'<=i<='9' or i=='_' for i in s))
- # type placeholders
- VOID = '*void*'
- VARARG = '*vararg*'
- ANYFUNCTION = '*anyfunc*'
- PRIMTYPE_MAPPINGS = dict((tuple(sorted(k)), v) for k, v in [
- (('bool',), '_X_bool'),
- (('char',), '_X_char'),
- (('signed', 'char'), '_X_char'),
- (('unsigned', 'char'), '_X_ubyte'), # no c_uchar
- (('wchar_t',), '_X_wchar'),
- (('int',), '_X_int'),
- (('signed',), '_X_int'),
- (('signed', 'int'), '_X_int'),
- (('unsigned',), '_X_uint'),
- (('unsigned', 'int'), '_X_uint'),
- (('short',), '_X_short'),
- (('short', 'int'), '_X_short'),
- (('signed', 'short'), '_X_short'),
- (('signed', 'short', 'int'), '_X_short'),
- (('unsigned', 'short'), '_X_ushort'),
- (('unsigned', 'short', 'int'), '_X_ushort'),
- (('long',), '_X_long'),
- (('long', 'int'), '_X_long'),
- (('signed', 'long'), '_X_long'),
- (('signed', 'long', 'int'), '_X_long'),
- (('unsigned', 'long'), '_X_ulong'),
- (('unsigned', 'long', 'int'), '_X_ulong'),
- (('long', 'long'), '_X_longlong'),
- (('long', 'long', 'int'), '_X_longlong'),
- (('signed', 'long', 'long'), '_X_longlong'),
- (('signed', 'long', 'long', 'int'), '_X_longlong'),
- (('unsigned', 'long', 'long'), '_X_ulonglong'),
- (('unsigned', 'long', 'long', 'int'), '_X_ulonglong'),
- (('float',), '_X_float'),
- (('double',), '_X_double'),
- (('long', 'double'), '_X_longdouble'),
- (('size_t',), '_X_size_t'),
- (('int8_t',), '_X_byte'),
- (('int16_t',), '_X_int16'),
- (('int32_t',), '_X_int32'),
- (('int64_t',), '_X_int64'),
- (('uint8_t',), '_X_ubyte'),
- (('uint16_t',), '_X_uint16'),
- (('uint32_t',), '_X_uint32'),
- (('uint64_t',), '_X_uint64'),
- (('__int64',), '_X_int64'),
- (('signed', '__int64'), '_X_int64'),
- (('unsigned', '__int64'), '_X_uint64'),
- (('void',), VOID),
- (('va_list',), '_X_void_p'),
- ((), None), # only storage class or CV; possible for complex type
- ])
- class bufferiter(object):
- def __init__(self, iterable):
- self.next = iter(iterable).next
- self.buffer = []
- def __call__(self):
- if self.buffer:
- return self.buffer.pop()
- else:
- return self.next()
- def peek(self):
- try:
- next = self()
- self.putback(next)
- return next
- except StopIteration:
- return None
- def putback(self, value):
- self.buffer.append(value)
- def skip(self, func=None):
- next = self()
- if func is None:
- while not next: next = self()
- else:
- while func(next): next = self()
- return next
- def until(self, func):
- result = [self()]
- while func(result[-1]): result.append(self())
- self.putback(result[-1])
- return result[:-1]
- class Processor(object):
- def __init__(self, paths, allowed_headers, allowed_dlls):
- self.paths = paths
- self.allowed_headers = allowed_headers
- self.allowed_dlls = {}
- for dll in allowed_dlls:
- try:
- self.allowed_dlls[dll] = getattr(ctypes.windll, dll)
- except:
- print >>sys.stderr, 'Warning: Library %r doesn\'t exist.' % dll
- self.ppsymbols = {} # (as_defined, as_undefined, args or None, tokens)
- self.symbols = {} # (type, defn)
- self.init_output('thirtytwo/')
- def init_output(self, prefix):
- # thirtytwo._consts module: almost all #define, enums
- self.consts_ctx = {}
- self.consts_buf = open(os.path.join(prefix, '_consts.py'), 'wb')
- self.consts_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
- 'from thirtytwo._support import *\n'
- 'from thirtytwo._types import *\n\n')
- # thirtytwo._types module: typedef, struct, union
- self.types_ctx = {}
- self.types_buf = open(os.path.join(prefix, '_types.py'), 'wb')
- self.types_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
- 'from thirtytwo._support import *\n\n')
- # thirtytwo._funcs module: function decl, function-wrapping #define
- self.funcs_ctx = {}
- self.funcs_buf = open(os.path.join(prefix, '_funcs.py'), 'wb')
- self.funcs_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
- 'from thirtytwo._support import *\n'
- 'from thirtytwo._consts import *\n'
- 'from thirtytwo._types import *\n\n')
- exec 'from thirtytwo._support import *\n' in self.consts_ctx, self.consts_ctx
- self.types_ctx.update(self.consts_ctx)
- self.funcs_ctx.update(self.consts_ctx)
- def emit_py(self, line, ctx, buf):
- # to treat "#" or "##" symbol outside the string as error,
- # we replaces them with "\#" or "\#\#" so it doesn't affect
- # string. (unless "\#" sequence is already present in the
- # string, but we can make sure it cannot happen)
- line = line[:1] + line[1:].replace('#', '\\#')
- try:
- exec line in ctx
- buf.write(line + '\n')
- except Exception, e:
- buf.write('##%s[%s]: %s\n' % (e.__class__.__name__,
- ' '.join(str(e).split()), line))
- def emit_const(self, line):
- self.emit_py(line, self.consts_ctx, self.consts_buf)
- self.funcs_ctx.update(self.consts_ctx)
- def emit_type(self, line):
- self.emit_py(line, self.types_ctx, self.types_buf)
- self.consts_ctx.update(self.types_ctx)
- self.funcs_ctx.update(self.types_ctx)
- def emit_func(self, line):
- self.emit_py(line, self.funcs_ctx, self.funcs_buf)
- def add_ppsymbol(self, name, tokens, tokens2=None,
- as_defined=True, as_undefined=None):
- args = None
- if tokens2 is not None:
- args = tokens
- tokens = tokens2
- self.ppsymbols[name] = (as_defined, as_undefined, args,
- filter(None, self.tokenize(tokens)))
- def to_pyname(self, name):
- if ' ' in name:
- parts = name.split()
- counter = parts.pop() if parts[-1].isdigit() else '0'
- name = '_X%sX_%s' % ({'*global*': 'g', 'struct': 's',
- 'union': 'u', 'enum': 'e'}[parts[0]], counter)
- if len(parts) > 1: name += parts[1]
- return name
- elif name.startswith('_X') and not name.startswith('_X_'):
- return '_XxX' + name[2:]
- else:
- return name
- def to_pytype(self, primtype, props, context):
- if primtype is VOID:
- # this is mainly for function return type.
- pytype = 'None'
- else:
- pytype = self.to_pyname(primtype)
- skipptr = False
- for prop in reversed(props):
- if prop[0] == '*':
- if pytype is ANYFUNCTION:
- pytype = '_X_void_p'
- elif not skipptr:
- pytype = '_X_POINTER(%s)' % pytype
- skipptr = False
- elif prop[0] == '()':
- # sometimes the exact prototype is not available in ctypes...
- if prop[2] is None or any(t is VARARG for _,t,_ in prop[2]):
- pytype = ANYFUNCTION
- else:
- if prop[1] == 'stdcall':
- ctor = '_X_WINFUNCTYPE'
- else:
- ctor = '_X_CFUNCTYPE'
- argtypes = [pytype]
- for aname, aprimtype, aprops in prop[2]:
- argtypes.append(self.to_pytype(aprimtype, aprops, context))
- pytype = '%s(%s)' % (ctor, ', '.join(argtypes))
- skipptr = True
- elif prop[0] == '[]':
- pytype = '%s*%s' % (pytype,
- self.to_pyexpr(['('] + prop[1] + [')'], context))
- else:
- assert False
- assert not skipptr
- return pytype
- def to_pyexpr(self, tokens, context):
- tokens = tokens[:] # type cast conversion changes tokens.
- expr = []
- i = 0
- while i < len(tokens):
- if tokens[i] == '&&':
- expr.append('and')
- elif tokens[i] == '||':
- expr.append('or')
- elif tokens[i] == '!':
- expr.append('not')
- elif tokens[i] == '->':
- expr.append('.')
- elif tokens[i].startswith("'"):
- expr.append(repr(ord(eval(tokens[i]))))
- elif tokens[i].startswith('"'):
- expr.append(repr(eval(tokens[i])))
- elif tokens[i].startswith("L'"):
- expr.append(repr(ord(eval('u' + tokens[i][1:]))))
- elif tokens[i].startswith('L"'):
- expr.append(repr(eval('u' + tokens[i][1:])))
- elif '0' <= tokens[i] <= '9':
- number = tokens[i].lower()
- if '.' in number:
- number = number.rstrip('f')
- if number.startswith('0x'):
- number = repr(float.fromhex(number))
- else:
- number = number.rstrip('sul')
- expr.append(number)
- elif tokens[i] == '(': # can be type cast
- try:
- iter = bufferiter(tokens[i+1:])
- primtype, storage, props, name = \
- self.parse_type(iter, declaration=False)
- except Exception:
- iscast = False
- else:
- assert not storage and not name
- istypesym = lambda t: self.symbols.get(t, (None,))[0] in \
- ('typedef', 'struct', 'union',
- 'enum', 'builtin')
- iscast = iter.peek() == ')'
- if iscast:
- ignorecast = primtype is VOID and not props
- iscast = istypesym(primtype) or ignorecast
- if iscast:
- # type cast expr ends at the next unmatching )/]/} or
- # binary/ternary operators whichever comes first.
- depth = 0
- typeend = None
- exprend = i
- while exprend < len(tokens):
- if tokens[exprend] in '([{':
- depth += 1
- elif tokens[exprend] in ')]}':
- depth -= 1
- if depth == 0 and typeend is None: typeend = exprend
- if depth < 0: break
- elif depth == 0:
- # strictly binary operator only
- if tokens[exprend] in BINARY_OPERATORS and \
- exprend - 1 > typeend:
- break
- exprend += 1
- assert depth <= 0
- iscast = typeend is not None and typeend + 1 < exprend
- if iscast:
- # XXX we are "pretending" pytype as C syntax!
- if not ignorecast: # ignore (void)
- expr.append('_X_cast')
- expr.append('(')
- expr += self.tokenize(self.to_pytype(primtype, props, context))
- expr.append(',')
- tokens.insert(exprend, ')')
- i = typeend
- else:
- expr.append(tokens[i])
- else:
- expr.append(tokens[i])
- i += 1
- exprstr = []
- prevtoken = pprevtoken = '@'
- for token in expr:
- if is_ident(prevtoken) and is_ident(token):
- exprstr.append(' ')
- elif prevtoken in BINARY_OPERATORS and \
- not (prevtoken == '-' and pprevtoken in '([{'):
- exprstr.append(' ')
- elif token in BINARY_OPERATORS and token != ',':
- exprstr.append(' ')
- exprstr.append(token)
- pprevtoken = prevtoken
- prevtoken = token
- exprstr = ''.join(exprstr)
- try:
- result = eval(exprstr, context, context)
- if isinstance(result, (int, long, float, str, unicode)) or \
- isinstance(result.value, (int, long, float, str, unicode)):
- return repr(result)
- except:
- pass
- return exprstr
- def search_dll_by_func(self, fname):
- for k, v in self.allowed_dlls.items():
- try:
- getattr(v, fname)
- return k
- except AttributeError:
- pass
- return '_unknown_'
- def handle_ppsymbol(self, name, args, tokens):
- self.ppsymbols[name] = (True, False, args, tokens)
- if tokens:
- expr = self.to_pyexpr(tokens, self.consts_ctx)
- if args is None:
- self.emit_const('%s = %s' % (name, expr))
- else:
- self.emit_const('%s = lambda %s: %s' %
- (name, ', '.join(args), expr))
- def handle_symbol(self, name, type, defn):
- self.symbols[name] = (type, defn)
- if type == 'struct' or type == 'union':
- fields = []
- anons = []
- for mname, mprimtype, mprops, mbitsz in defn:
- mpytype = self.to_pytype(mprimtype, mprops, self.types_ctx)
- if mname is None:
- mname = '_XaX_%d' % (len(anons) + 1)
- anons.append(mname)
- if mbitsz is None:
- fields.append('(%r, %s)' % (mname, mpytype))
- else:
- fields.append('(%r, %s, %s)' % (mname, mpytype,
- self.to_pyexpr(mbitsz, self.types_ctx)))
- pyname = self.to_pyname(name)
- head = 'class %s(%s): ' % (pyname,
- '_X_Structure' if type == 'struct' else '_X_Union')
- body = '@_fields_ = [%s]' % ', '.join(fields)
- if self.alignstack[-1] is not None:
- body = '@_pack_ = %d; ' % self.alignstack[-1] + body
- if anons:
- body += '; @_anonymous_ = %r' % anons
- if pyname in body: # self-referential: split declaration
- self.emit_type(head + 'pass')
- self.emit_type(body.replace('@', pyname + '.'))
- else:
- self.emit_type(head + body.replace('@', ''))
- return
- elif type == 'typedef':
- assert ' ' not in name
- self.emit_type('%s = %s' % (name, self.to_pytype(defn[0], defn[1],
- self.types_ctx)))
- return
- elif type == 'enum':
- pyname = self.to_pyname(name)
- self.emit_const('%s = _X_int' % pyname)
- mbase = ['0']
- offset = 0
- for mname, mvalue in defn:
- if mvalue:
- mbase = mvalue
- offset = 0
- else:
- offset += 1
- self.emit_const('%s = %s(%s)' % (mname, pyname,
- self.to_pyexpr(['('] + mbase + [')', '+', str(offset)],
- self.consts_ctx)))
- return
- elif type == '':
- primtype, props = defn
- if props and props[-1][0] == '()':
- dll = self.search_dll_by_func(name)
- pytype = self.to_pytype(primtype, [('*',)] + props, self.funcs_ctx)
- if pytype is ANYFUNCTION:
- self.emit_func('%s = _X_windll.%s.%s' % (name, dll, name))
- else:
- self.emit_func('%s = %s((%r, _X_windll.%s))' %
- (name, pytype, name, dll))
- return
- else:
- print '# %s := %s %s' % (name, type, defn)
- def subst_pp(self, tokens, ifcond=False):
- ntokens = len(tokens)
- result = []
- i = 0
- while i < ntokens:
- token = tokens[i]
- i += 1
- if not is_ident(token):
- result.append(token)
- continue
- if token in self.ppsymbols:
- spec = self.ppsymbols[token]
- if spec[2] is None: # object-like
- result.extend(spec[3])
- elif i < ntokens and tokens[i] == '(': # function-like
- args = []
- while tokens[i] != ')':
- i += 1
- start = i
- depth = 0
- while depth > 0 or (tokens[i] != ',' and tokens[i] != ')'):
- if tokens[i] == '(': depth += 1
- elif tokens[i] == ')': depth -= 1
- i += 1
- args.append(self.subst_pp(tokens[start:i], ifcond))
- i += 1
- assert len(args) == len(spec[2])
- iresult = []
- for token in spec[3]:
- try:
- iresult.extend(args[spec[2].index(token)])
- except:
- iresult.append(token)
- result.extend(iresult)
- else:
- result.append(token)
- elif ifcond: # as #if's condition expression
- if token == 'defined': # special case
- i += 1
- token = tokens[i-1]
- if token == '(':
- i += 2
- assert tokens[i-1] == ')'
- token = tokens[i-2]
- result.append('1' if token in self.ppsymbols else '0')
- else:
- result.append('0')
- else:
- result.append(token)
- return result
- def concat_pp(self, tokens):
- result = []
- concatnext = False
- for token in tokens:
- if token == '##':
- concatnext = True
- elif concatnext: # XXX can produce invalid token
- result[-1] += token
- concatnext = False
- else:
- result.append(token)
- return result
- def eval_cond_pp(self, tokens):
- tokens = self.subst_pp(tokens, True)
- expr = []
- i = 0
- while i < len(tokens):
- if tokens[i] == '&&':
- expr.append('and')
- elif tokens[i] == '||':
- expr.append('or')
- elif tokens[i] == '!':
- expr.append('not')
- elif tokens[i].startswith("'") or tokens[i].startswith("L'"):
- expr.append(ord(tokens[i].split("'")[1]))
- else:
- expr.append(tokens[i])
- i += 1
- return bool(eval(' '.join(expr), {}, {}))
- def process_pp(self, tokens):
- if not tokens: return
- if tokens[0] == 'if':
- cond = self.eval_cond_pp(tokens[1:])
- self.ppblocks.append((self.ppblocks[-1][0] and cond,
- self.ppblocks[-1][0] and not cond))
- return
- if tokens[0] == 'ifdef':
- assert len(tokens) == 2
- try:
- blockstat = self.ppsymbols[tokens[1]][0:2]
- except:
- blockstat = (False, True)
- self.ppblocks.append((self.ppblocks[-1][0] and blockstat[0],
- self.ppblocks[-1][0] and blockstat[1]))
- return
- if tokens[0] == 'ifndef':
- assert len(tokens) == 2
- try:
- blockstat = self.ppsymbols[tokens[1]][0:2][::-1]
- except:
- blockstat = (True, False)
- self.ppblocks.append((self.ppblocks[-1][0] and blockstat[0],
- self.ppblocks[-1][0] and blockstat[1]))
- return
- if tokens[0] == 'elif':
- cond = self.eval_cond_pp(tokens[1:])
- self.ppblocks[-1] = (self.ppblocks[-1][1] and cond,
- self.ppblocks[-1][1] and not cond)
- return
- if tokens[0] == 'else':
- assert len(tokens) == 1
- self.ppblocks[-1] = self.ppblocks[-1][1:]
- return
- if tokens[0] == 'endif':
- assert len(tokens) == 1
- self.ppblocks.pop()
- return
- if not self.ppblocks[-1][0]: # current block is ignored
- return
- if tokens[0] == 'define':
- assert len(tokens) >= 2
- substed = self.concat_pp(self.subst_pp(tokens[2:]))
- self.handle_ppsymbol(tokens[1], None, substed)
- return
- if tokens[0] == 'define()':
- assert len(tokens) >= 4 # "define()", "MACRONAME", "(", ")"
- assert tokens[2] == '('
- argnames = []
- if tokens[3] == ')':
- body = tokens[4:]
- else:
- for i in xrange(3, len(tokens), 2):
- argnames.append(tokens[i])
- if tokens[i+1] == ')':
- body = tokens[i+2:]
- break
- assert tokens[i+1] == ','
- else:
- assert False
- substed = self.subst_pp(body)
- self.handle_ppsymbol(tokens[1], argnames, substed)
- return
- if tokens[0] == 'undef':
- assert len(tokens) == 2
- try: del self.ppsymbols[tokens[1]]
- except: pass
- return
- if tokens[0] == 'pragma' and tokens[1] == 'pack':
- assert tokens[2] == '(' and tokens[-1] == ')'
- if tokens[3] == 'push':
- assert len(tokens) == 7
- assert tokens[4] == ',' and tokens[5].isdigit()
- self.alignstack.append(int(tokens[5]))
- return
- if tokens[3] == 'pop':
- assert len(tokens) == 5
- self.alignstack.pop()
- return
- if tokens[0] == 'include':
- if len(tokens) == 2:
- tokens[1:] = self.concat_pp(self.subst_pp(tokens[1:]))
- assert ((tokens[1] == '<' and tokens[-1] == '>') or
- (tokens[1] == '"' and tokens[-1] == '"'))
- filename = ''.join(tokens[2:-1])
- self.process_internal(filename)
- return
- print '# unknown PP directive: ' + ' '.join(tokens)
- def tokenize(self, s):
- return ['\n' if token[0] == '\n' else
- '' if token[0].isspace() or token[:2] in ('//', '/*') else token
- for token in TOKEN_PATTERN.findall(s.replace('\\\n', ''))]
- def preprocess(self, s):
- tokens = self.tokenize(s)
- tokens.append('\n') # the last empty line is to be ignored
- iter = bufferiter(tokens)
- try:
- buffer = []
- while True:
- token = iter.skip()
- if token == '#': # preprocessor line
- # flush current buffer
- for token in self.concat_pp(self.subst_pp(buffer)):
- yield token
- buffer = []
- token = iter.skip()
- line = [token]
- if token == 'define':
- macroname = iter.skip()
- if macroname != '\n':
- line.append(macroname)
- line.append(iter())
- if line[-1] == '(':
- # define pseudo-directive '#define()' in this case
- line[0] = 'define()'
- if line[-1] == '\n':
- del line[-1]
- else:
- line += iter.until(lambda t: t != '\n')
- token = iter.skip()
- assert token == '\n'
- elif token != '\n':
- line += iter.until(lambda t: t != '\n')
- token = iter.skip()
- assert token == '\n'
- self.process_pp(filter(None, line))
- elif token == '\n':
- pass
- elif not self.ppblocks[-1][0]: # ignored line
- iter.skip(lambda t: t != '\n')
- else:
- buffer.append(token)
- buffer += filter(None, iter.until(lambda t: t != '\n'))
- token = iter.skip()
- assert token == '\n'
- except StopIteration:
- pass
- for token in self.concat_pp(self.subst_pp(buffer)):
- yield token
- def parse_primtype(self, iter):
- token = iter()
- storage = ''
- const = volatile = restrict = False
- typename = []
- while True:
- if token == 'const':
- assert not const
- const = True
- elif token == 'volatile':
- assert not volatile
- volatile = True
- elif token == 'restrict':
- assert not restrict
- restrict = True
- elif token == 'extern' or token == 'static':
- assert not storage
- storage = token
- elif tuple(sorted(typename + [token])) in PRIMTYPE_MAPPINGS:
- typename.append(token)
- else:
- break
- token = iter()
- iter.putback(token)
- typename = tuple(sorted(typename))
- return PRIMTYPE_MAPPINGS[typename], storage
- def parse_type(self, iter, prev=None, declaration=True):
- if prev is None:
- primtype, storage = self.parse_primtype(iter)
- else: # e.g. int a, *b;
- primtype, storage = prev
- token = iter()
- if primtype is None:
- if token in ('struct', 'union', 'enum'):
- # storage class or CV + struct/union/enum
- assert primtype is None
- complextype = token
- complexname = None
- token = iter()
- if is_ident(token):
- complexname = token
- token = iter()
- assert (declaration and token == '{') or complexname is not None
- if complexname is None:
- primtype = '%s %d' % (self.anonprefix or '*global*',
- self.anoncounter)
- self.anoncounter += 1
- else:
- primtype = '%s %s' % (complextype, complexname)
- if declaration and token == '{':
- members = []
- if self.anonprefix is None:
- self.anonprefix = primtype
- prevcounter = self.anoncounter
- self.anoncounter = 1
- if complextype == 'enum': # declarator-like
- while True:
- name = iter()
- assert is_ident(name)
- token = iter()
- if token == '=':
- value = iter.until(lambda t: t != ',' and
- t != '}')
- token = iter()
- else:
- value = []
- members.append((name, value))
- if token == '}': break
- assert token == ','
- else: # declaration-like
- while True:
- token = iter()
- if token == '}': break
- iter.putback(token)
- prev = None
- while True:
- mprimtype, mstorage, mprops, mname = \
- self.parse_type(iter, prev)
- assert not mstorage and (
- (isinstance(mprimtype, str) and
- (mprimtype.startswith('struct ') or
- mprimtype.startswith('union '))) or
- mname is not None)
- token = iter()
- if token == ':':
- mbitsz = iter.until(lambda t: t != ';' and
- t != ',')
- assert mbitsz
- token = iter()
- else:
- mbitsz = None
- members.append((mname, mprimtype, mprops, mbitsz))
- if token == ';': break
- assert token == ','
- prev = (mprimtype, mstorage)
- self.handle_symbol(primtype, complextype, members)
- if self.anonprefix == primtype:
- self.anonprefix = None
- self.anoncounter = prevcounter
- token = iter()
- else:
- # pre-existing type
- assert is_ident(token)
- primtype = token
- token = iter()
- name = None
- props = []
- lpropstack = [[]]
- ldeclstack = [None]
- while True:
- if token == '(':
- lpropstack.append([])
- ldeclstack.append(None)
- elif token in ('const', 'volatile', 'inline', '__inline__'):
- pass # unused
- elif token == '__stdcall': # calling convention
- ldeclstack[-1] = 'stdcall'
- elif token == '*': # pointer
- while token == '*':
- lpropstack[-1].append(('*',))
- token = iter()
- iter.putback(token)
- else:
- break
- token = iter()
- if declaration and is_ident(token):
- name = token
- token = iter()
- rprops = []
- while True:
- if token == '(': # function
- args = []
- token = iter()
- if token == 'void' and iter.peek() == ')': # no arguments
- token = iter()
- assert token == ')'
- elif token == ')': # prototypeless
- args = None
- else:
- iter.putback(token)
- while True:
- token = iter()
- if token == '...': # vararg
- aname = astorage = None
- aprimtype = VARARG
- aprops = []
- else:
- iter.putback(token)
- aprimtype, astorage, aprops, aname = self.parse_type(iter)
- assert not astorage
- args.append((aname, aprimtype, aprops))
- token = iter()
- if token == ')': break
- assert token == ','
- assert not any(t is VOID and not p for n,t,p in args)
- rprops.append(('()', ldeclstack[-1], args))
- ldeclstack[-1] = None
- elif token == '[': # array indices
- tokens = iter.until(lambda t: t != ']')
- rprops.append(('[]', tokens))
- token = iter()
- assert token == ']'
- elif len(lpropstack) > 1 and token == ')':
- props += lpropstack.pop()[::-1]
- props += rprops
- decl = ldeclstack.pop()
- if decl is not None:
- assert not ldeclstack[-1]
- ldeclstack[-1] = decl
- else:
- break
- token = iter()
- assert len(lpropstack) == len(ldeclstack) == 1
- assert not ldeclstack[0]
- iter.putback(token)
- props += lpropstack[0][::-1]
- props += rprops
- if props and props[-1][0] == '*': # special types
- if primtype == '_X_char':
- primtype = '_X_char_p'
- props.pop()
- elif primtype == '_X_wchar':
- primtype = '_X_wchar_p'
- props.pop()
- elif primtype is VOID:
- primtype = '_X_void_p'
- props.pop()
- return primtype, storage, props, name
- def parse(self, tokens):
- try:
- iter = bufferiter(tokens)
- while True:
- token = iter()
- if token == 'typedef':
- prev = None
- while True:
- primtype, storage, props, name = self.parse_type(iter, prev)
- assert name is not None and not storage
- self.handle_symbol(name, 'typedef', (primtype, props))
- token = iter()
- if token == ';': break
- assert token == ','
- prev = (primtype, storage)
- else:
- iter.putback(token)
- prev = None
- while True:
- primtype, storage, props, name = self.parse_type(iter, prev)
- if name is not None:
- self.handle_symbol(name, storage, (primtype, props))
- else:
- assert (isinstance(primtype, str) and
- (primtype.startswith('struct ') or
- primtype.startswith('union ') or
- primtype.startswith('enum ')))
- token = iter()
- if token == ';': break
- assert token == ','
- prev = (primtype, storage)
- except StopIteration:
- pass
- def process_internal(self, filename):
- if filename not in self.allowed_headers:
- print >>sys.stderr, 'skipping %s...' % filename
- return
- for ipath in self.paths:
- path = os.path.join(ipath, filename)
- try:
- fp = open(path, 'rU')
- break
- except:
- pass
- print >>sys.stderr, 'processing %s...' % filename
- self.parse(self.preprocess(fp.read()))
- def process(self, filename):
- self.ppblocks = [(True, False)]
- self.alignstack = [None]
- self.anonprefix = None
- self.anoncounter = 1
- self.process_internal(filename)
- assert len(self.ppblocks) == 1
- if __name__ == '__main__':
- ALLOWED_HEADERS = set([
- # windows.h with WIN32_LEAN_AND_MEAN
- 'windows.h', 'windef.h', 'wincon.h', 'winbase.h', 'wingdi.h',
- 'winuser.h', 'winnls.h', 'winver.h', 'winnetwk.h', 'winreg.h',
- 'winsvc.h', 'winnt.h', 'winerror.h', 'basetsd.h',
- # windows.h without WIN32_LEAN_AND_MEAN
- 'cderr.h', 'dde.h', 'ddeml.h', 'dlgs.h', 'imm.h', 'lzexpand.h',
- 'mmsystem.h', 'nb30.h', 'rpc.h', 'rpcdce.h', 'rpcdcep.h',
- 'rpcnsi.h', 'rpcnterr.h', 'rpcndr.h', 'rpcnsip.h', 'shellapi.h',
- 'winperf.h', 'commdlg.h', 'unknwn.h', 'objfwd.h', 'basetyps.h',
- 'wtypes.h', 'winspool.h', 'ole2.h', 'winsock2.h',
- # additional
- 'commctrl.h', 'prsht.h',
- # #pragma pack() headers
- 'pshpack1.h', 'pshpack2.h', 'pshpack4.h', 'pshpack8.h', 'poppack.h',
- ])
- ALLOWED_DLLS = set([
- 'kernel32', 'user32', 'gdi32', 'advapi32', 'comctl32', 'comdlg32',
- 'shell32', 'wsock32', 'ws2_32', 'imm32', 'winmm', 'opengl32',
- 'version', 'winspool', 'rpcrt4', 'rpcns4', 'mpr',
- ])
- proc = Processor(paths=sys.argv[1:],
- allowed_headers=ALLOWED_HEADERS, allowed_dlls=ALLOWED_DLLS)
- proc.add_ppsymbol('_X86_', '')
- proc.add_ppsymbol('NULL', 'None')
- proc.add_ppsymbol('UNICODE', '')
- proc.add_ppsymbol('NO_STRICT', '')
- proc.add_ppsymbol('NOMINMAX', '')
- proc.add_ppsymbol('WIN32_LEAN_AND_MEAN', '')
- proc.add_ppsymbol('WINVER', '0x0501')
- proc.add_ppsymbol('DECLSPEC_NORETURN', '')
- proc.add_ppsymbol('DECLARE_STDCALL_P', ['type'], 'type __stdcall')
- proc.process('windows.h')
- proc.process('commctrl.h')