thirtytwo-build.py - Copyright (c) 2009, Kang Seonghoon. Se…

/thirtytwo-build.py

https://bitbucket.org/lifthrasiir/parang-thirtytwo · Python · 1054 lines · 879 code · 121 blank · 54 comment · 292 complexity · a2ea87ed60276ef46812a9a36a96199d MD5 · raw file

# thirtytwo-build.py -- mechanically translates C header into Python
# Copyright (c) 2009, Kang Seonghoon. See thirtytwo package for full license.
#
# Mainly used for machine generation of thirtytwo modules: thirtytwo._consts,
# thirtytwo._types, thirtytwo._funcs. They are accompanied with hand-written
# additional routines at thirtytwo.consts etc.
#
# Features and limitations:
# - Intended to parse MinGW w32api headers.
# - Requires Windows environment, the most recent version possible.
# - Parses the certain subset of C language.
#   - Handles both object-like and function-like macros.
#   - Handles conditional preprocessor blocks correctly, and even can be
#     configured to ignore certain blocks at all.
#   - Handles "#include" and "#pragma pack()" directives as well.
#   - Has a partial support for ## operator and no support for # operator.
#   - Parses almost all C type definitions and function prototype correctly,
#     as long as there are no duplicated symbols.
# - Translates majority of C header code into correct Python code.
#   - Simple C expression with numbers, strings, type casts, arithmetic/bitwise
#     operators, comparisons and function call can be translated.
#   - Typedefs, macros for types, structures, unions and enums are translated
#     into appropriate definition code.
#   - Function definition is replaced with import code: correct library is
#     searched from predefined list, so recent Windows is required for
#     generation. (Well I'm using Windows XP however.)
#   - Incorrectly generated Python code is commented and reported for
#     later inspection.
#   - Complex C symbols (i.e. type of anonymous struct, separate namespace for
#     struct/union/enum etc.) are mangled. Typedef'ed ones are not affected.

import os
import sys
import re
import ctypes
import cStringIO as stringio

TOKEN_PATTERN = re.compile(r'''
        # multicharacter operator or punctuator
            (?: [+\-*/%&|^<=>!]= | && | \|\| | <<=? | >>=? |
                \+\+ | -- | -> | \.\.\. | \#\# ) |
        # numbers
            \.?[0-9](?:[eE][-+]|[A-Za-z0-9.])* |
        # character & string literals
            L?'(?:\\["'?\\abfnrtv]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|[^'"\\])+' |
            L?"(?:\\["'?\\abfnrtv]|\\[0-7]{1,3}|\\x[0-9a-fA-F]+|[^'"\\])*" |
        # identifier & keywords
            [A-Za-z_][A-Za-z_0-9]* |
        # newline (only for pp directive, otherwise ignored)
            \n+ |
        # whitespaces and comments (only for pp directive, otherwise ignored)
            (?: //[^\n]* | /\*.*?\*/ | [ \t\v]+ )+ |
        # other characters
            .
        ''', re.X | re.S)

BINARY_OPERATORS = set([
    '+', '-', '*', '/', '%', '&', '|', '^', '<<', '>>',
    '=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=',
    '==', '!=', '<', '<=', '>', '>=', 'or', 'and', ',',

    # this list also includes ternary operators which can be thought as
    # set of binary operators only seen in the certain form.
    '?', ':',
])

def is_ident(s):
    return (s != '' and not '0'<=s[0]<='9' and
            all('A'<=i<='Z' or 'a'<=i<='z' or '0'<=i<='9' or i=='_' for i in s))

# type placeholders
VOID = '*void*'
VARARG = '*vararg*'
ANYFUNCTION = '*anyfunc*'

PRIMTYPE_MAPPINGS = dict((tuple(sorted(k)), v) for k, v in [
    (('bool',), '_X_bool'),
    (('char',), '_X_char'),
    (('signed', 'char'), '_X_char'),
    (('unsigned', 'char'), '_X_ubyte'), # no c_uchar
    (('wchar_t',), '_X_wchar'),
    (('int',), '_X_int'),
    (('signed',), '_X_int'),
    (('signed', 'int'), '_X_int'),
    (('unsigned',), '_X_uint'),
    (('unsigned', 'int'), '_X_uint'),
    (('short',), '_X_short'),
    (('short', 'int'), '_X_short'),
    (('signed', 'short'), '_X_short'),
    (('signed', 'short', 'int'), '_X_short'),
    (('unsigned', 'short'), '_X_ushort'),
    (('unsigned', 'short', 'int'), '_X_ushort'),
    (('long',), '_X_long'),
    (('long', 'int'), '_X_long'),
    (('signed', 'long'), '_X_long'),
    (('signed', 'long', 'int'), '_X_long'),
    (('unsigned', 'long'), '_X_ulong'),
    (('unsigned', 'long', 'int'), '_X_ulong'),
    (('long', 'long'), '_X_longlong'),
    (('long', 'long', 'int'), '_X_longlong'),
    (('signed', 'long', 'long'), '_X_longlong'),
    (('signed', 'long', 'long', 'int'), '_X_longlong'),
    (('unsigned', 'long', 'long'), '_X_ulonglong'),
    (('unsigned', 'long', 'long', 'int'), '_X_ulonglong'),
    (('float',), '_X_float'),
    (('double',), '_X_double'),
    (('long', 'double'), '_X_longdouble'),
    (('size_t',), '_X_size_t'),
    (('int8_t',), '_X_byte'),
    (('int16_t',), '_X_int16'),
    (('int32_t',), '_X_int32'),
    (('int64_t',), '_X_int64'),
    (('uint8_t',), '_X_ubyte'),
    (('uint16_t',), '_X_uint16'),
    (('uint32_t',), '_X_uint32'),
    (('uint64_t',), '_X_uint64'),
    (('__int64',), '_X_int64'),
    (('signed', '__int64'), '_X_int64'),
    (('unsigned', '__int64'), '_X_uint64'),
    (('void',), VOID),
    (('va_list',), '_X_void_p'),
    ((), None), # only storage class or CV; possible for complex type
])

class bufferiter(object):
    def __init__(self, iterable):
        self.next = iter(iterable).next
        self.buffer = []

    def __call__(self):
        if self.buffer:
            return self.buffer.pop()
        else:
            return self.next()

    def peek(self):
        try:
            next = self()
            self.putback(next)
            return next
        except StopIteration:
            return None

    def putback(self, value):
        self.buffer.append(value)

    def skip(self, func=None):
        next = self()
        if func is None:
            while not next: next = self()
        else:
            while func(next): next = self()
        return next

    def until(self, func):
        result = [self()]
        while func(result[-1]): result.append(self())
        self.putback(result[-1])
        return result[:-1]

class Processor(object):
    def __init__(self, paths, allowed_headers, allowed_dlls):
        self.paths = paths
        self.allowed_headers = allowed_headers
        self.allowed_dlls = {}
        for dll in allowed_dlls:
            try:
                self.allowed_dlls[dll] = getattr(ctypes.windll, dll)
            except:
                print >>sys.stderr, 'Warning: Library %r doesn\'t exist.' % dll

        self.ppsymbols = {} # (as_defined, as_undefined, args or None, tokens)
        self.symbols = {} # (type, defn)

        self.init_output('thirtytwo/')

    def init_output(self, prefix):
        # thirtytwo._consts module: almost all #define, enums
        self.consts_ctx = {}
        self.consts_buf = open(os.path.join(prefix, '_consts.py'), 'wb')
        self.consts_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
                              'from thirtytwo._support import *\n'
                              'from thirtytwo._types import *\n\n')

        # thirtytwo._types module: typedef, struct, union
        self.types_ctx = {}
        self.types_buf = open(os.path.join(prefix, '_types.py'), 'wb')
        self.types_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
                             'from thirtytwo._support import *\n\n')

        # thirtytwo._funcs module: function decl, function-wrapping #define
        self.funcs_ctx = {}
        self.funcs_buf = open(os.path.join(prefix, '_funcs.py'), 'wb')
        self.funcs_buf.write('# generated by thirtytwo-build. DO NOT MODIFY BY HAND!\n'
                             'from thirtytwo._support import *\n'
                             'from thirtytwo._consts import *\n'
                             'from thirtytwo._types import *\n\n')

        exec 'from thirtytwo._support import *\n' in self.consts_ctx, self.consts_ctx
        self.types_ctx.update(self.consts_ctx)
        self.funcs_ctx.update(self.consts_ctx)

    def emit_py(self, line, ctx, buf):
        # to treat "#" or "##" symbol outside the string as error,
        # we replaces them with "\#" or "\#\#" so it doesn't affect
        # string. (unless "\#" sequence is already present in the
        # string, but we can make sure it cannot happen)
        line = line[:1] + line[1:].replace('#', '\\#')

        try:
            exec line in ctx
            buf.write(line + '\n')
        except Exception, e:
            buf.write('##%s[%s]: %s\n' % (e.__class__.__name__,
                                          ' '.join(str(e).split()), line))

    def emit_const(self, line):
        self.emit_py(line, self.consts_ctx, self.consts_buf)
        self.funcs_ctx.update(self.consts_ctx)

    def emit_type(self, line):
        self.emit_py(line, self.types_ctx, self.types_buf)
        self.consts_ctx.update(self.types_ctx)
        self.funcs_ctx.update(self.types_ctx)

    def emit_func(self, line):
        self.emit_py(line, self.funcs_ctx, self.funcs_buf)

    def add_ppsymbol(self, name, tokens, tokens2=None,
                     as_defined=True, as_undefined=None):
        args = None
        if tokens2 is not None:
            args = tokens
            tokens = tokens2
        self.ppsymbols[name] = (as_defined, as_undefined, args,
                                filter(None, self.tokenize(tokens)))

    def to_pyname(self, name):
        if ' ' in name:
            parts = name.split()
            counter = parts.pop() if parts[-1].isdigit() else '0'
            name = '_X%sX_%s' % ({'*global*': 'g', 'struct': 's',
                                  'union': 'u', 'enum': 'e'}[parts[0]], counter)
            if len(parts) > 1: name += parts[1]
            return name
        elif name.startswith('_X') and not name.startswith('_X_'):
            return '_XxX' + name[2:]
        else:
            return name

    def to_pytype(self, primtype, props, context):
        if primtype is VOID:
            # this is mainly for function return type.
            pytype = 'None'
        else:
            pytype = self.to_pyname(primtype)

        skipptr = False
        for prop in reversed(props):
            if prop[0] == '*':
                if pytype is ANYFUNCTION:
                    pytype = '_X_void_p'
                elif not skipptr:
                    pytype = '_X_POINTER(%s)' % pytype
                skipptr = False
            elif prop[0] == '()':
                # sometimes the exact prototype is not available in ctypes...
                if prop[2] is None or any(t is VARARG for _,t,_ in prop[2]):
                    pytype = ANYFUNCTION
                else:
                    if prop[1] == 'stdcall':
                        ctor = '_X_WINFUNCTYPE'
                    else:
                        ctor = '_X_CFUNCTYPE'
                    argtypes = [pytype]
                    for aname, aprimtype, aprops in prop[2]:
                        argtypes.append(self.to_pytype(aprimtype, aprops, context))
                    pytype = '%s(%s)' % (ctor, ', '.join(argtypes))
                    skipptr = True
            elif prop[0] == '[]':
                pytype = '%s*%s' % (pytype,
                                    self.to_pyexpr(['('] + prop[1] + [')'], context))
            else:
                assert False
        assert not skipptr

        return pytype

    def to_pyexpr(self, tokens, context):
        tokens = tokens[:] # type cast conversion changes tokens.

        expr = []
        i = 0
        while i < len(tokens):
            if tokens[i] == '&&':
                expr.append('and')
            elif tokens[i] == '||':
                expr.append('or')
            elif tokens[i] == '!':
                expr.append('not')
            elif tokens[i] == '->':
                expr.append('.')
            elif tokens[i].startswith("'"):
                expr.append(repr(ord(eval(tokens[i]))))
            elif tokens[i].startswith('"'):
                expr.append(repr(eval(tokens[i])))
            elif tokens[i].startswith("L'"):
                expr.append(repr(ord(eval('u' + tokens[i][1:]))))
            elif tokens[i].startswith('L"'):
                expr.append(repr(eval('u' + tokens[i][1:])))
            elif '0' <= tokens[i] <= '9':
                number = tokens[i].lower()
                if '.' in number:
                    number = number.rstrip('f')
                    if number.startswith('0x'):
                        number = repr(float.fromhex(number))
                else:
                    number = number.rstrip('sul')
                expr.append(number)
            elif tokens[i] == '(': # can be type cast
                try:
                    iter = bufferiter(tokens[i+1:])
                    primtype, storage, props, name = \
                            self.parse_type(iter, declaration=False)
                except Exception:
                    iscast = False
                else:
                    assert not storage and not name
                    istypesym = lambda t: self.symbols.get(t, (None,))[0] in \
                                          ('typedef', 'struct', 'union',
                                           'enum', 'builtin')
                    iscast = iter.peek() == ')'
                    if iscast:
                        ignorecast = primtype is VOID and not props
                        iscast = istypesym(primtype) or ignorecast

                if iscast:
                    # type cast expr ends at the next unmatching )/]/} or
                    # binary/ternary operators whichever comes first.
                    depth = 0
                    typeend = None
                    exprend = i
                    while exprend < len(tokens):
                        if tokens[exprend] in '([{':
                            depth += 1
                        elif tokens[exprend] in ')]}':
                            depth -= 1
                            if depth == 0 and typeend is None: typeend = exprend
                            if depth < 0: break
                        elif depth == 0:
                            # strictly binary operator only
                            if tokens[exprend] in BINARY_OPERATORS and \
                                    exprend - 1 > typeend:
                                break
                        exprend += 1
                    assert depth <= 0
                    iscast = typeend is not None and typeend + 1 < exprend

                if iscast:
                    # XXX we are "pretending" pytype as C syntax!
                    if not ignorecast: # ignore (void)
                        expr.append('_X_cast')
                        expr.append('(')
                        expr += self.tokenize(self.to_pytype(primtype, props, context))
                        expr.append(',')
                        tokens.insert(exprend, ')')
                    i = typeend
                else:
                    expr.append(tokens[i])
            else:
                expr.append(tokens[i])
            i += 1

        exprstr = []
        prevtoken = pprevtoken = '@'
        for token in expr:
            if is_ident(prevtoken) and is_ident(token):
                exprstr.append(' ')
            elif prevtoken in BINARY_OPERATORS and \
                    not (prevtoken == '-' and pprevtoken in '([{'):
                exprstr.append(' ')
            elif token in BINARY_OPERATORS and token != ',':
                exprstr.append(' ')
            exprstr.append(token)
            pprevtoken = prevtoken
            prevtoken = token
        exprstr = ''.join(exprstr)

        try:
            result = eval(exprstr, context, context)
            if isinstance(result, (int, long, float, str, unicode)) or \
               isinstance(result.value, (int, long, float, str, unicode)):
                return repr(result)
        except:
            pass
        return exprstr

    def search_dll_by_func(self, fname):
        for k, v in self.allowed_dlls.items():
            try:
                getattr(v, fname)
                return k
            except AttributeError:
                pass
        return '_unknown_'

    def handle_ppsymbol(self, name, args, tokens):
        self.ppsymbols[name] = (True, False, args, tokens)

        if tokens:
            expr = self.to_pyexpr(tokens, self.consts_ctx)
            if args is None:
                self.emit_const('%s = %s' % (name, expr))
            else:
                self.emit_const('%s = lambda %s: %s' %
                                (name, ', '.join(args), expr))

    def handle_symbol(self, name, type, defn):
        self.symbols[name] = (type, defn)

        if type == 'struct' or type == 'union':
            fields = []
            anons = []
            for mname, mprimtype, mprops, mbitsz in defn:
                mpytype = self.to_pytype(mprimtype, mprops, self.types_ctx)
                if mname is None:
                    mname = '_XaX_%d' % (len(anons) + 1)
                    anons.append(mname)
                if mbitsz is None:
                    fields.append('(%r, %s)' % (mname, mpytype))
                else:
                    fields.append('(%r, %s, %s)' % (mname, mpytype,
                                                    self.to_pyexpr(mbitsz, self.types_ctx)))

            pyname = self.to_pyname(name)
            head = 'class %s(%s): ' % (pyname,
                    '_X_Structure' if type == 'struct' else '_X_Union')
            body = '@_fields_ = [%s]' % ', '.join(fields)
            if self.alignstack[-1] is not None:
                body = '@_pack_ = %d; ' % self.alignstack[-1] + body
            if anons:
                body += '; @_anonymous_ = %r' % anons

            if pyname in body: # self-referential: split declaration
                self.emit_type(head + 'pass')
                self.emit_type(body.replace('@', pyname + '.'))
            else:
                self.emit_type(head + body.replace('@', ''))
            return

        elif type == 'typedef':
            assert ' ' not in name
            self.emit_type('%s = %s' % (name, self.to_pytype(defn[0], defn[1],
                                                             self.types_ctx)))
            return

        elif type == 'enum':
            pyname = self.to_pyname(name)
            self.emit_const('%s = _X_int' % pyname)
            mbase = ['0']
            offset = 0
            for mname, mvalue in defn:
                if mvalue:
                    mbase = mvalue
                    offset = 0
                else:
                    offset += 1
                self.emit_const('%s = %s(%s)' % (mname, pyname,
                        self.to_pyexpr(['('] + mbase + [')', '+', str(offset)],
                                       self.consts_ctx)))
            return

        elif type == '':
            primtype, props = defn
            if props and props[-1][0] == '()':
                dll = self.search_dll_by_func(name)
                pytype = self.to_pytype(primtype, [('*',)] + props, self.funcs_ctx)
                if pytype is ANYFUNCTION:
                    self.emit_func('%s = _X_windll.%s.%s' % (name, dll, name))
                else:
                    self.emit_func('%s = %s((%r, _X_windll.%s))' %
                                 (name, pytype, name, dll))
                return

        else:
            print '# %s := %s %s' % (name, type, defn)

    def subst_pp(self, tokens, ifcond=False):
        ntokens = len(tokens)
        result = []

        i = 0
        while i < ntokens:
            token = tokens[i]
            i += 1
            if not is_ident(token):
                result.append(token)
                continue

            if token in self.ppsymbols:
                spec = self.ppsymbols[token]
                if spec[2] is None: # object-like
                    result.extend(spec[3])
                elif i < ntokens and tokens[i] == '(': # function-like
                    args = []
                    while tokens[i] != ')':
                        i += 1
                        start = i
                        depth = 0
                        while depth > 0 or (tokens[i] != ',' and tokens[i] != ')'):
                            if tokens[i] == '(': depth += 1
                            elif tokens[i] == ')': depth -= 1
                            i += 1
                        args.append(self.subst_pp(tokens[start:i], ifcond))
                    i += 1

                    assert len(args) == len(spec[2])
                    iresult = []
                    for token in spec[3]:
                        try:
                            iresult.extend(args[spec[2].index(token)])
                        except:
                            iresult.append(token)
                    result.extend(iresult)
                else:
                    result.append(token)

            elif ifcond: # as #if's condition expression
                if token == 'defined': # special case
                    i += 1
                    token = tokens[i-1]
                    if token == '(':
                        i += 2
                        assert tokens[i-1] == ')'
                        token = tokens[i-2]
                    result.append('1' if token in self.ppsymbols else '0')
                else:
                    result.append('0')

            else:
                result.append(token)

        return result

    def concat_pp(self, tokens):
        result = []
        concatnext = False
        for token in tokens:
            if token == '##':
                concatnext = True
            elif concatnext: # XXX can produce invalid token
                result[-1] += token
                concatnext = False
            else:
                result.append(token)
        return result

    def eval_cond_pp(self, tokens):
        tokens = self.subst_pp(tokens, True)
        expr = []

        i = 0
        while i < len(tokens):
            if tokens[i] == '&&':
                expr.append('and')
            elif tokens[i] == '||':
                expr.append('or')
            elif tokens[i] == '!':
                expr.append('not')
            elif tokens[i].startswith("'") or tokens[i].startswith("L'"):
                expr.append(ord(tokens[i].split("'")[1]))
            else:
                expr.append(tokens[i])
            i += 1

        return bool(eval(' '.join(expr), {}, {}))

    def process_pp(self, tokens):
        if not tokens: return

        if tokens[0] == 'if':
            cond = self.eval_cond_pp(tokens[1:])
            self.ppblocks.append((self.ppblocks[-1][0] and cond,
                                  self.ppblocks[-1][0] and not cond))
            return

        if tokens[0] == 'ifdef':
            assert len(tokens) == 2
            try:
                blockstat = self.ppsymbols[tokens[1]][0:2]
            except:
                blockstat = (False, True)
            self.ppblocks.append((self.ppblocks[-1][0] and blockstat[0],
                                  self.ppblocks[-1][0] and blockstat[1]))
            return

        if tokens[0] == 'ifndef':
            assert len(tokens) == 2
            try:
                blockstat = self.ppsymbols[tokens[1]][0:2][::-1]
            except:
                blockstat = (True, False)
            self.ppblocks.append((self.ppblocks[-1][0] and blockstat[0],
                                  self.ppblocks[-1][0] and blockstat[1]))
            return

        if tokens[0] == 'elif':
            cond = self.eval_cond_pp(tokens[1:])
            self.ppblocks[-1] = (self.ppblocks[-1][1] and cond,
                                 self.ppblocks[-1][1] and not cond)
            return

        if tokens[0] == 'else':
            assert len(tokens) == 1
            self.ppblocks[-1] = self.ppblocks[-1][1:]
            return

        if tokens[0] == 'endif':
            assert len(tokens) == 1
            self.ppblocks.pop()
            return

        if not self.ppblocks[-1][0]: # current block is ignored
            return

        if tokens[0] == 'define':
            assert len(tokens) >= 2
            substed = self.concat_pp(self.subst_pp(tokens[2:]))
            self.handle_ppsymbol(tokens[1], None, substed)
            return

        if tokens[0] == 'define()':
            assert len(tokens) >= 4 # "define()", "MACRONAME", "(", ")"
            assert tokens[2] == '('

            argnames = []
            if tokens[3] == ')':
                body = tokens[4:]
            else:
                for i in xrange(3, len(tokens), 2):
                    argnames.append(tokens[i])
                    if tokens[i+1] == ')':
                        body = tokens[i+2:]
                        break
                    assert tokens[i+1] == ','
                else:
                    assert False

            substed = self.subst_pp(body)
            self.handle_ppsymbol(tokens[1], argnames, substed)
            return

        if tokens[0] == 'undef':
            assert len(tokens) == 2
            try: del self.ppsymbols[tokens[1]]
            except: pass
            return

        if tokens[0] == 'pragma' and tokens[1] == 'pack':
            assert tokens[2] == '(' and tokens[-1] == ')'
            if tokens[3] == 'push':
                assert len(tokens) == 7
                assert tokens[4] == ',' and tokens[5].isdigit()
                self.alignstack.append(int(tokens[5]))
                return
            if tokens[3] == 'pop':
                assert len(tokens) == 5
                self.alignstack.pop()
                return

        if tokens[0] == 'include':
            if len(tokens) == 2:
                tokens[1:] = self.concat_pp(self.subst_pp(tokens[1:]))
            assert ((tokens[1] == '<' and tokens[-1] == '>') or
                    (tokens[1] == '"' and tokens[-1] == '"'))
            filename = ''.join(tokens[2:-1])
            self.process_internal(filename)
            return

        print '# unknown PP directive: ' + ' '.join(tokens)

    def tokenize(self, s):
        return ['\n' if token[0] == '\n' else
                    '' if token[0].isspace() or token[:2] in ('//', '/*') else token
                for token in TOKEN_PATTERN.findall(s.replace('\\\n', ''))]

    def preprocess(self, s):
        tokens = self.tokenize(s)
        tokens.append('\n') # the last empty line is to be ignored

        iter = bufferiter(tokens)
        try:
            buffer = []
            while True:
                token = iter.skip()

                if token == '#': # preprocessor line
                    # flush current buffer
                    for token in self.concat_pp(self.subst_pp(buffer)):
                        yield token
                    buffer = []

                    token = iter.skip()
                    line = [token]
                    if token == 'define':
                        macroname = iter.skip()
                        if macroname != '\n':
                            line.append(macroname)
                            line.append(iter())
                            if line[-1] == '(':
                                # define pseudo-directive '#define()' in this case
                                line[0] = 'define()'
                            if line[-1] == '\n':
                                del line[-1]
                            else:
                                line += iter.until(lambda t: t != '\n')
                                token = iter.skip()
                                assert token == '\n'
                    elif token != '\n':
                        line += iter.until(lambda t: t != '\n')
                        token = iter.skip()
                        assert token == '\n'

                    self.process_pp(filter(None, line))

                elif token == '\n':
                    pass

                elif not self.ppblocks[-1][0]: # ignored line
                    iter.skip(lambda t: t != '\n')

                else:
                    buffer.append(token)
                    buffer += filter(None, iter.until(lambda t: t != '\n'))
                    token = iter.skip()
                    assert token == '\n'

        except StopIteration:
            pass

        for token in self.concat_pp(self.subst_pp(buffer)):
            yield token

    def parse_primtype(self, iter):
        token = iter()

        storage = ''
        const = volatile = restrict = False
        typename = []
        while True:
            if token == 'const':
                assert not const
                const = True
            elif token == 'volatile':
                assert not volatile
                volatile = True
            elif token == 'restrict':
                assert not restrict
                restrict = True
            elif token == 'extern' or token == 'static':
                assert not storage
                storage = token
            elif tuple(sorted(typename + [token])) in PRIMTYPE_MAPPINGS:
                typename.append(token)
            else:
                break
            token = iter()

        iter.putback(token)
        typename = tuple(sorted(typename))
        return PRIMTYPE_MAPPINGS[typename], storage

    def parse_type(self, iter, prev=None, declaration=True):
        if prev is None:
            primtype, storage = self.parse_primtype(iter)
        else: # e.g. int a, *b;
            primtype, storage = prev

        token = iter()

        if primtype is None:
            if token in ('struct', 'union', 'enum'):
                # storage class or CV + struct/union/enum
                assert primtype is None
                complextype = token
                complexname = None
                token = iter()
                if is_ident(token):
                    complexname = token
                    token = iter()

                assert (declaration and token == '{') or complexname is not None
                if complexname is None:
                    primtype = '%s %d' % (self.anonprefix or '*global*',
                                          self.anoncounter)
                    self.anoncounter += 1
                else:
                    primtype = '%s %s' % (complextype, complexname)

                if declaration and token == '{':
                    members = []
                    if self.anonprefix is None:
                        self.anonprefix = primtype
                        prevcounter = self.anoncounter
                        self.anoncounter = 1

                    if complextype == 'enum': # declarator-like
                        while True:
                            name = iter()
                            assert is_ident(name)
                            token = iter()
                            if token == '=':
                                value = iter.until(lambda t: t != ',' and
                                                             t != '}')
                                token = iter()
                            else:
                                value = []
                            members.append((name, value))
                            if token == '}': break
                            assert token == ','

                    else: # declaration-like
                        while True:
                            token = iter()
                            if token == '}': break
                            iter.putback(token)

                            prev = None
                            while True:
                                mprimtype, mstorage, mprops, mname = \
                                        self.parse_type(iter, prev)
                                assert not mstorage and (
                                        (isinstance(mprimtype, str) and
                                         (mprimtype.startswith('struct ') or
                                          mprimtype.startswith('union '))) or
                                        mname is not None)
                                token = iter()
                                if token == ':':
                                    mbitsz = iter.until(lambda t: t != ';' and
                                                                  t != ',')
                                    assert mbitsz
                                    token = iter()
                                else:
                                    mbitsz = None
                                members.append((mname, mprimtype, mprops, mbitsz))
                                if token == ';': break
                                assert token == ','
                                prev = (mprimtype, mstorage)

                    self.handle_symbol(primtype, complextype, members)
                    if self.anonprefix == primtype:
                        self.anonprefix = None
                        self.anoncounter = prevcounter

                    token = iter()

            else:
                # pre-existing type
                assert is_ident(token)
                primtype = token
                token = iter()

        name = None
        props = []

        lpropstack = [[]]
        ldeclstack = [None]
        while True:
            if token == '(':
                lpropstack.append([])
                ldeclstack.append(None)
            elif token in ('const', 'volatile', 'inline', '__inline__'):
                pass # unused
            elif token == '__stdcall': # calling convention
                ldeclstack[-1] = 'stdcall'
            elif token == '*': # pointer
                while token == '*':
                    lpropstack[-1].append(('*',))
                    token = iter()
                iter.putback(token)
            else:
                break
            token = iter()

        if declaration and is_ident(token):
            name = token
            token = iter()

        rprops = []
        while True:
            if token == '(': # function
                args = []
                token = iter()
                if token == 'void' and iter.peek() == ')': # no arguments
                    token = iter()
                    assert token == ')'
                elif token == ')': # prototypeless
                    args = None
                else:
                    iter.putback(token)
                    while True:
                        token = iter()
                        if token == '...': # vararg
                            aname = astorage = None
                            aprimtype = VARARG
                            aprops = []
                        else:
                            iter.putback(token)
                            aprimtype, astorage, aprops, aname = self.parse_type(iter)
                        assert not astorage
                        args.append((aname, aprimtype, aprops))
                        token = iter()
                        if token == ')': break
                        assert token == ','
                    assert not any(t is VOID and not p for n,t,p in args)
                rprops.append(('()', ldeclstack[-1], args))
                ldeclstack[-1] = None
            elif token == '[': # array indices
                tokens = iter.until(lambda t: t != ']')
                rprops.append(('[]', tokens))
                token = iter()
                assert token == ']'
            elif len(lpropstack) > 1 and token == ')':
                props += lpropstack.pop()[::-1]
                props += rprops
                decl = ldeclstack.pop()
                if decl is not None:
                    assert not ldeclstack[-1]
                    ldeclstack[-1] = decl
            else:
                break
            token = iter()

        assert len(lpropstack) == len(ldeclstack) == 1
        assert not ldeclstack[0]
        iter.putback(token)
        props += lpropstack[0][::-1]
        props += rprops

        if props and props[-1][0] == '*': # special types
            if primtype == '_X_char':
                primtype = '_X_char_p'
                props.pop()
            elif primtype == '_X_wchar':
                primtype = '_X_wchar_p'
                props.pop()
            elif primtype is VOID:
                primtype = '_X_void_p'
                props.pop()

        return primtype, storage, props, name

    def parse(self, tokens):
        try:
            iter = bufferiter(tokens)
            while True:
                token = iter()
                if token == 'typedef':
                    prev = None
                    while True:
                        primtype, storage, props, name = self.parse_type(iter, prev)
                        assert name is not None and not storage
                        self.handle_symbol(name, 'typedef', (primtype, props))
                        token = iter()
                        if token == ';': break
                        assert token == ','
                        prev = (primtype, storage)
                else:
                    iter.putback(token)
                    prev = None
                    while True:
                        primtype, storage, props, name = self.parse_type(iter, prev)
                        if name is not None:
                            self.handle_symbol(name, storage, (primtype, props))
                        else:
                            assert (isinstance(primtype, str) and
                                    (primtype.startswith('struct ') or
                                     primtype.startswith('union ') or
                                     primtype.startswith('enum ')))
                        token = iter()
                        if token == ';': break
                        assert token == ','
                        prev = (primtype, storage)

        except StopIteration:
            pass

    def process_internal(self, filename):
        if filename not in self.allowed_headers:
            print >>sys.stderr, 'skipping %s...' % filename
            return

        for ipath in self.paths:
            path = os.path.join(ipath, filename)
            try:
                fp = open(path, 'rU')
                break
            except:
                pass

        print >>sys.stderr, 'processing %s...' % filename

        self.parse(self.preprocess(fp.read()))

    def process(self, filename):
        self.ppblocks = [(True, False)]
        self.alignstack = [None]
        self.anonprefix = None
        self.anoncounter = 1

        self.process_internal(filename)

        assert len(self.ppblocks) == 1

if __name__ == '__main__':
    ALLOWED_HEADERS = set([
        # windows.h with WIN32_LEAN_AND_MEAN
        'windows.h', 'windef.h', 'wincon.h', 'winbase.h', 'wingdi.h',
        'winuser.h', 'winnls.h', 'winver.h', 'winnetwk.h', 'winreg.h',
        'winsvc.h', 'winnt.h', 'winerror.h', 'basetsd.h',

        # windows.h without WIN32_LEAN_AND_MEAN
        'cderr.h', 'dde.h', 'ddeml.h', 'dlgs.h', 'imm.h', 'lzexpand.h',
        'mmsystem.h', 'nb30.h', 'rpc.h', 'rpcdce.h', 'rpcdcep.h',
        'rpcnsi.h', 'rpcnterr.h', 'rpcndr.h', 'rpcnsip.h', 'shellapi.h',
        'winperf.h', 'commdlg.h', 'unknwn.h', 'objfwd.h', 'basetyps.h',
        'wtypes.h', 'winspool.h', 'ole2.h', 'winsock2.h',

        # additional
        'commctrl.h', 'prsht.h',

        # #pragma pack() headers
        'pshpack1.h', 'pshpack2.h', 'pshpack4.h', 'pshpack8.h', 'poppack.h',
    ])
    ALLOWED_DLLS = set([
        'kernel32', 'user32', 'gdi32', 'advapi32', 'comctl32', 'comdlg32',
        'shell32', 'wsock32', 'ws2_32', 'imm32', 'winmm', 'opengl32',
        'version', 'winspool', 'rpcrt4', 'rpcns4', 'mpr',
    ])

    proc = Processor(paths=sys.argv[1:],
            allowed_headers=ALLOWED_HEADERS, allowed_dlls=ALLOWED_DLLS)
    proc.add_ppsymbol('_X86_', '')
    proc.add_ppsymbol('NULL', 'None')
    proc.add_ppsymbol('UNICODE', '')
    proc.add_ppsymbol('NO_STRICT', '')
    proc.add_ppsymbol('NOMINMAX', '')
    proc.add_ppsymbol('WIN32_LEAN_AND_MEAN', '')
    proc.add_ppsymbol('WINVER', '0x0501')
    proc.add_ppsymbol('DECLSPEC_NORETURN', '')
    proc.add_ppsymbol('DECLARE_STDCALL_P', ['type'], 'type __stdcall')
    proc.process('windows.h')
    proc.process('commctrl.h')
Tech Fingerprint

Alerts (64)

'def' Ensure functions have docstrings for documentation
67 136 144 147 155 177 203 217 221 226 229 238 251 289 398 407 418 488 545 558 578 682 687 744 773 953 988 1005
Complexity hotspot; lines 68 to 69 (total complexity: 5)
68 69
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
169 394 522 591 601 656 998
'open(' Use 'with open()' to ensure Files are properly closed
180 187 996
'import *' Avoid to prevent namespace pollution; import specific names or use aliases
182 183 189 195 196 197 199
'eval(' Avoid due to security risks; use ast.literal_eval for safer evaluation of literals
304 310 390 576
'except Exception:' Catch specific exceptions instead of Exception to avoid masking bugs
325
'lambda' Avoid complex 'lambda' functions; prefer named functions for clarity and debugging
329
'isinstance(' Overuse may indicate design issues; consider polymorphism
391 392
Complexity hotspot; lines 510 to 512 (total complexity: 5)
510 511 512
Complexity hotspot; lines 683 to 685 (total complexity: 5)
683 684 685
'try:' Ensure try blocks have corresponding except or finally blocks
692 954
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
714