webagent.py | searchcode

/Python/webagent.py

https://bitbucket.org/soulofmyshoe/code
Python | 71 lines | 64 code | 7 blank | 0 comment | 0 complexity | 8861882b51845c1b0f90204d831b8f0f MD5 | raw file

import re



WEBAGENT_MODE = 0

OUTPUT_MODE = 1



RESERVED_WORDS = set([

    'add', 'all', 'and', 'any', 'by', 'by', 'compress', 'compute', 

    'condition', 'data', 'decide', 'define', 'divide', 'else', 'end-define',

    'end-decide', 'end-for', 'end-if', 'end-repeat', 'eq', 'equal', 'every',

    'examine', 'false', 'first', 'for', 'for', 'for', 'from', 'ge', 'giving',

    'gt', 'if', 'ignore', 'include', 'into', 'invoke', 'is', 'le', 'leaving',

    'len', 'lower', 'lt', 'mask', 'move', 'multiply', 'name', 'ne', 'no', 

    'none', 'not', 'on', 'on', 'or', 'perform', 'redefine', 'repeat', 

    'replace', 'reset', 'rounded', 'scan', 'space', 'subtract', 'terminate', 

    'to', 'translate', 'true', 'until', 'upper', 'using', 'val', 'value', 

    'when', 'while', 'wpage', 'write',

])



COMMENT_REGEX = re.compile(r'/\*(.*(?:\r|\n|\r\n))')

LITERAL_REGEX = re.compile(r"('[^']*'|(?<![#a-z0-9_\.-])(?:\+|-)?\d+(?:\.\d+)?(?=[-\+/\*\s\r\n]|$)|\btrue\b|\bfalse\b)", re.IGNORECASE)

WHITESPACE_REGEX = re.compile(r'(?:\r|\n|\s)+')

LOCAL_VAR_REGEX = re.compile(r'\b[#a-z][a-z0-9_-]*\b', re.IGNORECASE)

WHITESPACE_AND_OP_REGEX = re.compile(r'/|\*|-(?=\s\r\n)|\+|\(|\)|\^?=|>=?|<=?|[\r\n\s]+')



NEWLINE_REGEX = re.compile(r'[\r\n]')



OUTPUT_VAR_REF_REGEX = re.compile(r'@(?!=@)[#a-z0-9_\s^-]*?\|')



OUTPUT_VAR_REF_DELIMITER_REGEX = re.compile(r'^|\s+')



def extract_variables(code, mode=OUTPUT_MODE):

    def strip_comments(text):

        return COMMENT_REGEX.sub('', text)

    def strip_literals(text):

        return LITERAL_REGEX.sub('', text)



    output_parts = []

    webagent_parts = []



    rest = code

    while rest:

        if mode is OUTPUT_MODE:

            output, _, rest = rest.partition('%>')

            if output:

                output_parts.append(output)

            mode = WEBAGENT_MODE

            continue



        webagent, _, rest = rest.partition('<%')

        

        last_comment_index = webagent.rfind('/*')

        if last_comment_index != -1:

            if NEWLINE_REGEX.search(webagent[last_comment_index:]) is None:

                webagent = webagent[:last_comment_index]

                _, _, rest = rest.partition('\n')



        if webagent:

            webagent_parts.append(webagent)

        mode = OUTPUT_MODE



    vars = set()

    for webagent in output_parts:

        webagent = strip_literals(strip_comments(webagent))



        words = WHITESPACE_AND_OP_REGEX.split(webagent)



        for word in words:

            if word and word.lower() not in RESERVED_WORDS:

                vars.add(word)



    return (webagent_parts, output_parts, vars)