/Python/webagent.py
Python | 71 lines | 64 code | 7 blank | 0 comment | 0 complexity | 8861882b51845c1b0f90204d831b8f0f MD5 | raw file
- import re
-
- WEBAGENT_MODE = 0
- OUTPUT_MODE = 1
-
- RESERVED_WORDS = set([
- 'add', 'all', 'and', 'any', 'by', 'by', 'compress', 'compute',
- 'condition', 'data', 'decide', 'define', 'divide', 'else', 'end-define',
- 'end-decide', 'end-for', 'end-if', 'end-repeat', 'eq', 'equal', 'every',
- 'examine', 'false', 'first', 'for', 'for', 'for', 'from', 'ge', 'giving',
- 'gt', 'if', 'ignore', 'include', 'into', 'invoke', 'is', 'le', 'leaving',
- 'len', 'lower', 'lt', 'mask', 'move', 'multiply', 'name', 'ne', 'no',
- 'none', 'not', 'on', 'on', 'or', 'perform', 'redefine', 'repeat',
- 'replace', 'reset', 'rounded', 'scan', 'space', 'subtract', 'terminate',
- 'to', 'translate', 'true', 'until', 'upper', 'using', 'val', 'value',
- 'when', 'while', 'wpage', 'write',
- ])
-
- COMMENT_REGEX = re.compile(r'/\*(.*(?:\r|\n|\r\n))')
- LITERAL_REGEX = re.compile(r"('[^']*'|(?<![#a-z0-9_\.-])(?:\+|-)?\d+(?:\.\d+)?(?=[-\+/\*\s\r\n]|$)|\btrue\b|\bfalse\b)", re.IGNORECASE)
- WHITESPACE_REGEX = re.compile(r'(?:\r|\n|\s)+')
- LOCAL_VAR_REGEX = re.compile(r'\b[#a-z][a-z0-9_-]*\b', re.IGNORECASE)
- WHITESPACE_AND_OP_REGEX = re.compile(r'/|\*|-(?=\s\r\n)|\+|\(|\)|\^?=|>=?|<=?|[\r\n\s]+')
-
- NEWLINE_REGEX = re.compile(r'[\r\n]')
-
- OUTPUT_VAR_REF_REGEX = re.compile(r'@(?!=@)[#a-z0-9_\s^-]*?\|')
-
- OUTPUT_VAR_REF_DELIMITER_REGEX = re.compile(r'^|\s+')
-
- def extract_variables(code, mode=OUTPUT_MODE):
- def strip_comments(text):
- return COMMENT_REGEX.sub('', text)
- def strip_literals(text):
- return LITERAL_REGEX.sub('', text)
-
- output_parts = []
- webagent_parts = []
-
- rest = code
- while rest:
- if mode is OUTPUT_MODE:
- output, _, rest = rest.partition('%>')
- if output:
- output_parts.append(output)
- mode = WEBAGENT_MODE
- continue
-
- webagent, _, rest = rest.partition('<%')
-
- last_comment_index = webagent.rfind('/*')
- if last_comment_index != -1:
- if NEWLINE_REGEX.search(webagent[last_comment_index:]) is None:
- webagent = webagent[:last_comment_index]
- _, _, rest = rest.partition('\n')
-
- if webagent:
- webagent_parts.append(webagent)
- mode = OUTPUT_MODE
-
- vars = set()
- for webagent in output_parts:
- webagent = strip_literals(strip_comments(webagent))
-
- words = WHITESPACE_AND_OP_REGEX.split(webagent)
-
- for word in words:
- if word and word.lower() not in RESERVED_WORDS:
- vars.add(word)
-
- return (webagent_parts, output_parts, vars)