PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/Python/webagent.py

https://bitbucket.org/soulofmyshoe/code
Python | 71 lines | 64 code | 7 blank | 0 comment | 0 complexity | 8861882b51845c1b0f90204d831b8f0f MD5 | raw file
  1. import re
  2. WEBAGENT_MODE = 0
  3. OUTPUT_MODE = 1
  4. RESERVED_WORDS = set([
  5. 'add', 'all', 'and', 'any', 'by', 'by', 'compress', 'compute',
  6. 'condition', 'data', 'decide', 'define', 'divide', 'else', 'end-define',
  7. 'end-decide', 'end-for', 'end-if', 'end-repeat', 'eq', 'equal', 'every',
  8. 'examine', 'false', 'first', 'for', 'for', 'for', 'from', 'ge', 'giving',
  9. 'gt', 'if', 'ignore', 'include', 'into', 'invoke', 'is', 'le', 'leaving',
  10. 'len', 'lower', 'lt', 'mask', 'move', 'multiply', 'name', 'ne', 'no',
  11. 'none', 'not', 'on', 'on', 'or', 'perform', 'redefine', 'repeat',
  12. 'replace', 'reset', 'rounded', 'scan', 'space', 'subtract', 'terminate',
  13. 'to', 'translate', 'true', 'until', 'upper', 'using', 'val', 'value',
  14. 'when', 'while', 'wpage', 'write',
  15. ])
  16. COMMENT_REGEX = re.compile(r'/\*(.*(?:\r|\n|\r\n))')
  17. LITERAL_REGEX = re.compile(r"('[^']*'|(?<![#a-z0-9_\.-])(?:\+|-)?\d+(?:\.\d+)?(?=[-\+/\*\s\r\n]|$)|\btrue\b|\bfalse\b)", re.IGNORECASE)
  18. WHITESPACE_REGEX = re.compile(r'(?:\r|\n|\s)+')
  19. LOCAL_VAR_REGEX = re.compile(r'\b[#a-z][a-z0-9_-]*\b', re.IGNORECASE)
  20. WHITESPACE_AND_OP_REGEX = re.compile(r'/|\*|-(?=\s\r\n)|\+|\(|\)|\^?=|>=?|<=?|[\r\n\s]+')
  21. NEWLINE_REGEX = re.compile(r'[\r\n]')
  22. OUTPUT_VAR_REF_REGEX = re.compile(r'@(?!=@)[#a-z0-9_\s^-]*?\|')
  23. OUTPUT_VAR_REF_DELIMITER_REGEX = re.compile(r'^|\s+')
  24. def extract_variables(code, mode=OUTPUT_MODE):
  25. def strip_comments(text):
  26. return COMMENT_REGEX.sub('', text)
  27. def strip_literals(text):
  28. return LITERAL_REGEX.sub('', text)
  29. output_parts = []
  30. webagent_parts = []
  31. rest = code
  32. while rest:
  33. if mode is OUTPUT_MODE:
  34. output, _, rest = rest.partition('%>')
  35. if output:
  36. output_parts.append(output)
  37. mode = WEBAGENT_MODE
  38. continue
  39. webagent, _, rest = rest.partition('<%')
  40. last_comment_index = webagent.rfind('/*')
  41. if last_comment_index != -1:
  42. if NEWLINE_REGEX.search(webagent[last_comment_index:]) is None:
  43. webagent = webagent[:last_comment_index]
  44. _, _, rest = rest.partition('\n')
  45. if webagent:
  46. webagent_parts.append(webagent)
  47. mode = OUTPUT_MODE
  48. vars = set()
  49. for webagent in output_parts:
  50. webagent = strip_literals(strip_comments(webagent))
  51. words = WHITESPACE_AND_OP_REGEX.split(webagent)
  52. for word in words:
  53. if word and word.lower() not in RESERVED_WORDS:
  54. vars.add(word)
  55. return (webagent_parts, output_parts, vars)