PageRenderTime 45ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/wk7/mon/view.py

https://github.com/cwallenpoole/PyFram-Tutorial
Python | 404 lines | 372 code | 9 blank | 23 comment | 3 complexity | 37c717d572423dab681c70d6ef8e242c MD5 | raw file
  1. import warnings
  2. __all__ = ["BasicRenderer", "HTMLRenderer", "RequestRenderer", "HTMLDocumentRenderer", "ErrorRender", "HTMLCleaner"]
  3. ################################################################################
  4. #
  5. # Cleaner
  6. #
  7. ################################################################################
  8. import html.parser as hparse
  9. DEFAULT_ALLOWED_TAGS = ['b','strong','em','i','u','a']
  10. import re
  11. attrfind_tolerant = re.compile(
  12. r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
  13. r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
  14. class HTMLCleaner(hparse.HTMLParser):
  15. '''This class is specifically designed to prevent XSS insertion and inputs which
  16. could otherwise break the DOM. It is meant to parallel the strip_tags function
  17. that is in PHP.
  18. '''
  19. # We don't want anything to get escaped like CDATA in this class. Everything
  20. # should be fair game
  21. CDATA_CONTENT_ELEMENTS = (None,)
  22. def __init__(self, strict = False, escape = True,
  23. allowed = DEFAULT_ALLOWED_TAGS, feed = None):
  24. '''Constructor
  25. Keyword arguments:
  26. strict -- whether this is strict mode (see html.parser.HTMLParser) ***NOTE***
  27. the default has been set to False as that seems like it would be
  28. better for the use-case.
  29. allowed -- The list of allowed tags.
  30. escape -- should this escape invalid tags or cause an error? (only usable in
  31. non-strict mode)
  32. feed -- if not None, this will be fed directly into self.feed.
  33. '''
  34. self.allowed_tags = allowed
  35. self.escape = escape
  36. super(HTMLCleaner, self).__init__(strict)
  37. if feed is not None:
  38. self.feed(feed)
  39. def __str__(self):
  40. return self.cleaned_data
  41. def _assert_valid_tag(self,tag):
  42. '''
  43. Tests to see if the provided tag is in the allowed tag set. If in strict mode
  44. it will cause an error if it isn't. Otherwise it will return True if it is,
  45. False if it isn't.
  46. '''
  47. if not (tag in self.allowed_tags):
  48. if not self.strict:
  49. return False
  50. self.error('{0} is not in the list of allowed tags: {1}'.\
  51. format(tag,self.allowed_tags))
  52. return True
  53. def _format_attrs(self, attrs):
  54. '''
  55. This converts the provided attributes to HTML valid attributes. It is used
  56. in the starttag and startendtag methods.
  57. Attributes come in the form [(key, value)...]. If there is no value, then it is
  58. a boolean, so it does not get an ="", but it simply is declared.
  59. '''
  60. result = ['{0}="{1}"'.format(*x) if len(x) == 2 else x for x in attrs]
  61. return ' '.join(result)
  62. def reset(self):
  63. self.open_tags = []
  64. self.source = ""
  65. self.cleaned_data = ""
  66. super(HTMLCleaner, self).reset()
  67. def feed(self,data):
  68. data = data.strip()
  69. self.source = data
  70. super(HTMLCleaner,self).feed(data)
  71. if self.open_tags and self.strict:
  72. self.error("The following tag(s) remain unclosed: {0}".\
  73. format(', '.join(self.open_tags)))
  74. def handle_invalid_startendtag(self,tag,attr):
  75. self.cleaned_data += '<{0} {1}/>'.format(tag, self._format_attrs(attrs))\
  76. if attrs else '<{0}>'.format(tag)
  77. def handle_invalid_end(self,tag):
  78. self.cleaned_data += '&lt;/{0}&gt;'.format(tag)
  79. def handle_invalid_starttag(self,tag,attrs):
  80. self.cleaned_data += '&lt;{0} {1}&gt;'.format(tag, self._format_attrs(attrs))\
  81. if attrs else '&lt;{0}&gt;'.format(tag)
  82. def handle_data(self, data):
  83. # passthrough... no need to do anything here.
  84. self.cleaned_data += data
  85. def handle_startendtag(self,tag,attrs):
  86. if self._assert_valid_tag(tag):
  87. self.cleaned_data += '<{0} {1}/>'.format(tag, self._format_attrs(attrs))
  88. elif self.escape:
  89. self.handle_invalid_startendtag(tag,attrs)
  90. def handle_starttag(self,tag,attrs):
  91. if not self._assert_valid_tag(tag):
  92. if self.escape:
  93. self.handle_invalid_starttag(tag,attrs)
  94. return
  95. self.open_tags.append(tag)
  96. self.cleaned_data += '<{0} {1}>'.format(tag, self._format_attrs(attrs))\
  97. if attrs else '<{0}>'.format(tag)
  98. def handle_endtag(self,tag):
  99. if not self._assert_valid_tag(tag):
  100. if self.escape:
  101. self.handle_invalid_end(tag)
  102. return
  103. if self.strict and (not self.open_tags or self.open_tags[-1] != tag):
  104. self.error("The end tag {0} does not match the latest start tag {1}".\
  105. format(tag, self.open_tags[-1]))
  106. self.cleaned_data += '</{0}>'.format(self.open_tags.pop() if self.open_tags else tag)
  107. def handle_charref(self, name):
  108. self.handle_entityref("#"+name)
  109. def handle_entityref(self, name):
  110. self.handle_data("&"+name)
  111. def handle_comment(self, data):
  112. '''
  113. handle comment
  114. Overridden to hook into handle_disallowed_type
  115. '''
  116. self.handle_disallowed_type(data)
  117. def handle_decl(self, decl):
  118. '''
  119. handle declaration
  120. Overridden to hook into handle_disallowed_type
  121. '''
  122. self.handle_disallowed_type(decl)
  123. def handle_pi(self, data):
  124. '''
  125. handle processing instruction
  126. Overridden to hook into handle_disallowed_type
  127. '''
  128. self.handle_disallowed_type(data)
  129. def unknown_decl(self, data):
  130. '''
  131. handle unknown declaration
  132. Overridden to hook into handle_disallowed_type
  133. '''
  134. self.handle_disallowed_type(data)
  135. def handle_disallowed_type(self,val):
  136. '''
  137. Comment, pi, decl, and unknown decl all route into this function.
  138. By design, it does nothing. It is meant to be overridden in a
  139. descendant class if necessary.
  140. '''
  141. pass
  142. # This has to be included to handle http://bugs.python.org/issue13273
  143. def parse_starttag(self, i):
  144. self.__starttag_text = None
  145. endpos = self.check_for_whole_start_tag(i)
  146. if endpos < 0:
  147. return endpos
  148. rawdata = self.rawdata
  149. self.__starttag_text = rawdata[i:endpos]
  150. # Now parse the data between i+1 and j into a tag and attrs
  151. attrs = []
  152. match = hparse.tagfind.match(rawdata, i+1)
  153. assert match, 'unexpected call to parse_starttag()'
  154. k = match.end()
  155. self.lasttag = tag = rawdata[i+1:k].lower()
  156. while k < endpos:
  157. if self.strict:
  158. m = hparse.attrfind.match(rawdata, k)
  159. else:
  160. # bug fix... sigh...
  161. m = attrfind_tolerant.match(rawdata, k)
  162. if not m:
  163. break
  164. attrname, rest, attrvalue = m.group(1, 2, 3)
  165. if not rest:
  166. attrvalue = None
  167. elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
  168. attrvalue[:1] == '"' == attrvalue[-1:]:
  169. attrvalue = attrvalue[1:-1]
  170. attrvalue = self.unescape(attrvalue)
  171. attrs.append((attrname.lower(), attrvalue))
  172. k = m.end()
  173. end = rawdata[k:endpos].strip()
  174. if end not in (">", "/>"):
  175. lineno, offset = self.getpos()
  176. if "\n" in self.__starttag_text:
  177. lineno = lineno + self.__starttag_text.count("\n")
  178. offset = len(self.__starttag_text) \
  179. - self.__starttag_text.rfind("\n")
  180. else:
  181. offset = offset + len(self.__starttag_text)
  182. if self.strict:
  183. self.error("junk characters in start tag: %r"
  184. % (rawdata[k:endpos][:20],))
  185. self.handle_data(rawdata[i:endpos])
  186. return endpos
  187. if end.endswith('/>'):
  188. # XHTML-style empty tag: <span attr="value" />
  189. self.handle_startendtag(tag, attrs)
  190. else:
  191. self.handle_starttag(tag, attrs)
  192. if tag in self.CDATA_CONTENT_ELEMENTS:
  193. self.set_cdata_mode()
  194. return endpos
  195. ################################################################################
  196. #
  197. # Renderer
  198. #
  199. ################################################################################
  200. def _str_print(line):
  201. print(str(line))
  202. class BasicRenderer:
  203. '''
  204. A simple way to manage output to the browser, this will be the basis of
  205. all of the view classes which will be used in PyFram. The use is simple:
  206. renderer = BasicRenderer()
  207. renderer.output()
  208. '''
  209. def __init__(self, body = None, output = _str_print):
  210. '''
  211. Constructor of a BasicRender
  212. body - the body part of the request (must be iterable)
  213. (default [])
  214. output - the function used to output the value
  215. (default print(str(value)))
  216. '''
  217. # if body is not none use body, else use a new list
  218. self.body = body if body else []
  219. # assert hasattr(self.body,'__iter__'), \
  220. # 'body must be iterable'
  221. # pass in the default output method
  222. self.output = output
  223. assert callable(output), 'output must be callable'
  224. def append(self, line):
  225. ''' an easy means of adding data (generally a list) to the body '''
  226. self.body += line
  227. def render(self,output = None):
  228. ''' passes each line of output to the output function '''
  229. # remember, that first line after the headers needs to be empty
  230. # whether there have been headers output or not!
  231. output("")
  232. for ln in self.body:
  233. output(ln)
  234. class RequestRenderer(BasicRenderer):
  235. def __init__(self, headers = None, body = None, output = None):
  236. '''
  237. Like BasicRender, only adds headers.
  238. headers - the headers part of the request (everything which
  239. is returned as an HTML header) (must have a keys method)
  240. (default {})
  241. body - the body part of the request (must be iterable)
  242. (default [])
  243. output - the function used to output the value
  244. (default print(str(value)))
  245. '''
  246. # if headers is not none use headers, use a new dictionary
  247. self.headers = headers if headers else {"Content-Type":"text"}
  248. # this is a helper to make sure that
  249. '''assert callable(getattr(self.headers, 'keys', None)), \
  250. 'Headers must have a keys method'
  251. '''
  252. super(RequestRenderer, self).__init__(body,output)
  253. def setHeader(self,name,value):
  254. ''' allows for easy means of setting a header '''
  255. self.headers[name] = value
  256. def getHeader(self,name):
  257. ''' allows for an easy means of getting the header set '''
  258. return self.headers[name]
  259. def render(self,output = None):
  260. ''' passes each line of output to the output function '''
  261. # make it as easy as possible to pass in a new output
  262. output = output if output is not None else self.output
  263. if not (self.headers or self.body):
  264. # if neither, make sure that the script knows that it needs
  265. # to output *something* otherwise we'll get an error!
  266. output('')
  267. return
  268. for key,val in self.headers.items():
  269. output("{0}: {1}".format(key, val))
  270. output("")
  271. try:
  272. self.body.render(output)
  273. except:
  274. # remember, that first line after the headers needs to be empty
  275. # whether there have been headers output or not!
  276. for ln in self.body:
  277. output(ln)
  278. class HTMLDocumentRenderer(BasicRenderer):
  279. def __init__(self, head = None, body = None, doctype = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
  280. "http://www.w3.org/TR/html4/strict.dtd">''', output = _str_print):
  281. self.doctype = doctype
  282. self.body = body if body is not None else []
  283. self.head = head if head is not None else []
  284. def append(self, line):
  285. # issue a warning, something which is annoying to look at, but
  286. # does not prevent action
  287. warnings.warn("HTMLDocumentRenderer should not have append " +
  288. "called on it directly")
  289. self.appendBody(line)
  290. def appendBody(self, line):
  291. self.body += line
  292. def appendHead(self, line):
  293. self.head += line
  294. def render(self,output = None):
  295. tmp = []
  296. # this actually performs better in benchmarks
  297. tmp.append(self.doctype + '<html><head>')
  298. tmp.extend(self.head)
  299. tmp.append('</head><body>')
  300. tmp.extend(self.body)
  301. tmp.append('</body></html>')
  302. output = output if output else self.output
  303. for ln in tmp:
  304. output(ln)
  305. class HTMLRenderer(RequestRenderer):
  306. ''' class designed for specialization in HTML requestions '''
  307. def __init__(self, headers = None, head = None, body = None, output = _str_print):
  308. body = body if body else HTMLDocumentRenderer(head,body)
  309. super(HTMLRenderer, self).__init__(headers,body,output)
  310. self.document = self.body
  311. def append(self, line):
  312. self.document.appendBody(line)
  313. def render(self, output=None):
  314. output = output if output else self.output
  315. if not (self.headers or self.body):
  316. # if neither, make sure that the script knows that it needs
  317. # to output *something* otherwise we'll get an error!
  318. output('')
  319. return
  320. for key,val in self.headers.items():
  321. output("{0}: {1}".format(key, val))
  322. output('');
  323. self.document.render(output)
  324. import cgitb
  325. class ErrorRenderer(HTMLRenderer):
  326. ''' class which is designed for use with the cgitb.enable method (and eventually
  327. with the PyFram handler '''
  328. def __init__(self, headers = None, body = None, output = _str_print):
  329. super(ErrorRenderer,self).__init__(headers,None,body,output)
  330. self.append(['<pre>'])
  331. def write(self, line = None):
  332. # did we close the last pre tag? Then open a new one.
  333. if self.body[-1] == '</pre>':
  334. self.append(['<pre>'])
  335. self.append([line])
  336. def flush(self):
  337. self.append(['</pre>'])
  338. self.render()