/wk7/mon/view.py
Python | 404 lines | 372 code | 9 blank | 23 comment | 3 complexity | 37c717d572423dab681c70d6ef8e242c MD5 | raw file
- import warnings
- __all__ = ["BasicRenderer", "HTMLRenderer", "RequestRenderer", "HTMLDocumentRenderer", "ErrorRender", "HTMLCleaner"]
- ################################################################################
- #
- # Cleaner
- #
- ################################################################################
- import html.parser as hparse
- DEFAULT_ALLOWED_TAGS = ['b','strong','em','i','u','a']
- import re
- attrfind_tolerant = re.compile(
- r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
- class HTMLCleaner(hparse.HTMLParser):
- '''This class is specifically designed to prevent XSS insertion and inputs which
- could otherwise break the DOM. It is meant to parallel the strip_tags function
- that is in PHP.
- '''
-
- # We don't want anything to get escaped like CDATA in this class. Everything
- # should be fair game
- CDATA_CONTENT_ELEMENTS = (None,)
-
- def __init__(self, strict = False, escape = True,
- allowed = DEFAULT_ALLOWED_TAGS, feed = None):
- '''Constructor
- Keyword arguments:
- strict -- whether this is strict mode (see html.parser.HTMLParser) ***NOTE***
- the default has been set to False as that seems like it would be
- better for the use-case.
- allowed -- The list of allowed tags.
- escape -- should this escape invalid tags or cause an error? (only usable in
- non-strict mode)
- feed -- if not None, this will be fed directly into self.feed.
- '''
- self.allowed_tags = allowed
- self.escape = escape
-
- super(HTMLCleaner, self).__init__(strict)
- if feed is not None:
- self.feed(feed)
- def __str__(self):
- return self.cleaned_data
-
- def _assert_valid_tag(self,tag):
- '''
- Tests to see if the provided tag is in the allowed tag set. If in strict mode
- it will cause an error if it isn't. Otherwise it will return True if it is,
- False if it isn't.
- '''
- if not (tag in self.allowed_tags):
- if not self.strict:
- return False
- self.error('{0} is not in the list of allowed tags: {1}'.\
- format(tag,self.allowed_tags))
- return True
-
- def _format_attrs(self, attrs):
- '''
- This converts the provided attributes to HTML valid attributes. It is used
- in the starttag and startendtag methods.
-
- Attributes come in the form [(key, value)...]. If there is no value, then it is
- a boolean, so it does not get an ="", but it simply is declared.
- '''
- result = ['{0}="{1}"'.format(*x) if len(x) == 2 else x for x in attrs]
- return ' '.join(result)
-
- def reset(self):
- self.open_tags = []
- self.source = ""
- self.cleaned_data = ""
- super(HTMLCleaner, self).reset()
- def feed(self,data):
- data = data.strip()
- self.source = data
- super(HTMLCleaner,self).feed(data)
- if self.open_tags and self.strict:
- self.error("The following tag(s) remain unclosed: {0}".\
- format(', '.join(self.open_tags)))
-
-
- def handle_invalid_startendtag(self,tag,attr):
- self.cleaned_data += '<{0} {1}/>'.format(tag, self._format_attrs(attrs))\
- if attrs else '<{0}>'.format(tag)
-
- def handle_invalid_end(self,tag):
- self.cleaned_data += '</{0}>'.format(tag)
-
- def handle_invalid_starttag(self,tag,attrs):
- self.cleaned_data += '<{0} {1}>'.format(tag, self._format_attrs(attrs))\
- if attrs else '<{0}>'.format(tag)
-
- def handle_data(self, data):
- # passthrough... no need to do anything here.
- self.cleaned_data += data
-
- def handle_startendtag(self,tag,attrs):
- if self._assert_valid_tag(tag):
- self.cleaned_data += '<{0} {1}/>'.format(tag, self._format_attrs(attrs))
- elif self.escape:
- self.handle_invalid_startendtag(tag,attrs)
-
- def handle_starttag(self,tag,attrs):
- if not self._assert_valid_tag(tag):
- if self.escape:
- self.handle_invalid_starttag(tag,attrs)
- return
- self.open_tags.append(tag)
- self.cleaned_data += '<{0} {1}>'.format(tag, self._format_attrs(attrs))\
- if attrs else '<{0}>'.format(tag)
-
- def handle_endtag(self,tag):
- if not self._assert_valid_tag(tag):
- if self.escape:
- self.handle_invalid_end(tag)
- return
- if self.strict and (not self.open_tags or self.open_tags[-1] != tag):
- self.error("The end tag {0} does not match the latest start tag {1}".\
- format(tag, self.open_tags[-1]))
- self.cleaned_data += '</{0}>'.format(self.open_tags.pop() if self.open_tags else tag)
-
- def handle_charref(self, name):
- self.handle_entityref("#"+name)
- def handle_entityref(self, name):
- self.handle_data("&"+name)
- def handle_comment(self, data):
- '''
- handle comment
- Overridden to hook into handle_disallowed_type
- '''
- self.handle_disallowed_type(data)
- def handle_decl(self, decl):
- '''
- handle declaration
- Overridden to hook into handle_disallowed_type
- '''
- self.handle_disallowed_type(decl)
- def handle_pi(self, data):
- '''
- handle processing instruction
- Overridden to hook into handle_disallowed_type
- '''
- self.handle_disallowed_type(data)
- def unknown_decl(self, data):
- '''
- handle unknown declaration
- Overridden to hook into handle_disallowed_type
- '''
- self.handle_disallowed_type(data)
-
- def handle_disallowed_type(self,val):
- '''
- Comment, pi, decl, and unknown decl all route into this function.
- By design, it does nothing. It is meant to be overridden in a
- descendant class if necessary.
- '''
- pass
- # This has to be included to handle http://bugs.python.org/issue13273
- def parse_starttag(self, i):
- self.__starttag_text = None
- endpos = self.check_for_whole_start_tag(i)
- if endpos < 0:
- return endpos
- rawdata = self.rawdata
- self.__starttag_text = rawdata[i:endpos]
- # Now parse the data between i+1 and j into a tag and attrs
- attrs = []
- match = hparse.tagfind.match(rawdata, i+1)
- assert match, 'unexpected call to parse_starttag()'
- k = match.end()
- self.lasttag = tag = rawdata[i+1:k].lower()
- while k < endpos:
- if self.strict:
- m = hparse.attrfind.match(rawdata, k)
- else:
- # bug fix... sigh...
- m = attrfind_tolerant.match(rawdata, k)
- if not m:
- break
- attrname, rest, attrvalue = m.group(1, 2, 3)
- if not rest:
- attrvalue = None
- elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
- attrvalue[:1] == '"' == attrvalue[-1:]:
- attrvalue = attrvalue[1:-1]
- attrvalue = self.unescape(attrvalue)
- attrs.append((attrname.lower(), attrvalue))
- k = m.end()
-
- end = rawdata[k:endpos].strip()
- if end not in (">", "/>"):
- lineno, offset = self.getpos()
- if "\n" in self.__starttag_text:
- lineno = lineno + self.__starttag_text.count("\n")
- offset = len(self.__starttag_text) \
- - self.__starttag_text.rfind("\n")
- else:
- offset = offset + len(self.__starttag_text)
- if self.strict:
- self.error("junk characters in start tag: %r"
- % (rawdata[k:endpos][:20],))
- self.handle_data(rawdata[i:endpos])
- return endpos
- if end.endswith('/>'):
- # XHTML-style empty tag: <span attr="value" />
- self.handle_startendtag(tag, attrs)
- else:
- self.handle_starttag(tag, attrs)
- if tag in self.CDATA_CONTENT_ELEMENTS:
- self.set_cdata_mode()
- return endpos
- ################################################################################
- #
- # Renderer
- #
- ################################################################################
- def _str_print(line):
- print(str(line))
- class BasicRenderer:
- '''
- A simple way to manage output to the browser, this will be the basis of
- all of the view classes which will be used in PyFram. The use is simple:
- renderer = BasicRenderer()
- renderer.output()
- '''
- def __init__(self, body = None, output = _str_print):
- '''
- Constructor of a BasicRender
-
- body - the body part of the request (must be iterable)
- (default [])
- output - the function used to output the value
- (default print(str(value)))
- '''
-
- # if body is not none use body, else use a new list
- self.body = body if body else []
- # assert hasattr(self.body,'__iter__'), \
- # 'body must be iterable'
- # pass in the default output method
- self.output = output
- assert callable(output), 'output must be callable'
-
- def append(self, line):
- ''' an easy means of adding data (generally a list) to the body '''
- self.body += line
-
- def render(self,output = None):
- ''' passes each line of output to the output function '''
- # remember, that first line after the headers needs to be empty
- # whether there have been headers output or not!
- output("")
- for ln in self.body:
- output(ln)
-
- class RequestRenderer(BasicRenderer):
- def __init__(self, headers = None, body = None, output = None):
- '''
- Like BasicRender, only adds headers.
-
- headers - the headers part of the request (everything which
- is returned as an HTML header) (must have a keys method)
- (default {})
-
- body - the body part of the request (must be iterable)
- (default [])
- output - the function used to output the value
- (default print(str(value)))
- '''
-
- # if headers is not none use headers, use a new dictionary
- self.headers = headers if headers else {"Content-Type":"text"}
- # this is a helper to make sure that
- '''assert callable(getattr(self.headers, 'keys', None)), \
- 'Headers must have a keys method'
- '''
- super(RequestRenderer, self).__init__(body,output)
-
- def setHeader(self,name,value):
- ''' allows for easy means of setting a header '''
- self.headers[name] = value
-
- def getHeader(self,name):
- ''' allows for an easy means of getting the header set '''
- return self.headers[name]
-
- def render(self,output = None):
- ''' passes each line of output to the output function '''
- # make it as easy as possible to pass in a new output
- output = output if output is not None else self.output
- if not (self.headers or self.body):
- # if neither, make sure that the script knows that it needs
- # to output *something* otherwise we'll get an error!
- output('')
- return
-
- for key,val in self.headers.items():
- output("{0}: {1}".format(key, val))
-
- output("")
- try:
- self.body.render(output)
- except:
- # remember, that first line after the headers needs to be empty
- # whether there have been headers output or not!
- for ln in self.body:
- output(ln)
- class HTMLDocumentRenderer(BasicRenderer):
- def __init__(self, head = None, body = None, doctype = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">''', output = _str_print):
- self.doctype = doctype
- self.body = body if body is not None else []
- self.head = head if head is not None else []
-
- def append(self, line):
- # issue a warning, something which is annoying to look at, but
- # does not prevent action
- warnings.warn("HTMLDocumentRenderer should not have append " +
- "called on it directly")
- self.appendBody(line)
-
- def appendBody(self, line):
- self.body += line
-
- def appendHead(self, line):
- self.head += line
-
- def render(self,output = None):
- tmp = []
- # this actually performs better in benchmarks
- tmp.append(self.doctype + '<html><head>')
- tmp.extend(self.head)
- tmp.append('</head><body>')
- tmp.extend(self.body)
- tmp.append('</body></html>')
-
- output = output if output else self.output
- for ln in tmp:
- output(ln)
- class HTMLRenderer(RequestRenderer):
- ''' class designed for specialization in HTML requestions '''
- def __init__(self, headers = None, head = None, body = None, output = _str_print):
- body = body if body else HTMLDocumentRenderer(head,body)
- super(HTMLRenderer, self).__init__(headers,body,output)
- self.document = self.body
-
- def append(self, line):
- self.document.appendBody(line)
- def render(self, output=None):
- output = output if output else self.output
- if not (self.headers or self.body):
- # if neither, make sure that the script knows that it needs
- # to output *something* otherwise we'll get an error!
- output('')
- return
- for key,val in self.headers.items():
- output("{0}: {1}".format(key, val))
- output('');
- self.document.render(output)
-
- import cgitb
- class ErrorRenderer(HTMLRenderer):
- ''' class which is designed for use with the cgitb.enable method (and eventually
- with the PyFram handler '''
- def __init__(self, headers = None, body = None, output = _str_print):
- super(ErrorRenderer,self).__init__(headers,None,body,output)
- self.append(['<pre>'])
-
- def write(self, line = None):
- # did we close the last pre tag? Then open a new one.
-
- if self.body[-1] == '</pre>':
- self.append(['<pre>'])
- self.append([line])
-
- def flush(self):
- self.append(['</pre>'])
- self.render()