/utils.py
Python | 103 lines | 76 code | 15 blank | 12 comment | 5 complexity | afe1c675d4a739d68efcee9b65e06bfd MD5 | raw file
- # -*- coding: utf-8 -*-
- import base64
- import uuid
- import httplib
- import pycurl
- import urllib
- import re
- import simplejson as json
- from pygments import highlight
- from pygments.lexers import get_lexer_for_mimetype, get_lexer_by_name, guess_lexer
- from pygments.formatters import HtmlFormatter
- from pygments.util import ClassNotFound
- USER_AGENT = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.14)', \
- 'Gecko/20080404 Firefox/2.0.0.14'
- def generate_cookie_secret():
- return base64.b64encode(uuid.uuid4().bytes + uuid.uuid4().bytes)
- def get_hostname_from_url(url):
- # this usually works.
- return urllib.splithost(urllib.splittype(url)[1])[0]
- def httplib_request(url, method, useragent=USER_AGENT):
- """ not used yet"""
- host = get_hostname_from_url(url)
- conn = httplib.HTTPConnection(host)
- conn.request(method, url, headers={'User-Agent': ua})
- response = conn.getresponse()
- headers = response.msg.headers
- content_type = response.getheader('content-type') or 'text/html'
- data = response.read()
- conn.close()
- return data, headers, content_type
- def urllib_request(url, method='GET'):
- resp = urllib.urlopen(url)
- #header_dict = resp.headers
- headers = resp.headers.headers
- content_type = resp.headers.get('content-type', 'text/html')
- data = resp.read()
- return data, headers, content_type
- def highlight_output(data, headers, content_type):
- """
- returns syntax-highlighted data, expects:
- data = text blob (json, html, xml)
- headers = list of raw headers
- content_type = self-explanatory
- """
- if re.search('json|javascript', content_type):
- print 'pretty printing json/javascript'
- new_data = pretty_print_json(data)
- data = new_data
- lexer = get_lexer_by_name('javascript', stripall=True)
- else:
- try:
- lexer = get_lexer_for_mimetype(content_type, stripall=True)
- except ClassNotFound:
- lexer = get_lexer_by_name('html', stripall=True)
- formatter = HtmlFormatter(linenos=False, cssclass="source")
- result = highlight(data, lexer, formatter)
- pretty_headers = pretty_print_headers(headers)
- #return result
- return pretty_headers + result
- def pretty_print_headers(headers):
- """
- headers = ['Date: Tue, 16 Feb 2010 01:46:07 GMT\r\n', 'Server: Apache/2.0.54\r\n', 'Last-Modified: Mon, 25 Feb 2008 20:57:31 GMT\r\n', 'ETag: "7d0dec5-2dfa-d20624c0"\r\n', 'Accept-Ranges: bytes\r\n', 'Content-Length: 11770\r\n', 'Vary: Accept-Encoding,User-Agent\r\n', 'Connection: close\r\n', 'Content-Type: text/html\r\n']
- """
- pat = r"^(.+?):(.+)$"
- header_re = re.compile(pat)
- out = list()
- for line in headers:
- match = header_re.match(line)
- if match:
- out.append('<span class="nt">%s</span>:<span class="s">%s</span>' % match.groups())
- else:
- out.append('<span class="nf">%s</span>' % line)
- ret = '<div class="highlight"><pre>%s</pre></div>' % ''.join(out)
- return ret
- def pretty_print_json(data):
- """
- makes json human-readable by adding indentation and line breaks.
- """
- j = json.loads(data)
- s = json.dumps(j, sort_keys=True, indent=4)
- ret = '\n'.join([l.rstrip() for l in s.splitlines()])
- return ret
- __all__ = ['pretty_print_json', 'pretty_print_headers', 'highlight_output', 'urllib_request']