PageRenderTime 38ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/utils.py

https://bitbucket.org/socal.piggies/pwc/
Python | 103 lines | 76 code | 15 blank | 12 comment | 5 complexity | afe1c675d4a739d68efcee9b65e06bfd MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. import base64
  3. import uuid
  4. import httplib
  5. import pycurl
  6. import urllib
  7. import re
  8. import simplejson as json
  9. from pygments import highlight
  10. from pygments.lexers import get_lexer_for_mimetype, get_lexer_by_name, guess_lexer
  11. from pygments.formatters import HtmlFormatter
  12. from pygments.util import ClassNotFound
  13. USER_AGENT = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.14)', \
  14. 'Gecko/20080404 Firefox/2.0.0.14'
  15. def generate_cookie_secret():
  16. return base64.b64encode(uuid.uuid4().bytes + uuid.uuid4().bytes)
  17. def get_hostname_from_url(url):
  18. # this usually works.
  19. return urllib.splithost(urllib.splittype(url)[1])[0]
  20. def httplib_request(url, method, useragent=USER_AGENT):
  21. """ not used yet"""
  22. host = get_hostname_from_url(url)
  23. conn = httplib.HTTPConnection(host)
  24. conn.request(method, url, headers={'User-Agent': ua})
  25. response = conn.getresponse()
  26. headers = response.msg.headers
  27. content_type = response.getheader('content-type') or 'text/html'
  28. data = response.read()
  29. conn.close()
  30. return data, headers, content_type
  31. def urllib_request(url, method='GET'):
  32. resp = urllib.urlopen(url)
  33. #header_dict = resp.headers
  34. headers = resp.headers.headers
  35. content_type = resp.headers.get('content-type', 'text/html')
  36. data = resp.read()
  37. return data, headers, content_type
  38. def highlight_output(data, headers, content_type):
  39. """
  40. returns syntax-highlighted data, expects:
  41. data = text blob (json, html, xml)
  42. headers = list of raw headers
  43. content_type = self-explanatory
  44. """
  45. if re.search('json|javascript', content_type):
  46. print 'pretty printing json/javascript'
  47. new_data = pretty_print_json(data)
  48. data = new_data
  49. lexer = get_lexer_by_name('javascript', stripall=True)
  50. else:
  51. try:
  52. lexer = get_lexer_for_mimetype(content_type, stripall=True)
  53. except ClassNotFound:
  54. lexer = get_lexer_by_name('html', stripall=True)
  55. formatter = HtmlFormatter(linenos=False, cssclass="source")
  56. result = highlight(data, lexer, formatter)
  57. pretty_headers = pretty_print_headers(headers)
  58. #return result
  59. return pretty_headers + result
  60. def pretty_print_headers(headers):
  61. """
  62. headers = ['Date: Tue, 16 Feb 2010 01:46:07 GMT\r\n', 'Server: Apache/2.0.54\r\n', 'Last-Modified: Mon, 25 Feb 2008 20:57:31 GMT\r\n', 'ETag: "7d0dec5-2dfa-d20624c0"\r\n', 'Accept-Ranges: bytes\r\n', 'Content-Length: 11770\r\n', 'Vary: Accept-Encoding,User-Agent\r\n', 'Connection: close\r\n', 'Content-Type: text/html\r\n']
  63. """
  64. pat = r"^(.+?):(.+)$"
  65. header_re = re.compile(pat)
  66. out = list()
  67. for line in headers:
  68. match = header_re.match(line)
  69. if match:
  70. out.append('<span class="nt">%s</span>:<span class="s">%s</span>' % match.groups())
  71. else:
  72. out.append('<span class="nf">%s</span>' % line)
  73. ret = '<div class="highlight"><pre>%s</pre></div>' % ''.join(out)
  74. return ret
  75. def pretty_print_json(data):
  76. """
  77. makes json human-readable by adding indentation and line breaks.
  78. """
  79. j = json.loads(data)
  80. s = json.dumps(j, sort_keys=True, indent=4)
  81. ret = '\n'.join([l.rstrip() for l in s.splitlines()])
  82. return ret
  83. __all__ = ['pretty_print_json', 'pretty_print_headers', 'highlight_output', 'urllib_request']