PageRenderTime 50ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/apps/links/utils.py

https://github.com/theinterned/batucada
Python | 116 lines | 82 code | 19 blank | 15 comment | 31 complexity | f09865f70fd3c14d5383cb96a1e2e6c8 MD5 | raw file
  1. import urlparse
  2. from xml import sax
  3. try:
  4. from cStringIO import StringIO
  5. except ImportError:
  6. from StringIO import StringIO
  7. from BeautifulSoup import BeautifulSoup
  8. from django.conf import settings
  9. def normalize_url(url, base_url):
  10. """Try to detect relative URLs and convert them into absolute URLs."""
  11. parts = urlparse.urlparse(url)
  12. if parts.scheme and parts.netloc:
  13. return url # looks fine
  14. if not base_url:
  15. return url
  16. base_parts = urlparse.urlparse(base_url)
  17. server = '://'.join((base_parts.scheme, base_parts.netloc))
  18. if server[-1] != '/' and url[0] != '/':
  19. server = server + '/'
  20. if server[-1] == '/' and url[0] == '/':
  21. server = server[:-1]
  22. return server + url
  23. class FeedHandler(sax.ContentHandler):
  24. """Parse RSS and Atom feeds and look for a PubSubHubbub hub."""
  25. href = None
  26. def startElementNS(self, name, qname, attrs):
  27. """Return href of link element with a rel attribute of 'hub'."""
  28. # stop processing if we encounter entries or items.
  29. if name == ('', 'item'):
  30. raise sax.SAXException('encountered item element')
  31. if name == ('http://www.w3.org/2005/Atom', 'entry'):
  32. raise sax.SAXException('encountered entry element')
  33. # only elements we're concerned with now are links
  34. if name != ('http://www.w3.org/2005/Atom', 'link'):
  35. return
  36. # drop namespace from attr names, build a dictionary of
  37. # local attribute name = value.
  38. fixed = {}
  39. for name, value in attrs.items():
  40. (namespace, local) = name
  41. fixed[local] = value
  42. # only concerned with links with 'hub' rel and an href attr.
  43. if not ('rel' in fixed and fixed['rel'] == 'hub'):
  44. return
  45. if not 'href' in fixed:
  46. return
  47. self.href = fixed['href']
  48. raise sax.SAXException('done') # hacky way to signal that we're done.
  49. def parse_feed_url(content, url=None):
  50. """
  51. Parse the provided html and return the first Atom or RSS feed we find.
  52. Note that a preference is given to Atom if the HTML contains links to
  53. both.
  54. """
  55. soup = BeautifulSoup(content)
  56. links = soup.findAll('link')
  57. # BeautifulSoup instances are not actually dictionaries, so
  58. # we can't use the more proper 'key in dict' syntax and
  59. # must instead use the deprecated 'has_key()' method.
  60. alternates = [link for link in links
  61. if link.has_key('rel') and link['rel'] == 'alternate']
  62. get_by_type = lambda t, links: [l for l in links
  63. if l.has_key('type') and l['type'] == t]
  64. get_hrefs = lambda links: [l['href'] for l in links if l.has_key('href')]
  65. atom = get_by_type('application/atom+xml', alternates)
  66. if atom:
  67. hrefs = get_hrefs(atom)
  68. if hrefs:
  69. return normalize_url(hrefs[0], url)
  70. rss = get_by_type('application/rss+xml', alternates)
  71. if rss:
  72. hrefs = get_hrefs(rss)
  73. if hrefs:
  74. return normalize_url(hrefs[0], url)
  75. return None
  76. def parse_hub_url(content, base_url=None):
  77. """Parse the provided xml and find a hub link."""
  78. handler = FeedHandler()
  79. parser = sax.make_parser()
  80. parser.setContentHandler(handler)
  81. parser.setFeature(sax.handler.feature_namespaces, 1)
  82. inpsrc = sax.xmlreader.InputSource()
  83. inpsrc.setByteStream(StringIO(content))
  84. try:
  85. parser.parse(inpsrc)
  86. except sax.SAXException:
  87. pass
  88. if handler.href is None:
  89. return handler.href
  90. return normalize_url(handler.href, base_url)
  91. def hub_credentials(hub_url):
  92. """Credentials callback for django_push.subscribers"""
  93. if hub_url == settings.SUPERFEEDR_URL:
  94. return (settings.SUPERFEEDR_USERNAME, settings.SUPERFEEDR_PASSWORD)
  95. return None