/Demo/xml/rss2html.py

http://unladen-swallow.googlecode.com/ · Python · 91 lines · 56 code · 19 blank · 16 comment · 13 complexity · 96f910edd622e18a228ef9f18be052cc MD5 · raw file

  1. import sys
  2. from xml.sax import make_parser, handler
  3. # --- Templates
  4. top = \
  5. """
  6. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  7. <HTML>
  8. <HEAD>
  9. <TITLE>%s</TITLE>
  10. </HEAD>
  11. <BODY>
  12. <H1>%s</H1>
  13. """
  14. bottom = \
  15. """
  16. </ul>
  17. <HR>
  18. <ADDRESS>
  19. Converted to HTML by sax_rss2html.py.
  20. </ADDRESS>
  21. </BODY>
  22. </HTML>
  23. """
  24. # --- The ContentHandler
  25. class RSSHandler(handler.ContentHandler):
  26. def __init__(self, out = sys.stdout):
  27. handler.ContentHandler.__init__(self)
  28. self._out = out
  29. self._text = ""
  30. self._parent = None
  31. self._list_started = 0
  32. self._title = None
  33. self._link = None
  34. self._descr = ""
  35. # ContentHandler methods
  36. def startElement(self, name, attrs):
  37. if name == "channel" or name == "image" or name == "item":
  38. self._parent = name
  39. self._text = ""
  40. def endElement(self, name):
  41. if self._parent == "channel":
  42. if name == "title":
  43. self._out.write(top % (self._text, self._text))
  44. elif name == "description":
  45. self._out.write("<p>%s</p>\n" % self._text)
  46. elif self._parent == "item":
  47. if name == "title":
  48. self._title = self._text
  49. elif name == "link":
  50. self._link = self._text
  51. elif name == "description":
  52. self._descr = self._text
  53. elif name == "item":
  54. if not self._list_started:
  55. self._out.write("<ul>\n")
  56. self._list_started = 1
  57. self._out.write(' <li><a href="%s">%s</a> %s\n' %
  58. (self._link, self._title, self._descr))
  59. self._title = None
  60. self._link = None
  61. self._descr = ""
  62. if name == "rss":
  63. self._out.write(bottom)
  64. def characters(self, content):
  65. self._text = self._text + content
  66. # --- Main program
  67. parser = make_parser()
  68. parser.setContentHandler(RSSHandler())
  69. parser.parse(sys.argv[1])