PageRenderTime 28ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/internets/api/feed.py

https://bitbucket.org/milos07p/pypsd-nao-on-git
Python | 194 lines | 187 code | 7 blank | 0 comment | 13 complexity | eb8aa6c3ea8679923e691d600b7fbda2 MD5 | raw file
  1. import httplib
  2. import json
  3. import socket
  4. import urllib2
  5. import xpath
  6. from BaseHTTPServer import BaseHTTPRequestHandler
  7. from BeautifulSoup import BeautifulSoup
  8. from decimal import Decimal
  9. from StringIO import StringIO
  10. from urlparse import urlparse
  11. from xml.dom.minidom import Element, Document
  12. from xml.dom.minidom import parse
  13. class InputError(Exception):
  14. def __init__(self, msg):
  15. self.msg = msg
  16. def __str__(self):
  17. return str(self.msg)
  18. class FeedError(Exception):
  19. def __init__(self, e):
  20. if hasattr(e, 'code'):
  21. c = e.code
  22. if c == 404:
  23. self.msg = 'not found.'
  24. elif c == 406:
  25. self.msg = 'this resource is unavailable.'
  26. elif c == 500:
  27. self.msg = 'the server has encountered an unexpected error.'
  28. elif c == 502:
  29. self.msg = 'invalid response from the server. Try again later.'
  30. elif c == 503:
  31. self.msg = 'this resource is temporarily unavailable. Try again later.'
  32. elif c == 512:
  33. self.msg = 'this resource is not supported.'
  34. else:
  35. self.msg = 'something went wrong while connecting (%s)' % BaseHTTPRequestHandler.responses[e.code][0]
  36. self.code = c
  37. self.url = e.url
  38. elif hasattr(e, 'reason'):
  39. r = str(e.reason)
  40. if r == 'timed out':
  41. self.msg = 'connection timed out. Try again later.'
  42. else:
  43. self.msg = r
  44. self.code = None
  45. self.url = None
  46. elif hasattr(e, 'message'):
  47. if e.message == '':
  48. self.msg = 'invalid response from the server. Try again later.'
  49. self.code = None
  50. self.url = None
  51. else:
  52. pass
  53. else:
  54. pass #???
  55. def __str__(self):
  56. return self.msg
  57. class HtmlFeed:
  58. def __init__(self, value, fake_ua=False):
  59. if value == None:
  60. raise InputError('Invalid feed input.')
  61. if isinstance(value, str) or isinstance(value, unicode):
  62. try:
  63. opener = urllib2.build_opener()
  64. if fake_ua:
  65. opener.addheaders = [('User-Agent', 'User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1')]
  66. else:
  67. opener.addheaders = [('User-Agent', 'Rizon Internets bot - www.rizon.net')]
  68. feed = opener.open(value.replace(' ', '%20'), timeout=20)
  69. self._html = feed.read()
  70. feed.close()
  71. except urllib2.URLError, e:
  72. raise FeedError(e)
  73. except httplib.BadStatusLine, e:
  74. raise FeedError(e)
  75. else:
  76. raise InputError('Invalid feed input type.')
  77. def html(self):
  78. return self._html
  79. def get_soup(self):
  80. return BeautifulSoup(self._html, convertEntities=BeautifulSoup.HTML_ENTITIES)
  81. def get_json(value):
  82. if value == None:
  83. raise InputError('Invalid feed input.')
  84. if isinstance(value, basestring):
  85. feed = HtmlFeed(value)
  86. return json.load(StringIO(feed.html()))
  87. else:
  88. raise InputError('Invalid feed input type.')
  89. class XmlFeed:
  90. def __init__(self, value, namespaces = None):
  91. if value == None:
  92. raise InputError('Invalid feed input.')
  93. self.namespaces = namespaces
  94. if isinstance(value, basestring):
  95. feed = HtmlFeed(value)
  96. self._element = parse(StringIO(feed.html()))
  97. elif isinstance(value, Element) or isinstance(value, Document):
  98. self._element = value
  99. else:
  100. raise InputError('Invalid feed input type.')
  101. error = xpath.findvalue('/error/message', self._element)
  102. if error != None:
  103. raise FeedError(error)
  104. def elements(self, query):
  105. return [XmlFeed(x, self.namespaces) for x in xpath.find(query, self._element, namespaces=self.namespaces)]
  106. def text(self, query, default=None):
  107. result = xpath.findvalue(query, self._element, namespaces=self.namespaces)
  108. if not result:
  109. value = default
  110. else:
  111. value = result.strip()
  112. if isinstance(value, unicode):
  113. try:
  114. value = value.encode('latin-1').decode('utf-8')
  115. except:
  116. pass
  117. return value
  118. def int(self, query, default = None):
  119. result = self.text(query, None)
  120. if result == None:
  121. return default
  122. try:
  123. return int(result)
  124. except:
  125. return default
  126. def decimal(self, query, default = None):
  127. result = self.text(query, None)
  128. if result == None:
  129. return default
  130. try:
  131. return Decimal(result)
  132. except:
  133. return default
  134. def bool(self, query, default = None):
  135. result = self.text(query, None)
  136. if result == None:
  137. return default
  138. if 'true' in result.lower() or result == '1':
  139. return True
  140. elif 'false' in result.lower() or result == '0':
  141. return False
  142. else:
  143. try:
  144. return int(result) > 0
  145. except:
  146. return default
  147. def attribute(self, query, attr, default = None, checkEveryOccurrence = False):
  148. elements = xpath.find(query, self._element)
  149. if len(elements) > 0 and not checkEveryOccurrence:
  150. if elements[0].hasAttribute(attr):
  151. return elements[0].getAttribute(attr)
  152. else:
  153. return None
  154. else:
  155. for e in elements:
  156. if e.hasAttribute(attr):
  157. return e.getAttribute(attr)
  158. return None