PageRenderTime 89ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/ZenPacks.chudler.GoogleAppEngine/ZenPacks/chudler/GoogleAppEngine/google_soup.py

https://github.com/chudler/Community-Zenpacks
Python | 221 lines | 201 code | 9 blank | 11 comment | 4 complexity | 3e7d5a443c9f7d4629f51c3348cfb5f4 MD5 | raw file
  1. #!/usr/bin/env python
  2. """
  3. Simulate a browser.
  4. Login to google appengine, parse several web pages into data structures
  5. and return them to the caller.
  6. """
  7. import re
  8. import random
  9. import logging
  10. import logging.handlers
  11. from BeautifulSoup import BeautifulSoup
  12. from ClientForm import *
  13. from mechanize import Browser
  14. class SoupLoginError(Exception):
  15. def __init__(self, value):
  16. self.value = value
  17. def __str__(self):
  18. return repr(self.value)
  19. class GoogleSoup:
  20. def __init__(self, username, password, logSeverity=logging.INFO):
  21. LOG_FILENAME = '/tmp/soup.out'
  22. # Set up a specific logger with our desired output level
  23. self.log = logging.getLogger('google_soup')
  24. self.log.setLevel( logSeverity )
  25. self.username = username
  26. self.password = password
  27. # Add the log message handler to the logger
  28. handler = logging.handlers.RotatingFileHandler( LOG_FILENAME, maxBytes=20, backupCount=5)
  29. self.log.addHandler(handler)
  30. self.br = Browser()
  31. def login(self, username, password):
  32. self.username = username
  33. self.password = password
  34. self.br.open("http://appengine.google.com/")
  35. self.br.select_form(nr=0)
  36. # sometimes google "suggests" a email and requires us to put the
  37. # password in for it
  38. if self.br["Email"] is None or self.br["Email"] == '':
  39. self.br["Email"] = username
  40. self.br["Passwd"] = password
  41. response = self.br.submit()
  42. page_title = self.br.title()
  43. self.log.debug('On web Page %s, %s' % (response.geturl(), page_title))
  44. self.ensureMainPage()
  45. return response
  46. def ensureMainPage(self):
  47. page_title = self.br.title()
  48. self.log.debug('On web Page %s' % (page_title))
  49. if page_title == 'Google Accounts' or page_title != 'Applications Overview':
  50. raise SoupLoginError(self.username)
  51. def resetClient(self):
  52. return self.login(self.username, self.password)
  53. def strip_tags(self, element):
  54. text_accumulator = element.contents[0]
  55. for tag in element.findAll():
  56. tag_text = tag.renderContents()
  57. text_accumulator += tag_text
  58. text_accumulator = re.sub('\n', ' ', text_accumulator)
  59. text_accumulator = re.sub(' ', ' ', text_accumulator)
  60. text_accumulator = text_accumulator.strip()
  61. return text_accumulator
  62. def extract_headings(self, table):
  63. table_headings = []
  64. for header in table.findAll('th'):
  65. text = self.strip_tags(header)
  66. table_headings.append(text)
  67. return table_headings
  68. def findApplications(self):
  69. main_apps = BeautifulSoup(self.resetClient())
  70. column_headers = []
  71. apps = []
  72. for table in main_apps.findAll('table', limit=1):
  73. # table is a BeautifulSoup.Tag object
  74. column_headers = self.extract_headings(table)
  75. for app_row in table.tbody.findAll('tr'):
  76. column = 0
  77. basic_app_data = {}
  78. for app_data in app_row.findAllNext('td', limit=len(column_headers)):
  79. data_value = app_data.renderContents()
  80. attribute_name = column_headers[column]
  81. if 'app_id' in data_value:
  82. app_link = app_data.find('a', href=re.compile('.*app_id='))
  83. app_url = app_link['href'] #.attrs
  84. app_link.renderContents()
  85. basic_app_data.update({'url':app_url})
  86. basic_app_data.update({'name':self.strip_tags(app_data)})
  87. if 'ae-ext-link' in data_value:
  88. data_value = self.strip_tags(app_data)
  89. basic_app_data.update({attribute_name:data_value})
  90. column += 1
  91. apps.append(basic_app_data)
  92. return apps
  93. def decompose_reservation(self, stat_name, usage):
  94. usage = re.sub('\n', ' ', usage)
  95. if ' of ' in usage:
  96. parts = re.search('([0-9\.]*).of.([0-9\.]*)([^ ]*.*)', usage, re.MULTILINE)
  97. if parts:
  98. used, remaining, metric = parts.groups()
  99. metric = metric or stat_name
  100. return { 'metric': metric, 'used':used, 'remaining':remaining }
  101. else:
  102. return None
  103. def queryApplicationPerfs(self, applicationNames):
  104. self.resetClient()
  105. if applicationNames == '__ALL__':
  106. applicationLinks = self.br.links(url_regex='app_id=')
  107. else:
  108. for applicationName in applicationNames:
  109. applicationLinks = self.br.find_link(url_regex='app_id=%s' % applicationName)
  110. quota_stats = {}
  111. self.log.debug( 'Looking for application %s ' % applicationNames)
  112. for applicationLink in applicationLinks:
  113. response = self.br.follow_link(applicationLink)
  114. response = self.br.follow_link(text_regex=r'Quota Details')
  115. current_url = response.geturl()
  116. app_id_match = re.search('app_id=([^&]*)', current_url)
  117. if app_id_match:
  118. app_id = app_id_match.groups()[0]
  119. else:
  120. app_id = 'unknown_app'
  121. self.log.debug('Taking stats for application %s ' % app_id)
  122. quota_stats[app_id] = {}
  123. quota_details = response.read()
  124. quota_fix = re.compile('.*Why is My App Over Quota', re.DOTALL|re.MULTILINE)
  125. quota_details = re.sub(quota_fix, '', quota_details)
  126. quota_soup = BeautifulSoup(quota_details)
  127. quota_section = quota_soup.find(attrs={'id':'ae-quota-details'})
  128. if quota_section:
  129. for quota_table in quota_section.findAll('table'):
  130. for stat_row in quota_table.tbody.findAll('tr', recursive=False):
  131. stat_name = self.strip_tags(stat_row.find('td'))
  132. for usage in stat_row.findAll('td', text=re.compile('%')):
  133. usage_string = usage.strip('\n')
  134. usage_string = int(usage_string.rstrip('%'))
  135. reservation = self.strip_tags(usage.parent.findNextSibling())
  136. reservation_components = self.decompose_reservation(stat_name, reservation)
  137. # just send random stuff for testing!!!!
  138. usage_string = str(random.randint(1000, 100000))
  139. quota_stats[app_id][stat_name + '_usage'] = usage_string
  140. quota_stats[app_id][stat_name + '_quota'] = reservation_components['used']
  141. quota_stats[app_id][stat_name + '_remaining'] = reservation_components['remaining']
  142. return quota_stats
  143. def parse_load(self):
  144. app_main = open('/tmp/dashboard.html', 'r').read()
  145. app_soup = BeautifulSoup(app_main)
  146. load_section = app_soup.find(text=re.compile('Current Load'))
  147. current_load = []
  148. if load_section:
  149. load_section = load_section.findParent('table')
  150. if load_section:
  151. column_headers = self.extract_headings(load_section)
  152. column = 0
  153. for stat_row in load_section.findAll('tr'):
  154. load_data = {}
  155. column = 0
  156. for stat_data in stat_row.findAll('td', limit=len(column_headers)):
  157. stat_name = column_headers[column]
  158. column += 1
  159. if 'URI' in stat_name:
  160. load_data['uri'] = stat_data.find('a')
  161. load_data['name'] = stat_data.find('a').string
  162. else:
  163. load_data[stat_name] = self.strip_tags(stat_data)
  164. current_load.append(load_data)
  165. def parse_errors(self):
  166. errors_section = app_soup.find(id='ae-dash-errors-count-col')
  167. errors = []
  168. if errors_section:
  169. errors_section = errors_section.findParent('table')
  170. if errors_section:
  171. column_headers = self.extract_headings(errors_section)
  172. column = 0
  173. for stat_row in errors_section.findAll('tr'):
  174. error_data = {}
  175. column = 0
  176. for stat_data in stat_row.findAll('td', limit=len(column_headers)):
  177. stat_name = column_headers[column]
  178. column += 1
  179. if 'URI' in stat_name:
  180. error_data['uri'] = stat_data.find('a')
  181. error_data['name'] = stat_data.find('a').string
  182. else:
  183. error_data[stat_name] = self.strip_tags(stat_data)
  184. errors.append(error_data)
  185. self.log.error(errors)
  186. if __name__ == '__main__':
  187. pass
  188. # for testing
  189. # g = GoogleSoup('username', 'password')
  190. # print g.findApplications()