/cron/parse_builds_3_sites.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 53 lines · 44 code · 5 blank · 4 comment · 9 complexity · 65bfc0cd9631748d601469039cbd6caa MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Connects to sites and determines which builds are available at each.
  4. """
  5. import sys
  6. import urllib
  7. if sys.version_info[:2] >= ( 2, 5 ):
  8. import xml.etree.ElementTree as ElementTree
  9. else:
  10. from galaxy import eggs
  11. import pkg_resources; pkg_resources.require( "elementtree" )
  12. from elementtree import ElementTree
  13. sites = ['http://genome.ucsc.edu/cgi-bin/',
  14. 'http://archaea.ucsc.edu/cgi-bin/',
  15. 'http://genome-test.cse.ucsc.edu/cgi-bin/'
  16. ]
  17. names = ['main',
  18. 'archaea',
  19. 'test'
  20. ]
  21. def main():
  22. for i in range(len(sites)):
  23. site = sites[i]+"das/dsn"
  24. trackurl = sites[i]+"hgTracks?"
  25. builds = []
  26. try:
  27. page = urllib.urlopen(site)
  28. except:
  29. print "#Unable to connect to " + site
  30. continue
  31. text = page.read()
  32. try:
  33. tree = ElementTree.fromstring(text)
  34. except:
  35. print "#Invalid xml passed back from " + site
  36. continue
  37. print "#Harvested from",site
  38. for dsn in tree:
  39. build = dsn.find("SOURCE").attrib['id']
  40. builds.append(build)
  41. build_dict = {}
  42. for build in builds:
  43. build_dict[build]=0
  44. builds = build_dict.keys()
  45. yield [names[i],trackurl,builds]
  46. if __name__ == "__main__":
  47. for site in main():
  48. print site[0]+"\t"+site[1]+"\t"+",".join(site[2])