/cron/parse_builds.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 57 lines · 42 code · 9 blank · 6 comment · 11 complexity · efc66d823b11e97c575c2e6f8a545dc7 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Connects to the URL specified and outputs builds available at that
  4. DSN in tabular format. USCS Test gateway is used as default.
  5. build description
  6. """
  7. import sys
  8. import urllib
  9. if sys.version_info[:2] >= ( 2, 5 ):
  10. import xml.etree.ElementTree as ElementTree
  11. else:
  12. from galaxy import eggs
  13. import pkg_resources; pkg_resources.require( "elementtree" )
  14. from elementtree import ElementTree
  15. URL = "http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"
  16. def getbuilds(url):
  17. try:
  18. page = urllib.urlopen(URL)
  19. except:
  20. print "#Unable to open " + URL
  21. print "?\tunspecified (?)"
  22. sys.exit(1)
  23. text = page.read()
  24. try:
  25. tree = ElementTree.fromstring(text)
  26. except:
  27. print "#Invalid xml passed back from " + URL
  28. print "?\tunspecified (?)"
  29. sys.exit(1)
  30. print "#Harvested from http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"
  31. print "?\tunspecified (?)"
  32. for dsn in tree:
  33. build = dsn.find("SOURCE").attrib['id']
  34. description = dsn.find("DESCRIPTION").text.replace(" - Genome at UCSC","").replace(" Genome at UCSC","")
  35. fields = description.split(" ")
  36. temp = fields[0]
  37. for i in range(len(fields)-1):
  38. if temp == fields[i+1]:
  39. fields.pop(i+1)
  40. else:
  41. temp = fields[i+1]
  42. description = " ".join(fields)
  43. yield [build,description]
  44. if __name__ == "__main__":
  45. if len(sys.argv) > 1:
  46. URL = sys.argv[1]
  47. for build in getbuilds(URL):
  48. print build[0]+"\t"+build[1]+" ("+build[0]+")"