PageRenderTime 20ms CodeModel.GetById 15ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 1ms

/cron/parse_builds_3_sites.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 53 lines | 44 code | 5 blank | 4 comment | 10 complexity | 65bfc0cd9631748d601469039cbd6caa MD5 | raw file
 1#!/usr/bin/env python
 2"""
 3Connects to sites and determines which builds are available at each.
 4"""
 5
 6import sys
 7import urllib
 8if sys.version_info[:2] >= ( 2, 5 ):
 9    import xml.etree.ElementTree as ElementTree
10else:
11    from galaxy import eggs
12    import pkg_resources; pkg_resources.require( "elementtree" )
13    from elementtree import ElementTree
14
15sites = ['http://genome.ucsc.edu/cgi-bin/',
16        'http://archaea.ucsc.edu/cgi-bin/',
17        'http://genome-test.cse.ucsc.edu/cgi-bin/'
18]
19names = ['main',
20        'archaea',
21        'test'
22]
23
24def main():
25    for i in range(len(sites)):
26        site = sites[i]+"das/dsn"
27        trackurl = sites[i]+"hgTracks?"
28        builds = []
29        try:
30            page = urllib.urlopen(site)
31        except:
32            print "#Unable to connect to " + site
33            continue
34        text = page.read()
35        try:
36            tree = ElementTree.fromstring(text)
37        except:
38            print "#Invalid xml passed back from " + site
39            continue
40        print "#Harvested from",site
41        
42        for dsn in tree:
43            build = dsn.find("SOURCE").attrib['id']
44            builds.append(build)
45            build_dict = {}
46        for build in builds:
47            build_dict[build]=0
48            builds = build_dict.keys()
49        yield [names[i],trackurl,builds]
50
51if __name__ == "__main__":
52    for site in main():
53        print site[0]+"\t"+site[1]+"\t"+",".join(site[2])