PageRenderTime 26ms CodeModel.GetById 8ms app.highlight 13ms RepoModel.GetById 2ms app.codeStats 0ms

/cron/parse_builds.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 57 lines | 45 code | 6 blank | 6 comment | 3 complexity | efc66d823b11e97c575c2e6f8a545dc7 MD5 | raw file
 1#!/usr/bin/env python
 2
 3"""
 4Connects to the URL specified and outputs builds available at that
 5DSN in tabular format.  USCS Test gateway is used as default.
 6build   description
 7"""
 8
 9import sys
10import urllib
11if sys.version_info[:2] >= ( 2, 5 ):
12    import xml.etree.ElementTree as ElementTree
13else:
14    from galaxy import eggs
15    import pkg_resources; pkg_resources.require( "elementtree" )
16    from elementtree import ElementTree
17
18URL = "http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"
19
20def getbuilds(url):
21    try:
22        page = urllib.urlopen(URL)
23    except:
24        print "#Unable to open " + URL
25        print "?\tunspecified (?)"
26        sys.exit(1)
27
28    text = page.read()
29    try:
30        tree = ElementTree.fromstring(text)
31    except:
32        print "#Invalid xml passed back from " + URL
33        print "?\tunspecified (?)"
34        sys.exit(1)
35
36    print "#Harvested from http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"
37    print "?\tunspecified (?)"
38    for dsn in tree:
39        build = dsn.find("SOURCE").attrib['id']
40        description = dsn.find("DESCRIPTION").text.replace(" - Genome at UCSC","").replace(" Genome at UCSC","")
41        
42        fields = description.split(" ")
43        temp = fields[0]
44        for i in range(len(fields)-1):
45            if temp == fields[i+1]:
46                fields.pop(i+1)
47            else:
48                temp = fields[i+1]
49        description = " ".join(fields)
50        yield [build,description]
51
52if __name__ == "__main__":
53    if len(sys.argv) > 1:
54        URL = sys.argv[1]
55    for build in getbuilds(URL):
56        print build[0]+"\t"+build[1]+" ("+build[0]+")"
57