PageRenderTime 35ms CodeModel.GetById 1ms app.highlight 28ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/data_source/microbial_import_code.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 154 lines | 137 code | 7 blank | 10 comment | 32 complexity | b23f4367caf4c357fbd2bd548c7e11ba MD5 | raw file
  1
  2def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
  3    # FIXME: this function is duplicated in the DynamicOptions class.  It is used here only to
  4    # set data.name in exec_after_process(). 
  5    microbe_info= {}
  6    orgs = {}
  7    
  8    filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR
  9    for i, line in enumerate( open( filename ) ):
 10        line = line.rstrip( '\r\n' )
 11        if line and not line.startswith( '#' ):
 12            fields = line.split( sep )
 13            #read each line, if not enough fields, go to next line
 14            try:
 15                info_type = fields.pop(0)
 16                if info_type.upper() == "ORG":
 17                    #ORG     12521   Clostridium perfringens SM101   bacteria        Firmicutes      CP000312,CP000313,CP000314,CP000315     http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521
 18                    org_num = fields.pop(0)
 19                    name = fields.pop(0)
 20                    kingdom = fields.pop(0)
 21                    group = fields.pop(0)
 22                    chromosomes = fields.pop(0)
 23                    info_url = fields.pop(0)
 24                    link_site = fields.pop(0)
 25                    if org_num not in orgs:
 26                        orgs[ org_num ] = {}
 27                        orgs[ org_num ][ 'chrs' ] = {}
 28                    orgs[ org_num ][ 'name' ] = name
 29                    orgs[ org_num ][ 'kingdom' ] = kingdom
 30                    orgs[ org_num ][ 'group' ] = group
 31                    orgs[ org_num ][ 'chromosomes' ] = chromosomes
 32                    orgs[ org_num ][ 'info_url' ] = info_url
 33                    orgs[ org_num ][ 'link_site' ] = link_site
 34                elif info_type.upper() == "CHR":
 35                    #CHR     12521   CP000315        Clostridium perfringens phage phiSM101, complete genome 38092   110684521       CP000315.1
 36                    org_num = fields.pop(0)
 37                    chr_acc = fields.pop(0)
 38                    name = fields.pop(0)
 39                    length = fields.pop(0)
 40                    gi = fields.pop(0)
 41                    gb = fields.pop(0)
 42                    info_url = fields.pop(0)
 43                    chr = {}
 44                    chr[ 'name' ] = name
 45                    chr[ 'length' ] = length
 46                    chr[ 'gi' ] = gi
 47                    chr[ 'gb' ] = gb
 48                    chr[ 'info_url' ] = info_url
 49                    if org_num not in orgs:
 50                        orgs[ org_num ] = {}
 51                        orgs[ org_num ][ 'chrs' ] = {}
 52                    orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr
 53                elif info_type.upper() == "DATA":
 54                    #DATA    12521_12521_CDS 12521   CP000315        CDS     bed     /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed
 55                    uid = fields.pop(0)
 56                    org_num = fields.pop(0)
 57                    chr_acc = fields.pop(0)
 58                    feature = fields.pop(0)
 59                    filetype = fields.pop(0)
 60                    path = fields.pop(0)
 61                    data = {}
 62                    data[ 'filetype' ] = filetype
 63                    data[ 'path' ] = path
 64                    data[ 'feature' ] = feature
 65
 66                    if org_num not in orgs:
 67                        orgs[ org_num ] = {}
 68                        orgs[ org_num ][ 'chrs' ] = {}
 69                    if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]:
 70                        orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {}
 71                    orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data
 72                else: continue
 73            except: continue
 74    for org_num in orgs:
 75        org = orgs[ org_num ]
 76        if org[ 'kingdom' ] not in microbe_info:
 77            microbe_info[ org[ 'kingdom' ] ] = {}
 78        if org_num not in microbe_info[ org[ 'kingdom' ] ]:
 79            microbe_info[ org[ 'kingdom' ] ][org_num] = org
 80    return microbe_info
 81
 82#post processing, set build for data and add additional data to history
 83from galaxy import datatypes, config, tools
 84from shutil import copyfile
 85
 86def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
 87    base_dataset = out_data.items()[0][1]
 88    history = base_dataset.history
 89    if history == None:
 90        print "unknown history!"
 91        return
 92    kingdom = param_dict.get( 'kingdom', None )
 93    #group = param_dict.get( 'group', None )
 94    org = param_dict.get( 'org', None )
 95    
 96    #if not (kingdom or group or org):
 97    if not (kingdom or org):
 98        print "Parameters are not available."
 99    #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values
100    if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ):
101        kingdom = kingdom.value
102    if isinstance( org, tools.parameters.basic.UnvalidatedValue ):
103        org = org.value
104    
105    GALAXY_DATA_INDEX_DIR = app.config.tool_data_path
106    microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' )
107    new_stdout = ""
108    split_stdout = stdout.split("\n")
109    basic_name = ""
110    for line in split_stdout:
111        fields = line.split("\t")
112        if fields[0] == "#File1":
113            description = fields[1]
114            chr = fields[2]
115            dbkey = fields[3]
116            file_type = fields[4]
117            name, data = out_data.items()[0]
118            data.set_size()
119            basic_name = data.name
120            data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")"
121            data.dbkey = dbkey
122            data.info = data.name
123            data = app.datatypes_registry.change_datatype( data, file_type )
124            data.init_meta()
125            data.set_peek()
126            app.model.context.add( data )
127            app.model.context.flush()
128        elif fields[0] == "#NewFile":
129            description = fields[1]
130            chr = fields[2]
131            dbkey = fields[3]
132            filepath = fields[4]
133            file_type = fields[5]
134            newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library
135            newdata.set_size()
136            newdata.extension = file_type
137            newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")"
138            app.model.context.add( newdata )
139            app.model.context.flush()
140            app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset )
141            history.add_dataset( newdata )
142            app.model.context.add( history )
143            app.model.context.flush()
144            try:
145                copyfile(filepath,newdata.file_name)
146                newdata.info = newdata.name
147                newdata.state = newdata.states.OK
148            except:
149                newdata.info = "The requested file is missing from the system."
150                newdata.state = newdata.states.ERROR
151            newdata.dbkey = dbkey
152            newdata.init_meta()
153            newdata.set_peek()
154            app.model.context.flush()