PageRenderTime 42ms CodeModel.GetById 27ms app.highlight 10ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/data_source/ucsc_filter.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 68 lines | 57 code | 6 blank | 5 comment | 22 complexity | 7b6a192f88566f421b41d5c7fb21401f MD5 | raw file
 1# runs after the job (and after the default post-filter)
 2from galaxy import datatypes, jobs
 3
 4def validate(incoming):
 5    """Validator"""
 6    #raise Exception, 'not quite right'
 7    pass
 8
 9def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
10    """Sets the name of the data"""
11    outputType = param_dict.get( 'hgta_outputType', None )
12    if isinstance(outputType, list) and len(outputType)>0: outputType = outputType[-1]
13    items = out_data.items()
14    
15    for name, data in items:
16        data.name  = param_dict.get('display', data.name)
17        data.dbkey = param_dict.get('dbkey', '???')
18
19        if outputType == 'wigData':
20            ext = "wig"
21        elif outputType == 'maf':
22            ext = "maf"
23        elif outputType == 'gff':
24            ext = "gff"
25        elif outputType == 'gff3':
26            ext = "gff3"
27        else:
28            if 'hgta_doPrintSelectedFields' in param_dict:
29                ext = "interval"
30            elif 'hgta_doGetBed' in param_dict:
31                ext = "bed"
32            elif 'hgta_doGenomicDna' in param_dict:
33                ext = "fasta"
34            elif 'hgta_doGenePredSequence' in param_dict:
35                ext = "fasta"
36            else:
37                ext = "interval"
38        
39        data = app.datatypes_registry.change_datatype(data, ext)
40        out_data[name] = data
41        
42def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
43    """Verifies the data after the run"""
44    items = out_data.items()
45    for name, data in items:
46        data.set_size()
47        try:            
48            err_msg, err_flag = 'Errors:', False
49            line_count = 0
50            num_lines = len(file(data.file_name).readlines())
51            for line in file(data.file_name):
52                line_count += 1
53                if line and line[0] == '-':
54                    if line_count + 3 == num_lines and not err_flag:
55                        err_flag = True
56                        err_msg = "Warning: It appears that your results have been truncated by UCSC. View the bottom of your result file for details."
57                        break
58                    err_flag = True
59                    err_msg = err_msg +" (line "+str(line_count)+")"+line
60            data.set_peek()
61            if isinstance(data.datatype, datatypes.interval.Interval) and data.missing_meta():
62                data = app.datatypes_registry.change_datatype(data, 'tabular')
63                out_data[name] = data
64            if err_flag:
65                raise Exception(err_msg)
66        except Exception, exc:
67            data.info  = data.info + "\n" + str(exc)
68            data.blurb = "error"