PageRenderTime 11ms CodeModel.GetById 1ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/filters/convert_characters.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 52 lines | 40 code | 10 blank | 2 comment | 5 complexity | ff720cb1597c5109ced491e95170b523 MD5 | raw file
 1#!/usr/bin/env python
 2#By, Guruprasad Ananda.
 3
 4from galaxy import eggs
 5import sys, re
 6
 7def stop_err(msg):
 8    sys.stderr.write(msg)
 9    sys.exit()
10    
11def main():
12    if len(sys.argv) != 4:
13        stop_err("usage: convert_characters infile from_char outfile")
14
15    try:
16        fin = open(sys.argv[1],'r')
17    except:
18        stop_err("Input file cannot be opened for reading.")
19    
20    from_char = sys.argv[2]
21    
22    try:
23        fout = open(sys.argv[3],'w')
24    except:
25        stop_err("Output file cannot be opened for writing.")
26    
27    char_dict = { 
28        'T': '\t',
29        's': '\s', 
30        'Dt': '\.',
31        'C': ',',
32        'D': '-',
33        'U': '_', 
34        'P': '\|',
35        'Co': ':',
36        'Sc': ';'
37    }
38    from_ch = char_dict[from_char] + '+'    #making an RE to match 1 or more occurences.
39    skipped = 0
40    
41    for line in fin:
42        line = line.strip()
43        try:
44            fout.write("%s\n" %(re.sub(from_ch,'\t',line)))     
45        except:
46            skipped += 1
47            
48    if skipped:
49        print "Skipped %d lines as invalid." %skipped
50    
51if __name__ == "__main__": 
52    main()