/tools/filters/convert_characters.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 52 lines · 40 code · 10 blank · 2 comment · 7 complexity · ff720cb1597c5109ced491e95170b523 MD5 · raw file
- #!/usr/bin/env python
- #By, Guruprasad Ananda.
- from galaxy import eggs
- import sys, re
- def stop_err(msg):
- sys.stderr.write(msg)
- sys.exit()
-
- def main():
- if len(sys.argv) != 4:
- stop_err("usage: convert_characters infile from_char outfile")
- try:
- fin = open(sys.argv[1],'r')
- except:
- stop_err("Input file cannot be opened for reading.")
-
- from_char = sys.argv[2]
-
- try:
- fout = open(sys.argv[3],'w')
- except:
- stop_err("Output file cannot be opened for writing.")
-
- char_dict = {
- 'T': '\t',
- 's': '\s',
- 'Dt': '\.',
- 'C': ',',
- 'D': '-',
- 'U': '_',
- 'P': '\|',
- 'Co': ':',
- 'Sc': ';'
- }
- from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences.
- skipped = 0
-
- for line in fin:
- line = line.strip()
- try:
- fout.write("%s\n" %(re.sub(from_ch,'\t',line)))
- except:
- skipped += 1
-
- if skipped:
- print "Skipped %d lines as invalid." %skipped
-
- if __name__ == "__main__":
- main()