/tools/filters/convert_characters.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 52 lines · 40 code · 10 blank · 2 comment · 7 complexity · ff720cb1597c5109ced491e95170b523 MD5 · raw file

  1. #!/usr/bin/env python
  2. #By, Guruprasad Ananda.
  3. from galaxy import eggs
  4. import sys, re
  5. def stop_err(msg):
  6. sys.stderr.write(msg)
  7. sys.exit()
  8. def main():
  9. if len(sys.argv) != 4:
  10. stop_err("usage: convert_characters infile from_char outfile")
  11. try:
  12. fin = open(sys.argv[1],'r')
  13. except:
  14. stop_err("Input file cannot be opened for reading.")
  15. from_char = sys.argv[2]
  16. try:
  17. fout = open(sys.argv[3],'w')
  18. except:
  19. stop_err("Output file cannot be opened for writing.")
  20. char_dict = {
  21. 'T': '\t',
  22. 's': '\s',
  23. 'Dt': '\.',
  24. 'C': ',',
  25. 'D': '-',
  26. 'U': '_',
  27. 'P': '\|',
  28. 'Co': ':',
  29. 'Sc': ';'
  30. }
  31. from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences.
  32. skipped = 0
  33. for line in fin:
  34. line = line.strip()
  35. try:
  36. fout.write("%s\n" %(re.sub(from_ch,'\t',line)))
  37. except:
  38. skipped += 1
  39. if skipped:
  40. print "Skipped %d lines as invalid." %skipped
  41. if __name__ == "__main__":
  42. main()