/tools/filters/sorter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 59 lines · 24 code · 11 blank · 24 comment · 4 complexity · 1c66b338b63136c2dc030fa02523ba0f MD5 · raw file

  1. """
  2. Sorts tabular data on one or more columns. All comments of the file are collected
  3. and placed at the beginning of the sorted output file.
  4. usage: sorter.py [options]
  5. -i, --input: Tabular file to be sorted
  6. -o, --output: Sorted output file
  7. -k, --key: Key (see manual for bash/sort)
  8. usage: sorter.py input output [key ...]
  9. """
  10. # 03/05/2013 guerler
  11. # imports
  12. import os, re, string, sys
  13. from optparse import OptionParser
  14. # error
  15. def stop_err( msg ):
  16. sys.stderr.write( "%s\n" % msg )
  17. sys.exit()
  18. # main
  19. def main():
  20. # define options
  21. parser = OptionParser()
  22. parser.add_option("-i", "--input")
  23. parser.add_option("-o", "--output")
  24. parser.add_option("-k", "--key", action="append")
  25. # parse
  26. options, args = parser.parse_args()
  27. try:
  28. # retrieve options
  29. input = options.input
  30. output = options.output
  31. key = [" -k" + k for k in options.key]
  32. # grep comments
  33. grep_comments = "(grep '^#' %s) > %s" % (input, output)
  34. #print grep_comments
  35. # grep and sort columns
  36. sort_columns = "(grep '^[^#]' %s | sort -f -t '\t' %s) >> %s" % (input, ' '.join(key), output)
  37. #print sort_columns
  38. # execute
  39. os.system(grep_comments)
  40. os.system(sort_columns)
  41. except Exception, ex:
  42. stop_err('Error running sorter.py\n' + str(ex))
  43. # exit
  44. sys.exit(0)
  45. if __name__ == "__main__":
  46. main()