/tools/plotting/scatterplot.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 79 lines · 67 code · 10 blank · 2 comment · 19 complexity · 5cc7d8049bfef69026ef84f5e0e96663 MD5 · raw file

  1. #!/usr/bin/env python
  2. #Greg Von Kuster
  3. import sys
  4. from rpy import *
  5. def stop_err(msg):
  6. sys.stderr.write(msg)
  7. sys.exit()
  8. def main():
  9. in_fname = sys.argv[1]
  10. out_fname = sys.argv[2]
  11. try:
  12. columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
  13. except:
  14. stop_err( "Columns not specified, your query does not contain a column of numerical data." )
  15. title = sys.argv[5]
  16. xlab = sys.argv[6]
  17. ylab = sys.argv[7]
  18. matrix = []
  19. skipped_lines = 0
  20. first_invalid_line = 0
  21. invalid_value = ''
  22. invalid_column = 0
  23. i = 0
  24. for i, line in enumerate( file( in_fname ) ):
  25. valid = True
  26. line = line.rstrip( '\r\n' )
  27. if line and not line.startswith( '#' ):
  28. row = []
  29. fields = line.split( "\t" )
  30. for column in columns:
  31. try:
  32. val = fields[column]
  33. if val.lower() == "na":
  34. row.append( float( "nan" ) )
  35. else:
  36. row.append( float( fields[column] ) )
  37. except:
  38. valid = False
  39. skipped_lines += 1
  40. if not first_invalid_line:
  41. first_invalid_line = i + 1
  42. try:
  43. invalid_value = fields[column]
  44. except:
  45. invalid_value = ''
  46. invalid_column = column + 1
  47. break
  48. else:
  49. valid = False
  50. skipped_lines += 1
  51. if not first_invalid_line:
  52. first_invalid_line = i+1
  53. if valid:
  54. matrix.append( row )
  55. if skipped_lines < i:
  56. try:
  57. r.pdf( out_fname, 8, 8 )
  58. r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
  59. r.dev_off()
  60. except Exception, exc:
  61. stop_err( "%s" %str( exc ) )
  62. else:
  63. stop_err( "All values in both columns %s and %s are non-numeric or empty." % ( sys.argv[3], sys.argv[4] ) )
  64. print "Scatter plot on columns %s, %s. " % ( sys.argv[3], sys.argv[4] )
  65. if skipped_lines > 0:
  66. print "Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric." % ( skipped_lines, first_invalid_line, invalid_value, invalid_column )
  67. r.quit( save="no" )
  68. if __name__ == "__main__":
  69. main()