/tools/plotting/bar_chart.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 144 lines · 89 code · 28 blank · 27 comment · 23 complexity · 13a25ac087b0dffb0d179f572db0d453 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
  4. a generic histogram builder based on gnuplot backend
  5. data_file - tab delimited file with data
  6. xtic_column - column containing labels for x ticks [integer, 0 means no ticks]
  7. column_list - comma separated list of columns to plot
  8. title - title for the entire histrogram
  9. ylabel - y axis label
  10. yrange_max - minimal value at the y axis (integer)
  11. yrange_max - maximal value at the y_axis (integer)
  12. to set yrange to autoscaling assign 0 to yrange_min and yrange_max
  13. graph_file - file to write histogram image to
  14. img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
  15. This tool required gnuplot and gnuplot.py
  16. anton nekrutenko | anton@bx.psu.edu
  17. """
  18. import Gnuplot, Gnuplot.funcutils
  19. import sys, string, tempfile, os
  20. assert sys.version_info[:2] >= ( 2, 4 )
  21. def stop_err(msg):
  22. sys.stderr.write(msg)
  23. sys.exit()
  24. def main(tmpFileName):
  25. skipped_lines_count = 0
  26. skipped_lines_index = []
  27. gf = open(tmpFileName, 'w')
  28. try:
  29. in_file = open( sys.argv[1], 'r' )
  30. xtic = int( sys.argv[2] )
  31. col_list = string.split( sys.argv[3],"," )
  32. title = 'set title "' + sys.argv[4] + '"'
  33. ylabel = 'set ylabel "' + sys.argv[5] + '"'
  34. ymin = sys.argv[6]
  35. ymax = sys.argv[7]
  36. img_file = sys.argv[8]
  37. img_size = sys.argv[9]
  38. except:
  39. stop_err("Check arguments\n")
  40. try:
  41. int( col_list[0] )
  42. except:
  43. stop_err('You forgot to set columns for plotting\n')
  44. for i, line in enumerate( in_file ):
  45. valid = True
  46. line = line.rstrip('\r\n')
  47. if line and not line.startswith( '#' ):
  48. row = []
  49. try:
  50. fields = line.split( '\t' )
  51. for col in col_list:
  52. row.append( str( float( fields[int( col )-1] ) ) )
  53. except:
  54. valid = False
  55. skipped_lines_count += 1
  56. skipped_lines_index.append(i)
  57. else:
  58. valid = False
  59. skipped_lines_count += 1
  60. skipped_lines_index.append(i)
  61. if valid and xtic > 0:
  62. row.append( fields[xtic-1] )
  63. elif valid and xtic == 0:
  64. row.append( str( i ) )
  65. if valid:
  66. gf.write( '\t'.join( row ) )
  67. gf.write( '\n' )
  68. if skipped_lines_count < i:
  69. #prepare 'using' clause of plot statement
  70. g_plot_command = ' ';
  71. #set the first column
  72. if xtic > 0:
  73. g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % ( tmpFileName, str( len( row ) ), col_list[0] )
  74. else:
  75. g_plot_command = "'%s' using 1 ti 'Column %s', " % ( tmpFileName, col_list[0] )
  76. #set subsequent columns
  77. for i in range(1,len(col_list)):
  78. g_plot_command += "'%s' using %s t 'Column %s', " % ( tmpFileName, str( i+1 ), col_list[i] )
  79. g_plot_command = g_plot_command.rstrip( ', ' )
  80. yrange = 'set yrange [' + ymin + ":" + ymax + ']'
  81. try:
  82. g = Gnuplot.Gnuplot()
  83. g('reset')
  84. g('set boxwidth 0.9 absolute')
  85. g('set style fill solid 1.00 border -1')
  86. g('set style histogram clustered gap 5 title offset character 0, 0, 0')
  87. g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
  88. g('set key invert reverse Left outside')
  89. if xtic == 0: g('unset xtics')
  90. g(title)
  91. g(ylabel)
  92. g_term = 'set terminal png tiny size ' + img_size
  93. g(g_term)
  94. g_out = 'set output "' + img_file + '"'
  95. if ymin != ymax:
  96. g(yrange)
  97. g(g_out)
  98. g('set style data histograms')
  99. g.plot(g_plot_command)
  100. except:
  101. stop_err("Gnuplot error: Data cannot be plotted")
  102. else:
  103. sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' %sys.argv[3] )
  104. if skipped_lines_count > 0:
  105. sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % ( skipped_lines_count, skipped_lines_index[0]+1 ) )
  106. if __name__ == "__main__":
  107. # The tempfile initialization is here because while inside the main() it seems to create a condition
  108. # when the file is removed before gnuplot has a chance of accessing it
  109. gp_data_file = tempfile.NamedTemporaryFile('w')
  110. Gnuplot.gp.GnuplotOpts.default_term = 'png'
  111. main(gp_data_file.name)