/tools/plotting/bar_chart.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 144 lines · 89 code · 28 blank · 27 comment · 23 complexity · 13a25ac087b0dffb0d179f572db0d453 MD5 · raw file
- #!/usr/bin/env python
- """
- histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
- a generic histogram builder based on gnuplot backend
- data_file - tab delimited file with data
- xtic_column - column containing labels for x ticks [integer, 0 means no ticks]
- column_list - comma separated list of columns to plot
- title - title for the entire histrogram
- ylabel - y axis label
- yrange_max - minimal value at the y axis (integer)
- yrange_max - maximal value at the y_axis (integer)
- to set yrange to autoscaling assign 0 to yrange_min and yrange_max
- graph_file - file to write histogram image to
- img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
-
-
- This tool required gnuplot and gnuplot.py
- anton nekrutenko | anton@bx.psu.edu
- """
- import Gnuplot, Gnuplot.funcutils
- import sys, string, tempfile, os
- assert sys.version_info[:2] >= ( 2, 4 )
- def stop_err(msg):
- sys.stderr.write(msg)
- sys.exit()
- def main(tmpFileName):
- skipped_lines_count = 0
- skipped_lines_index = []
- gf = open(tmpFileName, 'w')
-
-
- try:
- in_file = open( sys.argv[1], 'r' )
- xtic = int( sys.argv[2] )
- col_list = string.split( sys.argv[3],"," )
- title = 'set title "' + sys.argv[4] + '"'
- ylabel = 'set ylabel "' + sys.argv[5] + '"'
- ymin = sys.argv[6]
- ymax = sys.argv[7]
- img_file = sys.argv[8]
- img_size = sys.argv[9]
- except:
- stop_err("Check arguments\n")
-
- try:
- int( col_list[0] )
- except:
- stop_err('You forgot to set columns for plotting\n')
-
-
- for i, line in enumerate( in_file ):
- valid = True
- line = line.rstrip('\r\n')
- if line and not line.startswith( '#' ):
- row = []
- try:
- fields = line.split( '\t' )
- for col in col_list:
- row.append( str( float( fields[int( col )-1] ) ) )
-
- except:
- valid = False
- skipped_lines_count += 1
- skipped_lines_index.append(i)
-
- else:
- valid = False
- skipped_lines_count += 1
- skipped_lines_index.append(i)
-
- if valid and xtic > 0:
- row.append( fields[xtic-1] )
- elif valid and xtic == 0:
- row.append( str( i ) )
-
- if valid:
- gf.write( '\t'.join( row ) )
- gf.write( '\n' )
-
- if skipped_lines_count < i:
-
- #prepare 'using' clause of plot statement
-
- g_plot_command = ' ';
-
- #set the first column
- if xtic > 0:
- g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % ( tmpFileName, str( len( row ) ), col_list[0] )
- else:
- g_plot_command = "'%s' using 1 ti 'Column %s', " % ( tmpFileName, col_list[0] )
-
- #set subsequent columns
-
- for i in range(1,len(col_list)):
- g_plot_command += "'%s' using %s t 'Column %s', " % ( tmpFileName, str( i+1 ), col_list[i] )
-
- g_plot_command = g_plot_command.rstrip( ', ' )
-
- yrange = 'set yrange [' + ymin + ":" + ymax + ']'
-
- try:
- g = Gnuplot.Gnuplot()
- g('reset')
- g('set boxwidth 0.9 absolute')
- g('set style fill solid 1.00 border -1')
- g('set style histogram clustered gap 5 title offset character 0, 0, 0')
- g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
- g('set key invert reverse Left outside')
- if xtic == 0: g('unset xtics')
- g(title)
- g(ylabel)
- g_term = 'set terminal png tiny size ' + img_size
- g(g_term)
- g_out = 'set output "' + img_file + '"'
- if ymin != ymax:
- g(yrange)
- g(g_out)
- g('set style data histograms')
- g.plot(g_plot_command)
- except:
- stop_err("Gnuplot error: Data cannot be plotted")
- else:
- sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' %sys.argv[3] )
-
- if skipped_lines_count > 0:
- sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n' % ( skipped_lines_count, skipped_lines_index[0]+1 ) )
-
- if __name__ == "__main__":
- # The tempfile initialization is here because while inside the main() it seems to create a condition
- # when the file is removed before gnuplot has a chance of accessing it
- gp_data_file = tempfile.NamedTemporaryFile('w')
- Gnuplot.gp.GnuplotOpts.default_term = 'png'
- main(gp_data_file.name)