PageRenderTime 18ms CodeModel.GetById 2ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/plotting/bar_chart.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 144 lines | 89 code | 28 blank | 27 comment | 19 complexity | 13a25ac087b0dffb0d179f572db0d453 MD5 | raw file
  1#!/usr/bin/env python
  2
  3
  4"""
  5histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
  6a generic histogram builder based on gnuplot backend
  7
  8   data_file    - tab delimited file with data
  9   xtic_column  - column containing labels for x ticks [integer, 0 means no ticks]
 10   column_list  - comma separated list of columns to plot
 11   title        - title for the entire histrogram
 12   ylabel       - y axis label
 13   yrange_max   - minimal value at the y axis (integer)
 14   yrange_max   - maximal value at the y_axis (integer) 
 15                  to set yrange to autoscaling assign 0 to yrange_min and yrange_max
 16   graph_file   - file to write histogram image to
 17   img_size     - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
 18   
 19   
 20   This tool required gnuplot and gnuplot.py
 21
 22anton nekrutenko | anton@bx.psu.edu
 23
 24"""
 25
 26import Gnuplot, Gnuplot.funcutils
 27import sys, string, tempfile, os
 28
 29assert sys.version_info[:2] >= ( 2, 4 )
 30
 31def stop_err(msg):
 32    sys.stderr.write(msg)
 33    sys.exit()
 34
 35def main(tmpFileName):
 36    skipped_lines_count = 0
 37    skipped_lines_index = []
 38    gf = open(tmpFileName, 'w')
 39    
 40    
 41    try:
 42        in_file   = open( sys.argv[1], 'r' )
 43        xtic      = int( sys.argv[2] )
 44        col_list  = string.split( sys.argv[3],"," )
 45        title     = 'set title "' + sys.argv[4] + '"'
 46        ylabel    = 'set ylabel "' + sys.argv[5] + '"'
 47        ymin      = sys.argv[6]
 48        ymax      = sys.argv[7]
 49        img_file  = sys.argv[8]
 50        img_size  = sys.argv[9]
 51    except:
 52        stop_err("Check arguments\n")
 53        
 54    try:
 55        int( col_list[0] )
 56    except:
 57        stop_err('You forgot to set columns for plotting\n')    
 58    
 59       
 60    for i, line in enumerate( in_file ):
 61        valid = True
 62        line = line.rstrip('\r\n')
 63        if line and not line.startswith( '#' ):
 64            row = []
 65            try:
 66                fields = line.split( '\t' )
 67                for col in col_list:
 68                    row.append( str( float( fields[int( col )-1] ) ) )
 69                    
 70            except:
 71                valid = False
 72                skipped_lines_count += 1
 73                skipped_lines_index.append(i)
 74                    
 75        else:
 76            valid = False
 77            skipped_lines_count += 1
 78            skipped_lines_index.append(i)
 79            
 80        if valid and xtic > 0:
 81            row.append( fields[xtic-1] )
 82        elif valid and xtic == 0:
 83            row.append( str( i ) )    
 84            
 85        if valid:
 86            gf.write( '\t'.join( row ) )
 87            gf.write( '\n' )  
 88             
 89    if skipped_lines_count < i:
 90        
 91        #prepare 'using' clause of plot statement
 92        
 93        g_plot_command = ' ';
 94        
 95        #set the first column
 96        if xtic > 0:
 97            g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % ( tmpFileName, str( len( row ) ), col_list[0] )
 98        else:
 99            g_plot_command = "'%s' using 1 ti 'Column %s', " % ( tmpFileName, col_list[0] )
100        
101        #set subsequent columns
102        
103        for i in range(1,len(col_list)):
104            g_plot_command += "'%s' using %s t 'Column %s', " % ( tmpFileName, str( i+1 ), col_list[i] )
105        
106        g_plot_command = g_plot_command.rstrip( ', ' )
107        
108        yrange = 'set yrange [' + ymin + ":" + ymax + ']'
109                    
110        try:
111            g = Gnuplot.Gnuplot()
112            g('reset')
113            g('set boxwidth 0.9 absolute')
114            g('set style fill  solid 1.00 border -1')
115            g('set style histogram clustered gap 5 title  offset character 0, 0, 0')
116            g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
117            g('set key invert reverse Left outside')
118            if xtic == 0:  g('unset xtics')
119            g(title) 
120            g(ylabel)
121            g_term = 'set terminal png tiny size ' + img_size
122            g(g_term)
123            g_out = 'set output "' + img_file + '"'
124            if ymin != ymax:
125                g(yrange)
126            g(g_out)
127            g('set style data histograms')
128            g.plot(g_plot_command)
129        except:
130            stop_err("Gnuplot error: Data cannot be plotted")
131    else:
132        sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' %sys.argv[3] )
133        
134    if skipped_lines_count > 0:
135        sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d.  These lines were skipped while building the graph.\n' % ( skipped_lines_count, skipped_lines_index[0]+1 ) )
136    
137
138if __name__ == "__main__":
139    # The tempfile initialization is here because while inside the main() it seems to create a condition
140    # when the file is removed before gnuplot has a chance of accessing it
141    gp_data_file = tempfile.NamedTemporaryFile('w')
142    Gnuplot.gp.GnuplotOpts.default_term = 'png'
143    main(gp_data_file.name)
144