PageRenderTime 40ms CodeModel.GetById 14ms app.highlight 20ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/plotting/scatterplot.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 79 lines | 72 code | 5 blank | 2 comment | 5 complexity | 5cc7d8049bfef69026ef84f5e0e96663 MD5 | raw file
 1#!/usr/bin/env python
 2#Greg Von Kuster
 3
 4import sys
 5from rpy import *
 6
 7def stop_err(msg):
 8    sys.stderr.write(msg)
 9    sys.exit()
10
11def main():
12
13    in_fname = sys.argv[1]
14    out_fname = sys.argv[2]
15    try:
16        columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
17    except:
18        stop_err( "Columns not specified, your query does not contain a column of numerical data." )
19    title = sys.argv[5]
20    xlab = sys.argv[6]
21    ylab = sys.argv[7]
22
23    matrix = []
24    skipped_lines = 0
25    first_invalid_line = 0
26    invalid_value = ''
27    invalid_column = 0
28    i = 0
29    for i, line in enumerate( file( in_fname ) ):
30        valid = True
31        line = line.rstrip( '\r\n' )
32        if line and not line.startswith( '#' ): 
33            row = []
34            fields = line.split( "\t" )
35            for column in columns:
36                try:
37                    val = fields[column]
38                    if val.lower() == "na": 
39                        row.append( float( "nan" ) )
40                    else:
41                        row.append( float( fields[column] ) )
42                except:
43                    valid = False
44                    skipped_lines += 1
45                    if not first_invalid_line:
46                        first_invalid_line = i + 1
47                        try:
48                            invalid_value = fields[column]
49                        except:
50                            invalid_value = ''
51                        invalid_column = column + 1
52                    break
53        else:
54            valid = False
55            skipped_lines += 1
56            if not first_invalid_line:
57                first_invalid_line = i+1
58
59        if valid:
60            matrix.append( row )
61
62    if skipped_lines < i:
63        try:
64            r.pdf( out_fname, 8, 8 )
65            r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
66            r.dev_off()
67        except Exception, exc:
68            stop_err( "%s" %str( exc ) )
69    else:
70        stop_err( "All values in both columns %s and %s are non-numeric or empty." % ( sys.argv[3], sys.argv[4] ) )
71
72    print "Scatter plot on columns %s, %s. " % ( sys.argv[3], sys.argv[4] )
73    if skipped_lines > 0:
74        print "Skipped %d lines starting with line #%d, value '%s' in column %d is not numeric." % ( skipped_lines, first_invalid_line, invalid_value, invalid_column )
75
76    r.quit( save="no" )
77
78if __name__ == "__main__":
79    main()