/tools/solid_tools/qualsolid_boxplot_graph.sh
Shell | 94 lines | 64 code | 9 blank | 21 comment | 5 complexity | 46865d454f6544f0e80e8662b4fd90d5 MD5 | raw file
1#!/bin/sh 2 3# Modified fastq_quality_boxplot_graph.sh from FASTX-toolkit - FASTA/FASTQ preprocessing tools. 4# Copyright (C) 2009 A. Gordon (gordon@cshl.edu) 5# 6# This program is free software: you can redistribute it and/or modify 7# it under the terms of the GNU Affero General Public License as 8# published by the Free Software Foundation, either version 3 of the 9# License, or (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU Affero General Public License for more details. 15# 16# You should have received a copy of the GNU Affero General Public License 17# along with this program. If not, see <http://www.gnu.org/licenses/>. 18 19function usage() 20{ 21 echo "SOLiD-Quality BoxPlot plotter" 22 echo "Generates a SOLiD quality score box-plot graph " 23 echo 24 echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]" 25 echo 26 echo " [-p] - Generate PostScript (.PS) file. Default is PNG image." 27 echo " [-i INPUT.TXT] - Input file. Should be the output of \"solid_qual_stats\" program." 28 echo " [-o OUTPUT] - Output file name. default is STDOUT." 29 echo " [-t TITLE] - Title (usually the solid file name) - will be plotted on the graph." 30 echo 31 exit 32} 33 34# 35# Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW 36# As produced by "solid_qual_stats" program 37 38TITLE="" # default title is empty 39FILENAME="" 40OUTPUTTERM="set term png size 800,600" 41OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout" 42while getopts ":t:i:o:ph" Option 43 do 44 case $Option in 45 # w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;; 46 t ) TITLE="for $OPTARG" ;; 47 i ) FILENAME=$OPTARG ;; 48 o ) OUTPUTFILE="$OPTARG" ;; 49 p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 4" ;; 50 h ) usage ;; 51 * ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;; 52 esac 53done 54shift $(($OPTIND - 1)) 55 56 57if [ "$FILENAME" == "" ]; then 58 usage 59fi 60 61if [ ! -r "$FILENAME" ]; then 62 echo "Error: can't open input file ($1)." >&2 63 exit 1 64fi 65 66#Read number of cycles from the stats file (each line is a cycle, minus the header line) 67#But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now. 68NUM_CYCLES=$(cat "$FILENAME" | wc -l) 69 70GNUPLOTCMD=" 71$OUTPUTTERM 72set boxwidth 0.8 73set size 1,1 74set key Left inside 75set xlabel \"read position\" 76set ylabel \"Quality Score \" 77set title \"Quality Scores $TITLE\" 78#set auto x 79set bars 4.0 80set xrange [ 0: $NUM_CYCLES ] 81set yrange [-2:45] 82set y2range [-2:45] 83set xtics 1 84set x2tics 1 85set ytics 2 86set y2tics 2 87set tics out 88set grid ytics 89set style fill empty 90plot '$FILENAME' using 1:7:11:12:9 with candlesticks lt 1 lw 1 title 'Quartiles' whiskerbars, \ 91 '' using 1:8:8:8:8 with candlesticks lt -1 lw 2 title 'Medians' 92" 93 94echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"