/tools/solid_tools/qualsolid_boxplot_graph.sh

https://bitbucket.org/cistrome/cistrome-harvard/ · Shell · 94 lines · 64 code · 9 blank · 21 comment · 5 complexity · 46865d454f6544f0e80e8662b4fd90d5 MD5 · raw file

  1. #!/bin/sh
  2. # Modified fastq_quality_boxplot_graph.sh from FASTX-toolkit - FASTA/FASTQ preprocessing tools.
  3. # Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU Affero General Public License as
  7. # published by the Free Software Foundation, either version 3 of the
  8. # License, or (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU Affero General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU Affero General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. function usage()
  18. {
  19. echo "SOLiD-Quality BoxPlot plotter"
  20. echo "Generates a SOLiD quality score box-plot graph "
  21. echo
  22. echo "Usage: $0 [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]"
  23. echo
  24. echo " [-p] - Generate PostScript (.PS) file. Default is PNG image."
  25. echo " [-i INPUT.TXT] - Input file. Should be the output of \"solid_qual_stats\" program."
  26. echo " [-o OUTPUT] - Output file name. default is STDOUT."
  27. echo " [-t TITLE] - Title (usually the solid file name) - will be plotted on the graph."
  28. echo
  29. exit
  30. }
  31. #
  32. # Input Data columns: #pos cnt min max sum mean Q1 med Q3 IQR lW rW
  33. # As produced by "solid_qual_stats" program
  34. TITLE="" # default title is empty
  35. FILENAME=""
  36. OUTPUTTERM="set term png size 800,600"
  37. OUTPUTFILE="/dev/stdout" # Default output file is simply "stdout"
  38. while getopts ":t:i:o:ph" Option
  39. do
  40. case $Option in
  41. # w ) CMD=$OPTARG; FILENAME="PIMSLogList.txt"; TARGET="logfiles"; ;;
  42. t ) TITLE="for $OPTARG" ;;
  43. i ) FILENAME=$OPTARG ;;
  44. o ) OUTPUTFILE="$OPTARG" ;;
  45. p ) OUTPUTTERM="set term postscript enhanced color \"Helvetica\" 4" ;;
  46. h ) usage ;;
  47. * ) echo "unrecognized argument. use '-h' for usage information."; exit -1 ;;
  48. esac
  49. done
  50. shift $(($OPTIND - 1))
  51. if [ "$FILENAME" == "" ]; then
  52. usage
  53. fi
  54. if [ ! -r "$FILENAME" ]; then
  55. echo "Error: can't open input file ($1)." >&2
  56. exit 1
  57. fi
  58. #Read number of cycles from the stats file (each line is a cycle, minus the header line)
  59. #But for the graph, I want xrange to reach (num_cycles+1), so I don't subtract 1 now.
  60. NUM_CYCLES=$(cat "$FILENAME" | wc -l)
  61. GNUPLOTCMD="
  62. $OUTPUTTERM
  63. set boxwidth 0.8
  64. set size 1,1
  65. set key Left inside
  66. set xlabel \"read position\"
  67. set ylabel \"Quality Score \"
  68. set title \"Quality Scores $TITLE\"
  69. #set auto x
  70. set bars 4.0
  71. set xrange [ 0: $NUM_CYCLES ]
  72. set yrange [-2:45]
  73. set y2range [-2:45]
  74. set xtics 1
  75. set x2tics 1
  76. set ytics 2
  77. set y2tics 2
  78. set tics out
  79. set grid ytics
  80. set style fill empty
  81. plot '$FILENAME' using 1:7:11:12:9 with candlesticks lt 1 lw 1 title 'Quartiles' whiskerbars, \
  82. '' using 1:8:8:8:8 with candlesticks lt -1 lw 2 title 'Medians'
  83. "
  84. echo "$GNUPLOTCMD" | gnuplot > "$OUTPUTFILE"