PageRenderTime 50ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/Bio/Pipeline/Utils/Dumper/blastscore.pm

https://github.com/bioperl/bioperl-pipeline
Perl | 278 lines | 217 code | 48 blank | 13 comment | 36 complexity | 960fa9b10d1fb12dee51e37f49547678 MD5 | raw file
Possible License(s): LGPL-2.0
  1. #
  2. # BioPerl module for Bio::Pipeline::Utils::Dumper::blastscore
  3. #
  4. # Please direct questions and support issues to <bioperl-l@bioperl.org>
  5. #
  6. # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
  7. #
  8. #
  9. # You may distribute this module under the same terms as perl itself
  10. #
  11. # POD documentation - main docs before the code
  12. #
  13. =head1 NAME
  14. Bio::Pipeline::Utils::Dumper::blastscore
  15. =head1 SYNOPSIS
  16. use Bio::Pipeline::Dumper;
  17. use Bio::SearchIO;
  18. my $dumper = Bio::Pipeline::Dumper->new(-module=>'BlastScore',
  19. -format=>"score",
  20. -file=>">shawn.out",
  21. -significance=>"<0.001",
  22. -query_frac_identical=>">0.21");
  23. my $searchio = Bio::SearchIO->new ('-format' => 'blast',
  24. '-file' => "blast.report");
  25. my @hit;
  26. while (my $r = $searchio->next_result){
  27. while(my $hit = $r->next_hit){
  28. push @hit, $hit;
  29. }
  30. }
  31. $dumper->dump(@hit);
  32. =head1 DESCRIPTION
  33. Dumps BlastHits Scores into tab delimited files
  34. This module will evolve to become more flexible to dump out blast scores
  35. in some flat file format. This is for use in various reciprocal blast type
  36. pipeline that just need the plain score in a file format. It will enable
  37. one to filter the blast score to output using the various
  38. parameters that are available in Bio::Search::Hit::HitI
  39. -length the length of the hit
  40. -raw_score Gets the "raw score" generated by the algorithm.
  41. -significance Used to obtain the E or P value of a hit,
  42. -bits the bit score of the best HSP for the current hit
  43. -n This is the number of HSPs in the set which was ascribed
  44. the lowest P-value
  45. -p the P-value for the best HSP of the given BLAST hit
  46. -query_length_aln total length of the aligned region for query
  47. -subject_length_aln total length of the aligned region for subject
  48. -query_gaps the number of gaps in the aligned query
  49. -subject_gaps the number of gaps in the aligned subject
  50. -id_matches Get the total number of identical matches
  51. -cons_matches Get the total number of conserved matches
  52. -query_frac_identical the overall fraction of identical positions across all HSPs.
  53. -subject_frac_identical the overall fraction of identical positions across all HSPs.
  54. -query_frac_conserved the overall fraction of conserved positions across all HSPs.
  55. -subject_frac_conserved the overall fraction of conserved positions across all HSPs
  56. -frac_aligned_query the fraction of the query sequence which has been aligned
  57. -frac_aligned_subject the fraction of the hit (sbjct) sequence which has been aligned
  58. -rank the rank of this Hit in the Query search list
  59. The operation to be used is specified via appending '>', '<', '=' to the value provided.
  60. e.g -length => '>100' means dump hits with length greater than 100.
  61. =head1 FEEDBACK
  62. =head2 Mailing Lists
  63. User feedback is an integral part of the evolution of this and other
  64. Bioperl modules. Send your comments and suggestions preferably to one
  65. of the Bioperl mailing lists. Your participation is much appreciated.
  66. bioperl-l@bioperl.org - General discussion
  67. http://bioperl.org/wiki/Mailing_lists - About the mailing lists
  68. =head2 Support
  69. Please direct usage questions or support issues to the mailing list:
  70. L<bioperl-l@bioperl.org>
  71. rather than to the module maintainer directly. Many experienced and
  72. reponsive experts will be able look at the problem and quickly
  73. address it. Please include a thorough description of the problem
  74. with code and data examples if at all possible.
  75. =head2 Reporting Bugs
  76. Report bugs to the Bioperl bug tracking system to help us keep track
  77. the bugs and their resolution. Bug reports can be submitted via email
  78. or the web:
  79. bioperl-bugs@bio.perl.org
  80. http://bio.perl.org/bioperl-bugs/
  81. =head1 AUTHOR - Shawn Hoon
  82. Email shawnh@fugu-sg.org
  83. =head1 APPENDIX
  84. The rest of the documentation details each of the object methods. Internal metho
  85. ds are usually preceded with a _
  86. =cut
  87. package Bio::Pipeline::Utils::Dumper::blastscore;
  88. use vars qw(@ISA @BS_PARAMS %OK_FIELD $AUTOLOAD);
  89. use strict;
  90. use Bio::Root::Root;
  91. use Bio::Pipeline::Utils::Dumper;
  92. @ISA = qw(Bio::Pipeline::Utils::Dumper);
  93. BEGIN {
  94. @BS_PARAMS = qw(FORMAT LENGTH RAW_SCORE SIGNIFICANCE BITS N P QUERY_LENGTH_ALN
  95. SUBJECT_LENGTH_ALN QUERY_GAPS SUBJECT_GAPS ID_MATCHES
  96. CONS_MATCHES QUERY_FRAC_IDENTICAL SUBJECT_FRAC_IDENTICAL
  97. QUERY_FRAC_CONSERVED SUBJECT_FRAC_CONSERVED FRAC_ALIGNED_QUERY
  98. FRAC_ALIGNED_SUBJECT RANK);
  99. foreach my $attr ( @BS_PARAMS) { $OK_FIELD{$attr}++; }
  100. }
  101. sub _initialize {
  102. my($self,@args) = @_;
  103. $self->SUPER::_initialize(@args);
  104. while(@args){
  105. my $attr = shift @args;
  106. $attr=~s/-//;
  107. my $value = shift @args;
  108. $self->$attr($value);
  109. }
  110. }
  111. sub AUTOLOAD {
  112. my $self = shift;
  113. my $attr = $AUTOLOAD;
  114. $attr =~ s/.*:://;
  115. $attr = uc $attr;
  116. $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
  117. $self->{$attr} = shift if @_;
  118. return $self->{$attr};
  119. }
  120. =head2 dump
  121. Title : dump
  122. Usage : $dumper->dump(@hit);
  123. Function: dumps the hit to the flat file
  124. Returns :
  125. Args : an array of Bio::Search::Hit::HitI
  126. =cut
  127. sub dump {
  128. my ($self,@hit) = @_;
  129. my $outfile = $self->file;
  130. my $format = $self->format;
  131. foreach my $hit(@hit){
  132. $hit->isa("Bio::Search::Hit::GenericHit") || $self->throw("Not a Bio::Search::Hit::GenericHit object!");
  133. if($self->_include($hit)){
  134. my ($hsp) = $hit->hsps;
  135. if($format =~/gff/){
  136. $self->_print($hsp->query->gff_string."\n");
  137. $self->_print($hsp->subject->gff_string."\n");
  138. } elsif($format=~/tribe/){
  139. my $sig = $hit->significance;
  140. $sig =~s/^e-/1e-/g;
  141. my $expect = sprintf("%e",$sig);
  142. if ($expect==0){
  143. my $wt = 200; #hardcode
  144. $expect=sprintf("%e","1e-$wt");
  145. }
  146. my $first=(split("e-",$expect))[0];
  147. my $second=(split("e-",$expect))[1];
  148. $self->_print(join("\t",$hsp->feature1->seq_id,$hsp->feature2->seq_id,int($first),int($second)),"\n");
  149. } else {
  150. $self->_print($hsp->feature1->seq_id."\t".$hsp->feature2->seq_id."\t".$hit->significance."\n");
  151. }
  152. }
  153. }
  154. }
  155. #filter blasts
  156. sub _include {
  157. my ($self,$hit) = @_;
  158. foreach my $attr (keys %OK_FIELD){
  159. my $attr = lc $attr;
  160. my $val = $self->$attr;
  161. $val || next;
  162. my @op = $val=~/([\<\>\=])(\S+)/;
  163. my $thresh;
  164. my $op;
  165. if($#op > 0){
  166. $op = $op[0];
  167. $thresh = $op[1];
  168. }
  169. else {
  170. $op='>';
  171. $thresh = $op[0];
  172. }
  173. if($op eq '>'){
  174. if ($hit->can($attr)){
  175. if ($hit->$attr < $thresh){
  176. return 0;
  177. }
  178. next;
  179. }
  180. else {
  181. $attr=~/([a-zA-Z]+)_(\w+)/;
  182. my $type = $1;
  183. my $method = lc $2;
  184. $hit->can($method) || next;
  185. if($hit->$method($type) < $thresh){
  186. return 0;
  187. }
  188. next;
  189. }
  190. }
  191. if($op eq '<'){
  192. if ($hit->can($attr)){
  193. if ($hit->$attr > $thresh){
  194. return 0;
  195. }
  196. next;
  197. }
  198. else {
  199. $attr=~/([a-zA-Z]+)_(\w+)/;
  200. my $type = $1;
  201. my $method = lc $2;
  202. $hit->can($method) || next;
  203. if($hit->$method($type) > $thresh){
  204. return 0;
  205. }
  206. next;
  207. }
  208. }
  209. if($op eq '='){
  210. if ($hit->can($attr)){
  211. if ($hit->$attr == $thresh){
  212. return 0;
  213. }
  214. next;
  215. }
  216. else {
  217. $attr=~/([a-zA-Z]+)_(\w+)/;
  218. my $type = $1;
  219. my $method = lc $2;
  220. $hit->can($method) || next;
  221. if($hit->$method($type) == $thresh){
  222. return 0;
  223. }
  224. next;
  225. }
  226. }
  227. }
  228. return 1;
  229. }
  230. 1;