/Bio/Pipeline/Utils/Dumper/blastscore.pm
Perl | 278 lines | 217 code | 48 blank | 13 comment | 36 complexity | 960fa9b10d1fb12dee51e37f49547678 MD5 | raw file
Possible License(s): LGPL-2.0
- #
- # BioPerl module for Bio::Pipeline::Utils::Dumper::blastscore
- #
- # Please direct questions and support issues to <bioperl-l@bioperl.org>
- #
- # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
- #
- #
- # You may distribute this module under the same terms as perl itself
- #
- # POD documentation - main docs before the code
- #
- =head1 NAME
- Bio::Pipeline::Utils::Dumper::blastscore
- =head1 SYNOPSIS
- use Bio::Pipeline::Dumper;
- use Bio::SearchIO;
- my $dumper = Bio::Pipeline::Dumper->new(-module=>'BlastScore',
- -format=>"score",
- -file=>">shawn.out",
- -significance=>"<0.001",
- -query_frac_identical=>">0.21");
- my $searchio = Bio::SearchIO->new ('-format' => 'blast',
- '-file' => "blast.report");
- my @hit;
- while (my $r = $searchio->next_result){
- while(my $hit = $r->next_hit){
- push @hit, $hit;
- }
- }
- $dumper->dump(@hit);
- =head1 DESCRIPTION
- Dumps BlastHits Scores into tab delimited files
- This module will evolve to become more flexible to dump out blast scores
- in some flat file format. This is for use in various reciprocal blast type
- pipeline that just need the plain score in a file format. It will enable
- one to filter the blast score to output using the various
- parameters that are available in Bio::Search::Hit::HitI
- -length the length of the hit
- -raw_score Gets the "raw score" generated by the algorithm.
- -significance Used to obtain the E or P value of a hit,
- -bits the bit score of the best HSP for the current hit
- -n This is the number of HSPs in the set which was ascribed
- the lowest P-value
- -p the P-value for the best HSP of the given BLAST hit
- -query_length_aln total length of the aligned region for query
- -subject_length_aln total length of the aligned region for subject
- -query_gaps the number of gaps in the aligned query
- -subject_gaps the number of gaps in the aligned subject
- -id_matches Get the total number of identical matches
- -cons_matches Get the total number of conserved matches
- -query_frac_identical the overall fraction of identical positions across all HSPs.
- -subject_frac_identical the overall fraction of identical positions across all HSPs.
- -query_frac_conserved the overall fraction of conserved positions across all HSPs.
- -subject_frac_conserved the overall fraction of conserved positions across all HSPs
- -frac_aligned_query the fraction of the query sequence which has been aligned
- -frac_aligned_subject the fraction of the hit (sbjct) sequence which has been aligned
- -rank the rank of this Hit in the Query search list
- The operation to be used is specified via appending '>', '<', '=' to the value provided.
- e.g -length => '>100' means dump hits with length greater than 100.
- =head1 FEEDBACK
- =head2 Mailing Lists
- User feedback is an integral part of the evolution of this and other
- Bioperl modules. Send your comments and suggestions preferably to one
- of the Bioperl mailing lists. Your participation is much appreciated.
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
- =head2 Support
-
- Please direct usage questions or support issues to the mailing list:
-
- L<bioperl-l@bioperl.org>
-
- rather than to the module maintainer directly. Many experienced and
- reponsive experts will be able look at the problem and quickly
- address it. Please include a thorough description of the problem
- with code and data examples if at all possible.
- =head2 Reporting Bugs
- Report bugs to the Bioperl bug tracking system to help us keep track
- the bugs and their resolution. Bug reports can be submitted via email
- or the web:
- bioperl-bugs@bio.perl.org
- http://bio.perl.org/bioperl-bugs/
- =head1 AUTHOR - Shawn Hoon
- Email shawnh@fugu-sg.org
- =head1 APPENDIX
- The rest of the documentation details each of the object methods. Internal metho
- ds are usually preceded with a _
- =cut
- package Bio::Pipeline::Utils::Dumper::blastscore;
- use vars qw(@ISA @BS_PARAMS %OK_FIELD $AUTOLOAD);
- use strict;
- use Bio::Root::Root;
- use Bio::Pipeline::Utils::Dumper;
- @ISA = qw(Bio::Pipeline::Utils::Dumper);
- BEGIN {
- @BS_PARAMS = qw(FORMAT LENGTH RAW_SCORE SIGNIFICANCE BITS N P QUERY_LENGTH_ALN
- SUBJECT_LENGTH_ALN QUERY_GAPS SUBJECT_GAPS ID_MATCHES
- CONS_MATCHES QUERY_FRAC_IDENTICAL SUBJECT_FRAC_IDENTICAL
- QUERY_FRAC_CONSERVED SUBJECT_FRAC_CONSERVED FRAC_ALIGNED_QUERY
- FRAC_ALIGNED_SUBJECT RANK);
- foreach my $attr ( @BS_PARAMS) { $OK_FIELD{$attr}++; }
- }
- sub _initialize {
- my($self,@args) = @_;
- $self->SUPER::_initialize(@args);
- while(@args){
- my $attr = shift @args;
- $attr=~s/-//;
- my $value = shift @args;
- $self->$attr($value);
- }
- }
- sub AUTOLOAD {
- my $self = shift;
- my $attr = $AUTOLOAD;
- $attr =~ s/.*:://;
- $attr = uc $attr;
- $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
- $self->{$attr} = shift if @_;
- return $self->{$attr};
- }
- =head2 dump
- Title : dump
- Usage : $dumper->dump(@hit);
- Function: dumps the hit to the flat file
- Returns :
- Args : an array of Bio::Search::Hit::HitI
- =cut
- sub dump {
- my ($self,@hit) = @_;
- my $outfile = $self->file;
- my $format = $self->format;
- foreach my $hit(@hit){
- $hit->isa("Bio::Search::Hit::GenericHit") || $self->throw("Not a Bio::Search::Hit::GenericHit object!");
- if($self->_include($hit)){
- my ($hsp) = $hit->hsps;
- if($format =~/gff/){
- $self->_print($hsp->query->gff_string."\n");
- $self->_print($hsp->subject->gff_string."\n");
- } elsif($format=~/tribe/){
- my $sig = $hit->significance;
- $sig =~s/^e-/1e-/g;
- my $expect = sprintf("%e",$sig);
- if ($expect==0){
- my $wt = 200; #hardcode
- $expect=sprintf("%e","1e-$wt");
- }
- my $first=(split("e-",$expect))[0];
- my $second=(split("e-",$expect))[1];
- $self->_print(join("\t",$hsp->feature1->seq_id,$hsp->feature2->seq_id,int($first),int($second)),"\n");
- } else {
- $self->_print($hsp->feature1->seq_id."\t".$hsp->feature2->seq_id."\t".$hit->significance."\n");
- }
- }
- }
- }
- #filter blasts
- sub _include {
- my ($self,$hit) = @_;
- foreach my $attr (keys %OK_FIELD){
- my $attr = lc $attr;
- my $val = $self->$attr;
- $val || next;
- my @op = $val=~/([\<\>\=])(\S+)/;
- my $thresh;
- my $op;
- if($#op > 0){
- $op = $op[0];
- $thresh = $op[1];
- }
- else {
- $op='>';
- $thresh = $op[0];
- }
- if($op eq '>'){
- if ($hit->can($attr)){
- if ($hit->$attr < $thresh){
- return 0;
- }
- next;
- }
- else {
- $attr=~/([a-zA-Z]+)_(\w+)/;
- my $type = $1;
- my $method = lc $2;
- $hit->can($method) || next;
- if($hit->$method($type) < $thresh){
- return 0;
- }
- next;
- }
- }
- if($op eq '<'){
- if ($hit->can($attr)){
- if ($hit->$attr > $thresh){
- return 0;
- }
- next;
- }
- else {
- $attr=~/([a-zA-Z]+)_(\w+)/;
- my $type = $1;
- my $method = lc $2;
- $hit->can($method) || next;
- if($hit->$method($type) > $thresh){
- return 0;
- }
- next;
- }
- }
- if($op eq '='){
- if ($hit->can($attr)){
- if ($hit->$attr == $thresh){
- return 0;
- }
- next;
- }
- else {
- $attr=~/([a-zA-Z]+)_(\w+)/;
- my $type = $1;
- my $method = lc $2;
- $hit->can($method) || next;
- if($hit->$method($type) == $thresh){
- return 0;
- }
- next;
- }
- }
- }
- return 1;
- }
- 1;