setup_genewise.pm | searchcode

/Bio/Pipeline/InputCreate/setup_genewise.pm

https://github.com/bioperl/bioperl-pipeline
Perl | 289 lines | 197 code | 74 blank | 18 comment | 27 complexity | 9d7c7eef3cdde9e1105fce262ec08333 MD5 | raw file
Possible License(s): LGPL-2.0

#
# BioPerl module for Bio::Pipeline::InputCreate::setup_genewise
#
# Please direct questions and support issues to <bioperl-l@bioperl.org> 
#
# Cared for by Shawn Hoon <shawnh@fugu-sg.org>
#
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
#
=head1 NAME

Bio::Pipeline::Input::setup_genewise

=head1 SYNOPSIS

  my $inc = Bio::Pipeline::Input::setup_genewise->
      new(-slice_ioh=>$slice_ioh,
          -protein_ioh=>$pioh,
          -dh_ioh     =>$dh_ioh,
          -padding => 1000);
  $inc->run;

=head1 DESCRIPTION

This module setsup genewise jobs. It is really meant to work with
ensembl objects for it takes in DnaPepAlignFeatures, group them by the
hseqname and creates one genewise job per hit. Each job will consist
of two inputs, a target dna and a peptide query. The target dna is a
slice and its defined as the range of the peptide hit + padding on
either side.

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists.  Your participation is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Support 
 
Please direct usage questions or support issues to the mailing list:
  
L<bioperl-l@bioperl.org>
  
rather than to the module maintainer directly. Many experienced and 
reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem 
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.  Bug reports can be submitted via email
or the web:

  bioperl-bugs@bio.perl.org
  http://bio.perl.org/bioperl-bugs/

=head1 AUTHOR - Shawn Hoon

Email shawnh@fugu-sg.org

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal metho ds are usually preceded with a _

=cut

package Bio::Pipeline::InputCreate::setup_genewise;

use vars qw(@ISA);

use strict;
use Bio::Pipeline::InputCreate;
use Bio::Pipeline::DataType;

@ISA = qw(Bio::Pipeline::InputCreate);

sub _initialize {
    my ($self,@args) = @_;
    $self->SUPER::_initialize(@args);

    my ($slice_ioh,$protein_ioh,$dh_name,$padding) = $self->_rearrange([qw(SLICE_IOH PROTEIN_IOH DYN_ARG_DATAHANDLER_NAME PADDING)],@args);

    $slice_ioh || $self->throw("Need an iohandler for the slice");
    $self->slice_ioh($slice_ioh);

    $protein_ioh || $self->throw("Need an iohandler for the protein");
    $self->protein_ioh($protein_ioh);

    $dh_name || $self->throw("Need an datahandler id for fetch_contig_start_end method");
    $self->dhid($dh_name);

    $padding = $padding || 1000;
    $self->padding($padding);

}

=head2 padding

  Title   : padding
  Usage   : $self->padding()
  Function: get/sets of the padding on each side of sequence 
            to pad before passing to genewise 
  Returns :  
  Args    :

=cut

sub padding{
    my ($self,$arg) = @_;
    if($arg){
        $self->{'_padding'} = $arg;
    }
    return $self->{'_padding'};
}


sub slice_ioh {
    my ($self,$arg) = @_;
    if($arg){
        $self->{'_slice_ioh'} = $arg;
    }
    return $self->{'_slice_ioh'};
}
sub genewise_ioh {
    my ($self,$arg) = @_;
    if($arg){
        $self->{'_genewise_ioh'} = $arg;
    }
    return $self->{'_genewise_ioh'};
}

=head2 protein_ioh

  Title   : protein_ioh
  Usage   : $self->protein_ioh()
  Function: get/set of the iohandler id for fetching the protein  sequence
  Returns :
  Args    :

=cut

sub protein_ioh {
    my ($self,$arg) = @_;
    if($arg){
        $self->{'_protein_ioh'} = $arg;
    }
    return $self->{'_protein_ioh'};
}

=head2 dhid

  Title   : dhid
  Usage   : $self->dhid()
  Function: get/set of the datahandler id for dynamic arguments
  Returns :
  Args    :

=cut

sub dhid {
    my ($self,$arg) = @_;
    if($arg){
        $self->{'_dhid'} = $arg;
    }
    return $self->{'_dhid'};
}

=head2 datatypes

  Title   : datatypes
  Usage   : $self->datatypes()
  Function: get/set of the datatypes required for this input create
  Returns :
  Args    :

=cut

sub datatypes {
    my ($self) = @_;
    my $dt = Bio::Pipeline::DataType->new('-object_type'=>'Bio::SeqFeatureI',
                                          '-name'=>'sequence',
                                          '-reftype'=>'ARRAY');

    my %dts;
    $dts{input} = $dt;
    return %dts;
}

=head2 run

  Title   : run
  Usage   : $self->run($next_anal,$input)
  Function: creates the jobs for genewise 
  Returns :
  Args    : L<Bio::Pipeline::Analysis>, Hash reference

=cut

sub run {
    my ($self,$next_anal,$input) = @_;

    my $ioh = $self->dbadaptor->get_IOHandlerAdaptor->fetch_by_dbID($self->slice_ioh);
    my $dh_name = $self->dhid;
IOH:    foreach my $dh($ioh->datahandlers){
      if($dh->method eq $dh_name){
        $self->dhid($dh->dbID);
         last IOH;
      }
    }
    my @gw_ioh = @{$next_anal->iohandler};

    (ref($input) eq "HASH") || $self->throw("Expecting a hash reference");
    keys %{$input} > 1 ? $self->throw("Expecting only one entry for setup_genewise"):{};

    my ($key) = keys %{$input};
    my @hits= ref ($input->{$key}) eq 'ARRAY' ?  @{$input->{$key}} : $input->{$key};
    my %hash;

    #group the hsps by hit seqname (one gw job per hit)
    #foreach my $hsp (@output){
     # push @{$hash{$hsp->hseqname}},$hsp;
    #}
 
    $#hits>= 0 || return;
    ref $hits[0] || return;
    $hits[0]->isa("Bio::SeqFeatureI") || $self->throw("Need a SeqFeatureI object to setup_genewise");
    my @hsps = $hits[0]->get_SeqFeatures; 
    my $chr_name = $hsps[0]->entire_seq->chr_name;
    my $chr_start = $hsps[0]->entire_seq->chr_start;
    my $slice_length = $hsps[0]->entire_seq->length;
    my $padding = $self->padding; 
    #shawn to fix--creating input for contig_start_end
    foreach my $hit(@hits){
        my @hsps = $hit->get_SeqFeatures;
        my $slice_start = $hit->start;
        if ($slice_start > $padding){
            $slice_start -= $padding;
        }
        else {
            $slice_start = 1;
        }
        my $slice_end   = $hit->end;
        if ($slice_end < ($slice_length - $padding)){
            $slice_end += $padding;
        }

       #offset to change to chromosomal coordinates
        $slice_start += $chr_start;
        $slice_end   += $chr_start;

        my $protein_id   = $hsps[0]->hseqname;
        my $strand       = $hsps[0]->strand;
        
        my $input1 = Bio::Pipeline::Input->new(-name=>$protein_id,-tag=>"query_pep",-input_handler=>$self->protein_ioh);
        my $strand = $hit->strand || 1;
        my $input3 = Bio::Pipeline::Input->new(-name=>$strand,-tag=>"genewise_strand");

        my @arg;
        my $arg = Bio::Pipeline::Argument->new(-rank=>1,-value=>$slice_start,-dhid=>$self->dhid,-type=>"SCALAR");
        push @arg, $arg;
        $arg = Bio::Pipeline::Argument->new(-rank=>2,-value=>$slice_end,-dhid=>$self->dhid,-type=>"SCALAR");
        push @arg, $arg;

        my $input2 = Bio::Pipeline::Input->new(-name=>$chr_name,-tag=>"target_dna",-input_handler=>$self->slice_ioh,-dynamic_arguments=>\@arg);
        my $job = $self->create_job($next_anal,[$input1,$input2,$input3]);
        $job->status('HOLD');

        $self->dbadaptor->get_JobAdaptor->store($job);
        
    }
        
    
}

1;