PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/Bio/Pipeline/InputCreate/setup_genewise.pm

https://github.com/bioperl/bioperl-pipeline
Perl | 289 lines | 197 code | 74 blank | 18 comment | 27 complexity | 9d7c7eef3cdde9e1105fce262ec08333 MD5 | raw file
Possible License(s): LGPL-2.0
  1. #
  2. # BioPerl module for Bio::Pipeline::InputCreate::setup_genewise
  3. #
  4. # Please direct questions and support issues to <bioperl-l@bioperl.org>
  5. #
  6. # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
  7. #
  8. #
  9. # You may distribute this module under the same terms as perl itself
  10. #
  11. # POD documentation - main docs before the code
  12. #
  13. =head1 NAME
  14. Bio::Pipeline::Input::setup_genewise
  15. =head1 SYNOPSIS
  16. my $inc = Bio::Pipeline::Input::setup_genewise->
  17. new(-slice_ioh=>$slice_ioh,
  18. -protein_ioh=>$pioh,
  19. -dh_ioh =>$dh_ioh,
  20. -padding => 1000);
  21. $inc->run;
  22. =head1 DESCRIPTION
  23. This module setsup genewise jobs. It is really meant to work with
  24. ensembl objects for it takes in DnaPepAlignFeatures, group them by the
  25. hseqname and creates one genewise job per hit. Each job will consist
  26. of two inputs, a target dna and a peptide query. The target dna is a
  27. slice and its defined as the range of the peptide hit + padding on
  28. either side.
  29. =head1 FEEDBACK
  30. =head2 Mailing Lists
  31. User feedback is an integral part of the evolution of this and other
  32. Bioperl modules. Send your comments and suggestions preferably to one
  33. of the Bioperl mailing lists. Your participation is much appreciated.
  34. bioperl-l@bioperl.org - General discussion
  35. http://bioperl.org/wiki/Mailing_lists - About the mailing lists
  36. =head2 Support
  37. Please direct usage questions or support issues to the mailing list:
  38. L<bioperl-l@bioperl.org>
  39. rather than to the module maintainer directly. Many experienced and
  40. reponsive experts will be able look at the problem and quickly
  41. address it. Please include a thorough description of the problem
  42. with code and data examples if at all possible.
  43. =head2 Reporting Bugs
  44. Report bugs to the Bioperl bug tracking system to help us keep track
  45. the bugs and their resolution. Bug reports can be submitted via email
  46. or the web:
  47. bioperl-bugs@bio.perl.org
  48. http://bio.perl.org/bioperl-bugs/
  49. =head1 AUTHOR - Shawn Hoon
  50. Email shawnh@fugu-sg.org
  51. =head1 APPENDIX
  52. The rest of the documentation details each of the object
  53. methods. Internal metho ds are usually preceded with a _
  54. =cut
  55. package Bio::Pipeline::InputCreate::setup_genewise;
  56. use vars qw(@ISA);
  57. use strict;
  58. use Bio::Pipeline::InputCreate;
  59. use Bio::Pipeline::DataType;
  60. @ISA = qw(Bio::Pipeline::InputCreate);
  61. sub _initialize {
  62. my ($self,@args) = @_;
  63. $self->SUPER::_initialize(@args);
  64. my ($slice_ioh,$protein_ioh,$dh_name,$padding) = $self->_rearrange([qw(SLICE_IOH PROTEIN_IOH DYN_ARG_DATAHANDLER_NAME PADDING)],@args);
  65. $slice_ioh || $self->throw("Need an iohandler for the slice");
  66. $self->slice_ioh($slice_ioh);
  67. $protein_ioh || $self->throw("Need an iohandler for the protein");
  68. $self->protein_ioh($protein_ioh);
  69. $dh_name || $self->throw("Need an datahandler id for fetch_contig_start_end method");
  70. $self->dhid($dh_name);
  71. $padding = $padding || 1000;
  72. $self->padding($padding);
  73. }
  74. =head2 padding
  75. Title : padding
  76. Usage : $self->padding()
  77. Function: get/sets of the padding on each side of sequence
  78. to pad before passing to genewise
  79. Returns :
  80. Args :
  81. =cut
  82. sub padding{
  83. my ($self,$arg) = @_;
  84. if($arg){
  85. $self->{'_padding'} = $arg;
  86. }
  87. return $self->{'_padding'};
  88. }
  89. sub slice_ioh {
  90. my ($self,$arg) = @_;
  91. if($arg){
  92. $self->{'_slice_ioh'} = $arg;
  93. }
  94. return $self->{'_slice_ioh'};
  95. }
  96. sub genewise_ioh {
  97. my ($self,$arg) = @_;
  98. if($arg){
  99. $self->{'_genewise_ioh'} = $arg;
  100. }
  101. return $self->{'_genewise_ioh'};
  102. }
  103. =head2 protein_ioh
  104. Title : protein_ioh
  105. Usage : $self->protein_ioh()
  106. Function: get/set of the iohandler id for fetching the protein sequence
  107. Returns :
  108. Args :
  109. =cut
  110. sub protein_ioh {
  111. my ($self,$arg) = @_;
  112. if($arg){
  113. $self->{'_protein_ioh'} = $arg;
  114. }
  115. return $self->{'_protein_ioh'};
  116. }
  117. =head2 dhid
  118. Title : dhid
  119. Usage : $self->dhid()
  120. Function: get/set of the datahandler id for dynamic arguments
  121. Returns :
  122. Args :
  123. =cut
  124. sub dhid {
  125. my ($self,$arg) = @_;
  126. if($arg){
  127. $self->{'_dhid'} = $arg;
  128. }
  129. return $self->{'_dhid'};
  130. }
  131. =head2 datatypes
  132. Title : datatypes
  133. Usage : $self->datatypes()
  134. Function: get/set of the datatypes required for this input create
  135. Returns :
  136. Args :
  137. =cut
  138. sub datatypes {
  139. my ($self) = @_;
  140. my $dt = Bio::Pipeline::DataType->new('-object_type'=>'Bio::SeqFeatureI',
  141. '-name'=>'sequence',
  142. '-reftype'=>'ARRAY');
  143. my %dts;
  144. $dts{input} = $dt;
  145. return %dts;
  146. }
  147. =head2 run
  148. Title : run
  149. Usage : $self->run($next_anal,$input)
  150. Function: creates the jobs for genewise
  151. Returns :
  152. Args : L<Bio::Pipeline::Analysis>, Hash reference
  153. =cut
  154. sub run {
  155. my ($self,$next_anal,$input) = @_;
  156. my $ioh = $self->dbadaptor->get_IOHandlerAdaptor->fetch_by_dbID($self->slice_ioh);
  157. my $dh_name = $self->dhid;
  158. IOH: foreach my $dh($ioh->datahandlers){
  159. if($dh->method eq $dh_name){
  160. $self->dhid($dh->dbID);
  161. last IOH;
  162. }
  163. }
  164. my @gw_ioh = @{$next_anal->iohandler};
  165. (ref($input) eq "HASH") || $self->throw("Expecting a hash reference");
  166. keys %{$input} > 1 ? $self->throw("Expecting only one entry for setup_genewise"):{};
  167. my ($key) = keys %{$input};
  168. my @hits= ref ($input->{$key}) eq 'ARRAY' ? @{$input->{$key}} : $input->{$key};
  169. my %hash;
  170. #group the hsps by hit seqname (one gw job per hit)
  171. #foreach my $hsp (@output){
  172. # push @{$hash{$hsp->hseqname}},$hsp;
  173. #}
  174. $#hits>= 0 || return;
  175. ref $hits[0] || return;
  176. $hits[0]->isa("Bio::SeqFeatureI") || $self->throw("Need a SeqFeatureI object to setup_genewise");
  177. my @hsps = $hits[0]->get_SeqFeatures;
  178. my $chr_name = $hsps[0]->entire_seq->chr_name;
  179. my $chr_start = $hsps[0]->entire_seq->chr_start;
  180. my $slice_length = $hsps[0]->entire_seq->length;
  181. my $padding = $self->padding;
  182. #shawn to fix--creating input for contig_start_end
  183. foreach my $hit(@hits){
  184. my @hsps = $hit->get_SeqFeatures;
  185. my $slice_start = $hit->start;
  186. if ($slice_start > $padding){
  187. $slice_start -= $padding;
  188. }
  189. else {
  190. $slice_start = 1;
  191. }
  192. my $slice_end = $hit->end;
  193. if ($slice_end < ($slice_length - $padding)){
  194. $slice_end += $padding;
  195. }
  196. #offset to change to chromosomal coordinates
  197. $slice_start += $chr_start;
  198. $slice_end += $chr_start;
  199. my $protein_id = $hsps[0]->hseqname;
  200. my $strand = $hsps[0]->strand;
  201. my $input1 = Bio::Pipeline::Input->new(-name=>$protein_id,-tag=>"query_pep",-input_handler=>$self->protein_ioh);
  202. my $strand = $hit->strand || 1;
  203. my $input3 = Bio::Pipeline::Input->new(-name=>$strand,-tag=>"genewise_strand");
  204. my @arg;
  205. my $arg = Bio::Pipeline::Argument->new(-rank=>1,-value=>$slice_start,-dhid=>$self->dhid,-type=>"SCALAR");
  206. push @arg, $arg;
  207. $arg = Bio::Pipeline::Argument->new(-rank=>2,-value=>$slice_end,-dhid=>$self->dhid,-type=>"SCALAR");
  208. push @arg, $arg;
  209. my $input2 = Bio::Pipeline::Input->new(-name=>$chr_name,-tag=>"target_dna",-input_handler=>$self->slice_ioh,-dynamic_arguments=>\@arg);
  210. my $job = $self->create_job($next_anal,[$input1,$input2,$input3]);
  211. $job->status('HOLD');
  212. $self->dbadaptor->get_JobAdaptor->store($job);
  213. }
  214. }
  215. 1;