PageRenderTime 24ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/Bio/Pipeline/InputCreate/setup_cdna2genome.pm

https://github.com/bioperl/bioperl-pipeline
Perl | 245 lines | 164 code | 65 blank | 16 comment | 18 complexity | b19ffef826f8e31ecd0e07a8a3adbe8b MD5 | raw file
Possible License(s): LGPL-2.0
  1. #
  2. # BioPerl module for Bio::Pipeline::InputCreate::setup_cdna2genome
  3. #
  4. # Please direct questions and support issues to <bioperl-l@bioperl.org>
  5. #
  6. # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
  7. #
  8. #
  9. # You may distribute this module under the same terms as perl itself
  10. #
  11. # POD documentation - main docs before the code
  12. #
  13. =head1 NAME
  14. Bio::Pipeline::Input::setup_cdna2genome
  15. =head1 SYNOPSIS
  16. my $inc = Bio::Pipeline::Input::setup_cdna2genome->new(-contig_ioh=>$cioh,
  17. -protein_ioh=>$pioh,
  18. -dh_ioh =>$dh_ioh,
  19. -padding => 1000);
  20. $inc->run;
  21. =head1 DESCRIPTION
  22. The input/output object for reading input and writing output.
  23. =head1 FEEDBACK
  24. =head2 Mailing Lists
  25. User feedback is an integral part of the evolution of this and other
  26. Bioperl modules. Send your comments and suggestions preferably to one
  27. of the Bioperl mailing lists. Your participation is much appreciated.
  28. bioperl-l@bioperl.org - General discussion
  29. http://bioperl.org/wiki/Mailing_lists - About the mailing lists
  30. =head2 Support
  31. Please direct usage questions or support issues to the mailing list:
  32. L<bioperl-l@bioperl.org>
  33. rather than to the module maintainer directly. Many experienced and
  34. reponsive experts will be able look at the problem and quickly
  35. address it. Please include a thorough description of the problem
  36. with code and data examples if at all possible.
  37. =head2 Reporting Bugs
  38. Report bugs to the Bioperl bug tracking system to help us keep track
  39. the bugs and their resolution. Bug reports can be submitted via email
  40. or the web:
  41. bioperl-bugs@bio.perl.org
  42. http://bio.perl.org/bioperl-bugs/
  43. =head1 AUTHOR - Shawn Hoon
  44. Email shawnh@fugu-sg.org
  45. =head1 APPENDIX
  46. The rest of the documentation details each of the object methods. Internal metho
  47. ds are usually preceded with a _
  48. =cut
  49. package Bio::Pipeline::InputCreate::setup_cdna2genome;
  50. use vars qw(@ISA);
  51. use strict;
  52. use Bio::Pipeline::InputCreate;
  53. use Bio::Pipeline::DataType;
  54. use Bio::Root::IO;
  55. use Bio::SearchIO;
  56. @ISA = qw(Bio::Pipeline::InputCreate);
  57. sub _initialize {
  58. my ($self,@args) = @_;
  59. $self->SUPER::_initialize(@args);
  60. my ($cdna_ioh,$genome_ioh) = $self->_rearrange([qw(CDNA_IOH GENOME_IOH)],@args);
  61. $genome_ioh || $self->throw("Need an iohandler for the genome");
  62. $self->genome_ioh($genome_ioh);
  63. $cdna_ioh || $self->throw("Need an iohandler for the cdna");
  64. $self->cdna_ioh($cdna_ioh);
  65. return;
  66. }
  67. =head2 padding
  68. Title : padding
  69. Usage : $self->padding()
  70. Function: get/sets of the padding on each side of sequence
  71. to pad before passing to est2genome
  72. Returns :
  73. Args :
  74. =cut
  75. sub padding{
  76. my ($self,$arg) = @_;
  77. if($arg){
  78. $self->{'_padding'} = $arg;
  79. }
  80. return $self->{'_padding'};
  81. }
  82. =head2 genome_ioh
  83. Title : genome_ioh
  84. Usage : $self->genome_ioh()
  85. Function: get/set of the iohandler id for fetching the genome sequence
  86. Returns :
  87. Args :
  88. =cut
  89. sub genome_ioh {
  90. my ($self,$arg) = @_;
  91. if($arg){
  92. $self->{'_genome_ioh'} = $arg;
  93. }
  94. return $self->{'_genome_ioh'};
  95. }
  96. =head2 blast_dir
  97. Title : blast_dir
  98. Usage : $self->blast_dir()
  99. Function: get/set of the blast directory
  100. Returns :
  101. Args :
  102. =cut
  103. sub blast_dir {
  104. my ($self,$arg) = @_;
  105. if($arg){
  106. $self->{'_blast_dir'} = $arg;
  107. }
  108. return $self->{'_blast_dir'};
  109. }
  110. =head2 cdna_ioh
  111. Title : cdna_ioh
  112. Usage : $self->cdna_ioh()
  113. Function: get/set of the iohandler id for fetching the cdna sequence
  114. Returns :
  115. Args :
  116. =cut
  117. sub cdna_ioh {
  118. my ($self,$arg) = @_;
  119. if($arg){
  120. $self->{'_cdna_ioh'} = $arg;
  121. }
  122. return $self->{'_cdna_ioh'};
  123. }
  124. =head2 datatypes
  125. Title : datatypes
  126. Usage : $self->datatypes()
  127. Function: get/set of the datatypes required for this input create
  128. Returns :
  129. Args :
  130. =cut
  131. sub datatypes {
  132. my ($self) = @_;
  133. my $dt = Bio::Pipeline::DataType->new('-object_type'=>'Bio::SeqFeatureI',
  134. '-name'=>'sequence',
  135. '-reftype'=>'ARRAY');
  136. my %dts;
  137. $dts{input} = $dt;
  138. return %dts;
  139. }
  140. sub _parse_top_hits {
  141. my ($self,$file) = @_;
  142. my $sio = Bio::SearchIO->new(-file=>$file,-format=>"blast");
  143. my @id;
  144. my $count = 1;
  145. RESULT: while (my $r= $sio->next_result){
  146. while (my $hi = $r->next_hit){
  147. while(my $hs = $hi->next_hsp){
  148. push @id, [$hs->query->seq_id,$hs->subject->seq_id];
  149. # return \@id if $count > 5;
  150. # $count++;
  151. next RESULT;
  152. }
  153. }
  154. }
  155. RETURN: return \@id;
  156. }
  157. =head2 run
  158. Title : run
  159. Usage : $self->run($next_anal,$input)
  160. Function: creates the jobs for est2genome
  161. Returns :
  162. Args : L<Bio::Pipeline::Analysis>, Hash reference
  163. =cut
  164. sub run {
  165. my ($self,$next_anal,$infile) = @_;
  166. my $infile = $self->infile ||$self->throw("Need an input file");
  167. my $cdna_ioh = $self->cdna_ioh || $self->throw("Need a cdna iohandler");
  168. my $genome_ioh = $self->genome_ioh || $self->throw("Need a genome iohandler");
  169. #my $total = Bio::Root::IO->catfile($blast_dir,'blast_report'.time().rand(1000));
  170. #system("echo $blast_dir/* | xargs cat > $total");
  171. my @hits = @{$self->_parse_top_hits($infile)};
  172. foreach my $hit(@hits){
  173. my $in1 = Bio::Pipeline::Input->new(-name=>$hit->[0],-tag=>"cdna",-input_handler=>$cdna_ioh);
  174. my $in2 = Bio::Pipeline::Input->new(-name=>$hit->[1],-tag=>"genome",-input_handler=>$genome_ioh);
  175. my $job = $self->create_job($next_anal,[$in1,$in2]);
  176. $self->dbadaptor->get_JobAdaptor->store($job);
  177. }
  178. return 1;
  179. }
  180. 1;