PageRenderTime 87ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/Bio/Pipeline/InputCreate/setup_initial.pm

https://github.com/bioperl/bioperl-pipeline
Perl | 315 lines | 231 code | 71 blank | 13 comment | 22 complexity | 3b560dc87131850f998ce017bdb15182 MD5 | raw file
Possible License(s): LGPL-2.0
  1. #
  2. # BioPerl module for Bio::Pipeline::InputCreate::setup_initial
  3. #
  4. # Please direct questions and support issues to <bioperl-l@bioperl.org>
  5. #
  6. # Cared for by Shawn Hoon <shawnh@fugu-sg.org>
  7. #
  8. #
  9. # You may distribute this module under the same terms as perl itself
  10. #
  11. # POD documentation - main docs before the code
  12. #
  13. =head1 NAME
  14. Bio::Pipeline::InputCreate::setup_initial
  15. =head1 SYNOPSIS
  16. use Bio::Pipeline::InputCreate::setup_inital;
  17. my $inc = Bio::Pipeline::InputCreate::setup_initial->new('-protein_ioh'=>1,
  18. '-dna_ioh'=>2);
  19. $inc->run();
  20. =head1 DESCRIPTION
  21. The setup initial analysis takes in an array of ids and iohandler ids
  22. and creates inputs and jobs. Each input to the analysis is an array of
  23. input ids. Each array of input ids are associated with a given
  24. IOHandler. It has two modes of operation. It may either create one
  25. input per job or multiple inputs per job.
  26. For example in an xml snippet:
  27. 1 <analysis id="1">
  28. 2 <data_monger>
  29. 3 <initial/>
  30. 4 <input>
  31. 5 <name>gene1</name>
  32. 6 <iohandler>1</iohandler>
  33. 7 </input>
  34. 8 <input>
  35. 9 <name>gene2</name>
  36. 10 <iohandler>2</iohandler>
  37. 11 </input>
  38. 12 <input_create>
  39. 13 <module>setup_initial</module>
  40. 14 <rank>1</rank>
  41. 15 <argument>
  42. 16 <tag>group</tag>
  43. 17 <value>1</value>
  44. 18 </argument>
  45. 19 <argument>
  46. 20 <tag>gene2</tag>
  47. 21 <value>4</value>
  48. 22 </argument>
  49. 23 <argument>
  50. 24 <tag>gene1</tag>
  51. 25 <value>3</value>
  52. 26 </argument>
  53. 27 </input_create>
  54. 28 </data_monger>
  55. 29 <input_iohandler id="1"/>
  56. 30 <input_iohandler id="2"/>
  57. 31 </analysis>
  58. This specifies that there are two inputs (line 4-11) to the
  59. InputCreate job that uses the setup_initial module. Each input has its
  60. own iohandler which would return an array of input ids (line 6 and
  61. line 10). For example in this case gene1 may belong to genes from a
  62. human database and gene2 may belong to gene from a fugu database.
  63. Within the input_create arguments (line 12-27), we next specify how to
  64. map the input ids to its corresponding iohandler. In other words,
  65. given the gene input ids, how does one fetch the actual gene object?
  66. So for this case, input ids from gene1 are fetched using iohandler_id
  67. 3 (line 25) and input ids from gene2 are fetched using iohandler_id 4
  68. (line 21)
  69. We also specify that the inputs are grouped (line 15-18) meaning that
  70. each pair of inputs ids (assuming that the number of input ids are
  71. equal for gene1 and gene2) are passed to one job. So what you get:
  72. gene1_id-> fetched using iohandler id 3 ----> a single job of the next analysis
  73. gene2_id-> fetched using iohandler id 4
  74. If the group argument is not specified, the jobs are created as such:
  75. gene1_id-> fetched using iohandler id 3 ----> a job of the next analysis
  76. gene2_id-> fetched using iohandler id 4 ----> a job of the next analysis
  77. Currently it is assumed that the inputs are mapped based on object
  78. type to the inputs of the runnables.
  79. =head1 FEEDBACK
  80. =head2 Mailing Lists
  81. User feedback is an integral part of the evolution of this and other
  82. Bioperl modules. Send your comments and suggestions preferably to one
  83. of the Bioperl mailing lists. Your participation is much appreciated.
  84. bioperl-l@bioperl.org - General discussion
  85. http://bioperl.org/wiki/Mailing_lists - About the mailing lists
  86. =head2 Support
  87. Please direct usage questions or support issues to the mailing list:
  88. L<bioperl-l@bioperl.org>
  89. rather than to the module maintainer directly. Many experienced and
  90. reponsive experts will be able look at the problem and quickly
  91. address it. Please include a thorough description of the problem
  92. with code and data examples if at all possible.
  93. =head2 Reporting Bugs
  94. Report bugs to the Bioperl bug tracking system to help us keep track
  95. the bugs and their resolution. Bug reports can be submitted via email
  96. or the web:
  97. bioperl-bugs@bio.perl.org
  98. http://bugzilla.open-bio.org/
  99. =head1 AUTHOR - Shawn Hoon
  100. Email shawnh@fugu-sg.org
  101. =head1 APPENDIX
  102. The rest of the documentation details each of the object methods. Internal metho
  103. ds are usually preceded with a _
  104. =cut
  105. package Bio::Pipeline::InputCreate::setup_initial;
  106. use vars qw(@ISA);
  107. use strict;
  108. use Bio::Pipeline::InputCreate;
  109. use Bio::Pipeline::DataType;
  110. @ISA = qw(Bio::Pipeline::InputCreate);
  111. sub _initialize {
  112. my ($self,@args) = @_;
  113. $self->SUPER::_initialize(@args);
  114. my ($group,$test) = $self->_rearrange([qw(GROUP TEST)],@args);
  115. $self->group($group) if $group;
  116. $self->test($test) if $test;
  117. #from here on, assume all parameters are for iohandler mapping
  118. $#args > 0 || $self->throw("Need iohandlers to setup initial jobs");
  119. my %ioh = @args;
  120. @ioh{ map { lc $_} keys %ioh} = values %ioh; # lowercase keys
  121. $self->iohandler_map(\%ioh);
  122. }
  123. =head2 iohandler_map
  124. Title : iohandler_map
  125. Usage : $self->iohandler_map()
  126. Function: get/sets of the iohandler map hash
  127. Returns :
  128. Args :
  129. =cut
  130. sub iohandler_map {
  131. my ($self,$arg) = @_;
  132. if($arg){
  133. $self->{'_iohandler_map'} = $arg;
  134. }
  135. return $self->{'_iohandler_map'};
  136. }
  137. =head2 test
  138. Title : test
  139. Usage : $self->test()
  140. Function: get/set from test argument
  141. Returns :
  142. Args :
  143. =cut
  144. sub test {
  145. my ($self,$arg) = @_;
  146. if($arg){
  147. $self->{'_test'} = $arg;
  148. }
  149. return $self->{'_test'};
  150. }
  151. =head2 group
  152. Title : group
  153. Usage : $self->group()
  154. Function: get/set from group argument
  155. Returns :
  156. Args :
  157. =cut
  158. sub group {
  159. my ($self,$arg) = @_;
  160. if($arg){
  161. $self->{'_group'} = $arg;
  162. }
  163. return $self->{'_group'};
  164. }
  165. =head2 datatypes
  166. Title : datatypes
  167. Usage : $self->datatypes()
  168. Function: get/sets of the datatypes
  169. Returns :
  170. Args :
  171. =cut
  172. sub datatypes {
  173. my ($self) = @_;
  174. my $dt = Bio::Pipeline::DataType->new('-object_type'=>'',
  175. '-name'=>'ids',
  176. '-reftype'=>'ARRAY');
  177. my %dts;
  178. $dts{input} = $dt;
  179. return %dts;
  180. }
  181. =head2 run
  182. Title : run
  183. Usage : $self->run()
  184. Function: run the input create
  185. Returns :
  186. Args :
  187. =cut
  188. sub run {
  189. my ($self,$next_anal,$input) = @_;
  190. (ref($input) eq "HASH") || $self->throw("Expecting a hash reference");
  191. my $ioh_map = $self->iohandler_map;
  192. if($self->group){
  193. $self->_create_by_group($next_anal,$input,$ioh_map);
  194. }
  195. else {
  196. $self->_create_single($next_anal,$input,$ioh_map);
  197. }
  198. }
  199. sub _create_single {
  200. my ($self,$next_anal,$input,$ioh_map) = @_;
  201. my $count = 1;
  202. foreach my $key (keys %{$input}){
  203. my $ioh = $ioh_map->{$key};
  204. if(!$input->{$key}){
  205. $self->throw("Iohandler map for $key does not have inputs");
  206. }
  207. my @input;
  208. if(ref $input->{$key} eq "ARRAY"){
  209. @input = @{$input->{$key}};
  210. }
  211. else {
  212. push @input, $input->{$key};
  213. }
  214. foreach my $in(@input){
  215. my $input1 = $self->create_input($in,$ioh);
  216. my $job = $self->create_job($next_anal,[$input1]);
  217. $self->dbadaptor->get_JobAdaptor->store($job);
  218. if($self->test()){
  219. last if ($count == $self->test);
  220. }
  221. $count++;
  222. }
  223. }
  224. }
  225. sub _create_by_group {
  226. my ($self,$next_anal,$input,$ioh_map) = @_;
  227. my ($first_key) = keys %$input;
  228. my $count = 1;
  229. for my $i(0..scalar(@{$input->{$first_key}})){
  230. my @input;
  231. foreach my $key(keys %{$input}){
  232. my $ioh = $ioh_map->{$key};
  233. my $in = $input->{$key}->[$i];
  234. push @input,$self->create_input($in,$ioh);
  235. }
  236. my $job = $self->create_job($next_anal,\@input);
  237. $self->dbadaptor->get_JobAdaptor->store($job);
  238. if($self->test()){
  239. last if ($count == $self->test);
  240. }
  241. $count++;
  242. }
  243. }
  244. 1;