PageRenderTime 50ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/Bio/AlignIO/meme.pm

http://github.com/bioperl/bioperl-live
Perl | 236 lines | 146 code | 51 blank | 39 comment | 14 complexity | 8b1b406a32335ab32a277ec51a8cfac5 MD5 | raw file
Possible License(s): GPL-3.0, AGPL-1.0
  1. #
  2. # BioPerl module for Bio::AlignIO::meme
  3. # Based on the Bio::SeqIO modules
  4. # by Ewan Birney <birney@ebi.ac.uk>
  5. # and Lincoln Stein <lstein@cshl.org>
  6. # and the SimpleAlign.pm module of Ewan Birney
  7. #
  8. # Copyright Benjamin Berman
  9. #
  10. # You may distribute this module under the same terms as perl itself
  11. =head1 NAME
  12. Bio::AlignIO::meme - meme sequence input/output stream
  13. =head1 SYNOPSIS
  14. Do not use this module directly. Use it via the Bio::AlignIO class.
  15. use Bio::AlignIO;
  16. # read in an alignment from meme
  17. my $in = Bio::AlignIO->new(-format => 'meme',
  18. -file => 'meme.out');
  19. while( my $aln = $in->next_aln ) {
  20. # do something with the alignment
  21. }
  22. =head1 DESCRIPTION
  23. This object transforms the "sites sorted by position p-value" sections
  24. of a meme (text) output file into a series of Bio::SimpleAlign
  25. objects. Each SimpleAlign object contains Bio::LocatableSeq
  26. objects which represent the individual aligned sites as defined by
  27. the central portion of the "site" field in the meme file. The start
  28. and end coordinates are derived from the "Start" field. See
  29. L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for more information.
  30. This module can only parse MEME version 3 and 4. Previous
  31. versions have output formats that are more difficult to parse
  32. correctly. If the meme output file is not version 3.0 or greater
  33. we signal an error.
  34. =head1 FEEDBACK
  35. =head2 Support
  36. Please direct usage questions or support issues to the mailing list:
  37. I<bioperl-l@bioperl.org>
  38. rather than to the module maintainer directly. Many experienced and
  39. reponsive experts will be able look at the problem and quickly
  40. address it. Please include a thorough description of the problem
  41. with code and data examples if at all possible.
  42. =head2 Reporting Bugs
  43. Report bugs to the Bioperl bug tracking system to help us keep track
  44. the bugs and their resolution. Bug reports can be submitted via the
  45. web:
  46. https://github.com/bioperl/bioperl-live/issues
  47. =head1 AUTHORS - Benjamin Berman
  48. Bbased on the Bio::SeqIO modules by Ewan Birney and others
  49. Email: benb@fruitfly.berkeley.edu
  50. =head1 APPENDIX
  51. The rest of the documentation details each of the object
  52. methods. Internal methods are usually preceded with an
  53. underscore.
  54. =cut
  55. # Let the code begin...
  56. package Bio::AlignIO::meme;
  57. use strict;
  58. use Bio::LocatableSeq;
  59. use base qw(Bio::AlignIO);
  60. # Constants
  61. my $MEME_VERS_ERR =
  62. "MEME output file must be generated by version 3.0 or higher";
  63. my $MEME_NO_HEADER_ERR =
  64. "MEME output file contains no header line (ex: MEME version 3.0)";
  65. my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
  66. =head2 next_aln
  67. Title : next_aln
  68. Usage : $aln = $stream->next_aln()
  69. Function: returns the next alignment in the stream
  70. Returns : Bio::SimpleAlign object with the score() set to the evalue of the
  71. motif.
  72. Args : NONE
  73. =cut
  74. sub next_aln {
  75. my ($self) = @_;
  76. my $aln = Bio::SimpleAlign->new( -source => 'meme' );
  77. my $line;
  78. my $good_align_sec = 0;
  79. my $in_align_sec = 0;
  80. my $evalue;
  81. while ( !$good_align_sec && defined( $line = $self->_readline() ) ) {
  82. if ( !$in_align_sec ) {
  83. # Check for the meme header
  84. if ( $line =~ /^\s*MEME\s+version\s+(\S+)/ ) {
  85. $self->{'meme_vers'} = $1;
  86. my ($vers) = $self->{'meme_vers'} =~ /^(\d)/;
  87. $self->throw($MEME_VERS_ERR) unless ( $vers >= 3 );
  88. $self->{'seen_header'} = 1;
  89. }
  90. # Check if they've output the HTML version
  91. if ( $line =~ /\<TITLE\>/i ) {
  92. $self->throw($HTML_VERS_ERR);
  93. }
  94. # Grab the evalue
  95. if ( $line =~ /MOTIF\s+\d+\s+width.+E-value = (\S+)/ ) {
  96. $self->throw($MEME_NO_HEADER_ERR)
  97. unless ( $self->{'seen_header'} );
  98. $evalue = $1;
  99. }
  100. # Check if we're going into an alignment section
  101. if ( $line =~ /sites sorted by position/ ) {
  102. $self->throw($MEME_NO_HEADER_ERR)
  103. unless ( $self->{'seen_header'} );
  104. $in_align_sec = 1;
  105. }
  106. }
  107. # The first regexp is for version 3, the second is for version 4
  108. elsif ( $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
  109. \S+\s+[.A-Z\-]*\s+([A-Z\-]+)\s+
  110. ([.A-Z\-]*)/xi
  111. ||
  112. $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
  113. \S+\s+\.\s+([A-Z\-]+)/xi
  114. )
  115. {
  116. # Got a sequence line
  117. my $seq_name = $1;
  118. my $strand = ( $2 eq '-' ) ? -1 : 1;
  119. my $start_pos = $3;
  120. my $central = uc($4);
  121. # my $p_val = $4;
  122. # my $left_flank = uc($5);
  123. # my $right_flank = uc($7);
  124. # Info about the flanking sequence
  125. # my $start_len = ($strand > 0) ? length($left_flank) :
  126. # length($right_flank);
  127. # my $end_len = ($strand > 0) ? length($right_flank) :
  128. # length($left_flank);
  129. # Make the sequence. Meme gives the start coordinate at the left
  130. # hand side of the motif relative to the INPUT sequence.
  131. my $end_pos = $start_pos + length($central) - 1;
  132. my $seq = Bio::LocatableSeq->new(
  133. -seq => $central,
  134. -display_id => $seq_name,
  135. -start => $start_pos,
  136. -end => $end_pos,
  137. -strand => $strand,
  138. -alphabet => $self->alphabet,
  139. );
  140. # Add the sequence motif to the alignment
  141. $aln->add_seq($seq);
  142. }
  143. elsif ( ( $line =~ /^\-/ ) || ( $line =~ /Sequence name/ ) ) {
  144. # These are acceptable things to be in the site section
  145. }
  146. elsif ( $line =~ /^\s*$/ ) {
  147. # This ends the site section
  148. $in_align_sec = 0;
  149. $good_align_sec = 1;
  150. }
  151. else {
  152. $self->warn("Unrecognized format:\n$line");
  153. return;
  154. }
  155. }
  156. # Signal an error if we didn't find a header section
  157. $self->throw($MEME_NO_HEADER_ERR) unless ( $self->{'seen_header'} );
  158. if ($good_align_sec) {
  159. $aln->score($evalue);
  160. return $aln;
  161. }
  162. return;
  163. }
  164. =head2 write_aln
  165. Title : write_aln
  166. Usage : $stream->write_aln(@aln)
  167. Function: Not implemented
  168. Returns : 1 for success and 0 for error
  169. Args : Bio::SimpleAlign object
  170. =cut
  171. sub write_aln {
  172. my ( $self, @aln ) = @_;
  173. $self->throw_not_implemented();
  174. }
  175. # ----------------------------------------
  176. # - Private methods
  177. # ----------------------------------------
  178. sub _initialize {
  179. my ( $self, @args ) = @_;
  180. # Call into our base version
  181. $self->SUPER::_initialize(@args);
  182. # Then initialize our data variables
  183. $self->{'seen_header'} = 0;
  184. }
  185. 1;