PageRenderTime 36ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/Bio/Motif/__init__.py

https://github.com/chapmanb/biopython
Python | 171 lines | 133 code | 10 blank | 28 comment | 0 complexity | 4563dddd47eb12e62ded1ba7ea857f31 MD5 | raw file
  1. # Copyright 2003-2009 by Bartek Wilczynski. All rights reserved.
  2. # This code is part of the Biopython distribution and governed by its
  3. # license. Please see the LICENSE file that should have been included
  4. # as part of this package.
  5. """Tools for sequence motif analysis (OBSOLETE, see Bio.motifs instead).
  6. This module (Bio.Motif) is now obsolete, and will be deprecated and
  7. removed in a future release of release of Biopython. Please use the
  8. new module Bio.motifs instead.
  9. This contains the core Motif class containing various I/O methods as
  10. well as methods for motif comparisons and motif searching in sequences.
  11. It also inlcudes functionality for parsing AlignACE and MEME programs.
  12. """
  13. import warnings
  14. warnings.warn("The module Bio.Motif is now obsolete, and will be"
  15. "deprecated and removed in a future release of"
  16. "release of Biopython. As a replacement for Bio.Motif,"
  17. "please use the new module Bio.motifs instead. Please"
  18. "be aware that though the functionality of Bio.Motif"
  19. "is retained (and extended) in Bio.motifs, usage may"
  20. "be different.",
  21. PendingDeprecationWarning)
  22. from Bio.Motif._Motif import Motif
  23. from Bio.Motif.Parsers.AlignAce import read as _AlignAce_read
  24. from Bio.Motif.Parsers.MEME import read as _MEME_read
  25. from Bio.Motif.Thresholds import ScoreDistribution
  26. _parsers={"AlignAce" : _AlignAce_read,
  27. "MEME" : _MEME_read,
  28. }
  29. def _from_pfm(handle):
  30. return Motif()._from_jaspar_pfm(handle)
  31. def _from_sites(handle):
  32. return Motif()._from_jaspar_sites(handle)
  33. _readers={"jaspar-pfm": _from_pfm,
  34. "jaspar-sites": _from_sites
  35. }
  36. def parse(handle,format):
  37. """Parses an output file of motif finding programs.
  38. Currently supported formats:
  39. - AlignAce
  40. - MEME
  41. You can also use single-motif formats, although the Bio.Motif.read()
  42. function is simpler to use in this situation.
  43. - jaspar-pfm
  44. - jaspar-sites
  45. For example:
  46. >>> from Bio import Motif
  47. >>> for motif in Motif.parse(open("Motif/alignace.out"),"AlignAce"):
  48. ... print motif.consensus()
  49. TCTACGATTGAG
  50. CTGCACCTAGCTACGAGTGAG
  51. GTGCCCTAAGCATACTAGGCG
  52. GCCACTAGCAGAGCAGGGGGC
  53. CGACTCAGAGGTT
  54. CCACGCTAAGAGAAGTGCCGGAG
  55. GCACGTCCCTGAGCA
  56. GTCCATCGCAAAGCGTGGGGC
  57. GAGATCAGAGGGCCG
  58. TGGACGCGGGG
  59. GACCAGAGCCTCGCATGGGGG
  60. AGCGCGCGTG
  61. GCCGGTTGCTGTTCATTAGG
  62. ACCGACGGCAGCTAAAAGGG
  63. GACGCCGGGGAT
  64. CGACTCGCGCTTACAAGG
  65. """
  66. try:
  67. parser=_parsers[format]
  68. except KeyError:
  69. try: #not a true parser, try reader formats
  70. reader=_readers[format]
  71. except:
  72. raise ValueError("Wrong parser format")
  73. else: #we have a proper reader
  74. yield reader(handle)
  75. else: # we have a proper reader
  76. for m in parser(handle).motifs:
  77. yield m
  78. def read(handle,format):
  79. """Reads a motif from a handle using a specified file-format.
  80. This supports the same formats as Bio.Motif.parse(), but
  81. only for files containing exactly one record. For example,
  82. reading a pfm file:
  83. >>> from Bio import Motif
  84. >>> motif = Motif.read(open("Motif/SRF.pfm"),"jaspar-pfm")
  85. >>> motif.consensus()
  86. Seq('GCCCATATATGG', IUPACUnambiguousDNA())
  87. Or a single-motif MEME file,
  88. >>> from Bio import Motif
  89. >>> motif = Motif.read(open("Motif/meme.out"),"MEME")
  90. >>> motif.consensus()
  91. Seq('CTCAATCGTA', IUPACUnambiguousDNA())
  92. If the handle contains no records, or more than one record,
  93. an exception is raised:
  94. >>> from Bio import Motif
  95. >>> motif = Motif.read(open("Motif/alignace.out"),"AlignAce")
  96. Traceback (most recent call last):
  97. ...
  98. ValueError: More than one motif found in handle
  99. If however you want the first record from a file containing
  100. multiple records this function would raise an exception (as
  101. shown in the example above). Instead use:
  102. >>> from Bio import Motif
  103. >>> motif = Motif.parse(open("Motif/alignace.out"),"AlignAce").next()
  104. >>> motif.consensus()
  105. Seq('TCTACGATTGAG', IUPACUnambiguousDNA())
  106. Use the Bio.Motif.parse(handle, format) function if you want
  107. to read multiple records from the handle.
  108. """
  109. iterator = parse(handle, format)
  110. try:
  111. first = iterator.next()
  112. except StopIteration:
  113. first = None
  114. if first is None:
  115. raise ValueError("No motifs found in handle")
  116. try:
  117. second = iterator.next()
  118. except StopIteration:
  119. second = None
  120. if second is not None:
  121. raise ValueError("More than one motif found in handle")
  122. return first
  123. def _test():
  124. """Run the Bio.Motif module's doctests.
  125. This will try and locate the unit tests directory, and run the doctests
  126. from there in order that the relative paths used in the examples work.
  127. """
  128. import doctest
  129. import os
  130. if os.path.isdir(os.path.join("..","..","Tests")):
  131. print "Runing doctests..."
  132. cur_dir = os.path.abspath(os.curdir)
  133. os.chdir(os.path.join("..","..","Tests"))
  134. doctest.testmod()
  135. os.chdir(cur_dir)
  136. del cur_dir
  137. print "Done"
  138. if __name__ == "__main__":
  139. #Run the doctests
  140. _test()