/lib/galaxy/datatypes/anvio.py

https://github.com/nekrut/galaxy
Python | 165 lines | 140 code | 0 blank | 25 comment | 10 complexity | 9685e3df7f238c8ba14a45436d1698d9 MD5 | raw file
  1. """
  2. Datatypes for Anvi'o
  3. https://github.com/merenlab/anvio
  4. """
  5. import glob
  6. import logging
  7. import os
  8. import sys
  9. from typing import Optional
  10. from galaxy.datatypes.metadata import MetadataElement
  11. from galaxy.datatypes.text import Html
  12. log = logging.getLogger(__name__)
  13. class AnvioComposite(Html):
  14. """
  15. Base class to use for Anvi'o composite datatypes.
  16. Generally consist of a sqlite database, plus optional additional files
  17. """
  18. file_ext = "anvio_composite"
  19. composite_type = 'auto_primary_file'
  20. def generate_primary_file(self, dataset=None):
  21. """
  22. This is called only at upload to write the html file
  23. cannot rename the datasets here - they come with the default unfortunately
  24. """
  25. defined_files = self.get_composite_files(dataset=dataset).items()
  26. rval = [f"<html><head><title>Files for Anvi'o Composite Dataset ({self.file_ext})</title></head>"]
  27. if defined_files:
  28. rval.append("<p/>This composite dataset is composed of the following defined files:<p/><ul>")
  29. for composite_name, composite_file in defined_files:
  30. opt_text = ''
  31. if composite_file.optional:
  32. opt_text = ' (optional)'
  33. missing_text = ''
  34. if not os.path.exists(os.path.join(dataset.extra_files_path, composite_name)):
  35. missing_text = ' (missing)'
  36. rval.append(f'<li><a href="{composite_name}">{composite_name}</a>{opt_text}{missing_text}</li>')
  37. rval.append("</ul>")
  38. defined_files = map(lambda x: x[0], defined_files)
  39. extra_files = []
  40. for dirpath, _dirnames, filenames in os.walk(dataset.extra_files_path, followlinks=True):
  41. for filename in filenames:
  42. rel_path = os.path.relpath(os.path.join(dirpath, filename), dataset.extra_files_path)
  43. if rel_path not in defined_files:
  44. extra_files.append(rel_path)
  45. if extra_files:
  46. rval.append("<p/>This composite dataset contains these undefined files:<p/><ul>")
  47. for rel_path in extra_files:
  48. rval.append(f'<li><a href="{rel_path}">{rel_path}</a></li>')
  49. rval.append('</ul>')
  50. if not (defined_files or extra_files):
  51. rval.append("<p/>This composite dataset does not contain any files!<p/><ul>")
  52. rval.append('</html>')
  53. return "\n".join(rval)
  54. def get_mime(self):
  55. """Returns the mime type of the datatype"""
  56. return 'text/html'
  57. def set_peek(self, dataset, is_multi_byte=False):
  58. """Set the peek and blurb text"""
  59. if not dataset.dataset.purged:
  60. dataset.peek = 'Anvio database (multiple files)'
  61. dataset.blurb = 'Anvio database (multiple files)'
  62. else:
  63. dataset.peek = 'file does not exist'
  64. dataset.blurb = 'file purged from disk'
  65. def display_peek(self, dataset):
  66. """Create HTML content, used for displaying peek."""
  67. try:
  68. return dataset.peek
  69. except Exception:
  70. return "Anvio database (multiple files)"
  71. class AnvioDB(AnvioComposite):
  72. """Class for AnvioDB database files."""
  73. _anvio_basename: Optional[str] = None
  74. MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
  75. file_ext = 'anvio_db'
  76. def __init__(self, *args, **kwd):
  77. super().__init__(*args, **kwd)
  78. if self._anvio_basename is not None:
  79. self.add_composite_file(self._anvio_basename, is_binary=True, optional=False)
  80. def set_meta(self, dataset, **kwd):
  81. """
  82. Set the anvio_basename based upon actual extra_files_path contents.
  83. """
  84. super().set_meta(dataset, **kwd)
  85. if dataset.metadata.anvio_basename is not None and os.path.exists(os.path.join(dataset.extra_files_path, dataset.metadata.anvio_basename)):
  86. return
  87. found = False
  88. for basename in [dataset.metadata.anvio_basename, self._anvio_basename]:
  89. if found:
  90. break
  91. if basename is not None and not os.path.exists(os.path.join(dataset.extra_files_path, basename)):
  92. for name in glob.glob(os.path.join(dataset.extra_files_path, f"*{basename}")):
  93. dataset.metadata.anvio_basename = os.path.basename(name)
  94. found = True
  95. break
  96. class AnvioStructureDB(AnvioDB):
  97. """Class for Anvio Structure DB database files."""
  98. _anvio_basename = 'STRUCTURE.db'
  99. MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
  100. file_ext = 'anvio_structure_db'
  101. class AnvioGenomesDB(AnvioDB):
  102. """Class for Anvio Genomes DB database files."""
  103. _anvio_basename = '-GENOMES.db'
  104. MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
  105. file_ext = 'anvio_genomes_db'
  106. class AnvioContigsDB(AnvioDB):
  107. """Class for Anvio Contigs DB database files."""
  108. _anvio_basename = 'CONTIGS.db'
  109. MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
  110. file_ext = 'anvio_contigs_db'
  111. def __init__(self, *args, **kwd):
  112. super().__init__(*args, **kwd)
  113. self.add_composite_file('CONTIGS.h5', is_binary=True, optional=True)
  114. class AnvioProfileDB(AnvioDB):
  115. """Class for Anvio Profile DB database files."""
  116. _anvio_basename = 'PROFILE.db'
  117. MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
  118. file_ext = 'anvio_profile_db'
  119. def __init__(self, *args, **kwd):
  120. super().__init__(*args, **kwd)
  121. self.add_composite_file('RUNINFO.cp', is_binary=True, optional=True)
  122. self.add_composite_file('RUNINFO.mcp', is_binary=True, optional=True)
  123. self.add_composite_file('AUXILIARY_DATA.db', is_binary=True, optional=True)
  124. self.add_composite_file('RUNLOG.txt', is_binary=False, optional=True)
  125. class AnvioPanDB(AnvioDB):
  126. """Class for Anvio Pan DB database files."""
  127. _anvio_basename = 'PAN.db'
  128. MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
  129. file_ext = 'anvio_pan_db'
  130. class AnvioSamplesDB(AnvioDB):
  131. """Class for Anvio Samples DB database files."""
  132. _anvio_basename = 'SAMPLES.db'
  133. MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
  134. file_ext = 'anvio_samples_db'
  135. if __name__ == '__main__':
  136. import doctest
  137. doctest.testmod(sys.modules[__name__])