PageRenderTime 46ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/galaxy/datatypes/registry.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 816 lines | 719 code | 8 blank | 89 comment | 87 complexity | 4f8e14aa843e3b2a0ef0581dff4fd74e MD5 | raw file
  1. """
  2. Provides mapping between extensions and datatypes, mime-types, etc.
  3. """
  4. import os
  5. import sys
  6. import tempfile
  7. import threading
  8. import logging
  9. import imp
  10. import data
  11. import tabular
  12. import interval
  13. import images
  14. import sequence
  15. import qualityscore
  16. import genetics
  17. import xml
  18. import coverage
  19. import tracks
  20. import chrominfo
  21. import binary
  22. import assembly
  23. import ngsindex
  24. import graph
  25. import galaxy.util
  26. from galaxy.util.odict import odict
  27. from display_applications.application import DisplayApplication
  28. class ConfigurationError( Exception ):
  29. pass
  30. class Registry( object ):
  31. def __init__( self ):
  32. self.log = logging.getLogger(__name__)
  33. self.log.addHandler( logging.NullHandler() )
  34. self.datatypes_by_extension = {}
  35. self.mimetypes_by_extension = {}
  36. self.datatype_converters = odict()
  37. # Converters defined in local datatypes_conf.xml
  38. self.converters = []
  39. # Converters defined in datatypes_conf.xml included in installed tool shed repositories.
  40. self.proprietary_converters = []
  41. self.converter_deps = {}
  42. self.available_tracks = []
  43. self.set_external_metadata_tool = None
  44. self.sniff_order = []
  45. self.upload_file_formats = []
  46. # Datatype elements defined in local datatypes_conf.xml that contain display applications.
  47. self.display_app_containers = []
  48. # Datatype elements in datatypes_conf.xml included in installed
  49. # tool shed repositories that contain display applications.
  50. self.proprietary_display_app_containers = []
  51. # Map a display application id to a display application
  52. self.display_applications = odict()
  53. # The following 2 attributes are used in the to_xml_file()
  54. # method to persist the current state into an xml file.
  55. self.display_path_attr = None
  56. self.converters_path_attr = None
  57. # The 'default' converters_path defined in local datatypes_conf.xml
  58. self.converters_path = None
  59. # The 'default' display_path defined in local datatypes_conf.xml
  60. self.display_applications_path = None
  61. self.inherit_display_application_by_class = []
  62. # Keep a list of imported proprietary datatype class modules.
  63. self.imported_modules = []
  64. self.datatype_elems = []
  65. self.sniffer_elems = []
  66. self.xml_filename = None
  67. def load_datatypes( self, root_dir=None, config=None, deactivate=False, override=True ):
  68. """
  69. Parse a datatypes XML file located at root_dir/config (if processing the Galaxy distributed config) or contained within
  70. an installed Tool Shed repository. If deactivate is True, an installed Tool Shed repository that includes custom datatypes
  71. is being deactivated or uninstalled, so appropriate loaded datatypes will be removed from the registry. The value of
  72. override will be False when a Tool Shed repository is being installed. Since installation is occurring after the datatypes
  73. registry has been initialized at server startup, it's contents cannot be overridden by newly introduced conflicting data types.
  74. """
  75. def __import_module( full_path, datatype_module, datatype_class_name ):
  76. open_file_obj, file_name, description = imp.find_module( datatype_module, [ full_path ] )
  77. imported_module = imp.load_module( datatype_class_name, open_file_obj, file_name, description )
  78. return imported_module
  79. if root_dir and config:
  80. # If handling_proprietary_datatypes is determined as True below, we'll have an elem that looks something like this:
  81. # <datatype display_in_upload="true"
  82. # extension="blastxml"
  83. # mimetype="application/xml"
  84. # proprietary_datatype_module="blast"
  85. # proprietary_path="[cloned repository path]"
  86. # type="galaxy.datatypes.blast:BlastXml" />
  87. handling_proprietary_datatypes = False
  88. # Parse datatypes_conf.xml
  89. tree = galaxy.util.parse_xml( config )
  90. root = tree.getroot()
  91. # Load datatypes and converters from config
  92. if deactivate:
  93. self.log.debug( 'Deactivating datatypes from %s' % config )
  94. else:
  95. self.log.debug( 'Loading datatypes from %s' % config )
  96. registration = root.find( 'registration' )
  97. # Set default paths defined in local datatypes_conf.xml.
  98. if not self.converters_path:
  99. self.converters_path_attr = registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' )
  100. self.converters_path = os.path.join( root_dir, self.converters_path_attr )
  101. if not os.path.isdir( self.converters_path ):
  102. raise ConfigurationError( "Directory does not exist: %s" % self.converters_path )
  103. if not self.display_applications_path:
  104. self.display_path_attr = registration.get( 'display_path', 'display_applications' )
  105. self.display_applications_path = os.path.join( root_dir, self.display_path_attr )
  106. # Proprietary datatype's <registration> tag may have special attributes, proprietary_converter_path and proprietary_display_path.
  107. proprietary_converter_path = registration.get( 'proprietary_converter_path', None )
  108. proprietary_display_path = registration.get( 'proprietary_display_path', None )
  109. if proprietary_converter_path is not None or proprietary_display_path is not None and not handling_proprietary_datatypes:
  110. handling_proprietary_datatypes = True
  111. for elem in registration.findall( 'datatype' ):
  112. # Keep a status of the process steps to enable stopping the process of handling the datatype if necessary.
  113. ok = True
  114. extension = elem.get( 'extension', None )
  115. dtype = elem.get( 'type', None )
  116. type_extension = elem.get( 'type_extension', None )
  117. mimetype = elem.get( 'mimetype', None )
  118. display_in_upload = galaxy.util.string_as_bool( elem.get( 'display_in_upload', False ) )
  119. # If make_subclass is True, it does not necessarily imply that we are subclassing a datatype that is contained
  120. # in the distribution.
  121. make_subclass = galaxy.util.string_as_bool( elem.get( 'subclass', False ) )
  122. # Proprietary datatypes included in installed tool shed repositories will include two special attributes
  123. # (proprietary_path and proprietary_datatype_module) if they depend on proprietary datatypes classes.
  124. # The value of proprietary_path is the path to the cloned location of the tool shed repository's contained
  125. # datatypes_conf.xml file.
  126. proprietary_path = elem.get( 'proprietary_path', None )
  127. proprietary_datatype_module = elem.get( 'proprietary_datatype_module', None )
  128. if proprietary_path is not None or proprietary_datatype_module is not None and not handling_proprietary_datatypes:
  129. handling_proprietary_datatypes = True
  130. if deactivate:
  131. # We are deactivating or uninstalling an installed tool shed repository, so eliminate the datatype
  132. # elem from the in-memory list of datatype elems.
  133. for in_memory_elem in self.datatype_elems:
  134. in_memory_extension = in_memory_elem.get( 'extension', None )
  135. if in_memory_extension == extension:
  136. in_memory_dtype = elem.get( 'type', None )
  137. in_memory_type_extension = elem.get( 'type_extension', None )
  138. in_memory_mimetype = elem.get( 'mimetype', None )
  139. in_memory_display_in_upload = galaxy.util.string_as_bool( elem.get( 'display_in_upload', False ) )
  140. in_memory_make_subclass = galaxy.util.string_as_bool( elem.get( 'subclass', False ) )
  141. if in_memory_dtype == dtype and \
  142. in_memory_type_extension == type_extension and \
  143. in_memory_mimetype == mimetype and \
  144. in_memory_display_in_upload == display_in_upload and \
  145. in_memory_make_subclass == make_subclass:
  146. self.datatype_elems.remove( in_memory_elem )
  147. if extension is not None and extension in self.datatypes_by_extension:
  148. # We are deactivating or uninstalling an installed tool shed repository, so eliminate the datatype
  149. # from the registry. TODO: Handle deactivating datatype converters, etc before removing from
  150. # self.datatypes_by_extension.
  151. del self.datatypes_by_extension[ extension ]
  152. if extension in self.upload_file_formats:
  153. self.upload_file_formats.remove( extension )
  154. self.log.debug( "Removed datatype with extension '%s' from the registry." % extension )
  155. else:
  156. # We are loading new datatype, so we'll make sure it is correctly defined before proceeding.
  157. can_process_datatype = False
  158. if extension is not None:
  159. if dtype is not None or type_extension is not None:
  160. if override or extension not in self.datatypes_by_extension:
  161. can_process_datatype = True
  162. if can_process_datatype:
  163. if dtype is not None:
  164. try:
  165. fields = dtype.split( ':' )
  166. datatype_module = fields[ 0 ]
  167. datatype_class_name = fields[ 1 ]
  168. except Exception, e:
  169. self.log.exception( 'Error parsing datatype definition for dtype %s: %s' % ( str( dtype ), str( e ) ) )
  170. ok = False
  171. if ok:
  172. datatype_class = None
  173. if proprietary_path and proprietary_datatype_module and datatype_class_name:
  174. # We need to change the value of sys.path, so do it in a way that is thread-safe.
  175. lock = threading.Lock()
  176. lock.acquire( True )
  177. try:
  178. imported_module = __import_module( proprietary_path,
  179. proprietary_datatype_module,
  180. datatype_class_name )
  181. if imported_module not in self.imported_modules:
  182. self.imported_modules.append( imported_module )
  183. if hasattr( imported_module, datatype_class_name ):
  184. datatype_class = getattr( imported_module, datatype_class_name )
  185. except Exception, e:
  186. full_path = os.path.join( proprietary_path, proprietary_datatype_module )
  187. self.log.debug( "Exception importing proprietary code file %s: %s" % ( str( full_path ), str( e ) ) )
  188. finally:
  189. lock.release()
  190. # Either the above exception was thrown because the proprietary_datatype_module is not derived from a class
  191. # in the repository, or we are loading Galaxy's datatypes. In either case we'll look in the registry.
  192. if datatype_class is None:
  193. try:
  194. # The datatype class name must be contained in one of the datatype modules in the Galaxy distribution.
  195. fields = datatype_module.split( '.' )
  196. module = __import__( fields.pop( 0 ) )
  197. for mod in fields:
  198. module = getattr( module, mod )
  199. datatype_class = getattr( module, datatype_class_name )
  200. self.log.debug( 'Retrieved datatype module %s from the datatype registry.' % str( datatype_module ) )
  201. except Exception, e:
  202. self.log.exception( 'Error importing datatype module %s: %s' % ( str( datatype_module ), str( e ) ) )
  203. ok = False
  204. elif type_extension is not None:
  205. try:
  206. datatype_class = self.datatypes_by_extension[ type_extension ].__class__
  207. except Exception, e:
  208. self.log.exception( 'Error determining datatype_class for type_extension %s: %s' % ( str( type_extension ), str( e ) ) )
  209. ok = False
  210. if ok:
  211. if not deactivate:
  212. # A new tool shed repository that contains custom datatypes is being installed, and since installation is
  213. # occurring after the datatypes registry has been initialized at server startup, its contents cannot be
  214. # overridden by new introduced conflicting data types unless the value of override is True.
  215. if extension in self.datatypes_by_extension:
  216. # Because of the way that the value of can_process_datatype was set above, we know that the value of
  217. # override is True.
  218. self.log.debug( "Overriding conflicting datatype with extension '%s', using datatype from %s." % \
  219. ( str( extension ), str( config ) ) )
  220. if make_subclass:
  221. datatype_class = type( datatype_class_name, ( datatype_class, ), {} )
  222. self.datatypes_by_extension[ extension ] = datatype_class()
  223. if mimetype is None:
  224. # Use default mimetype per datatype specification.
  225. mimetype = self.datatypes_by_extension[ extension ].get_mime()
  226. self.mimetypes_by_extension[ extension ] = mimetype
  227. if datatype_class.track_type:
  228. self.available_tracks.append( extension )
  229. if display_in_upload and extension not in self.upload_file_formats:
  230. self.upload_file_formats.append( extension )
  231. # Max file size cut off for setting optional metadata.
  232. self.datatypes_by_extension[ extension ].max_optional_metadata_filesize = elem.get( 'max_optional_metadata_filesize', None )
  233. for converter in elem.findall( 'converter' ):
  234. # Build the list of datatype converters which will later be loaded into the calling app's toolbox.
  235. converter_config = converter.get( 'file', None )
  236. target_datatype = converter.get( 'target_datatype', None )
  237. depends_on = converter.get( 'depends_on', None )
  238. if depends_on is not None and target_datatype is not None:
  239. if extension not in self.converter_deps:
  240. self.converter_deps[ extension ] = {}
  241. self.converter_deps[ extension ][ target_datatype ] = depends_on.split( ',' )
  242. if converter_config and target_datatype:
  243. if proprietary_converter_path:
  244. self.proprietary_converters.append( ( converter_config, extension, target_datatype ) )
  245. else:
  246. self.converters.append( ( converter_config, extension, target_datatype ) )
  247. # Add composite files.
  248. for composite_file in elem.findall( 'composite_file' ):
  249. name = composite_file.get( 'name', None )
  250. if name is None:
  251. self.log.warning( "You must provide a name for your composite_file (%s)." % composite_file )
  252. optional = composite_file.get( 'optional', False )
  253. mimetype = composite_file.get( 'mimetype', None )
  254. self.datatypes_by_extension[ extension ].add_composite_file( name, optional=optional, mimetype=mimetype )
  255. for display_app in elem.findall( 'display' ):
  256. if proprietary_display_path:
  257. if elem not in self.proprietary_display_app_containers:
  258. self.proprietary_display_app_containers.append( elem )
  259. else:
  260. if elem not in self.display_app_containers:
  261. self.display_app_containers.append( elem )
  262. # Processing the new datatype elem is now complete, so make sure the element defining it is retained by appending
  263. # the new datatype to the in-memory list of datatype elems to enable persistence.
  264. self.datatype_elems.append( elem )
  265. else:
  266. if extension is not None:
  267. if dtype is not None or type_extension is not None:
  268. if extension in self.datatypes_by_extension:
  269. if not override:
  270. # Do not load the datatype since it conflicts with an existing datatype which we are not supposed
  271. # to override.
  272. self.log.debug( "Ignoring conflicting datatype with extension '%s' from %s." % ( extension, config ) )
  273. # Load datatype sniffers from the config - we'll do this even if one or more datatypes were not properly processed in the config
  274. # since sniffers are not tightly coupled with datatypes.
  275. self.load_datatype_sniffers( root,
  276. deactivate=deactivate,
  277. handling_proprietary_datatypes=handling_proprietary_datatypes,
  278. override=override )
  279. self.upload_file_formats.sort()
  280. # Persist the xml form of the registry into a temporary file so that it can be loaded from the command line by tools and
  281. # set_metadata processing.
  282. self.to_xml_file()
  283. self.set_default_values()
  284. def append_to_sniff_order():
  285. # Just in case any supported data types are not included in the config's sniff_order section.
  286. for ext in self.datatypes_by_extension:
  287. datatype = self.datatypes_by_extension[ ext ]
  288. included = False
  289. for atype in self.sniff_order:
  290. if isinstance( atype, datatype.__class__ ):
  291. included = True
  292. break
  293. if not included:
  294. self.sniff_order.append( datatype )
  295. append_to_sniff_order()
  296. def load_datatype_sniffers( self, root, deactivate=False, handling_proprietary_datatypes=False, override=False ):
  297. """
  298. Process the sniffers element from a parsed a datatypes XML file located at root_dir/config (if processing the Galaxy
  299. distributed config) or contained within an installed Tool Shed repository. If deactivate is True, an installed Tool
  300. Shed repository that includes custom sniffers is being deactivated or uninstalled, so appropriate loaded sniffers will
  301. be removed from the registry. The value of override will be False when a Tool Shed repository is being installed.
  302. Since installation is occurring after the datatypes registry has been initialized at server startup, it's contents
  303. cannot be overridden by newly introduced conflicting sniffers.
  304. """
  305. sniffer_elem_classes = [ e.attrib[ 'type' ] for e in self.sniffer_elems ]
  306. sniffers = root.find( 'sniffers' )
  307. if sniffers:
  308. for elem in sniffers.findall( 'sniffer' ):
  309. # Keep a status of the process steps to enable stopping the process of handling the sniffer if necessary.
  310. ok = True
  311. dtype = elem.get( 'type', None )
  312. if dtype is not None:
  313. try:
  314. fields = dtype.split( ":" )
  315. datatype_module = fields[ 0 ]
  316. datatype_class_name = fields[ 1 ]
  317. module = None
  318. except Exception, e:
  319. self.log.exception( 'Error determining datatype class or module for dtype %s: %s' % ( str( dtype ), str( e ) ) )
  320. ok = False
  321. if ok:
  322. if handling_proprietary_datatypes:
  323. # See if one of the imported modules contains the datatype class name.
  324. for imported_module in self.imported_modules:
  325. if hasattr( imported_module, datatype_class_name ):
  326. module = imported_module
  327. break
  328. if module is None:
  329. try:
  330. # The datatype class name must be contained in one of the datatype modules in the Galaxy distribution.
  331. module = __import__( datatype_module )
  332. for comp in datatype_module.split( '.' )[ 1: ]:
  333. module = getattr( module, comp )
  334. except Exception, e:
  335. self.log.exception( "Error importing datatype class for '%s': %s" % ( str( dtype ), str( e ) ) )
  336. ok = False
  337. if ok:
  338. try:
  339. aclass = getattr( module, datatype_class_name )()
  340. except Exception, e:
  341. self.log.exception( 'Error calling method %s from class %s: %s', str( datatype_class_name ), str( module ), str( e ) )
  342. ok = False
  343. if ok:
  344. if deactivate:
  345. # We are deactivating or uninstalling an installed Tool Shed repository, so eliminate the appropriate sniffers.
  346. sniffer_class = elem.get( 'type', None )
  347. if sniffer_class is not None:
  348. for index, s_e_c in enumerate( sniffer_elem_classes ):
  349. if sniffer_class == s_e_c:
  350. del self.sniffer_elems[ index ]
  351. sniffer_elem_classes = [ e.attrib[ 'type' ] for e in self.sniffer_elems ]
  352. self.log.debug( "Removed sniffer element for datatype '%s'" % str( dtype ) )
  353. break
  354. for sniffer_class in self.sniff_order:
  355. if sniffer_class.__class__ == aclass.__class__:
  356. self.sniff_order.remove( sniffer_class )
  357. self.log.debug( "Removed sniffer class for datatype '%s' from sniff order" % str( dtype ) )
  358. break
  359. else:
  360. # We are loading new sniffer, so see if we have a conflicting sniffer already loaded.
  361. conflict = False
  362. for conflict_loc, sniffer_class in enumerate( self.sniff_order ):
  363. if sniffer_class.__class__ == aclass.__class__:
  364. # We have a conflicting sniffer, so replace the one previously loaded.
  365. conflict = True
  366. if override:
  367. del self.sniff_order[ conflict_loc ]
  368. self.log.debug( "Removed conflicting sniffer for datatype '%s'" % dtype )
  369. break
  370. if conflict:
  371. if override:
  372. self.sniff_order.append( aclass )
  373. self.log.debug( "Loaded sniffer for datatype '%s'" % dtype )
  374. else:
  375. self.sniff_order.append( aclass )
  376. self.log.debug( "Loaded sniffer for datatype '%s'" % dtype )
  377. # Processing the new sniffer elem is now complete, so make sure the element defining it is loaded if necessary.
  378. sniffer_class = elem.get( 'type', None )
  379. if sniffer_class is not None:
  380. if sniffer_class not in sniffer_elem_classes:
  381. self.sniffer_elems.append( elem )
  382. def get_datatype_class_by_name( self, name ):
  383. """
  384. Return the datatype class where the datatype's `type` attribute
  385. (as defined in the datatype_conf.xml file) contains `name`.
  386. """
  387. #TODO: too roundabout - would be better to generate this once as a map and store in this object
  388. found_class = None
  389. for ext, datatype_obj in self.datatypes_by_extension.items():
  390. datatype_obj_class = datatype_obj.__class__
  391. datatype_obj_class_str = str( datatype_obj_class )
  392. #print datatype_obj_class_str
  393. if name in datatype_obj_class_str:
  394. return datatype_obj_class
  395. return None
  396. # these seem to be connected to the dynamic classes being generated in this file, lines 157-158
  397. # they appear when a one of the three are used in inheritance with subclass="True"
  398. #TODO: a possible solution is to def a fn in datatypes __init__ for creating the dynamic classes
  399. #remap = {
  400. # 'galaxy.datatypes.registry.Tabular' : galaxy.datatypes.tabular.Tabular,
  401. # 'galaxy.datatypes.registry.Text' : galaxy.datatypes.data.Text,
  402. # 'galaxy.datatypes.registry.Binary' : galaxy.datatypes.binary.Binary
  403. #}
  404. #datatype_str = str( datatype )
  405. #if datatype_str in remap:
  406. # datatype = remap[ datatype_str ]
  407. #
  408. #return datatype
  409. def get_available_tracks( self ):
  410. return self.available_tracks
  411. def get_mimetype_by_extension( self, ext, default='application/octet-stream' ):
  412. """Returns a mimetype based on an extension"""
  413. try:
  414. mimetype = self.mimetypes_by_extension[ ext ]
  415. except KeyError:
  416. #datatype was never declared
  417. mimetype = default
  418. self.log.warning( 'unknown mimetype in data factory %s' % str( ext ) )
  419. return mimetype
  420. def get_datatype_by_extension( self, ext ):
  421. """Returns a datatype based on an extension"""
  422. try:
  423. builder = self.datatypes_by_extension[ ext ]
  424. except KeyError:
  425. builder = data.Text()
  426. return builder
  427. def change_datatype( self, data, ext ):
  428. data.extension = ext
  429. # call init_meta and copy metadata from itself. The datatype
  430. # being converted *to* will handle any metadata copying and
  431. # initialization.
  432. if data.has_data():
  433. data.set_size()
  434. data.init_meta( copy_from=data )
  435. return data
  436. def old_change_datatype( self, data, ext ):
  437. """Creates and returns a new datatype based on an existing data and an extension"""
  438. newdata = factory( ext )( id=data.id )
  439. for key, value in data.__dict__.items():
  440. setattr( newdata, key, value )
  441. newdata.ext = ext
  442. return newdata
  443. def load_datatype_converters( self, toolbox, installed_repository_dict=None, deactivate=False ):
  444. """
  445. If deactivate is False, add datatype converters from self.converters or self.proprietary_converters
  446. to the calling app's toolbox. If deactivate is True, eliminates relevant converters from the calling
  447. app's toolbox.
  448. """
  449. if installed_repository_dict:
  450. # Load converters defined by datatypes_conf.xml included in installed tool shed repository.
  451. converters = self.proprietary_converters
  452. else:
  453. # Load converters defined by local datatypes_conf.xml.
  454. converters = self.converters
  455. for elem in converters:
  456. tool_config = elem[ 0 ]
  457. source_datatype = elem[ 1 ]
  458. target_datatype = elem[ 2 ]
  459. if installed_repository_dict:
  460. converter_path = installed_repository_dict[ 'converter_path' ]
  461. else:
  462. converter_path = self.converters_path
  463. try:
  464. config_path = os.path.join( converter_path, tool_config )
  465. converter = toolbox.load_tool( config_path )
  466. if installed_repository_dict:
  467. # If the converter is included in an installed tool shed repository, set the tool
  468. # shed related tool attributes.
  469. converter.tool_shed = installed_repository_dict[ 'tool_shed' ]
  470. converter.repository_name = installed_repository_dict[ 'repository_name' ]
  471. converter.repository_owner = installed_repository_dict[ 'repository_owner' ]
  472. converter.installed_changeset_revision = installed_repository_dict[ 'installed_changeset_revision' ]
  473. converter.old_id = converter.id
  474. # The converter should be included in the list of tools defined in tool_dicts.
  475. tool_dicts = installed_repository_dict[ 'tool_dicts' ]
  476. for tool_dict in tool_dicts:
  477. if tool_dict[ 'id' ] == converter.id:
  478. converter.guid = tool_dict[ 'guid' ]
  479. converter.id = tool_dict[ 'guid' ]
  480. break
  481. if deactivate:
  482. if converter.id in toolbox.tools_by_id:
  483. del toolbox.tools_by_id[ converter.id ]
  484. if source_datatype in self.datatype_converters:
  485. if target_datatype in self.datatype_converters[ source_datatype ]:
  486. del self.datatype_converters[ source_datatype ][ target_datatype ]
  487. self.log.debug( "Deactivated converter: %s", converter.id )
  488. else:
  489. toolbox.tools_by_id[ converter.id ] = converter
  490. if source_datatype not in self.datatype_converters:
  491. self.datatype_converters[ source_datatype ] = odict()
  492. self.datatype_converters[ source_datatype ][ target_datatype ] = converter
  493. self.log.debug( "Loaded converter: %s", converter.id )
  494. except Exception, e:
  495. if deactivate:
  496. self.log.exception( "Error deactivating converter from (%s): %s" % ( converter_path, str( e ) ) )
  497. else:
  498. self.log.exception( "Error loading converter (%s): %s" % ( converter_path, str( e ) ) )
  499. def load_display_applications( self, installed_repository_dict=None, deactivate=False ):
  500. """
  501. If deactivate is False, add display applications from self.display_app_containers or
  502. self.proprietary_display_app_containers to appropriate datatypes. If deactivate is
  503. True, eliminates relevant display applications from appropriate datatypes.
  504. """
  505. if installed_repository_dict:
  506. # Load display applications defined by datatypes_conf.xml included in installed tool shed repository.
  507. datatype_elems = self.proprietary_display_app_containers
  508. else:
  509. # Load display applications defined by local datatypes_conf.xml.
  510. datatype_elems = self.display_app_containers
  511. for elem in datatype_elems:
  512. extension = elem.get( 'extension', None )
  513. for display_app in elem.findall( 'display' ):
  514. display_file = display_app.get( 'file', None )
  515. if installed_repository_dict:
  516. display_path = installed_repository_dict[ 'display_path' ]
  517. display_file_head, display_file_tail = os.path.split( display_file )
  518. config_path = os.path.join( display_path, display_file_tail )
  519. else:
  520. config_path = os.path.join( self.display_applications_path, display_file )
  521. try:
  522. inherit = galaxy.util.string_as_bool( display_app.get( 'inherit', 'False' ) )
  523. display_app = DisplayApplication.from_file( config_path, self )
  524. if display_app:
  525. if display_app.id in self.display_applications:
  526. if deactivate:
  527. del self.display_applications[ display_app.id ]
  528. else:
  529. # If we already loaded this display application, we'll use the first one loaded.
  530. display_app = self.display_applications[ display_app.id ]
  531. elif installed_repository_dict:
  532. # If the display application is included in an installed tool shed repository,
  533. # set the tool shed related tool attributes.
  534. display_app.tool_shed = installed_repository_dict[ 'tool_shed' ]
  535. display_app.repository_name = installed_repository_dict[ 'repository_name' ]
  536. display_app.repository_owner = installed_repository_dict[ 'repository_owner' ]
  537. display_app.installed_changeset_revision = installed_repository_dict[ 'installed_changeset_revision' ]
  538. display_app.old_id = display_app.id
  539. # The display application should be included in the list of tools defined in tool_dicts.
  540. tool_dicts = installed_repository_dict[ 'tool_dicts' ]
  541. for tool_dict in tool_dicts:
  542. if tool_dict[ 'id' ] == display_app.id:
  543. display_app.guid = tool_dict[ 'guid' ]
  544. display_app.id = tool_dict[ 'guid' ]
  545. break
  546. if deactivate:
  547. if display_app.id in self.display_applications:
  548. del self.display_applications[ display_app.id ]
  549. if extension in self.datatypes_by_extension:
  550. if display_app.id in self.datatypes_by_extension[ extension ].display_applications:
  551. del self.datatypes_by_extension[ extension ].display_applications[ display_app.id ]
  552. if inherit and ( self.datatypes_by_extension[ extension ], display_app ) in self.inherit_display_application_by_class:
  553. self.inherit_display_application_by_class.remove( ( self.datatypes_by_extension[ extension ], display_app ) )
  554. self.log.debug( "Deactivated display application '%s' for datatype '%s'." % ( display_app.id, extension ) )
  555. else:
  556. self.display_applications[ display_app.id ] = display_app
  557. self.datatypes_by_extension[ extension ].add_display_application( display_app )
  558. if inherit and ( self.datatypes_by_extension[ extension ], display_app ) not in self.inherit_display_application_by_class:
  559. self.inherit_display_application_by_class.append( ( self.datatypes_by_extension[ extension ], display_app ) )
  560. self.log.debug( "Loaded display application '%s' for datatype '%s', inherit=%s." % ( display_app.id, extension, inherit ) )
  561. except Exception, e:
  562. if deactivate:
  563. self.log.exception( "Error deactivating display application (%s): %s" % ( config_path, str( e ) ) )
  564. else:
  565. self.log.exception( "Error loading display application (%s): %s" % ( config_path, str( e ) ) )
  566. # Handle display_application subclass inheritance.
  567. for extension, d_type1 in self.datatypes_by_extension.iteritems():
  568. for d_type2, display_app in self.inherit_display_application_by_class:
  569. current_app = d_type1.get_display_application( display_app.id, None )
  570. if current_app is None and isinstance( d_type1, type( d_type2 ) ):
  571. self.log.debug( "Adding inherited display application '%s' to datatype '%s'" % ( display_app.id, extension ) )
  572. d_type1.add_display_application( display_app )
  573. def load_external_metadata_tool( self, toolbox ):
  574. """Adds a tool which is used to set external metadata"""
  575. # We need to be able to add a job to the queue to set metadata. The queue will currently only accept jobs with an associated
  576. # tool. We'll create a special tool to be used for Auto-Detecting metadata; this is less than ideal, but effective
  577. # Properly building a tool without relying on parsing an XML file is near impossible...so we'll create a temporary file
  578. tool_xml_text = """
  579. <tool id="__SET_METADATA__" name="Set External Metadata" version="1.0.1" tool_type="set_metadata">
  580. <type class="SetMetadataTool" module="galaxy.tools"/>
  581. <requirements>
  582. <requirement type="package">samtools</requirement>
  583. </requirements>
  584. <action module="galaxy.tools.actions.metadata" class="SetMetadataToolAction"/>
  585. <command>$__SET_EXTERNAL_METADATA_COMMAND_LINE__</command>
  586. <inputs>
  587. <param format="data" name="input1" type="data" label="File to set metadata on."/>
  588. <param name="__ORIGINAL_DATASET_STATE__" type="hidden" value=""/>
  589. <param name="__SET_EXTERNAL_METADATA_COMMAND_LINE__" type="hidden" value=""/>
  590. </inputs>
  591. </tool>
  592. """
  593. tmp_name = tempfile.NamedTemporaryFile()
  594. tmp_name.write( tool_xml_text )
  595. tmp_name.flush()
  596. set_meta_tool = toolbox.load_tool( tmp_name.name )
  597. toolbox.tools_by_id[ set_meta_tool.id ] = set_meta_tool
  598. self.set_external_metadata_tool = set_meta_tool
  599. self.log.debug( "Loaded external metadata tool: %s", self.set_external_metadata_tool.id )
  600. def set_default_values( self ):
  601. # Default values.
  602. if not self.datatypes_by_extension:
  603. self.datatypes_by_extension = {
  604. 'ab1' : binary.Ab1(),
  605. 'axt' : sequence.Axt(),
  606. 'bam' : binary.Bam(),
  607. 'bed' : interval.Bed(),
  608. 'coverage' : coverage.LastzCoverage(),
  609. 'customtrack' : interval.CustomTrack(),
  610. 'csfasta' : sequence.csFasta(),
  611. 'fasta' : sequence.Fasta(),
  612. 'eland' : tabular.Eland(),
  613. 'fastq' : sequence.Fastq(),
  614. 'fastqsanger' : sequence.FastqSanger(),
  615. 'gtf' : interval.Gtf(),
  616. 'gff' : interval.Gff(),
  617. 'gff3' : interval.Gff3(),
  618. 'genetrack' : tracks.GeneTrack(),
  619. 'interval' : interval.Interval(),
  620. 'laj' : images.Laj(),
  621. 'lav' : sequence.Lav(),
  622. 'maf' : sequence.Maf(),
  623. 'pileup' : tabular.Pileup(),
  624. 'qualsolid' : qualityscore.QualityScoreSOLiD(),
  625. 'qualsolexa' : qualityscore.QualityScoreSolexa(),
  626. 'qual454' : qualityscore.QualityScore454(),
  627. 'sam' : tabular.Sam(),
  628. 'scf' : binary.Scf(),
  629. 'sff' : binary.Sff(),
  630. 'tabular' : tabular.Tabular(),
  631. 'taxonomy' : tabular.Taxonomy(),
  632. 'txt' : data.Text(),
  633. 'wig' : interval.Wiggle(),
  634. 'xml' : xml.GenericXml(),
  635. 'cel.zip' : binary.CelZip(), # Cistrome Customized Datatype: CelZip for expression tools
  636. 'xys.zip' : binary.XysZip(), # Cistrome Customized Datatype: XysZip for nimblegen expression tools
  637. 'eset' : binary.Eset(), # Cistrome Customized Datatype: Eset file is the output from Bioconductor
  638. 'cel' : binary.Cel(), # Cistrome Customized Datatype: Cel file is required by MAT
  639. }
  640. self.mimetypes_by_extension = {
  641. 'ab1' : 'application/octet-stream',
  642. 'axt' : 'text/plain',
  643. 'bam' : 'application/octet-stream',
  644. 'bed' : 'text/plain',
  645. 'customtrack' : 'text/plain',
  646. 'csfasta' : 'text/plain',
  647. 'eland' : 'application/octet-stream',
  648. 'fasta' : 'text/plain',
  649. 'fastq' : 'text/plain',
  650. 'fastqsanger' : 'text/plain',
  651. 'gtf' : 'text/plain',
  652. 'gff' : 'text/plain',
  653. 'gff3' : 'text/plain',
  654. 'interval' : 'text/plain',
  655. 'laj' : 'text/plain',
  656. 'lav' : 'text/plain',
  657. 'maf' : 'text/plain',
  658. 'memexml' : 'application/xml',
  659. 'pileup' : 'text/plain',
  660. 'qualsolid' : 'text/plain',
  661. 'qualsolexa' : 'text/plain',
  662. 'qual454' : 'text/plain',
  663. 'sam' : 'text/plain',
  664. 'scf' : 'application/octet-stream',
  665. 'sff' : 'application/octet-stream',
  666. 'tabular' : 'text/plain',
  667. 'taxonomy' : 'text/plain',
  668. 'txt' : 'text/plain',
  669. 'wig' : 'text/plain',
  670. 'xml' : 'application/xml',
  671. }
  672. # super supertype fix for input steps in workflows.
  673. if 'data' not in self.datatypes_by_extension:
  674. self.datatypes_by_extension[ 'data' ] = data.Data()
  675. self.mimetypes_by_extension[ 'data' ] = 'application/octet-stream'
  676. # Default values - the order in which we attempt to determine data types is critical
  677. # because some formats are much more flexibly defined than others.
  678. if len( self.sniff_order ) < 1:
  679. self.sniff_order = [
  680. binary.Cel(), # Cistrome Customized Datatype: Cel file is required by MAT
  681. binary.Bam(),
  682. binary.Sff(),
  683. xml.GenericXml(),
  684. sequence.Maf(),
  685. sequence.Lav(),
  686. sequence.csFasta(),
  687. qualityscore.QualityScoreSOLiD(),
  688. qualityscore.QualityScore454(),
  689. sequence.Fasta(),
  690. sequence.Fastq(),
  691. interval.Wiggle(),
  692. images.Html(),
  693. sequence.Axt(),
  694. interval.Bed(),
  695. interval.CustomTrack(),
  696. interval.Gtf(),
  697. interval.Gff(),
  698. interval.Gff3(),
  699. tabular.Pileup(),
  700. interval.Interval(),
  701. tabular.Sam(),
  702. tabular.Eland()
  703. ]
  704. def get_converters_by_datatype( self, ext ):
  705. """Returns available converters by source type"""
  706. converters = odict()
  707. source_datatype = type( self.get_datatype_by_extension( ext ) )
  708. for ext2, dict in self.datatype_converters.items():
  709. converter_datatype = type( self.get_datatype_by_extension( ext2 ) )
  710. if issubclass( source_datatype, converter_datatype ):
  711. converters.update( dict )
  712. #Ensure ext-level converters are present
  713. if ext in self.datatype_converters.keys():
  714. converters.update( self.datatype_converters[ ext ] )
  715. return converters
  716. def get_converter_by_target_type( self, source_ext, target_ext ):
  717. """Returns a converter based on source and target datatypes"""
  718. converters = self.get_converters_by_datatype( source_ext )
  719. if target_ext in converters.keys():
  720. return converters[ target_ext ]
  721. return None
  722. def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe=True ):
  723. """Returns ( target_ext, existing converted dataset )"""
  724. for convert_ext in self.get_converters_by_datatype( dataset.ext ):
  725. if self.get_datatype_by_extension( convert_ext ).matches_any( accepted_formats ):
  726. converted_dataset = dataset.get_converted_files_by_type( convert_ext )
  727. if converted_dataset:
  728. ret_data = converted_dataset
  729. elif not converter_safe:
  730. continue
  731. else:
  732. ret_data = None
  733. return ( convert_ext, ret_data )
  734. return ( None, None )
  735. def get_composite_extensions( self ):
  736. return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ]
  737. def get_upload_metadata_params( self, context, group, tool ):
  738. """Returns dict of case value:inputs for metadata conditional for upload tool"""
  739. rval = {}
  740. for ext, d_type in self.datatypes_by_extension.iteritems():
  741. inputs = []
  742. for meta_name, meta_spec in d_type.metadata_spec.iteritems():
  743. if meta_spec.set_in_upload:
  744. help_txt = meta_spec.desc
  745. if not help_txt or help_txt == meta_name:
  746. help_txt = ""
  747. inputs.append( '<param type="text" name="%s" label="Set metadata value for &quot;%s&quot;" value="%s" help="%s"/>' % ( meta_name, meta_name, meta_spec.default, help_txt ) )
  748. rval[ ext ] = "\n".join( inputs )
  749. if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype
  750. rval[ 'auto' ] = rval[ 'txt' ]
  751. return rval
  752. @property
  753. def integrated_datatypes_configs( self ):
  754. if self.xml_filename and os.path.isfile( self.xml_filename ):
  755. return self.xml_filename
  756. self.to_xml_file()
  757. return self.xml_filename
  758. def to_xml_file( self ):
  759. if self.xml_filename is not None:
  760. # If persisted previously, attempt to remove the temporary file in which we were written.
  761. try:
  762. os.unlink( self.xml_filename )
  763. except:
  764. pass
  765. self.xml_filename = None
  766. fd, filename = tempfile.mkstemp()
  767. self.xml_filename = os.path.abspath( filename )
  768. if self.converters_path_attr:
  769. converters_path_str = ' converters_path="%s"' % self.converters_path_attr
  770. else:
  771. converters_path_str = ''
  772. if self.display_path_attr:
  773. display_path_str = ' display_path="%s"' % self.display_path_attr
  774. else:
  775. display_path_str = ''
  776. os.write( fd, '<?xml version="1.0"?>\n' )
  777. os.write( fd, '<datatypes>\n' )
  778. os.write( fd, '<registration%s%s>\n' % ( converters_path_str, display_path_str ) )
  779. for elem in self.datatype_elems:
  780. os.write( fd, '%s' % galaxy.util.xml_to_string( elem ) )
  781. os.write( fd, '</registration>\n' )
  782. os.write( fd, '<sniffers>\n' )
  783. for elem in self.sniffer_elems:
  784. os.write( fd, '%s' % galaxy.util.xml_to_string( elem ) )
  785. os.write( fd, '</sniffers>\n' )
  786. os.write( fd, '</datatypes>\n' )
  787. os.close( fd )
  788. os.chmod( self.xml_filename, 0644 )