PageRenderTime 142ms CodeModel.GetById 83ms app.highlight 49ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/datatypes/registry.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 816 lines | 719 code | 8 blank | 89 comment | 120 complexity | 4f8e14aa843e3b2a0ef0581dff4fd74e MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1"""
  2Provides mapping between extensions and datatypes, mime-types, etc.
  3"""
  4import os
  5import sys
  6import tempfile
  7import threading
  8import logging
  9import imp
 10import data
 11import tabular
 12import interval
 13import images
 14import sequence
 15import qualityscore
 16import genetics
 17import xml
 18import coverage
 19import tracks
 20import chrominfo
 21import binary
 22import assembly
 23import ngsindex
 24import graph
 25import galaxy.util
 26from galaxy.util.odict import odict
 27from display_applications.application import DisplayApplication
 28
 29
 30class ConfigurationError( Exception ):
 31    pass
 32
 33
 34class Registry( object ):
 35
 36    def __init__( self ):
 37        self.log = logging.getLogger(__name__)
 38        self.log.addHandler( logging.NullHandler() )
 39        self.datatypes_by_extension = {}
 40        self.mimetypes_by_extension = {}
 41        self.datatype_converters = odict()
 42        # Converters defined in local datatypes_conf.xml
 43        self.converters = []
 44        # Converters defined in datatypes_conf.xml included in installed tool shed repositories.
 45        self.proprietary_converters = []
 46        self.converter_deps = {}
 47        self.available_tracks = []
 48        self.set_external_metadata_tool = None
 49        self.sniff_order = []
 50        self.upload_file_formats = []
 51        # Datatype elements defined in local datatypes_conf.xml that contain display applications.
 52        self.display_app_containers = []
 53        # Datatype elements in datatypes_conf.xml included in installed
 54        # tool shed repositories that contain display applications.
 55        self.proprietary_display_app_containers = []
 56        # Map a display application id to a display application
 57        self.display_applications = odict()
 58        # The following 2 attributes are used in the to_xml_file()
 59        # method to persist the current state into an xml file.
 60        self.display_path_attr = None
 61        self.converters_path_attr = None
 62        # The 'default' converters_path defined in local datatypes_conf.xml
 63        self.converters_path = None
 64        # The 'default' display_path defined in local datatypes_conf.xml
 65        self.display_applications_path = None
 66        self.inherit_display_application_by_class = []
 67        # Keep a list of imported proprietary datatype class modules.
 68        self.imported_modules = []
 69        self.datatype_elems = []
 70        self.sniffer_elems = []
 71        self.xml_filename = None
 72
 73    def load_datatypes( self, root_dir=None, config=None, deactivate=False, override=True ):
 74        """
 75        Parse a datatypes XML file located at root_dir/config (if processing the Galaxy distributed config) or contained within
 76        an installed Tool Shed repository.  If deactivate is True, an installed Tool Shed repository that includes custom datatypes
 77        is being deactivated or uninstalled, so appropriate loaded datatypes will be removed from the registry.  The value of
 78        override will be False when a Tool Shed repository is being installed.  Since installation is occurring after the datatypes
 79        registry has been initialized at server startup, it's contents cannot be overridden by newly introduced conflicting data types.
 80        """
 81
 82        def __import_module( full_path, datatype_module, datatype_class_name ):
 83            open_file_obj, file_name, description = imp.find_module( datatype_module, [ full_path ] )
 84            imported_module = imp.load_module( datatype_class_name, open_file_obj, file_name, description )
 85            return imported_module
 86
 87        if root_dir and config:
 88            # If handling_proprietary_datatypes is determined as True below, we'll have an elem that looks something like this:
 89            # <datatype display_in_upload="true"
 90            #           extension="blastxml"
 91            #           mimetype="application/xml"
 92            #           proprietary_datatype_module="blast"
 93            #           proprietary_path="[cloned repository path]"
 94            #           type="galaxy.datatypes.blast:BlastXml" />
 95            handling_proprietary_datatypes = False
 96            # Parse datatypes_conf.xml
 97            tree = galaxy.util.parse_xml( config )
 98            root = tree.getroot()
 99            # Load datatypes and converters from config
100            if deactivate:
101                self.log.debug( 'Deactivating datatypes from %s' % config )
102            else:
103                self.log.debug( 'Loading datatypes from %s' % config )
104            registration = root.find( 'registration' )
105            # Set default paths defined in local datatypes_conf.xml.
106            if not self.converters_path:
107                self.converters_path_attr = registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' )
108                self.converters_path = os.path.join( root_dir, self.converters_path_attr )
109                if not os.path.isdir( self.converters_path ):
110                    raise ConfigurationError( "Directory does not exist: %s" % self.converters_path )
111            if not self.display_applications_path:
112                self.display_path_attr = registration.get( 'display_path', 'display_applications' )
113                self.display_applications_path = os.path.join( root_dir, self.display_path_attr )
114            # Proprietary datatype's <registration> tag may have special attributes, proprietary_converter_path and proprietary_display_path.
115            proprietary_converter_path = registration.get( 'proprietary_converter_path', None )
116            proprietary_display_path = registration.get( 'proprietary_display_path', None )
117            if proprietary_converter_path is not None or proprietary_display_path is not None and not handling_proprietary_datatypes:
118                handling_proprietary_datatypes = True
119            for elem in registration.findall( 'datatype' ):
120                # Keep a status of the process steps to enable stopping the process of handling the datatype if necessary.
121                ok = True
122                extension = elem.get( 'extension', None )
123                dtype = elem.get( 'type', None )
124                type_extension = elem.get( 'type_extension', None )
125                mimetype = elem.get( 'mimetype', None )
126                display_in_upload = galaxy.util.string_as_bool( elem.get( 'display_in_upload', False ) )
127                # If make_subclass is True, it does not necessarily imply that we are subclassing a datatype that is contained
128                # in the distribution.
129                make_subclass = galaxy.util.string_as_bool( elem.get( 'subclass', False ) )
130                # Proprietary datatypes included in installed tool shed repositories will include two special attributes
131                # (proprietary_path and proprietary_datatype_module) if they depend on proprietary datatypes classes.
132                # The value of proprietary_path is the path to the cloned location of the tool shed repository's contained
133                # datatypes_conf.xml file.
134                proprietary_path = elem.get( 'proprietary_path', None )
135                proprietary_datatype_module = elem.get( 'proprietary_datatype_module', None )
136                if proprietary_path is not None or proprietary_datatype_module is not None and not handling_proprietary_datatypes:
137                    handling_proprietary_datatypes = True
138                if deactivate:
139                    # We are deactivating or uninstalling an installed tool shed repository, so eliminate the datatype
140                    # elem from the in-memory list of datatype elems.
141                    for in_memory_elem in self.datatype_elems:
142                        in_memory_extension = in_memory_elem.get( 'extension', None )
143                        if in_memory_extension == extension:
144                            in_memory_dtype = elem.get( 'type', None )
145                            in_memory_type_extension = elem.get( 'type_extension', None )
146                            in_memory_mimetype = elem.get( 'mimetype', None )
147                            in_memory_display_in_upload = galaxy.util.string_as_bool( elem.get( 'display_in_upload', False ) )
148                            in_memory_make_subclass = galaxy.util.string_as_bool( elem.get( 'subclass', False ) )
149                            if in_memory_dtype == dtype and \
150                                in_memory_type_extension == type_extension and \
151                                in_memory_mimetype == mimetype and \
152                                in_memory_display_in_upload == display_in_upload and \
153                                in_memory_make_subclass == make_subclass:
154                                self.datatype_elems.remove( in_memory_elem )
155                    if extension is not None and extension in self.datatypes_by_extension:
156                        # We are deactivating or uninstalling an installed tool shed repository, so eliminate the datatype
157                        # from the registry.  TODO: Handle deactivating datatype converters, etc before removing from
158                        # self.datatypes_by_extension.
159                        del self.datatypes_by_extension[ extension ]
160                        if extension in self.upload_file_formats:
161                            self.upload_file_formats.remove( extension )
162                        self.log.debug( "Removed datatype with extension '%s' from the registry." % extension )
163                else:
164                    # We are loading new datatype, so we'll make sure it is correctly defined before proceeding.
165                    can_process_datatype = False
166                    if extension is not None:
167                        if dtype is not None or type_extension is not None:
168                            if override or extension not in self.datatypes_by_extension:
169                                can_process_datatype = True
170                    if can_process_datatype:
171                        if dtype is not None:
172                            try:
173                                fields = dtype.split( ':' )
174                                datatype_module = fields[ 0 ]
175                                datatype_class_name = fields[ 1 ]
176                            except Exception, e:
177                                self.log.exception( 'Error parsing datatype definition for dtype %s: %s' % ( str( dtype ), str( e ) ) )
178                                ok = False
179                            if ok:
180                                datatype_class = None
181                                if proprietary_path and proprietary_datatype_module and datatype_class_name:
182                                    # We need to change the value of sys.path, so do it in a way that is thread-safe.
183                                    lock = threading.Lock()
184                                    lock.acquire( True )
185                                    try:
186                                        imported_module = __import_module( proprietary_path,
187                                                                           proprietary_datatype_module,
188                                                                           datatype_class_name )
189                                        if imported_module not in self.imported_modules:
190                                            self.imported_modules.append( imported_module )
191                                        if hasattr( imported_module, datatype_class_name ):
192                                            datatype_class = getattr( imported_module, datatype_class_name )
193                                    except Exception, e:
194                                        full_path = os.path.join( proprietary_path, proprietary_datatype_module )
195                                        self.log.debug( "Exception importing proprietary code file %s: %s" % ( str( full_path ), str( e ) ) )
196                                    finally:
197                                        lock.release()
198                                # Either the above exception was thrown because the proprietary_datatype_module is not derived from a class
199                                # in the repository, or we are loading Galaxy's datatypes. In either case we'll look in the registry.
200                                if datatype_class is None:
201                                    try:
202                                        # The datatype class name must be contained in one of the datatype modules in the Galaxy distribution.
203                                        fields = datatype_module.split( '.' )
204                                        module = __import__( fields.pop( 0 ) )
205                                        for mod in fields:
206                                            module = getattr( module, mod )
207                                        datatype_class = getattr( module, datatype_class_name )
208                                        self.log.debug( 'Retrieved datatype module %s from the datatype registry.' % str( datatype_module ) )
209                                    except Exception, e:
210                                        self.log.exception( 'Error importing datatype module %s: %s' % ( str( datatype_module ), str( e ) ) )
211                                        ok = False
212                        elif type_extension is not None:
213                            try:
214                                datatype_class = self.datatypes_by_extension[ type_extension ].__class__
215                            except Exception, e:
216                                self.log.exception( 'Error determining datatype_class for type_extension %s: %s' % ( str( type_extension ), str( e ) ) )
217                                ok = False
218                        if ok:
219                            if not deactivate:
220                                # A new tool shed repository that contains custom datatypes is being installed, and since installation is
221                                # occurring after the datatypes registry has been initialized at server startup, its contents cannot be
222                                # overridden by new introduced conflicting data types unless the value of override is True.
223                                if extension in self.datatypes_by_extension:
224                                    # Because of the way that the value of can_process_datatype was set above, we know that the value of
225                                    # override is True.
226                                    self.log.debug( "Overriding conflicting datatype with extension '%s', using datatype from %s." % \
227                                                      ( str( extension ), str( config ) ) )
228                                if make_subclass:
229                                    datatype_class = type( datatype_class_name, ( datatype_class, ), {} )
230                                self.datatypes_by_extension[ extension ] = datatype_class()
231                                if mimetype is None:
232                                    # Use default mimetype per datatype specification.
233                                    mimetype = self.datatypes_by_extension[ extension ].get_mime()
234                                self.mimetypes_by_extension[ extension ] = mimetype
235                                if datatype_class.track_type:
236                                    self.available_tracks.append( extension )
237                                if display_in_upload and extension not in self.upload_file_formats:
238                                    self.upload_file_formats.append( extension )
239                                # Max file size cut off for setting optional metadata.
240                                self.datatypes_by_extension[ extension ].max_optional_metadata_filesize = elem.get( 'max_optional_metadata_filesize', None )
241                                for converter in elem.findall( 'converter' ):
242                                    # Build the list of datatype converters which will later be loaded into the calling app's toolbox.
243                                    converter_config = converter.get( 'file', None )
244                                    target_datatype = converter.get( 'target_datatype', None )
245                                    depends_on = converter.get( 'depends_on', None )
246                                    if depends_on is not None and target_datatype is not None:
247                                        if extension not in self.converter_deps:
248                                            self.converter_deps[ extension ] = {}
249                                        self.converter_deps[ extension ][ target_datatype ] = depends_on.split( ',' )
250                                    if converter_config and target_datatype:
251                                        if proprietary_converter_path:
252                                            self.proprietary_converters.append( ( converter_config, extension, target_datatype ) )
253                                        else:
254                                            self.converters.append( ( converter_config, extension, target_datatype ) )
255                                # Add composite files.
256                                for composite_file in elem.findall( 'composite_file' ):
257                                    name = composite_file.get( 'name', None )
258                                    if name is None:
259                                        self.log.warning( "You must provide a name for your composite_file (%s)." % composite_file )
260                                    optional = composite_file.get( 'optional', False )
261                                    mimetype = composite_file.get( 'mimetype', None )
262                                    self.datatypes_by_extension[ extension ].add_composite_file( name, optional=optional, mimetype=mimetype )
263                                for display_app in elem.findall( 'display' ):
264                                    if proprietary_display_path:
265                                        if elem not in self.proprietary_display_app_containers:
266                                            self.proprietary_display_app_containers.append( elem )
267                                    else:
268                                        if elem not in self.display_app_containers:
269                                            self.display_app_containers.append( elem )
270                                # Processing the new datatype elem is now complete, so make sure the element defining it is retained by appending
271                                # the new datatype to the in-memory list of datatype elems to enable persistence.
272                                self.datatype_elems.append( elem )
273                    else:
274                        if extension is not None:
275                            if dtype is not None or type_extension is not None:
276                                if extension in self.datatypes_by_extension:
277                                    if not override:
278                                        # Do not load the datatype since it conflicts with an existing datatype which we are not supposed
279                                        # to override.
280                                        self.log.debug( "Ignoring conflicting datatype with extension '%s' from %s." % ( extension, config ) )
281            # Load datatype sniffers from the config - we'll do this even if one or more datatypes were not properly processed in the config
282            # since sniffers are not tightly coupled with datatypes.
283            self.load_datatype_sniffers( root,
284                                         deactivate=deactivate,
285                                         handling_proprietary_datatypes=handling_proprietary_datatypes,
286                                         override=override )
287            self.upload_file_formats.sort()
288            # Persist the xml form of the registry into a temporary file so that it can be loaded from the command line by tools and
289            # set_metadata processing.
290            self.to_xml_file()
291        self.set_default_values()
292
293        def append_to_sniff_order():
294            # Just in case any supported data types are not included in the config's sniff_order section.
295            for ext in self.datatypes_by_extension:
296                datatype = self.datatypes_by_extension[ ext ]
297                included = False
298                for atype in self.sniff_order:
299                    if isinstance( atype, datatype.__class__ ):
300                        included = True
301                        break
302                if not included:
303                    self.sniff_order.append( datatype )
304        append_to_sniff_order()
305
306    def load_datatype_sniffers( self, root, deactivate=False, handling_proprietary_datatypes=False, override=False ):
307        """
308        Process the sniffers element from a parsed a datatypes XML file located at root_dir/config (if processing the Galaxy
309        distributed config) or contained within an installed Tool Shed repository.  If deactivate is True, an installed Tool
310        Shed repository that includes custom sniffers is being deactivated or uninstalled, so appropriate loaded sniffers will
311        be removed from the registry.  The value of override will be False when a Tool Shed repository is being installed.
312        Since installation is occurring after the datatypes registry has been initialized at server startup, it's contents
313        cannot be overridden by newly introduced conflicting sniffers.
314        """
315        sniffer_elem_classes = [ e.attrib[ 'type' ] for e in self.sniffer_elems ]
316        sniffers = root.find( 'sniffers' )
317        if sniffers:
318            for elem in sniffers.findall( 'sniffer' ):
319                # Keep a status of the process steps to enable stopping the process of handling the sniffer if necessary.
320                ok = True
321                dtype = elem.get( 'type', None )
322                if dtype is not None:
323                    try:
324                        fields = dtype.split( ":" )
325                        datatype_module = fields[ 0 ]
326                        datatype_class_name = fields[ 1 ]
327                        module = None
328                    except Exception, e:
329                        self.log.exception( 'Error determining datatype class or module for dtype %s: %s' % ( str( dtype ), str( e ) ) )
330                        ok = False
331                    if ok:
332                        if handling_proprietary_datatypes:
333                            # See if one of the imported modules contains the datatype class name.
334                            for imported_module in self.imported_modules:
335                                if hasattr( imported_module, datatype_class_name ):
336                                    module = imported_module
337                                    break
338                        if module is None:
339                            try:
340                                # The datatype class name must be contained in one of the datatype modules in the Galaxy distribution.
341                                module = __import__( datatype_module )
342                                for comp in datatype_module.split( '.' )[ 1: ]:
343                                    module = getattr( module, comp )
344                            except Exception, e:
345                                self.log.exception( "Error importing datatype class for '%s': %s" % ( str( dtype ), str( e ) ) )
346                                ok = False
347                        if ok:
348                            try:
349                                aclass = getattr( module, datatype_class_name )()
350                            except Exception, e:
351                                self.log.exception( 'Error calling method %s from class %s: %s', str( datatype_class_name ), str( module ), str( e ) )
352                                ok = False
353                            if ok:
354                                if deactivate:
355                                    # We are deactivating or uninstalling an installed Tool Shed repository, so eliminate the appropriate sniffers.
356                                    sniffer_class = elem.get( 'type', None )
357                                    if sniffer_class is not None:
358                                        for index, s_e_c in enumerate( sniffer_elem_classes ):
359                                            if sniffer_class == s_e_c:
360                                                del self.sniffer_elems[ index ]
361                                                sniffer_elem_classes = [ e.attrib[ 'type' ] for e in self.sniffer_elems ]
362                                                self.log.debug( "Removed sniffer element for datatype '%s'" % str( dtype ) )
363                                                break
364                                        for sniffer_class in self.sniff_order:
365                                            if sniffer_class.__class__ == aclass.__class__:
366                                                self.sniff_order.remove( sniffer_class )
367                                                self.log.debug( "Removed sniffer class for datatype '%s' from sniff order" % str( dtype ) )
368                                                break
369                                else:
370                                    # We are loading new sniffer, so see if we have a conflicting sniffer already loaded.
371                                    conflict = False
372                                    for conflict_loc, sniffer_class in enumerate( self.sniff_order ):
373                                        if sniffer_class.__class__ == aclass.__class__:
374                                            # We have a conflicting sniffer, so replace the one previously loaded.
375                                            conflict = True
376                                            if override:
377                                                del self.sniff_order[ conflict_loc ]
378                                                self.log.debug( "Removed conflicting sniffer for datatype '%s'" % dtype )
379                                            break
380                                    if conflict:
381                                        if override:
382                                            self.sniff_order.append( aclass )
383                                            self.log.debug( "Loaded sniffer for datatype '%s'" % dtype )
384                                    else:
385                                        self.sniff_order.append( aclass )
386                                        self.log.debug( "Loaded sniffer for datatype '%s'" % dtype )
387                                    # Processing the new sniffer elem is now complete, so make sure the element defining it is loaded if necessary.
388                                    sniffer_class = elem.get( 'type', None )
389                                    if sniffer_class is not None:
390                                        if sniffer_class not in sniffer_elem_classes:
391                                            self.sniffer_elems.append( elem )
392
393    def get_datatype_class_by_name( self, name ):
394        """
395        Return the datatype class where the datatype's `type` attribute
396        (as defined in the datatype_conf.xml file) contains `name`.
397        """
398        #TODO: too roundabout - would be better to generate this once as a map and store in this object
399        found_class = None
400        for ext, datatype_obj in self.datatypes_by_extension.items():
401            datatype_obj_class = datatype_obj.__class__
402            datatype_obj_class_str = str( datatype_obj_class )
403            #print datatype_obj_class_str
404            if name in datatype_obj_class_str:
405                return datatype_obj_class
406        return None
407        # these seem to be connected to the dynamic classes being generated in this file, lines 157-158
408        #   they appear when a one of the three are used in inheritance with subclass="True"
409        #TODO: a possible solution is to def a fn in datatypes __init__ for creating the dynamic classes
410
411        #remap = {
412        #    'galaxy.datatypes.registry.Tabular'   : galaxy.datatypes.tabular.Tabular,
413        #    'galaxy.datatypes.registry.Text'      : galaxy.datatypes.data.Text,
414        #    'galaxy.datatypes.registry.Binary'    : galaxy.datatypes.binary.Binary
415        #}
416        #datatype_str = str( datatype )
417        #if datatype_str in remap:
418        #    datatype = remap[ datatype_str ]
419        #
420        #return datatype
421
422    def get_available_tracks( self ):
423        return self.available_tracks
424
425    def get_mimetype_by_extension( self, ext, default='application/octet-stream' ):
426        """Returns a mimetype based on an extension"""
427        try:
428            mimetype = self.mimetypes_by_extension[ ext ]
429        except KeyError:
430            #datatype was never declared
431            mimetype = default
432            self.log.warning( 'unknown mimetype in data factory %s' % str( ext ) )
433        return mimetype
434
435    def get_datatype_by_extension( self, ext ):
436        """Returns a datatype based on an extension"""
437        try:
438            builder = self.datatypes_by_extension[ ext ]
439        except KeyError:
440            builder = data.Text()
441        return builder
442
443    def change_datatype( self, data, ext ):
444        data.extension = ext
445        # call init_meta and copy metadata from itself.  The datatype
446        # being converted *to* will handle any metadata copying and
447        # initialization.
448        if data.has_data():
449            data.set_size()
450            data.init_meta( copy_from=data )
451        return data
452
453    def old_change_datatype( self, data, ext ):
454        """Creates and returns a new datatype based on an existing data and an extension"""
455        newdata = factory( ext )( id=data.id )
456        for key, value in data.__dict__.items():
457            setattr( newdata, key, value )
458        newdata.ext = ext
459        return newdata
460
461    def load_datatype_converters( self, toolbox, installed_repository_dict=None, deactivate=False ):
462        """
463        If deactivate is False, add datatype converters from self.converters or self.proprietary_converters
464        to the calling app's toolbox.  If deactivate is True, eliminates relevant converters from the calling
465        app's toolbox.
466        """
467        if installed_repository_dict:
468            # Load converters defined by datatypes_conf.xml included in installed tool shed repository.
469            converters = self.proprietary_converters
470        else:
471            # Load converters defined by local datatypes_conf.xml.
472            converters = self.converters
473        for elem in converters:
474            tool_config = elem[ 0 ]
475            source_datatype = elem[ 1 ]
476            target_datatype = elem[ 2 ]
477            if installed_repository_dict:
478                converter_path = installed_repository_dict[ 'converter_path' ]
479            else:
480                converter_path = self.converters_path
481            try:
482                config_path = os.path.join( converter_path, tool_config )
483                converter = toolbox.load_tool( config_path )
484                if installed_repository_dict:
485                    # If the converter is included in an installed tool shed repository, set the tool
486                    # shed related tool attributes.
487                    converter.tool_shed = installed_repository_dict[ 'tool_shed' ]
488                    converter.repository_name = installed_repository_dict[ 'repository_name' ]
489                    converter.repository_owner = installed_repository_dict[ 'repository_owner' ]
490                    converter.installed_changeset_revision = installed_repository_dict[ 'installed_changeset_revision' ]
491                    converter.old_id = converter.id
492                    # The converter should be included in the list of tools defined in tool_dicts.
493                    tool_dicts = installed_repository_dict[ 'tool_dicts' ]
494                    for tool_dict in tool_dicts:
495                        if tool_dict[ 'id' ] == converter.id:
496                            converter.guid = tool_dict[ 'guid' ]
497                            converter.id = tool_dict[ 'guid' ]
498                            break
499                if deactivate:
500                    if converter.id in toolbox.tools_by_id:
501                        del toolbox.tools_by_id[ converter.id ]
502                    if source_datatype in self.datatype_converters:
503                        if target_datatype in self.datatype_converters[ source_datatype ]:
504                            del self.datatype_converters[ source_datatype ][ target_datatype ]
505                    self.log.debug( "Deactivated converter: %s", converter.id )
506                else:
507                    toolbox.tools_by_id[ converter.id ] = converter
508                    if source_datatype not in self.datatype_converters:
509                        self.datatype_converters[ source_datatype ] = odict()
510                    self.datatype_converters[ source_datatype ][ target_datatype ] = converter
511                    self.log.debug( "Loaded converter: %s", converter.id )
512            except Exception, e:
513                if deactivate:
514                    self.log.exception( "Error deactivating converter from (%s): %s" % ( converter_path, str( e ) ) )
515                else:
516                    self.log.exception( "Error loading converter (%s): %s" % ( converter_path, str( e ) ) )
517
518    def load_display_applications( self, installed_repository_dict=None, deactivate=False ):
519        """
520        If deactivate is False, add display applications from self.display_app_containers or
521        self.proprietary_display_app_containers to appropriate datatypes.  If deactivate is
522        True, eliminates relevant display applications from appropriate datatypes.
523        """
524        if installed_repository_dict:
525            # Load display applications defined by datatypes_conf.xml included in installed tool shed repository.
526            datatype_elems = self.proprietary_display_app_containers
527        else:
528            # Load display applications defined by local datatypes_conf.xml.
529            datatype_elems = self.display_app_containers
530        for elem in datatype_elems:
531            extension = elem.get( 'extension', None )
532            for display_app in elem.findall( 'display' ):
533                display_file = display_app.get( 'file', None )
534                if installed_repository_dict:
535                    display_path = installed_repository_dict[ 'display_path' ]
536                    display_file_head, display_file_tail = os.path.split( display_file )
537                    config_path = os.path.join( display_path, display_file_tail )
538                else:
539                    config_path = os.path.join( self.display_applications_path, display_file )
540                try:
541                    inherit = galaxy.util.string_as_bool( display_app.get( 'inherit', 'False' ) )
542                    display_app = DisplayApplication.from_file( config_path, self )
543                    if display_app:
544                        if display_app.id in self.display_applications:
545                            if deactivate:
546                                del self.display_applications[ display_app.id ]
547                            else:
548                                # If we already loaded this display application, we'll use the first one loaded.
549                                display_app = self.display_applications[ display_app.id ]
550                        elif installed_repository_dict:
551                            # If the display application is included in an installed tool shed repository,
552                            # set the tool shed related tool attributes.
553                            display_app.tool_shed = installed_repository_dict[ 'tool_shed' ]
554                            display_app.repository_name = installed_repository_dict[ 'repository_name' ]
555                            display_app.repository_owner = installed_repository_dict[ 'repository_owner' ]
556                            display_app.installed_changeset_revision = installed_repository_dict[ 'installed_changeset_revision' ]
557                            display_app.old_id = display_app.id
558                            # The display application should be included in the list of tools defined in tool_dicts.
559                            tool_dicts = installed_repository_dict[ 'tool_dicts' ]
560                            for tool_dict in tool_dicts:
561                                if tool_dict[ 'id' ] == display_app.id:
562                                    display_app.guid = tool_dict[ 'guid' ]
563                                    display_app.id = tool_dict[ 'guid' ]
564                                    break
565                        if deactivate:
566                            if display_app.id in self.display_applications:
567                                del self.display_applications[ display_app.id ]
568                            if extension in self.datatypes_by_extension:
569                                if display_app.id in self.datatypes_by_extension[ extension ].display_applications:
570                                    del self.datatypes_by_extension[ extension ].display_applications[ display_app.id ]
571                            if inherit and ( self.datatypes_by_extension[ extension ], display_app ) in self.inherit_display_application_by_class:
572                                self.inherit_display_application_by_class.remove( ( self.datatypes_by_extension[ extension ], display_app ) )
573                            self.log.debug( "Deactivated display application '%s' for datatype '%s'." % ( display_app.id, extension ) )
574                        else:
575                            self.display_applications[ display_app.id ] = display_app
576                            self.datatypes_by_extension[ extension ].add_display_application( display_app )
577                            if inherit and ( self.datatypes_by_extension[ extension ], display_app ) not in self.inherit_display_application_by_class:
578                                self.inherit_display_application_by_class.append( ( self.datatypes_by_extension[ extension ], display_app ) )
579                            self.log.debug( "Loaded display application '%s' for datatype '%s', inherit=%s." % ( display_app.id, extension, inherit ) )
580                except Exception, e:
581                    if deactivate:
582                        self.log.exception( "Error deactivating display application (%s): %s" % ( config_path, str( e ) ) )
583                    else:
584                        self.log.exception( "Error loading display application (%s): %s" % ( config_path, str( e ) ) )
585        # Handle display_application subclass inheritance.
586        for extension, d_type1 in self.datatypes_by_extension.iteritems():
587            for d_type2, display_app in self.inherit_display_application_by_class:
588                current_app = d_type1.get_display_application( display_app.id, None )
589                if current_app is None and isinstance( d_type1, type( d_type2 ) ):
590                    self.log.debug( "Adding inherited display application '%s' to datatype '%s'" % ( display_app.id, extension ) )
591                    d_type1.add_display_application( display_app )
592
593    def load_external_metadata_tool( self, toolbox ):
594        """Adds a tool which is used to set external metadata"""
595        # We need to be able to add a job to the queue to set metadata. The queue will currently only accept jobs with an associated
596        # tool.  We'll create a special tool to be used for Auto-Detecting metadata; this is less than ideal, but effective
597        # Properly building a tool without relying on parsing an XML file is near impossible...so we'll create a temporary file
598        tool_xml_text = """
599            <tool id="__SET_METADATA__" name="Set External Metadata" version="1.0.1" tool_type="set_metadata">
600              <type class="SetMetadataTool" module="galaxy.tools"/>
601              <requirements>
602                  <requirement type="package">samtools</requirement>
603              </requirements>
604              <action module="galaxy.tools.actions.metadata" class="SetMetadataToolAction"/>
605              <command>$__SET_EXTERNAL_METADATA_COMMAND_LINE__</command>
606              <inputs>
607                <param format="data" name="input1" type="data" label="File to set metadata on."/>
608                <param name="__ORIGINAL_DATASET_STATE__" type="hidden" value=""/>
609                <param name="__SET_EXTERNAL_METADATA_COMMAND_LINE__" type="hidden" value=""/>
610              </inputs>
611            </tool>
612            """
613        tmp_name = tempfile.NamedTemporaryFile()
614        tmp_name.write( tool_xml_text )
615        tmp_name.flush()
616        set_meta_tool = toolbox.load_tool( tmp_name.name )
617        toolbox.tools_by_id[ set_meta_tool.id ] = set_meta_tool
618        self.set_external_metadata_tool = set_meta_tool
619        self.log.debug( "Loaded external metadata tool: %s", self.set_external_metadata_tool.id )
620
621    def set_default_values( self ):
622        # Default values.
623        if not self.datatypes_by_extension:
624            self.datatypes_by_extension = {
625                'ab1'         : binary.Ab1(),
626                'axt'         : sequence.Axt(),
627                'bam'         : binary.Bam(),
628                'bed'         : interval.Bed(),
629                'coverage'    : coverage.LastzCoverage(),
630                'customtrack' : interval.CustomTrack(),
631                'csfasta'     : sequence.csFasta(),
632                'fasta'       : sequence.Fasta(),
633                'eland'       : tabular.Eland(),
634                'fastq'       : sequence.Fastq(),
635                'fastqsanger' : sequence.FastqSanger(),
636                'gtf'         : interval.Gtf(),
637                'gff'         : interval.Gff(),
638                'gff3'        : interval.Gff3(),
639                'genetrack'   : tracks.GeneTrack(),
640                'interval'    : interval.Interval(),
641                'laj'         : images.Laj(),
642                'lav'         : sequence.Lav(),
643                'maf'         : sequence.Maf(),
644                'pileup'      : tabular.Pileup(),
645                'qualsolid'   : qualityscore.QualityScoreSOLiD(),
646                'qualsolexa'  : qualityscore.QualityScoreSolexa(),
647                'qual454'     : qualityscore.QualityScore454(),
648                'sam'         : tabular.Sam(),
649                'scf'         : binary.Scf(),
650                'sff'         : binary.Sff(),
651                'tabular'     : tabular.Tabular(),
652                'taxonomy'    : tabular.Taxonomy(),
653                'txt'         : data.Text(),
654                'wig'         : interval.Wiggle(),
655                'xml'         : xml.GenericXml(),
656                'cel.zip'     : binary.CelZip(), # Cistrome Customized Datatype: CelZip for expression tools
657                'xys.zip'     : binary.XysZip(), # Cistrome Customized Datatype: XysZip for nimblegen expression tools
658                'eset'        : binary.Eset(),   # Cistrome Customized Datatype: Eset file is the output from Bioconductor
659                'cel'         : binary.Cel(),    # Cistrome Customized Datatype: Cel file is required by MAT
660            }
661            self.mimetypes_by_extension = {
662                'ab1'         : 'application/octet-stream',
663                'axt'         : 'text/plain',
664                'bam'         : 'application/octet-stream',
665                'bed'         : 'text/plain',
666                'customtrack' : 'text/plain',
667                'csfasta'     : 'text/plain',
668                'eland'       : 'application/octet-stream',
669                'fasta'       : 'text/plain',
670                'fastq'       : 'text/plain',
671                'fastqsanger' : 'text/plain',
672                'gtf'         : 'text/plain',
673                'gff'         : 'text/plain',
674                'gff3'        : 'text/plain',
675                'interval'    : 'text/plain',
676                'laj'         : 'text/plain',
677                'lav'         : 'text/plain',
678                'maf'         : 'text/plain',
679                'memexml'     : 'application/xml',
680                'pileup'      : 'text/plain',
681                'qualsolid'   : 'text/plain',
682                'qualsolexa'  : 'text/plain',
683                'qual454'     : 'text/plain',
684                'sam'         : 'text/plain',
685                'scf'         : 'application/octet-stream',
686                'sff'         : 'application/octet-stream',
687                'tabular'     : 'text/plain',
688                'taxonomy'    : 'text/plain',
689                'txt'         : 'text/plain',
690                'wig'         : 'text/plain',
691                'xml'         : 'application/xml',
692            }
693        # super supertype fix for input steps in workflows.
694        if 'data' not in self.datatypes_by_extension:
695            self.datatypes_by_extension[ 'data' ] = data.Data()
696            self.mimetypes_by_extension[ 'data' ] = 'application/octet-stream'
697        # Default values - the order in which we attempt to determine data types is critical
698        # because some formats are much more flexibly defined than others.
699        if len( self.sniff_order ) < 1:
700            self.sniff_order = [
701                binary.Cel(),           # Cistrome Customized Datatype: Cel file is required by MAT
702                binary.Bam(),
703                binary.Sff(),
704                xml.GenericXml(),
705                sequence.Maf(),
706                sequence.Lav(),
707                sequence.csFasta(),
708                qualityscore.QualityScoreSOLiD(),
709                qualityscore.QualityScore454(),
710                sequence.Fasta(),
711                sequence.Fastq(),
712                interval.Wiggle(),
713                images.Html(),
714                sequence.Axt(),
715                interval.Bed(),
716                interval.CustomTrack(),
717                interval.Gtf(),
718                interval.Gff(),
719                interval.Gff3(),
720                tabular.Pileup(),
721                interval.Interval(),
722                tabular.Sam(),
723                tabular.Eland()
724            ]
725
726    def get_converters_by_datatype( self, ext ):
727        """Returns available converters by source type"""
728        converters = odict()
729        source_datatype = type( self.get_datatype_by_extension( ext ) )
730        for ext2, dict in self.datatype_converters.items():
731            converter_datatype = type( self.get_datatype_by_extension( ext2 ) )
732            if issubclass( source_datatype, converter_datatype ):
733                converters.update( dict )
734        #Ensure ext-level converters are present
735        if ext in self.datatype_converters.keys():
736            converters.update( self.datatype_converters[ ext ] )
737        return converters
738
739    def get_converter_by_target_type( self, source_ext, target_ext ):
740        """Returns a converter based on source and target datatypes"""
741        converters = self.get_converters_by_datatype( source_ext )
742        if target_ext in converters.keys():
743            return converters[ target_ext ]
744        return None
745
746    def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe=True ):
747        """Returns ( target_ext, existing converted dataset )"""
748        for convert_ext in self.get_converters_by_datatype( dataset.ext ):
749            if self.get_datatype_by_extension( convert_ext ).matches_any( accepted_formats ):
750                converted_dataset = dataset.get_converted_files_by_type( convert_ext )
751                if converted_dataset:
752                    ret_data = converted_dataset
753                elif not converter_safe:
754                    continue
755                else:
756                    ret_data = None
757                return ( convert_ext, ret_data )
758        return ( None, None )
759
760    def get_composite_extensions( self ):
761        return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ]
762
763    def get_upload_metadata_params( self, context, group, tool ):
764        """Returns dict of case value:inputs for metadata conditional for upload tool"""
765        rval = {}
766        for ext, d_type in self.datatypes_by_extension.iteritems():
767            inputs = []
768            for meta_name, meta_spec in d_type.metadata_spec.iteritems():
769                if meta_spec.set_in_upload:
770                    help_txt = meta_spec.desc
771                    if not help_txt or help_txt == meta_name:
772                        help_txt = ""
773                    inputs.append( '<param type="text" name="%s" label="Set metadata value for &quot;%s&quot;" value="%s" help="%s"/>' % ( meta_name, meta_name, meta_spec.default, help_txt ) )
774            rval[ ext ] = "\n".join( inputs )
775        if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype
776            rval[ 'auto' ] = rval[ 'txt' ]
777        return rval
778
779    @property
780    def integrated_datatypes_configs( self ):
781        if self.xml_filename and os.path.isfile( self.xml_filename ):
782            return self.xml_filename
783        self.to_xml_file()
784        return self.xml_filename
785
786    def to_xml_file( self ):
787        if self.xml_filename is not None:
788            # If persisted previously, attempt to remove the temporary file in which we were written.
789            try:
790                os.unlink( self.xml_filename )
791            except:
792                pass
793            self.xml_filename = None
794        fd, filename = tempfile.mkstemp()
795        self.xml_filename = os.path.abspath( filename )
796        if self.converters_path_attr:
797            converters_path_str = ' converters_path="%s"' % self.converters_path_attr
798        else:
799            converters_path_str = ''
800        if self.display_path_attr:
801            display_path_str = ' display_path="%s"' % self.display_path_attr
802        else:
803            display_path_str = ''
804        os.write( fd, '<?xml version="1.0"?>\n' )
805        os.write( fd, '<datatypes>\n' )
806        os.write( fd, '<registration%s%s>\n' % ( converters_path_str, display_p

Large files files are truncated, but you can click here to view the full file