PageRenderTime 53ms CodeModel.GetById 2ms app.highlight 43ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/datatypes/metadata.py

Relevant Search: With Applications for Solr and Elasticsearch

For more in depth reading about search, ranking and generally everything you could ever want to know about how lucene, elasticsearch or solr work under the hood I highly suggest this book. Easily one of the most interesting technical books I have read in a long time. If you are tasked with solving search relevance problems even if not in Solr or Elasticsearch it should be your first reference. Amazon Affiliate Link
https://bitbucket.org/cistrome/cistrome-harvard/
Python | 755 lines | 702 code | 28 blank | 25 comment | 34 complexity | 4b1df126956c2139fc1090d6c64b19c2 MD5 | raw file
  1"""
  2Galaxy Metadata
  3
  4"""
  5
  6import copy
  7import cPickle
  8import json
  9import logging
 10import os
 11import shutil
 12import sys
 13import tempfile
 14import weakref
 15
 16from os.path import abspath
 17
 18import galaxy.model
 19from galaxy.util import listify, stringify_dictionary_keys, string_as_bool
 20from galaxy.util.odict import odict
 21from galaxy.util import in_directory
 22from galaxy.web import form_builder
 23from sqlalchemy.orm import object_session
 24
 25log = logging.getLogger(__name__)
 26
 27STATEMENTS = "__galaxy_statements__" #this is the name of the property in a Datatype class where new metadata spec element Statements are stored
 28
 29class Statement( object ):
 30    """
 31    This class inserts its target into a list in the surrounding
 32    class.  the data.Data class has a metaclass which executes these
 33    statements.  This is how we shove the metadata element spec into
 34    the class.
 35    """
 36    def __init__( self, target ):
 37        self.target = target
 38
 39    def __call__( self, *args, **kwargs ):
 40        class_locals = sys._getframe( 1 ).f_locals #get the locals dictionary of the frame object one down in the call stack (i.e. the Datatype class calling MetadataElement)
 41        statements = class_locals.setdefault( STATEMENTS, [] ) #get and set '__galaxy_statments__' to an empty list if not in locals dict
 42        statements.append( ( self, args, kwargs ) ) #add Statement containing info to populate a MetadataElementSpec
 43
 44    @classmethod
 45    def process( cls, element ):
 46        for statement, args, kwargs in getattr( element, STATEMENTS, [] ):
 47            statement.target( element, *args, **kwargs ) #statement.target is MetadataElementSpec, element is a Datatype class
 48
 49
 50class MetadataCollection( object ):
 51    """
 52    MetadataCollection is not a collection at all, but rather a proxy
 53    to the real metadata which is stored as a Dictionary. This class
 54    handles processing the metadata elements when they are set and
 55    retrieved, returning default values in cases when metadata is not set.
 56    """
 57    def __init__(self, parent ):
 58        self.parent = parent
 59        #initialize dict if needed
 60        if self.parent._metadata is None:
 61            self.parent._metadata = {}
 62
 63    def get_parent( self ):
 64        if "_parent" in self.__dict__:
 65            return self.__dict__["_parent"]()
 66        return None
 67
 68    def set_parent( self, parent ):
 69        self.__dict__["_parent"] = weakref.ref( parent ) # use weakref to prevent a circular reference interfering with garbage collection: hda/lda (parent) <--> MetadataCollection (self) ; needs to be hashable, so cannot use proxy.
 70    parent = property( get_parent, set_parent )
 71
 72    @property
 73    def spec( self ):
 74        return self.parent.datatype.metadata_spec
 75
 76    def __iter__( self ):
 77        return self.parent._metadata.__iter__()
 78
 79    def get( self, key, default=None ):
 80        try:
 81            return self.__getattr__( key ) or default
 82        except:
 83            return default
 84
 85    def items(self):
 86        return iter( [ ( k, self.get( k ) ) for k in self.spec.iterkeys() ] )
 87
 88    def __str__(self):
 89        return dict( self.items() ).__str__()
 90
 91    def __nonzero__( self ):
 92        return bool( self.parent._metadata )
 93
 94    def __getattr__( self, name ):
 95        if name in self.spec:
 96            if name in self.parent._metadata:
 97                return self.spec[name].wrap( self.parent._metadata[name], object_session( self.parent ) )
 98            return self.spec[name].wrap( self.spec[name].default, object_session( self.parent ) )
 99        if name in self.parent._metadata:
100            return self.parent._metadata[name]
101
102    def __setattr__( self, name, value ):
103        if name == "parent":
104            return self.set_parent( value )
105        else:
106            if name in self.spec:
107                self.parent._metadata[name] = self.spec[name].unwrap( value )
108            else:
109                self.parent._metadata[name] = value
110
111    def element_is_set( self, name ):
112        return bool( self.parent._metadata.get( name, False ) )
113
114    def get_html_by_name( self, name, **kwd ):
115        if name in self.spec:
116            rval = self.spec[name].param.get_html( value=getattr( self, name ), context=self, **kwd )
117            if rval is None:
118                return self.spec[name].no_value
119            return rval
120
121    def make_dict_copy( self, to_copy ):
122        """Makes a deep copy of input iterable to_copy according to self.spec"""
123        rval = {}
124        for key, value in to_copy.items():
125            if key in self.spec:
126                rval[key] = self.spec[key].param.make_copy( value, target_context=self, source_context=to_copy )
127        return rval
128
129    def from_JSON_dict( self, filename ):
130        dataset = self.parent
131        log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) )
132        JSONified_dict = json.load( open( filename ) )
133        for name, spec in self.spec.items():
134            if name in JSONified_dict:
135                dataset._metadata[ name ] = spec.param.from_external_value( JSONified_dict[ name ], dataset )
136            elif name in dataset._metadata:
137                #if the metadata value is not found in our externally set metadata but it has a value in the 'old'
138                #metadata associated with our dataset, we'll delete it from our dataset's metadata dict
139                del dataset._metadata[ name ]
140
141    def to_JSON_dict( self, filename ):
142        #galaxy.model.customtypes.json_encoder.encode()
143        meta_dict = {}
144        dataset_meta_dict = self.parent._metadata
145        for name, spec in self.spec.items():
146            if name in dataset_meta_dict:
147                meta_dict[ name ] = spec.param.to_external_value( dataset_meta_dict[ name ] )
148        json.dump( meta_dict, open( filename, 'wb+' ) )
149
150    def __getstate__( self ):
151        return None #cannot pickle a weakref item (self._parent), when data._metadata_collection is None, it will be recreated on demand
152
153
154class MetadataSpecCollection( odict ):
155    """
156    A simple extension of dict which allows cleaner access to items
157    and allows the values to be iterated over directly as if it were a
158    list.  append() is also implemented for simplicity and does not
159    "append".
160    """
161    def __init__( self, dict = None ):
162        odict.__init__( self, dict = None )
163
164    def append( self, item ):
165        self[item.name] = item
166
167    def iter( self ):
168        return self.itervalues()
169
170    def __getattr__( self, name ):
171        return self.get( name )
172
173    def __repr__( self ):
174        # force elements to draw with __str__ for sphinx-apidoc
175        return ', '.join([ item.__str__() for item in self.iter() ])
176
177
178class MetadataParameter( object ):
179    def __init__( self, spec ):
180        self.spec = spec
181
182    def get_html_field( self, value=None, context=None, other_values=None, **kwd ):
183        context = context or {}
184        other_values = other_values or {}
185        return form_builder.TextField( self.spec.name, value=value )
186
187    def get_html( self, value, context=None, other_values=None, **kwd ):
188        """
189        The "context" is simply the metadata collection/bunch holding
190        this piece of metadata. This is passed in to allow for
191        metadata to validate against each other (note: this could turn
192        into a huge, recursive mess if not done with care). For
193        example, a column assignment should validate against the
194        number of columns in the dataset.
195        """
196        context = context or {}
197        other_values = other_values or {}
198
199        if self.spec.get("readonly"):
200            return value
201        if self.spec.get("optional"):
202            checked = False
203            if value: checked = "true"
204            checkbox = form_builder.CheckboxField( "is_" + self.spec.name, checked=checked )
205            return checkbox.get_html() + self.get_html_field( value=value, context=context, other_values=other_values, **kwd ).get_html()
206        else:
207            return self.get_html_field( value=value, context=context, other_values=other_values, **kwd ).get_html()
208
209    def to_string( self, value ):
210        return str( value )
211
212    def make_copy( self, value, target_context = None, source_context = None ):
213        return copy.deepcopy( value )
214
215    @classmethod
216    def marshal ( cls, value ):
217        """
218        This method should/can be overridden to convert the incoming
219        value to whatever type it is supposed to be.
220        """
221        return value
222
223    def validate( self, value ):
224        """
225        Throw an exception if the value is invalid.
226        """
227        pass
228
229    def unwrap( self, form_value ):
230        """
231        Turns a value into its storable form.
232        """
233        value = self.marshal( form_value )
234        self.validate( value )
235        return value
236
237    def wrap( self, value, session ):
238        """
239        Turns a value into its usable form.
240        """
241        return value
242
243    def from_external_value( self, value, parent ):
244        """
245        Turns a value read from an external dict into its value to be pushed directly into the metadata dict.
246        """
247        return value
248
249    def to_external_value( self, value ):
250        """
251        Turns a value read from a metadata into its value to be pushed directly into the external dict.
252        """
253        return value
254
255
256class MetadataElementSpec( object ):
257    """
258    Defines a metadata element and adds it to the metadata_spec (which
259    is a MetadataSpecCollection) of datatype.
260    """
261    def __init__( self, datatype,
262                  name=None, desc=None, param=MetadataParameter, default=None, no_value = None,
263                  visible=True, set_in_upload = False, **kwargs ):
264        self.name = name
265        self.desc = desc or name
266        self.default = default
267        self.no_value = no_value
268        self.visible = visible
269        self.set_in_upload = set_in_upload
270        # Catch-all, allows for extra attributes to be set
271        self.__dict__.update(kwargs)
272        # set up param last, as it uses values set above
273        self.param = param( self )
274        # add spec element to the spec
275        datatype.metadata_spec.append( self )
276
277    def get( self, name, default=None ):
278        return self.__dict__.get(name, default)
279
280    def wrap( self, value, session ):
281        """
282        Turns a stored value into its usable form.
283        """
284        return self.param.wrap( value, session )
285
286    def unwrap( self, value ):
287        """
288        Turns an incoming value into its storable form.
289        """
290        return self.param.unwrap( value )
291
292    def __str__( self ):
293        #TODO??: assuming param is the class of this MetadataElementSpec - add the plain class name for that
294        spec_dict = dict( param_class=self.param.__class__.__name__ )
295        spec_dict.update( self.__dict__ )
296        return ( "{name} ({param_class}): {desc}, defaults to '{default}'".format( **spec_dict ) )
297
298# create a statement class that, when called,
299#   will add a new MetadataElementSpec to a class's metadata_spec
300MetadataElement = Statement( MetadataElementSpec )
301
302
303"""
304MetadataParameter sub-classes.
305"""
306
307class SelectParameter( MetadataParameter ):
308    def __init__( self, spec ):
309        MetadataParameter.__init__( self, spec )
310        self.values = self.spec.get( "values" )
311        self.multiple = string_as_bool( self.spec.get( "multiple" ) )
312
313    def to_string( self, value ):
314        if value in [ None, [] ]:
315            return str( self.spec.no_value )
316        if not isinstance( value, list ):
317            value = [value]
318        return ",".join( map( str, value ) )
319
320    def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd ):
321        context = context or {}
322        other_values = other_values or {}
323
324        field = form_builder.SelectField( self.spec.name, multiple=self.multiple, display=self.spec.get("display") )
325        if self.values:
326            value_list = self.values
327        elif values:
328            value_list = values
329        elif value:
330            value_list = [ ( v, v ) for v in listify( value )]
331        else:
332            value_list = []
333        for val, label in value_list:
334            try:
335                if ( self.multiple and val in value ) or ( not self.multiple and val == value ):
336                    field.add_option( label, val, selected=True )
337                else:
338                    field.add_option( label, val, selected=False )
339            except TypeError:
340                field.add_option( val, label, selected=False )
341        return field
342
343    def get_html( self, value, context=None, other_values=None, values=None, **kwd ):
344        context = context or {}
345        other_values = other_values or {}
346
347        if self.spec.get("readonly"):
348            if value in [ None, [] ]:
349                return str( self.spec.no_value )
350            return ", ".join( map( str, value ) )
351        return MetadataParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd )
352
353    def wrap( self, value, session ):
354        value = self.marshal( value ) #do we really need this (wasteful)? - yes because we are not sure that all existing selects have been stored previously as lists. Also this will handle the case where defaults/no_values are specified and are single non-list values.
355        if self.multiple:
356            return value
357        elif value:
358            return value[0] #single select, only return the first value
359        return None
360
361    @classmethod
362    def marshal( cls, value ):
363        # Store select as list, even if single item
364        if value is None: return []
365        if not isinstance( value, list ): return [value]
366        return value
367
368
369class DBKeyParameter( SelectParameter ):
370
371    def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd):
372        context = context or {}
373        other_values = other_values or {}
374        try:
375            values = kwd['trans'].db_builds
376        except KeyError:
377            pass
378        return super(DBKeyParameter, self).get_html_field( value, context, other_values, values, **kwd)
379
380    def get_html( self, value=None, context=None, other_values=None, values=None, **kwd):
381        context = context or {}
382        other_values = other_values or {}
383        try:
384            values = kwd['trans'].db_builds
385        except KeyError:
386            pass
387        return super(DBKeyParameter, self).get_html( value, context, other_values, values, **kwd)
388
389
390class RangeParameter( SelectParameter ):
391
392    def __init__( self, spec ):
393        SelectParameter.__init__( self, spec )
394        # The spec must be set with min and max values
395        self.min = spec.get( "min" ) or 1
396        self.max = spec.get( "max" ) or 1
397        self.step = self.spec.get( "step" ) or 1
398
399    def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd ):
400        context = context or {}
401        other_values = other_values or {}
402
403        if values is None:
404            values = zip( range( self.min, self.max, self.step ), range( self.min, self.max, self.step ))
405        return SelectParameter.get_html_field( self, value=value, context=context, other_values=other_values, values=values, **kwd )
406
407    def get_html( self, value, context=None, other_values=None, values=None, **kwd ):
408        context = context or {}
409        other_values = other_values or {}
410
411        if values is None:
412            values = zip( range( self.min, self.max, self.step ), range( self.min, self.max, self.step ))
413        return SelectParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd )
414
415    @classmethod
416    def marshal( cls, value ):
417        value = SelectParameter.marshal( value )
418        values = [ int(x) for x in value ]
419        return values
420
421
422class ColumnParameter( RangeParameter ):
423
424    def get_html_field( self, value=None, context=None, other_values=None, values=None, **kwd ):
425        context = context or {}
426        other_values = other_values or {}
427
428        if values is None and context:
429            column_range = range( 1, ( context.columns or 0 ) + 1, 1 )
430            values = zip( column_range, column_range )
431        return RangeParameter.get_html_field( self, value=value, context=context, other_values=other_values, values=values, **kwd )
432
433    def get_html( self, value, context=None, other_values=None, values=None, **kwd ):
434        context = context or {}
435        other_values = other_values or {}
436
437        if values is None and context:
438            column_range = range( 1, ( context.columns or 0 ) + 1, 1 )
439            values = zip( column_range, column_range )
440        return RangeParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd )
441
442
443class ColumnTypesParameter( MetadataParameter ):
444
445    def to_string( self, value ):
446        return ",".join( map( str, value ) )
447
448
449class ListParameter( MetadataParameter ):
450
451    def to_string( self, value ):
452        return ",".join( [str(x) for x in value] )
453
454
455class DictParameter( MetadataParameter ):
456
457    def to_string( self, value ):
458        return  json.dumps( value )
459
460
461class PythonObjectParameter( MetadataParameter ):
462
463    def to_string( self, value ):
464        if not value:
465            return self.spec._to_string( self.spec.no_value )
466        return self.spec._to_string( value )
467
468    def get_html_field( self, value=None, context=None, other_values=None, **kwd ):
469        context = context or {}
470        other_values = other_values or {}
471        return form_builder.TextField( self.spec.name, value=self._to_string( value ) )
472
473    def get_html( self, value=None, context=None, other_values=None, **kwd ):
474        context = context or {}
475        other_values = other_values or {}
476        return str( self )
477
478    @classmethod
479    def marshal( cls, value ):
480        return value
481
482
483class FileParameter( MetadataParameter ):
484
485    def to_string( self, value ):
486        if not value:
487            return str( self.spec.no_value )
488        return value.file_name
489
490    def get_html_field( self, value=None, context=None, other_values=None, **kwd ):
491        context = context or {}
492        other_values = other_values or {}
493        return form_builder.TextField( self.spec.name, value=str( value.id ) )
494
495    def get_html( self, value=None, context=None, other_values=None, **kwd ):
496        context = context or {}
497        other_values = other_values or {}
498        return "<div>No display available for Metadata Files</div>"
499
500    def wrap( self, value, session ):
501        if value is None:
502            return None
503        if isinstance( value, galaxy.model.MetadataFile ) or isinstance( value, MetadataTempFile ):
504            return value
505        mf = session.query( galaxy.model.MetadataFile ).get( value )
506        return mf
507
508    def make_copy( self, value, target_context, source_context ):
509        value = self.wrap( value, object_session( target_context.parent ) )
510        if value:
511            new_value = galaxy.model.MetadataFile( dataset = target_context.parent, name = self.spec.name )
512            object_session( target_context.parent ).add( new_value )
513            object_session( target_context.parent ).flush()
514            shutil.copy( value.file_name, new_value.file_name )
515            return self.unwrap( new_value )
516        return None
517
518    @classmethod
519    def marshal( cls, value ):
520        if isinstance( value, galaxy.model.MetadataFile ):
521            value = value.id
522        return value
523
524    def from_external_value( self, value, parent ):
525        """
526        Turns a value read from a external dict into its value to be pushed directly into the metadata dict.
527        """
528        if MetadataTempFile.is_JSONified_value( value ):
529            value = MetadataTempFile.from_JSON( value )
530        if isinstance( value, MetadataTempFile ):
531            mf = parent.metadata.get( self.spec.name, None)
532            if mf is None:
533                mf = self.new_file( dataset = parent, **value.kwds )
534            # Ensure the metadata file gets updated with content
535            parent.dataset.object_store.update_from_file( mf, file_name=value.file_name, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name=os.path.basename(mf.file_name) )
536            os.unlink( value.file_name )
537            value = mf.id
538        return value
539
540    def to_external_value( self, value ):
541        """
542        Turns a value read from a metadata into its value to be pushed directly into the external dict.
543        """
544        if isinstance( value, galaxy.model.MetadataFile ):
545            value = value.id
546        elif isinstance( value, MetadataTempFile ):
547            value = MetadataTempFile.to_JSON( value )
548        return value
549
550    def new_file( self, dataset = None, **kwds ):
551        if object_session( dataset ):
552            mf = galaxy.model.MetadataFile( name = self.spec.name, dataset = dataset, **kwds )
553            object_session( dataset ).add( mf )
554            object_session( dataset ).flush() #flush to assign id
555            return mf
556        else:
557            #we need to make a tmp file that is accessable to the head node,
558            #we will be copying its contents into the MetadataFile objects filename after restoring from JSON
559            #we do not include 'dataset' in the kwds passed, as from_JSON_value() will handle this for us
560            return MetadataTempFile( **kwds )
561
562
563#This class is used when a database file connection is not available
564class MetadataTempFile( object ):
565    tmp_dir = 'database/tmp' #this should be overwritten as necessary in calling scripts
566
567    def __init__( self, **kwds ):
568        self.kwds = kwds
569        self._filename = None
570
571    @property
572    def file_name( self ):
573        if self._filename is None:
574            #we need to create a tmp file, accessable across all nodes/heads, save the name, and return it
575            self._filename = abspath( tempfile.NamedTemporaryFile( dir = self.tmp_dir, prefix = "metadata_temp_file_" ).name )
576            open( self._filename, 'wb+' ) #create an empty file, so it can't be reused using tempfile
577        return self._filename
578
579    def to_JSON( self ):
580        return { '__class__':self.__class__.__name__, 'filename':self.file_name, 'kwds':self.kwds }
581
582    @classmethod
583    def from_JSON( cls, json_dict ):
584        #need to ensure our keywords are not unicode
585        rval = cls( **stringify_dictionary_keys( json_dict['kwds'] ) )
586        rval._filename = json_dict['filename']
587        return rval
588
589    @classmethod
590    def is_JSONified_value( cls, value ):
591        return ( isinstance( value, dict ) and value.get( '__class__', None ) == cls.__name__ )
592
593    @classmethod
594    def cleanup_from_JSON_dict_filename( cls, filename ):
595        try:
596            for key, value in json.load( open( filename ) ).items():
597                if cls.is_JSONified_value( value ):
598                    value = cls.from_JSON( value )
599                if isinstance( value, cls ) and os.path.exists( value.file_name ):
600                    log.debug( 'Cleaning up abandoned MetadataTempFile file: %s' % value.file_name )
601                    os.unlink( value.file_name )
602        except Exception, e:
603            log.debug( 'Failed to cleanup MetadataTempFile temp files from %s: %s' % ( filename, e ) )
604
605
606#Class with methods allowing set_meta() to be called externally to the Galaxy head
607class JobExternalOutputMetadataWrapper( object ):
608    #this class allows access to external metadata filenames for all outputs associated with a job
609    #We will use JSON as the medium of exchange of information, except for the DatasetInstance object which will use pickle (in the future this could be JSONified as well)
610
611    def __init__( self, job ):
612        self.job_id = job.id
613
614    def get_output_filenames_by_dataset( self, dataset, sa_session ):
615        if isinstance( dataset, galaxy.model.HistoryDatasetAssociation ):
616            return sa_session.query( galaxy.model.JobExternalOutputMetadata ) \
617                             .filter_by( job_id = self.job_id, history_dataset_association_id = dataset.id ) \
618                             .first() #there should only be one or None
619        elif isinstance( dataset, galaxy.model.LibraryDatasetDatasetAssociation ):
620            return sa_session.query( galaxy.model.JobExternalOutputMetadata ) \
621                             .filter_by( job_id = self.job_id, library_dataset_dataset_association_id = dataset.id ) \
622                             .first() #there should only be one or None
623        return None
624
625    def get_dataset_metadata_key( self, dataset ):
626        # Set meta can be called on library items and history items,
627        # need to make different keys for them, since ids can overlap
628        return "%s_%d" % ( dataset.__class__.__name__, dataset.id )
629
630    def setup_external_metadata( self, datasets, sa_session, exec_dir=None, tmp_dir=None, dataset_files_path=None,
631                                 output_fnames=None, config_root=None, config_file=None, datatypes_config=None, job_metadata=None, compute_tmp_dir=None, kwds=None ):
632        kwds = kwds or {}
633        if tmp_dir is None:
634            tmp_dir = MetadataTempFile.tmp_dir
635
636        # path is calculated for Galaxy, may be different on compute - rewrite
637        # for the compute server.
638        def metadata_path_on_compute(path):
639            compute_path = path
640            log.info(compute_tmp_dir)
641            if compute_tmp_dir and tmp_dir and in_directory(path, tmp_dir):
642                path_relative = os.path.relpath(path, tmp_dir)
643                compute_path = os.path.join(compute_tmp_dir, path_relative)
644            return compute_path
645
646        #fill in metadata_files_dict and return the command with args required to set metadata
647        def __metadata_files_list_to_cmd_line( metadata_files ):
648            def __get_filename_override():
649                if output_fnames:
650                    for dataset_path in output_fnames:
651                        if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name:
652                            return dataset_path.false_path
653                return ""
654            line = "%s,%s,%s,%s,%s,%s" % (
655                metadata_path_on_compute(metadata_files.filename_in),
656                metadata_path_on_compute(metadata_files.filename_kwds),
657                metadata_path_on_compute(metadata_files.filename_out),
658                metadata_path_on_compute(metadata_files.filename_results_code),
659                __get_filename_override(),
660                metadata_path_on_compute(metadata_files.filename_override_metadata),
661            )
662            log.info(line)
663            return line
664        if not isinstance( datasets, list ):
665            datasets = [ datasets ]
666        if exec_dir is None:
667            exec_dir = os.path.abspath( os.getcwd() )
668        if dataset_files_path is None:
669            dataset_files_path = galaxy.model.Dataset.file_path
670        if config_root is None:
671            config_root = os.path.abspath( os.getcwd() )
672        if datatypes_config is None:
673            raise Exception( 'In setup_external_metadata, the received datatypes_config is None.' )
674            datatypes_config = 'datatypes_conf.xml'
675        metadata_files_list = []
676        for dataset in datasets:
677            key = self.get_dataset_metadata_key( dataset )
678            #future note:
679            #wonkiness in job execution causes build command line to be called more than once
680            #when setting metadata externally, via 'auto-detect' button in edit attributes, etc.,
681            #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files,
682            #so we will only populate the dictionary once
683            metadata_files = self.get_output_filenames_by_dataset( dataset, sa_session )
684            if not metadata_files:
685                metadata_files = galaxy.model.JobExternalOutputMetadata( dataset = dataset)
686                metadata_files.job_id = self.job_id
687                #we are using tempfile to create unique filenames, tempfile always returns an absolute path
688                #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root
689                #is located differently, i.e. on a cluster node with a different filesystem structure
690
691                #file to store existing dataset
692                metadata_files.filename_in = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_in_%s_" % key ).name )
693
694                #FIXME: HACK
695                #sqlalchemy introduced 'expire_on_commit' flag for sessionmaker at version 0.5x
696                #This may be causing the dataset attribute of the dataset_association object to no-longer be loaded into memory when needed for pickling.
697                #For now, we'll simply 'touch' dataset_association.dataset to force it back into memory.
698                dataset.dataset #force dataset_association.dataset to be loaded before pickling
699                #A better fix could be setting 'expire_on_commit=False' on the session, or modifying where commits occur, or ?
700
701                cPickle.dump( dataset, open( metadata_files.filename_in, 'wb+' ) )
702                #file to store metadata results of set_meta()
703                metadata_files.filename_out = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name )
704                open( metadata_files.filename_out, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
705                #file to store a 'return code' indicating the results of the set_meta() call
706                #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure )
707                metadata_files.filename_results_code = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_results_%s_" % key ).name )
708                json.dump( ( False, 'External set_meta() not called' ), open( metadata_files.filename_results_code, 'wb+' ) ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
709                #file to store kwds passed to set_meta()
710                metadata_files.filename_kwds = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_kwds_%s_" % key ).name )
711                json.dump( kwds, open( metadata_files.filename_kwds, 'wb+' ), ensure_ascii=True )
712                #existing metadata file parameters need to be overridden with cluster-writable file locations
713                metadata_files.filename_override_metadata = abspath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_override_%s_" % key ).name )
714                open( metadata_files.filename_override_metadata, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
715                override_metadata = []
716                for meta_key, spec_value in dataset.metadata.spec.iteritems():
717                    if isinstance( spec_value.param, FileParameter ) and dataset.metadata.get( meta_key, None ) is not None:
718                        metadata_temp = MetadataTempFile()
719                        shutil.copy( dataset.metadata.get( meta_key, None ).file_name, metadata_temp.file_name )
720                        override_metadata.append( ( meta_key, metadata_temp.to_JSON() ) )
721                json.dump( override_metadata, open( metadata_files.filename_override_metadata, 'wb+' ) )
722                #add to session and flush
723                sa_session.add( metadata_files )
724                sa_session.flush()
725            metadata_files_list.append( metadata_files )
726        #return command required to build
727        return "%s %s %s %s %s %s %s %s" % ( os.path.join( exec_dir, 'set_metadata.sh' ), dataset_files_path, tmp_dir, config_root, config_file, datatypes_config, job_metadata, " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) )
728
729    def external_metadata_set_successfully( self, dataset, sa_session ):
730        metadata_files = self.get_output_filenames_by_dataset( dataset, sa_session )
731        if not metadata_files:
732            return False # this file doesn't exist
733        rval, rstring = json.load( open( metadata_files.filename_results_code ) )
734        if not rval:
735            log.debug( 'setting metadata externally failed for %s %s: %s' % ( dataset.__class__.__name__, dataset.id, rstring ) )
736        return rval
737
738    def cleanup_external_metadata( self, sa_session ):
739        log.debug( 'Cleaning up external metadata files' )
740        for metadata_files in sa_session.query( galaxy.model.Job ).get( self.job_id ).external_output_metadata:
741            #we need to confirm that any MetadataTempFile files were removed, if not we need to remove them
742            #can occur if the job was stopped before completion, but a MetadataTempFile is used in the set_meta
743            MetadataTempFile.cleanup_from_JSON_dict_filename( metadata_files.filename_out )
744            dataset_key = self.get_dataset_metadata_key( metadata_files.dataset )
745            for key, fname in [ ( 'filename_in', metadata_files.filename_in ), ( 'filename_out', metadata_files.filename_out ), ( 'filename_results_code', metadata_files.filename_results_code ), ( 'filename_kwds', metadata_files.filename_kwds ), ( 'filename_override_metadata', metadata_files.filename_override_metadata ) ]:
746                try:
747                    os.remove( fname )
748                except Exception, e:
749                    log.debug( 'Failed to cleanup external metadata file (%s) for %s: %s' % ( key, dataset_key, e ) )
750
751    def set_job_runner_external_pid( self, pid, sa_session ):
752        for metadata_files in sa_session.query( galaxy.model.Job ).get( self.job_id ).external_output_metadata:
753            metadata_files.job_runner_external_pid = pid
754            sa_session.add( metadata_files )
755            sa_session.flush()