PageRenderTime 89ms CodeModel.GetById 10ms app.highlight 71ms RepoModel.GetById 1ms app.codeStats 1ms

/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 712 lines | 614 code | 47 blank | 51 comment | 119 complexity | fe40dcfd27bc0874625f71c5ab05bcb0 MD5 | raw file
  1import sys, logging, os, time, datetime, errno
  2
  3log = logging.getLogger( __name__ )
  4log.setLevel(logging.DEBUG)
  5handler = logging.StreamHandler( sys.stdout )
  6format = "%(name)s %(levelname)s %(asctime)s %(message)s"
  7formatter = logging.Formatter( format )
  8handler.setFormatter( formatter )
  9log.addHandler( handler )
 10
 11from sqlalchemy import and_
 12
 13from sqlalchemy import *
 14now = datetime.datetime.utcnow
 15from sqlalchemy.orm import *
 16
 17from migrate import *
 18from migrate.changeset import *
 19
 20from galaxy.model.custom_types import *
 21
 22from galaxy.util.bunch import Bunch
 23
 24
 25metadata = MetaData()
 26context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) )
 27
 28
 29## classes
 30def get_permitted_actions( **kwds ):
 31    return Bunch()
 32
 33def directory_hash_id( id ):
 34    s = str( id )
 35    l = len( s )
 36    # Shortcut -- ids 0-999 go under ../000/
 37    if l < 4:
 38        return [ "000" ]
 39    # Pad with zeros until a multiple of three
 40    padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s
 41    # Drop the last three digits -- 1000 files per directory
 42    padded = padded[:-3]
 43    # Break into chunks of three
 44    return [ padded[i*3:(i+1)*3] for i in range( len( padded ) // 3 ) ]
 45
 46
 47class Dataset( object ):
 48    states = Bunch( NEW = 'new',
 49                    UPLOAD = 'upload',
 50                    QUEUED = 'queued',
 51                    RUNNING = 'running',
 52                    OK = 'ok',
 53                    EMPTY = 'empty',
 54                    ERROR = 'error',
 55                    DISCARDED = 'discarded' )
 56    permitted_actions = get_permitted_actions( filter='DATASET' )
 57    file_path = "/tmp/"
 58    engine = None
 59    def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ):
 60        self.id = id
 61        self.state = state
 62        self.deleted = False
 63        self.purged = False
 64        self.purgable = purgable
 65        self.external_filename = external_filename
 66        self._extra_files_path = extra_files_path
 67        self.file_size = file_size
 68    def get_file_name( self ):
 69        if not self.external_filename:
 70            assert self.id is not None, "ID must be set before filename used (commit the object)"
 71            # First try filename directly under file_path
 72            filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id )
 73            # Only use that filename if it already exists (backward compatibility),
 74            # otherwise construct hashed path
 75            if not os.path.exists( filename ):
 76                dir = os.path.join( self.file_path, *directory_hash_id( self.id ) )
 77                # Create directory if it does not exist
 78                try:
 79                    os.makedirs( dir )
 80                except OSError, e:
 81                    # File Exists is okay, otherwise reraise
 82                    if e.errno != errno.EEXIST:
 83                        raise
 84                # Return filename inside hashed directory
 85                return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) )
 86        else:
 87            filename = self.external_filename
 88        # Make filename absolute
 89        return os.path.abspath( filename )
 90    def set_file_name ( self, filename ):
 91        if not filename:
 92            self.external_filename = None
 93        else:
 94            self.external_filename = filename
 95    file_name = property( get_file_name, set_file_name )
 96    @property
 97    def extra_files_path( self ):
 98        if self._extra_files_path:
 99            path = self._extra_files_path
100        else:
101            path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
102            #only use path directly under self.file_path if it exists
103            if not os.path.exists( path ):
104                path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id )
105        # Make path absolute
106        return os.path.abspath( path )
107    def get_size( self ):
108        """Returns the size of the data on disk"""
109        if self.file_size:
110            return self.file_size
111        else:
112            try:
113                return os.path.getsize( self.file_name )
114            except OSError:
115                return 0
116    def set_size( self ):
117        """Returns the size of the data on disk"""
118        try:
119            if not self.file_size:
120                self.file_size = os.path.getsize( self.file_name )
121        except OSError:
122            self.file_size = 0
123    def has_data( self ):
124        """Detects whether there is any data"""
125        return self.get_size() > 0
126    def mark_deleted( self, include_children=True ):
127        self.deleted = True
128    # FIXME: sqlalchemy will replace this
129    def _delete(self):
130        """Remove the file that corresponds to this data"""
131        try:
132            os.remove(self.data.file_name)
133        except OSError, e:
134            log.critical('%s delete error %s' % (self.__class__.__name__, e))
135
136class DatasetInstance( object ):
137    """A base class for all 'dataset instances', HDAs, LDAs, etc"""
138    states = Dataset.states
139    permitted_actions = Dataset.permitted_actions
140    def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
141                  dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
142                  parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
143        self.name = name or "Unnamed dataset"
144        self.id = id
145        self.info = info
146        self.blurb = blurb
147        self.peek = peek
148        self.extension = extension
149        self.designation = designation
150        self.metadata = metadata or dict()
151        if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
152            self.dbkey = dbkey
153        self.deleted = deleted
154        self.visible = visible
155        # Relationships
156        if not dataset and create_dataset:
157            dataset = Dataset( state=Dataset.states.NEW )
158            context.add( dataset )
159            context.flush()
160        self.dataset = dataset
161        self.parent_id = parent_id
162        self.validation_errors = validation_errors
163    @property
164    def ext( self ):
165        return self.extension
166    def get_dataset_state( self ):
167        return self.dataset.state
168    def set_dataset_state ( self, state ):
169        self.dataset.state = state
170        context.add( self.dataset )
171        context.flush() #flush here, because hda.flush() won't flush the Dataset object
172    state = property( get_dataset_state, set_dataset_state )
173    def get_file_name( self ):
174        return self.dataset.get_file_name()
175    def set_file_name (self, filename):
176        return self.dataset.set_file_name( filename )
177    file_name = property( get_file_name, set_file_name )
178    @property
179    def extra_files_path( self ):
180        return self.dataset.extra_files_path
181    @property
182    def datatype( self ):
183        return datatypes_registry.get_datatype_by_extension( self.extension )
184    def get_metadata( self ):
185        if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute?
186            self._metadata_collection = MetadataCollection( self )
187        return self._metadata_collection
188    def set_metadata( self, bunch ):
189        # Needs to accept a MetadataCollection, a bunch, or a dict
190        self._metadata = self.metadata.make_dict_copy( bunch )
191    metadata = property( get_metadata, set_metadata )
192    # This provide backwards compatibility with using the old dbkey
193    # field in the database.  That field now maps to "old_dbkey" (see mapping.py).
194    def get_dbkey( self ):
195        dbkey = self.metadata.dbkey
196        if not isinstance(dbkey, list): dbkey = [dbkey]
197        if dbkey in [[None], []]: return "?"
198        return dbkey[0]
199    def set_dbkey( self, value ):
200        if "dbkey" in self.datatype.metadata_spec:
201            if not isinstance(value, list):
202                self.metadata.dbkey = [value]
203            else:
204                self.metadata.dbkey = value
205    dbkey = property( get_dbkey, set_dbkey )
206    def change_datatype( self, new_ext ):
207        self.clear_associated_files()
208        datatypes_registry.change_datatype( self, new_ext )
209    def get_size( self ):
210        """Returns the size of the data on disk"""
211        return self.dataset.get_size()
212    def set_size( self ):
213        """Returns the size of the data on disk"""
214        return self.dataset.set_size()
215    def has_data( self ):
216        """Detects whether there is any data"""
217        return self.dataset.has_data()
218    def get_raw_data( self ):
219        """Returns the full data. To stream it open the file_name and read/write as needed"""
220        return self.datatype.get_raw_data( self )
221    def write_from_stream( self, stream ):
222        """Writes data from a stream"""
223        self.datatype.write_from_stream(self, stream)
224    def set_raw_data( self, data ):
225        """Saves the data on the disc"""
226        self.datatype.set_raw_data(self, data)
227    def get_mime( self ):
228        """Returns the mime type of the data"""
229        return datatypes_registry.get_mimetype_by_extension( self.extension.lower() )
230    def set_peek( self, is_multi_byte=False ):
231        return self.datatype.set_peek( self, is_multi_byte=is_multi_byte )
232    def init_meta( self, copy_from=None ):
233        return self.datatype.init_meta( self, copy_from=copy_from )
234    def set_meta( self, **kwd ):
235        self.clear_associated_files( metadata_safe = True )
236        return self.datatype.set_meta( self, **kwd )
237    def missing_meta( self, **kwd ):
238        return self.datatype.missing_meta( self, **kwd )
239    def as_display_type( self, type, **kwd ):
240        return self.datatype.as_display_type( self, type, **kwd )
241    def display_peek( self ):
242        return self.datatype.display_peek( self )
243    def display_name( self ):
244        return self.datatype.display_name( self )
245    def display_info( self ):
246        return self.datatype.display_info( self )
247    def get_converted_files_by_type( self, file_type ):
248        valid = []
249        for assoc in self.implicitly_converted_datasets:
250            if not assoc.deleted and assoc.type == file_type:
251                valid.append( assoc.dataset )
252        return valid
253    def clear_associated_files( self, metadata_safe = False, purge = False ):
254        raise 'Unimplemented'
255    def get_child_by_designation(self, designation):
256        for child in self.children:
257            if child.designation == designation:
258                return child
259        return None
260    def get_converter_types(self):
261        return self.datatype.get_converter_types( self, datatypes_registry)
262    def find_conversion_destination( self, accepted_formats, **kwd ):
263        """Returns ( target_ext, exisiting converted dataset )"""
264        return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
265    def add_validation_error( self, validation_error ):
266        self.validation_errors.append( validation_error )
267    def extend_validation_errors( self, validation_errors ):
268        self.validation_errors.extend(validation_errors)
269    def mark_deleted( self, include_children=True ):
270        self.deleted = True
271        if include_children:
272            for child in self.children:
273                child.mark_deleted()
274    def mark_undeleted( self, include_children=True ):
275        self.deleted = False
276        if include_children:
277            for child in self.children:
278                child.mark_undeleted()
279    def undeletable( self ):
280        if self.purged:
281            return False
282        return True
283    @property
284    def source_library_dataset( self ):
285        def get_source( dataset ):
286            if isinstance( dataset, LibraryDatasetDatasetAssociation ):
287                if dataset.library_dataset:
288                    return ( dataset, dataset.library_dataset )
289            if dataset.copied_from_library_dataset_dataset_association:
290                source = get_source( dataset.copied_from_library_dataset_dataset_association )
291                if source:
292                    return source
293            if dataset.copied_from_history_dataset_association:
294                source = get_source( dataset.copied_from_history_dataset_association )
295                if source:
296                    return source
297            return ( None, None )
298        return get_source( self )
299
300
301class HistoryDatasetAssociation( DatasetInstance ):
302    def __init__( self,
303                  hid = None,
304                  history = None,
305                  copied_from_history_dataset_association = None,
306                  copied_from_library_dataset_dataset_association = None,
307                  **kwd ):
308        DatasetInstance.__init__( self, **kwd )
309        self.hid = hid
310        # Relationships
311        self.history = history
312        self.copied_from_history_dataset_association = copied_from_history_dataset_association
313        self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
314    def copy( self, copy_children = False, parent_id = None, target_history = None ):
315        hda = HistoryDatasetAssociation( hid=self.hid,
316                                         name=self.name,
317                                         info=self.info,
318                                         blurb=self.blurb,
319                                         peek=self.peek,
320                                         extension=self.extension,
321                                         dbkey=self.dbkey,
322                                         dataset = self.dataset,
323                                         visible=self.visible,
324                                         deleted=self.deleted,
325                                         parent_id=parent_id,
326                                         copied_from_history_dataset_association=self,
327                                         history = target_history )
328        context.add( hda )
329        context.flush()
330        hda.set_size()
331        # Need to set after flushed, as MetadataFiles require dataset.id
332        hda.metadata = self.metadata
333        if copy_children:
334            for child in self.children:
335                child_copy = child.copy( copy_children = copy_children, parent_id = hda.id )
336        if not self.datatype.copy_safe_peek:
337            # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
338            hda.set_peek()
339        context.flush()
340        return hda
341    def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ):
342        if replace_dataset:
343            # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
344            library_dataset = replace_dataset
345        else:
346            # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new
347            # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
348            library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info )
349            context.add( library_dataset )
350            context.flush()
351        ldda = LibraryDatasetDatasetAssociation( name=self.name,
352                                                 info=self.info,
353                                                 blurb=self.blurb,
354                                                 peek=self.peek,
355                                                 extension=self.extension,
356                                                 dbkey=self.dbkey,
357                                                 dataset=self.dataset,
358                                                 library_dataset=library_dataset,
359                                                 visible=self.visible,
360                                                 deleted=self.deleted,
361                                                 parent_id=parent_id,
362                                                 copied_from_history_dataset_association=self,
363                                                 user=self.history.user )
364        context.add( ldda )
365        context.flush()
366        # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
367        # Must set metadata after ldda flushed, as MetadataFiles require ldda.id
368        ldda.metadata = self.metadata
369        if not replace_dataset:
370            target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey )
371            context.add( target_folder )
372            context.flush()
373        library_dataset.library_dataset_dataset_association_id = ldda.id
374        context.add( library_dataset )
375        context.flush()
376        for child in self.children:
377            child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id )
378        if not self.datatype.copy_safe_peek:
379            # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
380            ldda.set_peek()
381        context.flush()
382        return ldda
383    def clear_associated_files( self, metadata_safe = False, purge = False ):
384        # metadata_safe = True means to only clear when assoc.metadata_safe == False
385        for assoc in self.implicitly_converted_datasets:
386            if not metadata_safe or not assoc.metadata_safe:
387                assoc.clear( purge = purge )
388
389
390
391class LibraryDatasetDatasetAssociation( DatasetInstance ):
392    def __init__( self,
393                  copied_from_history_dataset_association=None,
394                  copied_from_library_dataset_dataset_association=None,
395                  library_dataset=None,
396                  user=None,
397                  **kwd ):
398        DatasetInstance.__init__( self, **kwd )
399        self.copied_from_history_dataset_association = copied_from_history_dataset_association
400        self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
401        self.library_dataset = library_dataset
402        self.user = user
403    def to_history_dataset_association( self, target_history, parent_id=None ):
404        hid = target_history._next_hid()
405        hda = HistoryDatasetAssociation( name=self.name,
406                                         info=self.info,
407                                         blurb=self.blurb,
408                                         peek=self.peek,
409                                         extension=self.extension,
410                                         dbkey=self.dbkey,
411                                         dataset=self.dataset,
412                                         visible=self.visible,
413                                         deleted=self.deleted,
414                                         parent_id=parent_id,
415                                         copied_from_library_dataset_dataset_association=self,
416                                         history=target_history,
417                                         hid=hid )
418        context.flush()
419        hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
420        for child in self.children:
421            child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id )
422        if not self.datatype.copy_safe_peek:
423            hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
424        context.add( hda )
425        context.flush()
426        return hda
427    def copy( self, copy_children = False, parent_id = None, target_folder = None ):
428        ldda = LibraryDatasetDatasetAssociation( name=self.name,
429                                                 info=self.info,
430                                                 blurb=self.blurb,
431                                                 peek=self.peek,
432                                                 extension=self.extension,
433                                                 dbkey=self.dbkey,
434                                                 dataset=self.dataset,
435                                                 visible=self.visible,
436                                                 deleted=self.deleted,
437                                                 parent_id=parent_id,
438                                                 copied_from_library_dataset_dataset_association=self,
439                                                 folder=target_folder )
440        context.add( ldda )
441        context.flush()
442         # Need to set after flushed, as MetadataFiles require dataset.id
443        ldda.metadata = self.metadata
444        if copy_children:
445            for child in self.children:
446                child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id )
447        if not self.datatype.copy_safe_peek:
448             # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
449            ldda.set_peek()
450        context.flush()
451        return ldda
452    def clear_associated_files( self, metadata_safe = False, purge = False ):
453        return
454    def get_library_item_info_templates( self, template_list=[], restrict=False ):
455        # If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation
456        if self.library_dataset_dataset_info_template_associations:
457            template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] )
458        self.library_dataset.get_library_item_info_templates( template_list, restrict )
459        return template_list
460
461
462
463class LibraryDataset( object ):
464    # This class acts as a proxy to the currently selected LDDA
465    def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ):
466        self.folder = folder
467        self.order_id = order_id
468        self.name = name
469        self.info = info
470        self.library_dataset_dataset_association = library_dataset_dataset_association
471    def set_library_dataset_dataset_association( self, ldda ):
472        self.library_dataset_dataset_association = ldda
473        ldda.library_dataset = self
474        context.add_all( ( self, ldda ) )
475        context.flush()
476    def get_info( self ):
477        if self.library_dataset_dataset_association:
478            return self.library_dataset_dataset_association.info
479        elif self._info:
480            return self._info
481        else:
482            return 'no info'
483    def set_info( self, info ):
484        self._info = info
485    info = property( get_info, set_info )
486    def get_name( self ):
487        if self.library_dataset_dataset_association:
488            return self.library_dataset_dataset_association.name
489        elif self._name:
490            return self._name
491        else:
492            return 'Unnamed dataset'
493    def set_name( self, name ):
494        self._name = name
495    name = property( get_name, set_name )
496    def display_name( self ):
497        self.library_dataset_dataset_association.display_name()
498    def get_purged( self ):
499        return self.library_dataset_dataset_association.dataset.purged
500    def set_purged( self, purged ):
501        if purged:
502            raise Exception( "Not implemented" )
503        if not purged and self.purged:
504            raise Exception( "Cannot unpurge once purged" )
505    purged = property( get_purged, set_purged )
506    def get_library_item_info_templates( self, template_list=[], restrict=False ):
507        # If restrict is True, we'll return only those templates directly associated with this LibraryDataset
508        if self.library_dataset_info_template_associations:
509            template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] )
510        if restrict not in [ 'True', True ]:
511            self.folder.get_library_item_info_templates( template_list, restrict )
512        return template_list
513
514##tables
515
516
517Dataset.table = Table( "dataset", metadata,
518    Column( "id", Integer, primary_key=True ),
519    Column( "create_time", DateTime, default=now ),
520    Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
521    Column( "state", TrimmedString( 64 ) ),
522    Column( "deleted", Boolean, index=True, default=False ),
523    Column( "purged", Boolean, index=True, default=False ),
524    Column( "purgable", Boolean, default=True ),
525    Column( "external_filename" , TEXT ),
526    Column( "_extra_files_path", TEXT ),
527    Column( 'file_size', Numeric( 15, 0 ) ) )
528
529
530
531HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata,
532    Column( "id", Integer, primary_key=True ),
533    Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
534    Column( "create_time", DateTime, default=now ),
535    Column( "update_time", DateTime, default=now, onupdate=now ),
536    Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
537    Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
538    Column( "hid", Integer ),
539    Column( "name", TrimmedString( 255 ) ),
540    Column( "info", TrimmedString( 255 ) ),
541    Column( "blurb", TrimmedString( 255 ) ),
542    Column( "peek" , TEXT ),
543    Column( "extension", TrimmedString( 64 ) ),
544    Column( "metadata", MetadataType(), key="_metadata" ),
545    Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
546    Column( "designation", TrimmedString( 255 ) ),
547    Column( "deleted", Boolean, index=True, default=False ),
548    Column( "visible", Boolean ) )
549
550
551LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata,
552    Column( "id", Integer, primary_key=True ),
553    Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ),
554    Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
555    Column( "create_time", DateTime, default=now ),
556    Column( "update_time", DateTime, default=now, onupdate=now ),
557    Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ),
558    Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ),
559    Column( "name", TrimmedString( 255 ) ),
560    Column( "info", TrimmedString( 255 ) ),
561    Column( "blurb", TrimmedString( 255 ) ),
562    Column( "peek" , TEXT ),
563    Column( "extension", TrimmedString( 64 ) ),
564    Column( "metadata", MetadataType(), key="_metadata" ),
565    Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
566    Column( "designation", TrimmedString( 255 ) ),
567    Column( "deleted", Boolean, index=True, default=False ),
568    Column( "visible", Boolean ),
569    Column( "message", TrimmedString( 255 ) ) )
570
571LibraryDataset.table = Table( "library_dataset", metadata,
572    Column( "id", Integer, primary_key=True ),
573    Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected
574    Column( "order_id", Integer ),
575    Column( "create_time", DateTime, default=now ),
576    Column( "update_time", DateTime, default=now, onupdate=now ),
577    Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
578    Column( "info", TrimmedString( 255 ),  key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
579    Column( "deleted", Boolean, index=True, default=False ) )
580
581
582
583##mappers
584
585
586mapper( Dataset, Dataset.table,
587    properties=dict(
588        history_associations=relation(
589            HistoryDatasetAssociation,
590            primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ),
591        active_history_associations=relation(
592            HistoryDatasetAssociation,
593            primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ),
594        library_associations=relation(
595            LibraryDatasetDatasetAssociation,
596            primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ),
597        active_library_associations=relation(
598            LibraryDatasetDatasetAssociation,
599            primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) )
600            ) )
601
602
603mapper( HistoryDatasetAssociation, HistoryDatasetAssociation.table,
604    properties=dict(
605        dataset=relation(
606            Dataset,
607            primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ),
608        # .history defined in History mapper
609        copied_to_history_dataset_associations=relation(
610            HistoryDatasetAssociation,
611            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
612            backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
613        copied_to_library_dataset_dataset_associations=relation(
614            LibraryDatasetDatasetAssociation,
615            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
616            backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
617        children=relation(
618            HistoryDatasetAssociation,
619            primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ),
620            backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
621        visible_children=relation(
622            HistoryDatasetAssociation,
623            primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) )
624            ) )
625
626mapper( LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table,
627    properties=dict(
628        dataset=relation( Dataset ),
629        library_dataset = relation( LibraryDataset,
630        primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ),
631        copied_to_library_dataset_dataset_associations=relation(
632            LibraryDatasetDatasetAssociation,
633            primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
634            backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
635        copied_to_history_dataset_associations=relation(
636            HistoryDatasetAssociation,
637            primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
638            backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
639        children=relation(
640            LibraryDatasetDatasetAssociation,
641            primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ),
642            backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
643        visible_children=relation(
644            LibraryDatasetDatasetAssociation,
645            primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) )
646        ) )
647
648mapper( LibraryDataset, LibraryDataset.table,
649    properties=dict(
650        library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
651        expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True )
652        ) )
653
654
655def __guess_dataset_by_filename( filename ):
656    """Return a guessed dataset by filename"""
657    try:
658        fields = os.path.split( filename )
659        if fields:
660            if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
661                return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
662    except:
663        pass #some parsing error, we can't guess Dataset
664    return None
665
666def upgrade(migrate_engine):
667    metadata.bind = migrate_engine
668    log.debug( "Fixing a discrepancy concerning deleted shared history items." )
669    affected_items = 0
670    start_time = time.time()
671    for dataset in context.query( Dataset ).filter( and_( Dataset.deleted == True, Dataset.purged == False ) ):
672        for dataset_instance in dataset.history_associations + dataset.library_associations:
673            if not dataset_instance.deleted:
674                dataset.deleted = False
675                if dataset.file_size in [ None, 0 ]:
676                    dataset.set_size() #Restore filesize
677                affected_items += 1
678                break
679    context.flush()
680    log.debug( "%i items affected, and restored." % ( affected_items ) )
681    log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
682
683    #fix share before hda
684    log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
685    dataset_by_filename = {}
686    changed_associations = 0
687    start_time = time.time()
688    for dataset in context.query( Dataset ).filter( Dataset.external_filename.like( '%dataset_%.dat' ) ):
689        if dataset.file_name in dataset_by_filename:
690            guessed_dataset = dataset_by_filename[ dataset.file_name ]
691        else:
692            guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
693            if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
694                guessed_dataset = None
695            dataset_by_filename[ dataset.file_name ] = guessed_dataset
696
697        if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
698            for dataset_instance in dataset.history_associations + dataset.library_associations:
699                dataset_instance.dataset = guessed_dataset
700                changed_associations += 1
701            #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
702            dataset.deleted = True
703            dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
704            dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
705    context.flush()
706    log.debug( "%i items affected, and restored." % ( changed_associations ) )
707    log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
708
709def downgrade(migrate_engine):
710    metadata.bind = migrate_engine
711    log.debug( "Downgrade is not possible." )
712