/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py
Python | 712 lines | 614 code | 47 blank | 51 comment | 93 complexity | fe40dcfd27bc0874625f71c5ab05bcb0 MD5 | raw file
- import sys, logging, os, time, datetime, errno
- log = logging.getLogger( __name__ )
- log.setLevel(logging.DEBUG)
- handler = logging.StreamHandler( sys.stdout )
- format = "%(name)s %(levelname)s %(asctime)s %(message)s"
- formatter = logging.Formatter( format )
- handler.setFormatter( formatter )
- log.addHandler( handler )
- from sqlalchemy import and_
- from sqlalchemy import *
- now = datetime.datetime.utcnow
- from sqlalchemy.orm import *
- from migrate import *
- from migrate.changeset import *
- from galaxy.model.custom_types import *
- from galaxy.util.bunch import Bunch
- metadata = MetaData()
- context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) )
- ## classes
- def get_permitted_actions( **kwds ):
- return Bunch()
- def directory_hash_id( id ):
- s = str( id )
- l = len( s )
- # Shortcut -- ids 0-999 go under ../000/
- if l < 4:
- return [ "000" ]
- # Pad with zeros until a multiple of three
- padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s
- # Drop the last three digits -- 1000 files per directory
- padded = padded[:-3]
- # Break into chunks of three
- return [ padded[i*3:(i+1)*3] for i in range( len( padded ) // 3 ) ]
- class Dataset( object ):
- states = Bunch( NEW = 'new',
- UPLOAD = 'upload',
- QUEUED = 'queued',
- RUNNING = 'running',
- OK = 'ok',
- EMPTY = 'empty',
- ERROR = 'error',
- DISCARDED = 'discarded' )
- permitted_actions = get_permitted_actions( filter='DATASET' )
- file_path = "/tmp/"
- engine = None
- def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ):
- self.id = id
- self.state = state
- self.deleted = False
- self.purged = False
- self.purgable = purgable
- self.external_filename = external_filename
- self._extra_files_path = extra_files_path
- self.file_size = file_size
- def get_file_name( self ):
- if not self.external_filename:
- assert self.id is not None, "ID must be set before filename used (commit the object)"
- # First try filename directly under file_path
- filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id )
- # Only use that filename if it already exists (backward compatibility),
- # otherwise construct hashed path
- if not os.path.exists( filename ):
- dir = os.path.join( self.file_path, *directory_hash_id( self.id ) )
- # Create directory if it does not exist
- try:
- os.makedirs( dir )
- except OSError, e:
- # File Exists is okay, otherwise reraise
- if e.errno != errno.EEXIST:
- raise
- # Return filename inside hashed directory
- return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) )
- else:
- filename = self.external_filename
- # Make filename absolute
- return os.path.abspath( filename )
- def set_file_name ( self, filename ):
- if not filename:
- self.external_filename = None
- else:
- self.external_filename = filename
- file_name = property( get_file_name, set_file_name )
- @property
- def extra_files_path( self ):
- if self._extra_files_path:
- path = self._extra_files_path
- else:
- path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
- #only use path directly under self.file_path if it exists
- if not os.path.exists( path ):
- path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id )
- # Make path absolute
- return os.path.abspath( path )
- def get_size( self ):
- """Returns the size of the data on disk"""
- if self.file_size:
- return self.file_size
- else:
- try:
- return os.path.getsize( self.file_name )
- except OSError:
- return 0
- def set_size( self ):
- """Returns the size of the data on disk"""
- try:
- if not self.file_size:
- self.file_size = os.path.getsize( self.file_name )
- except OSError:
- self.file_size = 0
- def has_data( self ):
- """Detects whether there is any data"""
- return self.get_size() > 0
- def mark_deleted( self, include_children=True ):
- self.deleted = True
- # FIXME: sqlalchemy will replace this
- def _delete(self):
- """Remove the file that corresponds to this data"""
- try:
- os.remove(self.data.file_name)
- except OSError, e:
- log.critical('%s delete error %s' % (self.__class__.__name__, e))
- class DatasetInstance( object ):
- """A base class for all 'dataset instances', HDAs, LDAs, etc"""
- states = Dataset.states
- permitted_actions = Dataset.permitted_actions
- def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
- dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
- parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
- self.name = name or "Unnamed dataset"
- self.id = id
- self.info = info
- self.blurb = blurb
- self.peek = peek
- self.extension = extension
- self.designation = designation
- self.metadata = metadata or dict()
- if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
- self.dbkey = dbkey
- self.deleted = deleted
- self.visible = visible
- # Relationships
- if not dataset and create_dataset:
- dataset = Dataset( state=Dataset.states.NEW )
- context.add( dataset )
- context.flush()
- self.dataset = dataset
- self.parent_id = parent_id
- self.validation_errors = validation_errors
- @property
- def ext( self ):
- return self.extension
- def get_dataset_state( self ):
- return self.dataset.state
- def set_dataset_state ( self, state ):
- self.dataset.state = state
- context.add( self.dataset )
- context.flush() #flush here, because hda.flush() won't flush the Dataset object
- state = property( get_dataset_state, set_dataset_state )
- def get_file_name( self ):
- return self.dataset.get_file_name()
- def set_file_name (self, filename):
- return self.dataset.set_file_name( filename )
- file_name = property( get_file_name, set_file_name )
- @property
- def extra_files_path( self ):
- return self.dataset.extra_files_path
- @property
- def datatype( self ):
- return datatypes_registry.get_datatype_by_extension( self.extension )
- def get_metadata( self ):
- if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute?
- self._metadata_collection = MetadataCollection( self )
- return self._metadata_collection
- def set_metadata( self, bunch ):
- # Needs to accept a MetadataCollection, a bunch, or a dict
- self._metadata = self.metadata.make_dict_copy( bunch )
- metadata = property( get_metadata, set_metadata )
- # This provide backwards compatibility with using the old dbkey
- # field in the database. That field now maps to "old_dbkey" (see mapping.py).
- def get_dbkey( self ):
- dbkey = self.metadata.dbkey
- if not isinstance(dbkey, list): dbkey = [dbkey]
- if dbkey in [[None], []]: return "?"
- return dbkey[0]
- def set_dbkey( self, value ):
- if "dbkey" in self.datatype.metadata_spec:
- if not isinstance(value, list):
- self.metadata.dbkey = [value]
- else:
- self.metadata.dbkey = value
- dbkey = property( get_dbkey, set_dbkey )
- def change_datatype( self, new_ext ):
- self.clear_associated_files()
- datatypes_registry.change_datatype( self, new_ext )
- def get_size( self ):
- """Returns the size of the data on disk"""
- return self.dataset.get_size()
- def set_size( self ):
- """Returns the size of the data on disk"""
- return self.dataset.set_size()
- def has_data( self ):
- """Detects whether there is any data"""
- return self.dataset.has_data()
- def get_raw_data( self ):
- """Returns the full data. To stream it open the file_name and read/write as needed"""
- return self.datatype.get_raw_data( self )
- def write_from_stream( self, stream ):
- """Writes data from a stream"""
- self.datatype.write_from_stream(self, stream)
- def set_raw_data( self, data ):
- """Saves the data on the disc"""
- self.datatype.set_raw_data(self, data)
- def get_mime( self ):
- """Returns the mime type of the data"""
- return datatypes_registry.get_mimetype_by_extension( self.extension.lower() )
- def set_peek( self, is_multi_byte=False ):
- return self.datatype.set_peek( self, is_multi_byte=is_multi_byte )
- def init_meta( self, copy_from=None ):
- return self.datatype.init_meta( self, copy_from=copy_from )
- def set_meta( self, **kwd ):
- self.clear_associated_files( metadata_safe = True )
- return self.datatype.set_meta( self, **kwd )
- def missing_meta( self, **kwd ):
- return self.datatype.missing_meta( self, **kwd )
- def as_display_type( self, type, **kwd ):
- return self.datatype.as_display_type( self, type, **kwd )
- def display_peek( self ):
- return self.datatype.display_peek( self )
- def display_name( self ):
- return self.datatype.display_name( self )
- def display_info( self ):
- return self.datatype.display_info( self )
- def get_converted_files_by_type( self, file_type ):
- valid = []
- for assoc in self.implicitly_converted_datasets:
- if not assoc.deleted and assoc.type == file_type:
- valid.append( assoc.dataset )
- return valid
- def clear_associated_files( self, metadata_safe = False, purge = False ):
- raise 'Unimplemented'
- def get_child_by_designation(self, designation):
- for child in self.children:
- if child.designation == designation:
- return child
- return None
- def get_converter_types(self):
- return self.datatype.get_converter_types( self, datatypes_registry)
- def find_conversion_destination( self, accepted_formats, **kwd ):
- """Returns ( target_ext, exisiting converted dataset )"""
- return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
- def add_validation_error( self, validation_error ):
- self.validation_errors.append( validation_error )
- def extend_validation_errors( self, validation_errors ):
- self.validation_errors.extend(validation_errors)
- def mark_deleted( self, include_children=True ):
- self.deleted = True
- if include_children:
- for child in self.children:
- child.mark_deleted()
- def mark_undeleted( self, include_children=True ):
- self.deleted = False
- if include_children:
- for child in self.children:
- child.mark_undeleted()
- def undeletable( self ):
- if self.purged:
- return False
- return True
- @property
- def source_library_dataset( self ):
- def get_source( dataset ):
- if isinstance( dataset, LibraryDatasetDatasetAssociation ):
- if dataset.library_dataset:
- return ( dataset, dataset.library_dataset )
- if dataset.copied_from_library_dataset_dataset_association:
- source = get_source( dataset.copied_from_library_dataset_dataset_association )
- if source:
- return source
- if dataset.copied_from_history_dataset_association:
- source = get_source( dataset.copied_from_history_dataset_association )
- if source:
- return source
- return ( None, None )
- return get_source( self )
- class HistoryDatasetAssociation( DatasetInstance ):
- def __init__( self,
- hid = None,
- history = None,
- copied_from_history_dataset_association = None,
- copied_from_library_dataset_dataset_association = None,
- **kwd ):
- DatasetInstance.__init__( self, **kwd )
- self.hid = hid
- # Relationships
- self.history = history
- self.copied_from_history_dataset_association = copied_from_history_dataset_association
- self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
- def copy( self, copy_children = False, parent_id = None, target_history = None ):
- hda = HistoryDatasetAssociation( hid=self.hid,
- name=self.name,
- info=self.info,
- blurb=self.blurb,
- peek=self.peek,
- extension=self.extension,
- dbkey=self.dbkey,
- dataset = self.dataset,
- visible=self.visible,
- deleted=self.deleted,
- parent_id=parent_id,
- copied_from_history_dataset_association=self,
- history = target_history )
- context.add( hda )
- context.flush()
- hda.set_size()
- # Need to set after flushed, as MetadataFiles require dataset.id
- hda.metadata = self.metadata
- if copy_children:
- for child in self.children:
- child_copy = child.copy( copy_children = copy_children, parent_id = hda.id )
- if not self.datatype.copy_safe_peek:
- # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
- hda.set_peek()
- context.flush()
- return hda
- def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ):
- if replace_dataset:
- # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
- library_dataset = replace_dataset
- else:
- # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new
- # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
- library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info )
- context.add( library_dataset )
- context.flush()
- ldda = LibraryDatasetDatasetAssociation( name=self.name,
- info=self.info,
- blurb=self.blurb,
- peek=self.peek,
- extension=self.extension,
- dbkey=self.dbkey,
- dataset=self.dataset,
- library_dataset=library_dataset,
- visible=self.visible,
- deleted=self.deleted,
- parent_id=parent_id,
- copied_from_history_dataset_association=self,
- user=self.history.user )
- context.add( ldda )
- context.flush()
- # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
- # Must set metadata after ldda flushed, as MetadataFiles require ldda.id
- ldda.metadata = self.metadata
- if not replace_dataset:
- target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey )
- context.add( target_folder )
- context.flush()
- library_dataset.library_dataset_dataset_association_id = ldda.id
- context.add( library_dataset )
- context.flush()
- for child in self.children:
- child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id )
- if not self.datatype.copy_safe_peek:
- # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
- ldda.set_peek()
- context.flush()
- return ldda
- def clear_associated_files( self, metadata_safe = False, purge = False ):
- # metadata_safe = True means to only clear when assoc.metadata_safe == False
- for assoc in self.implicitly_converted_datasets:
- if not metadata_safe or not assoc.metadata_safe:
- assoc.clear( purge = purge )
- class LibraryDatasetDatasetAssociation( DatasetInstance ):
- def __init__( self,
- copied_from_history_dataset_association=None,
- copied_from_library_dataset_dataset_association=None,
- library_dataset=None,
- user=None,
- **kwd ):
- DatasetInstance.__init__( self, **kwd )
- self.copied_from_history_dataset_association = copied_from_history_dataset_association
- self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
- self.library_dataset = library_dataset
- self.user = user
- def to_history_dataset_association( self, target_history, parent_id=None ):
- hid = target_history._next_hid()
- hda = HistoryDatasetAssociation( name=self.name,
- info=self.info,
- blurb=self.blurb,
- peek=self.peek,
- extension=self.extension,
- dbkey=self.dbkey,
- dataset=self.dataset,
- visible=self.visible,
- deleted=self.deleted,
- parent_id=parent_id,
- copied_from_library_dataset_dataset_association=self,
- history=target_history,
- hid=hid )
- context.flush()
- hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
- for child in self.children:
- child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id )
- if not self.datatype.copy_safe_peek:
- hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
- context.add( hda )
- context.flush()
- return hda
- def copy( self, copy_children = False, parent_id = None, target_folder = None ):
- ldda = LibraryDatasetDatasetAssociation( name=self.name,
- info=self.info,
- blurb=self.blurb,
- peek=self.peek,
- extension=self.extension,
- dbkey=self.dbkey,
- dataset=self.dataset,
- visible=self.visible,
- deleted=self.deleted,
- parent_id=parent_id,
- copied_from_library_dataset_dataset_association=self,
- folder=target_folder )
- context.add( ldda )
- context.flush()
- # Need to set after flushed, as MetadataFiles require dataset.id
- ldda.metadata = self.metadata
- if copy_children:
- for child in self.children:
- child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id )
- if not self.datatype.copy_safe_peek:
- # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
- ldda.set_peek()
- context.flush()
- return ldda
- def clear_associated_files( self, metadata_safe = False, purge = False ):
- return
- def get_library_item_info_templates( self, template_list=[], restrict=False ):
- # If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation
- if self.library_dataset_dataset_info_template_associations:
- template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] )
- self.library_dataset.get_library_item_info_templates( template_list, restrict )
- return template_list
- class LibraryDataset( object ):
- # This class acts as a proxy to the currently selected LDDA
- def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ):
- self.folder = folder
- self.order_id = order_id
- self.name = name
- self.info = info
- self.library_dataset_dataset_association = library_dataset_dataset_association
- def set_library_dataset_dataset_association( self, ldda ):
- self.library_dataset_dataset_association = ldda
- ldda.library_dataset = self
- context.add_all( ( self, ldda ) )
- context.flush()
- def get_info( self ):
- if self.library_dataset_dataset_association:
- return self.library_dataset_dataset_association.info
- elif self._info:
- return self._info
- else:
- return 'no info'
- def set_info( self, info ):
- self._info = info
- info = property( get_info, set_info )
- def get_name( self ):
- if self.library_dataset_dataset_association:
- return self.library_dataset_dataset_association.name
- elif self._name:
- return self._name
- else:
- return 'Unnamed dataset'
- def set_name( self, name ):
- self._name = name
- name = property( get_name, set_name )
- def display_name( self ):
- self.library_dataset_dataset_association.display_name()
- def get_purged( self ):
- return self.library_dataset_dataset_association.dataset.purged
- def set_purged( self, purged ):
- if purged:
- raise Exception( "Not implemented" )
- if not purged and self.purged:
- raise Exception( "Cannot unpurge once purged" )
- purged = property( get_purged, set_purged )
- def get_library_item_info_templates( self, template_list=[], restrict=False ):
- # If restrict is True, we'll return only those templates directly associated with this LibraryDataset
- if self.library_dataset_info_template_associations:
- template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] )
- if restrict not in [ 'True', True ]:
- self.folder.get_library_item_info_templates( template_list, restrict )
- return template_list
- ##tables
- Dataset.table = Table( "dataset", metadata,
- Column( "id", Integer, primary_key=True ),
- Column( "create_time", DateTime, default=now ),
- Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
- Column( "state", TrimmedString( 64 ) ),
- Column( "deleted", Boolean, index=True, default=False ),
- Column( "purged", Boolean, index=True, default=False ),
- Column( "purgable", Boolean, default=True ),
- Column( "external_filename" , TEXT ),
- Column( "_extra_files_path", TEXT ),
- Column( 'file_size', Numeric( 15, 0 ) ) )
- HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata,
- Column( "id", Integer, primary_key=True ),
- Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
- Column( "create_time", DateTime, default=now ),
- Column( "update_time", DateTime, default=now, onupdate=now ),
- Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
- Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
- Column( "hid", Integer ),
- Column( "name", TrimmedString( 255 ) ),
- Column( "info", TrimmedString( 255 ) ),
- Column( "blurb", TrimmedString( 255 ) ),
- Column( "peek" , TEXT ),
- Column( "extension", TrimmedString( 64 ) ),
- Column( "metadata", MetadataType(), key="_metadata" ),
- Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
- Column( "designation", TrimmedString( 255 ) ),
- Column( "deleted", Boolean, index=True, default=False ),
- Column( "visible", Boolean ) )
- LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata,
- Column( "id", Integer, primary_key=True ),
- Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ),
- Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
- Column( "create_time", DateTime, default=now ),
- Column( "update_time", DateTime, default=now, onupdate=now ),
- Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ),
- Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ),
- Column( "name", TrimmedString( 255 ) ),
- Column( "info", TrimmedString( 255 ) ),
- Column( "blurb", TrimmedString( 255 ) ),
- Column( "peek" , TEXT ),
- Column( "extension", TrimmedString( 64 ) ),
- Column( "metadata", MetadataType(), key="_metadata" ),
- Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
- Column( "designation", TrimmedString( 255 ) ),
- Column( "deleted", Boolean, index=True, default=False ),
- Column( "visible", Boolean ),
- Column( "message", TrimmedString( 255 ) ) )
- LibraryDataset.table = Table( "library_dataset", metadata,
- Column( "id", Integer, primary_key=True ),
- Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected
- Column( "order_id", Integer ),
- Column( "create_time", DateTime, default=now ),
- Column( "update_time", DateTime, default=now, onupdate=now ),
- Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
- Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
- Column( "deleted", Boolean, index=True, default=False ) )
- ##mappers
- mapper( Dataset, Dataset.table,
- properties=dict(
- history_associations=relation(
- HistoryDatasetAssociation,
- primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ),
- active_history_associations=relation(
- HistoryDatasetAssociation,
- primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ),
- library_associations=relation(
- LibraryDatasetDatasetAssociation,
- primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ),
- active_library_associations=relation(
- LibraryDatasetDatasetAssociation,
- primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) )
- ) )
- mapper( HistoryDatasetAssociation, HistoryDatasetAssociation.table,
- properties=dict(
- dataset=relation(
- Dataset,
- primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ),
- # .history defined in History mapper
- copied_to_history_dataset_associations=relation(
- HistoryDatasetAssociation,
- primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
- backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
- copied_to_library_dataset_dataset_associations=relation(
- LibraryDatasetDatasetAssociation,
- primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
- backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
- children=relation(
- HistoryDatasetAssociation,
- primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ),
- backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
- visible_children=relation(
- HistoryDatasetAssociation,
- primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) )
- ) )
- mapper( LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table,
- properties=dict(
- dataset=relation( Dataset ),
- library_dataset = relation( LibraryDataset,
- primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ),
- copied_to_library_dataset_dataset_associations=relation(
- LibraryDatasetDatasetAssociation,
- primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
- backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
- copied_to_history_dataset_associations=relation(
- HistoryDatasetAssociation,
- primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
- backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
- children=relation(
- LibraryDatasetDatasetAssociation,
- primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ),
- backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
- visible_children=relation(
- LibraryDatasetDatasetAssociation,
- primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) )
- ) )
- mapper( LibraryDataset, LibraryDataset.table,
- properties=dict(
- library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
- expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True )
- ) )
- def __guess_dataset_by_filename( filename ):
- """Return a guessed dataset by filename"""
- try:
- fields = os.path.split( filename )
- if fields:
- if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
- return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
- except:
- pass #some parsing error, we can't guess Dataset
- return None
- def upgrade(migrate_engine):
- metadata.bind = migrate_engine
- log.debug( "Fixing a discrepancy concerning deleted shared history items." )
- affected_items = 0
- start_time = time.time()
- for dataset in context.query( Dataset ).filter( and_( Dataset.deleted == True, Dataset.purged == False ) ):
- for dataset_instance in dataset.history_associations + dataset.library_associations:
- if not dataset_instance.deleted:
- dataset.deleted = False
- if dataset.file_size in [ None, 0 ]:
- dataset.set_size() #Restore filesize
- affected_items += 1
- break
- context.flush()
- log.debug( "%i items affected, and restored." % ( affected_items ) )
- log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
- #fix share before hda
- log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
- dataset_by_filename = {}
- changed_associations = 0
- start_time = time.time()
- for dataset in context.query( Dataset ).filter( Dataset.external_filename.like( '%dataset_%.dat' ) ):
- if dataset.file_name in dataset_by_filename:
- guessed_dataset = dataset_by_filename[ dataset.file_name ]
- else:
- guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
- if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
- guessed_dataset = None
- dataset_by_filename[ dataset.file_name ] = guessed_dataset
- if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
- for dataset_instance in dataset.history_associations + dataset.library_associations:
- dataset_instance.dataset = guessed_dataset
- changed_associations += 1
- #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
- dataset.deleted = True
- dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
- dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
- context.flush()
- log.debug( "%i items affected, and restored." % ( changed_associations ) )
- log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
- def downgrade(migrate_engine):
- metadata.bind = migrate_engine
- log.debug( "Downgrade is not possible." )