/lib/galaxy/model/__init__.py
Python | 3547 lines | 3472 code | 38 blank | 37 comment | 18 complexity | ec9af105465b68c9ba85c2dae5583df1 MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- """
- Galaxy data model classes
- Naming: try to use class names that have a distinct plural form so that
- the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
- """
- from galaxy import eggs
- eggs.require("pexpect")
- import codecs
- import errno
- import logging
- import operator
- import os
- import pexpect
- import json
- import socket
- import time
- from uuid import UUID, uuid4
- from string import Template
- from itertools import ifilter
- from itertools import chain
- import galaxy.datatypes
- import galaxy.datatypes.registry
- import galaxy.security.passwords
- from galaxy.datatypes.metadata import MetadataCollection
- from galaxy.model.item_attrs import Dictifiable, UsesAnnotations
- from galaxy.security import get_permitted_actions
- from galaxy.util import is_multi_byte, nice_size, Params, restore_text, send_mail
- from galaxy.util import ready_name_for_url
- from galaxy.util.bunch import Bunch
- from galaxy.util.hash_util import new_secure_hash
- from galaxy.util.directory_hash import directory_hash_id
- from galaxy.web.framework.helpers import to_unicode
- from galaxy.web.form_builder import (AddressField, CheckboxField, HistoryField,
- PasswordField, SelectField, TextArea, TextField, WorkflowField,
- WorkflowMappingField)
- from sqlalchemy.orm import object_session
- from sqlalchemy.orm import joinedload
- from sqlalchemy.sql.expression import func
- from sqlalchemy import not_
- log = logging.getLogger( __name__ )
- datatypes_registry = galaxy.datatypes.registry.Registry()
- # Default Value Required for unit tests
- datatypes_registry.load_datatypes()
- # When constructing filters with in for a fixed set of ids, maximum
- # number of items to place in the IN statement. Different databases
- # are going to have different limits so it is likely best to not let
- # this be unlimited - filter in Python if over this limit.
- MAX_IN_FILTER_LENGTH = 100
- class NoConverterException(Exception):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
- class ConverterDependencyException(Exception):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
- def set_datatypes_registry( d_registry ):
- """
- Set up datatypes_registry
- """
- global datatypes_registry
- datatypes_registry = d_registry
- class HasName:
- def get_display_name( self ):
- """
- These objects have a name attribute can be either a string or a unicode
- object. If string, convert to unicode object assuming 'utf-8' format.
- """
- name = self.name
- if isinstance(name, str):
- name = unicode(name, 'utf-8')
- return name
- class User( object, Dictifiable ):
- use_pbkdf2 = True
- """
- Data for a Galaxy user or admin and relations to their
- histories, credentials, and roles.
- """
- # attributes that will be accessed and returned when calling to_dict( view='collection' )
- dict_collection_visible_keys = ( 'id', 'email' )
- # attributes that will be accessed and returned when calling to_dict( view='element' )
- dict_element_visible_keys = ( 'id', 'email', 'username', 'total_disk_usage', 'nice_total_disk_usage' )
- def __init__( self, email=None, password=None ):
- self.email = email
- self.password = password
- self.external = False
- self.deleted = False
- self.purged = False
- self.active = False
- self.activation_token = None
- self.username = None
- # Relationships
- self.histories = []
- self.credentials = []
- #? self.roles = []
- def set_password_cleartext( self, cleartext ):
- """
- Set user password to the digest of `cleartext`.
- """
- if User.use_pbkdf2:
- self.password = galaxy.security.passwords.hash_password( cleartext )
- else:
- self.password = new_secure_hash( text_type=cleartext )
- def check_password( self, cleartext ):
- """
- Check if `cleartext` matches user password when hashed.
- """
- return galaxy.security.passwords.check_password( cleartext, self.password )
- def all_roles( self ):
- """
- Return a unique list of Roles associated with this user or any of their groups.
- """
- roles = [ ura.role for ura in self.roles ]
- for group in [ uga.group for uga in self.groups ]:
- for role in [ gra.role for gra in group.roles ]:
- if role not in roles:
- roles.append( role )
- return roles
- def get_disk_usage( self, nice_size=False ):
- """
- Return byte count of disk space used by user or a human-readable
- string if `nice_size` is `True`.
- """
- rval = 0
- if self.disk_usage is not None:
- rval = self.disk_usage
- if nice_size:
- rval = galaxy.datatypes.data.nice_size( rval )
- return rval
- def set_disk_usage( self, bytes ):
- """
- Manually set the disk space used by a user to `bytes`.
- """
- self.disk_usage = bytes
- total_disk_usage = property( get_disk_usage, set_disk_usage )
- @property
- def nice_total_disk_usage( self ):
- """
- Return byte count of disk space used in a human-readable string.
- """
- return self.get_disk_usage( nice_size=True )
- def calculate_disk_usage( self ):
- """
- Return byte count total of disk space used by all non-purged, non-library
- HDAs in non-purged histories.
- """
- # maintain a list so that we don't double count
- dataset_ids = []
- total = 0
- # this can be a huge number and can run out of memory, so we avoid the mappers
- db_session = object_session( self )
- for history in db_session.query( History ).enable_eagerloads( False ).filter_by( user_id=self.id, purged=False ).yield_per( 1000 ):
- for hda in db_session.query( HistoryDatasetAssociation ).enable_eagerloads( False ).filter_by( history_id=history.id, purged=False ).yield_per( 1000 ):
- #TODO: def hda.counts_toward_disk_usage():
- # return ( not self.dataset.purged and not self.dataset.library_associations )
- if not hda.dataset.id in dataset_ids and not hda.dataset.purged and not hda.dataset.library_associations:
- dataset_ids.append( hda.dataset.id )
- total += hda.dataset.get_total_size()
- return total
- @staticmethod
- def user_template_environment( user ):
- """
- >>> env = User.user_template_environment(None)
- >>> env['__user_email__']
- 'Anonymous'
- >>> env['__user_id__']
- 'Anonymous'
- >>> user = User('foo@example.com')
- >>> user.id = 6
- >>> user.username = 'foo2'
- >>> env = User.user_template_environment(user)
- >>> env['__user_id__']
- '6'
- >>> env['__user_name__']
- 'foo2'
- """
- if user:
- user_id = '%d' % user.id
- user_email = str( user.email )
- user_name = str( user.username )
- else:
- user = None
- user_id = 'Anonymous'
- user_email = 'Anonymous'
- user_name = 'Anonymous'
- environment = {}
- environment[ '__user__' ] = user
- environment[ '__user_id__' ] = environment[ 'userId' ] = user_id
- environment[ '__user_email__' ] = environment[ 'userEmail' ] = user_email
- environment[ '__user_name__' ] = user_name
- return environment
- @staticmethod
- def expand_user_properties( user, in_string ):
- """
- """
- environment = User.user_template_environment( user )
- return Template( in_string ).safe_substitute( environment )
- class Job( object, Dictifiable ):
- dict_collection_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time' ]
- dict_element_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time' ]
- """
- A job represents a request to run a tool given input datasets, tool
- parameters, and output datasets.
- """
- states = Bunch( NEW = 'new',
- UPLOAD = 'upload',
- WAITING = 'waiting',
- QUEUED = 'queued',
- RUNNING = 'running',
- OK = 'ok',
- ERROR = 'error',
- PAUSED = 'paused',
- DELETED = 'deleted',
- DELETED_NEW = 'deleted_new' )
- # Please include an accessor (get/set pair) for any new columns/members.
- def __init__( self ):
- self.session_id = None
- self.user_id = None
- self.tool_id = None
- self.tool_version = None
- self.command_line = None
- self.param_filename = None
- self.parameters = []
- self.input_datasets = []
- self.output_datasets = []
- self.input_library_datasets = []
- self.output_library_datasets = []
- self.state = Job.states.NEW
- self.info = None
- self.job_runner_name = None
- self.job_runner_external_id = None
- self.destination_id = None
- self.destination_params = None
- self.post_job_actions = []
- self.imported = False
- self.handler = None
- self.exit_code = None
- @property
- def finished( self ):
- states = self.states
- return self.state in [
- states.OK,
- states.ERROR,
- states.DELETED,
- states.DELETED_NEW,
- ]
- # TODO: Add accessors for members defined in SQL Alchemy for the Job table and
- # for the mapper defined to the Job table.
- def get_external_output_metadata( self ):
- """
- The external_output_metadata is currently a reference from Job to
- JobExternalOutputMetadata. It exists for a job but not a task.
- """
- return self.external_output_metadata
- def get_session_id( self ):
- return self.session_id
- def get_user_id( self ):
- return self.user_id
- def get_tool_id( self ):
- return self.tool_id
- def get_tool_version( self ):
- return self.tool_version
- def get_command_line( self ):
- return self.command_line
- def get_param_filename( self ):
- return self.param_filename
- def get_parameters( self ):
- return self.parameters
- def get_input_datasets( self ):
- return self.input_datasets
- def get_output_datasets( self ):
- return self.output_datasets
- def get_input_library_datasets( self ):
- return self.input_library_datasets
- def get_output_library_datasets( self ):
- return self.output_library_datasets
- def get_state( self ):
- return self.state
- def get_info( self ):
- return self.info
- def get_job_runner_name( self ):
- # This differs from the Task class in that job_runner_name is
- # accessed instead of task_runner_name. Note that the field
- # runner_name is not the same thing.
- return self.job_runner_name
- def get_job_runner_external_id( self ):
- # This is different from the Task just in the member accessed:
- return self.job_runner_external_id
- def get_post_job_actions( self ):
- return self.post_job_actions
- def get_imported( self ):
- return self.imported
- def get_handler( self ):
- return self.handler
- def get_params( self ):
- return self.params
- def get_user( self ):
- # This is defined in the SQL Alchemy mapper as a relation to the User.
- return self.user
- def get_id( self ):
- # This is defined in the SQL Alchemy's Job table (and not in the model).
- return self.id
- def get_tasks( self ):
- # The tasks member is pert of a reference in the SQL Alchemy schema:
- return self.tasks
- def get_id_tag( self ):
- """
- Return a tag that can be useful in identifying a Job.
- This returns the Job's get_id
- """
- return "%s" % self.id;
- def set_session_id( self, session_id ):
- self.session_id = session_id
- def set_user_id( self, user_id ):
- self.user_id = user_id
- def set_tool_id( self, tool_id ):
- self.tool_id = tool_id
- def set_tool_version( self, tool_version ):
- self.tool_version = tool_version
- def set_command_line( self, command_line ):
- self.command_line = command_line
- def set_param_filename( self, param_filename ):
- self.param_filename = param_filename
- def set_parameters( self, parameters ):
- self.parameters = parameters
- def set_input_datasets( self, input_datasets ):
- self.input_datasets = input_datasets
- def set_output_datasets( self, output_datasets ):
- self.output_datasets = output_datasets
- def set_input_library_datasets( self, input_library_datasets ):
- self.input_library_datasets = input_library_datasets
- def set_output_library_datasets( self, output_library_datasets ):
- self.output_library_datasets = output_library_datasets
- def set_info( self, info ):
- self.info = info
- def set_runner_name( self, job_runner_name ):
- self.job_runner_name = job_runner_name
- def set_runner_external_id( self, job_runner_external_id ):
- self.job_runner_external_id = job_runner_external_id
- def set_post_job_actions( self, post_job_actions ):
- self.post_job_actions = post_job_actions
- def set_imported( self, imported ):
- self.imported = imported
- def set_handler( self, handler ):
- self.handler = handler
- def set_params( self, params ):
- self.params = params
- def add_parameter( self, name, value ):
- self.parameters.append( JobParameter( name, value ) )
- def add_input_dataset( self, name, dataset ):
- self.input_datasets.append( JobToInputDatasetAssociation( name, dataset ) )
- def add_output_dataset( self, name, dataset ):
- self.output_datasets.append( JobToOutputDatasetAssociation( name, dataset ) )
- def add_input_library_dataset( self, name, dataset ):
- self.input_library_datasets.append( JobToInputLibraryDatasetAssociation( name, dataset ) )
- def add_output_library_dataset( self, name, dataset ):
- self.output_library_datasets.append( JobToOutputLibraryDatasetAssociation( name, dataset ) )
- def add_post_job_action(self, pja):
- self.post_job_actions.append( PostJobActionAssociation( pja, self ) )
- def set_state( self, state ):
- """
- This is the only set method that performs extra work. In this case, the
- state is propagated down to datasets.
- """
- self.state = state
- # For historical reasons state propogates down to datasets
- for da in self.output_datasets:
- da.dataset.state = state
- def get_param_values( self, app, ignore_errors=False ):
- """
- Read encoded parameter values from the database and turn back into a
- dict of tool parameter values.
- """
- param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
- tool = app.toolbox.get_tool( self.tool_id )
- param_dict = tool.params_from_strings( param_dict, app, ignore_errors=ignore_errors )
- return param_dict
- def check_if_output_datasets_deleted( self ):
- """
- Return true if all of the output datasets associated with this job are
- in the deleted state
- """
- for dataset_assoc in self.output_datasets:
- dataset = dataset_assoc.dataset
- # only the originator of the job can delete a dataset to cause
- # cancellation of the job, no need to loop through history_associations
- if not dataset.deleted:
- return False
- return True
- def mark_deleted( self, track_jobs_in_database=False ):
- """
- Mark this job as deleted, and mark any output datasets as discarded.
- """
- if track_jobs_in_database:
- self.state = Job.states.DELETED_NEW
- else:
- self.state = Job.states.DELETED
- self.info = "Job output deleted by user before job completed."
- for dataset_assoc in self.output_datasets:
- dataset = dataset_assoc.dataset
- dataset.deleted = True
- dataset.state = dataset.states.DISCARDED
- for dataset in dataset.dataset.history_associations:
- # propagate info across shared datasets
- dataset.deleted = True
- dataset.blurb = 'deleted'
- dataset.peek = 'Job deleted'
- dataset.info = 'Job output deleted by user before job completed'
- def to_dict( self, view='collection' ):
- rval = super( Job, self ).to_dict( view=view )
- rval['tool_id'] = self.tool_id
- if view == 'element':
- param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
- rval['params'] = param_dict
- input_dict = {}
- for i in self.input_datasets:
- if i.dataset is not None:
- input_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
- for i in self.input_library_datasets:
- if i.dataset is not None:
- input_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
- for k in input_dict:
- if k in param_dict:
- del param_dict[k]
- rval['inputs'] = input_dict
- output_dict = {}
- for i in self.output_datasets:
- if i.dataset is not None:
- output_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
- for i in self.output_library_datasets:
- if i.dataset is not None:
- output_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
- rval['outputs'] = output_dict
- return rval
- class Task( object ):
- """
- A task represents a single component of a job.
- """
- states = Bunch( NEW = 'new',
- WAITING = 'waiting',
- QUEUED = 'queued',
- RUNNING = 'running',
- OK = 'ok',
- ERROR = 'error',
- DELETED = 'deleted' )
- # Please include an accessor (get/set pair) for any new columns/members.
- def __init__( self, job, working_directory, prepare_files_cmd ):
- self.command_line = None
- self.parameters = []
- self.state = Task.states.NEW
- self.info = None
- self.working_directory = working_directory
- self.task_runner_name = None
- self.task_runner_external_id = None
- self.job = job
- self.stdout = ""
- self.stderr = ""
- self.exit_code = None
- self.prepare_input_files_cmd = prepare_files_cmd
- def get_param_values( self, app ):
- """
- Read encoded parameter values from the database and turn back into a
- dict of tool parameter values.
- """
- param_dict = dict( [ ( p.name, p.value ) for p in self.parent_job.parameters ] )
- tool = app.toolbox.get_tool( self.tool_id )
- param_dict = tool.params_from_strings( param_dict, app )
- return param_dict
- def get_id( self ):
- # This is defined in the SQL Alchemy schema:
- return self.id
- def get_id_tag( self ):
- """
- Return an id tag suitable for identifying the task.
- This combines the task's job id and the task's own id.
- """
- return "%s_%s" % ( self.job.get_id(), self.get_id() )
- def get_command_line( self ):
- return self.command_line
- def get_parameters( self ):
- return self.parameters
- def get_state( self ):
- return self.state
- def get_info( self ):
- return self.info
- def get_working_directory( self ):
- return self.working_directory
- def get_task_runner_name( self ):
- return self.task_runner_name
- def get_task_runner_external_id( self ):
- return self.task_runner_external_id
- def get_job( self ):
- return self.job
- def get_stdout( self ):
- return self.stdout
- def get_stderr( self ):
- return self.stderr
- def get_prepare_input_files_cmd( self ):
- return self.prepare_input_files_cmd
- # The following accessors are for members that are in the Job class but
- # not in the Task class. So they can either refer to the parent Job
- # or return None, depending on whether Tasks need to point to the parent
- # (e.g., for a session) or never use the member (e.g., external output
- # metdata). These can be filled in as needed.
- def get_external_output_metadata( self ):
- """
- The external_output_metadata is currently a backref to
- JobExternalOutputMetadata. It exists for a job but not a task,
- and when a task is cancelled its corresponding parent Job will
- be cancelled. So None is returned now, but that could be changed
- to self.get_job().get_external_output_metadata().
- """
- return None
- def get_job_runner_name( self ):
- """
- Since runners currently access Tasks the same way they access Jobs,
- this method just refers to *this* instance's runner.
- """
- return self.task_runner_name
- def get_job_runner_external_id( self ):
- """
- Runners will use the same methods to get information about the Task
- class as they will about the Job class, so this method just returns
- the task's external id.
- """
- # TODO: Merge into get_runner_external_id.
- return self.task_runner_external_id
- def get_session_id( self ):
- # The Job's galaxy session is equal to the Job's session, so the
- # Job's session is the same as the Task's session.
- return self.get_job().get_session_id()
- def set_id( self, id ):
- # This is defined in the SQL Alchemy's mapper and not here.
- # This should never be called.
- self.id = id
- def set_command_line( self, command_line ):
- self.command_line = command_line
- def set_parameters( self, parameters ):
- self.parameters = parameters
- def set_state( self, state ):
- self.state = state
- def set_info( self, info ):
- self.info = info
- def set_working_directory( self, working_directory ):
- self.working_directory = working_directory
- def set_task_runner_name( self, task_runner_name ):
- self.task_runner_name = task_runner_name
- def set_job_runner_external_id( self, task_runner_external_id ):
- # This method is available for runners that do not want/need to
- # differentiate between the kinds of Runnable things (Jobs and Tasks)
- # that they're using.
- log.debug( "Task %d: Set external id to %s"
- % ( self.id, task_runner_external_id ) )
- self.task_runner_external_id = task_runner_external_id
- def set_task_runner_external_id( self, task_runner_external_id ):
- self.task_runner_external_id = task_runner_external_id
- def set_job( self, job ):
- self.job = job
- def set_stdout( self, stdout ):
- self.stdout = stdout
- def set_stderr( self, stderr ):
- self.stderr = stderr
- def set_prepare_input_files_cmd( self, prepare_input_files_cmd ):
- self.prepare_input_files_cmd = prepare_input_files_cmd
- class JobParameter( object ):
- def __init__( self, name, value ):
- self.name = name
- self.value = value
- class JobToInputDatasetAssociation( object ):
- def __init__( self, name, dataset ):
- self.name = name
- self.dataset = dataset
- class JobToOutputDatasetAssociation( object ):
- def __init__( self, name, dataset ):
- self.name = name
- self.dataset = dataset
- class JobToInputLibraryDatasetAssociation( object ):
- def __init__( self, name, dataset ):
- self.name = name
- self.dataset = dataset
- class JobToOutputLibraryDatasetAssociation( object ):
- def __init__( self, name, dataset ):
- self.name = name
- self.dataset = dataset
- class PostJobAction( object ):
- def __init__( self, action_type, workflow_step, output_name = None, action_arguments = None):
- self.action_type = action_type
- self.output_name = output_name
- self.action_arguments = action_arguments
- self.workflow_step = workflow_step
- class PostJobActionAssociation( object ):
- def __init__(self, pja, job):
- self.job = job
- self.post_job_action = pja
- class JobExternalOutputMetadata( object ):
- def __init__( self, job = None, dataset = None ):
- self.job = job
- if isinstance( dataset, galaxy.model.HistoryDatasetAssociation ):
- self.history_dataset_association = dataset
- elif isinstance( dataset, galaxy.model.LibraryDatasetDatasetAssociation ):
- self.library_dataset_dataset_association = dataset
- @property
- def dataset( self ):
- if self.history_dataset_association:
- return self.history_dataset_association
- elif self.library_dataset_dataset_association:
- return self.library_dataset_dataset_association
- return None
- class JobExportHistoryArchive( object ):
- def __init__( self, job=None, history=None, dataset=None, compressed=False, \
- history_attrs_filename=None, datasets_attrs_filename=None,
- jobs_attrs_filename=None ):
- self.job = job
- self.history = history
- self.dataset = dataset
- self.compressed = compressed
- self.history_attrs_filename = history_attrs_filename
- self.datasets_attrs_filename = datasets_attrs_filename
- self.jobs_attrs_filename = jobs_attrs_filename
- @property
- def up_to_date( self ):
- """ Return False, if a new export should be generated for corresponding
- history.
- """
- job = self.job
- return job.state not in [ Job.states.ERROR, Job.states.DELETED ] \
- and job.update_time > self.history.update_time
- @property
- def ready( self ):
- return self.job.state == Job.states.OK
- @property
- def preparing( self ):
- return self.job.state in [ Job.states.RUNNING, Job.states.QUEUED, Job.states.WAITING ]
- @property
- def export_name( self ):
- # Stream archive.
- hname = ready_name_for_url( self.history.name )
- hname = "Galaxy-History-%s.tar" % ( hname )
- if self.compressed:
- hname += ".gz"
- return hname
- class JobImportHistoryArchive( object ):
- def __init__( self, job=None, history=None, archive_dir=None ):
- self.job = job
- self.history = history
- self.archive_dir=archive_dir
- class GenomeIndexToolData( object ):
- def __init__( self, job=None, params=None, dataset=None, deferred_job=None, \
- transfer_job=None, fasta_path=None, created_time=None, modified_time=None, \
- dbkey=None, user=None, indexer=None ):
- self.job = job
- self.dataset = dataset
- self.fasta_path = fasta_path
- self.user = user
- self.indexer = indexer
- self.created_time = created_time
- self.modified_time = modified_time
- self.deferred = deferred_job
- self.transfer = transfer_job
- class DeferredJob( object ):
- states = Bunch( NEW = 'new',
- WAITING = 'waiting',
- QUEUED = 'queued',
- RUNNING = 'running',
- OK = 'ok',
- ERROR = 'error' )
- def __init__( self, state=None, plugin=None, params=None ):
- self.state = state
- self.plugin = plugin
- self.params = params
- def get_check_interval( self ):
- if not hasattr( self, '_check_interval' ):
- self._check_interval = None
- return self._check_interval
- def set_check_interval( self, seconds ):
- self._check_interval = seconds
- check_interval = property( get_check_interval, set_check_interval )
- def get_last_check( self ):
- if not hasattr( self, '_last_check' ):
- self._last_check = 0
- return self._last_check
- def set_last_check( self, seconds ):
- try:
- self._last_check = int( seconds )
- except:
- self._last_check = time.time()
- last_check = property( get_last_check, set_last_check )
- @property
- def is_check_time( self ):
- if self.check_interval is None:
- return True
- elif ( int( time.time() ) - self.last_check ) > self.check_interval:
- return True
- else:
- return False
- class Group( object, Dictifiable ):
- dict_collection_visible_keys = ( 'id', 'name' )
- dict_element_visible_keys = ( 'id', 'name' )
- def __init__( self, name = None ):
- self.name = name
- self.deleted = False
- class UserGroupAssociation( object ):
- def __init__( self, user, group ):
- self.user = user
- self.group = group
- class History( object, Dictifiable, UsesAnnotations, HasName ):
- dict_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' )
- dict_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged', 'importable', 'slug' )
- default_name = 'Unnamed history'
- def __init__( self, id=None, name=None, user=None ):
- self.id = id
- self.name = name or History.default_name
- self.deleted = False
- self.purged = False
- self.importing = False
- self.genome_build = None
- self.published = False
- # Relationships
- self.user = user
- self.datasets = []
- self.galaxy_sessions = []
- self.tags = []
- def _next_hid( self ):
- # this is overriden in mapping.py db_next_hid() method
- if len( self.datasets ) == 0:
- return 1
- else:
- last_hid = 0
- for dataset in self.datasets:
- if dataset.hid > last_hid:
- last_hid = dataset.hid
- return last_hid + 1
- def add_galaxy_session( self, galaxy_session, association=None ):
- if association is None:
- self.galaxy_sessions.append( GalaxySessionToHistoryAssociation( galaxy_session, self ) )
- else:
- self.galaxy_sessions.append( association )
- def add_dataset( self, dataset, parent_id=None, genome_build=None, set_hid=True, quota=True ):
- if isinstance( dataset, Dataset ):
- dataset = HistoryDatasetAssociation(dataset=dataset)
- object_session( self ).add( dataset )
- object_session( self ).flush()
- elif not isinstance( dataset, HistoryDatasetAssociation ):
- raise TypeError, ( "You can only add Dataset and HistoryDatasetAssociation instances to a history" +
- " ( you tried to add %s )." % str( dataset ) )
- if parent_id:
- for data in self.datasets:
- if data.id == parent_id:
- dataset.hid = data.hid
- break
- else:
- if set_hid:
- dataset.hid = self._next_hid()
- else:
- if set_hid:
- dataset.hid = self._next_hid()
- if quota and self.user:
- self.user.total_disk_usage += dataset.quota_amount( self.user )
- dataset.history = self
- if genome_build not in [None, '?']:
- self.genome_build = genome_build
- self.datasets.append( dataset )
- return dataset
- def copy( self, name=None, target_user=None, activatable=False, all_datasets=False ):
- """
- Return a copy of this history using the given `name` and `target_user`.
- If `activatable`, copy only non-deleted datasets. If `all_datasets`, copy
- non-deleted, deleted, and purged datasets.
- """
- # Create new history.
- if not name:
- name = self.name
- if not target_user:
- target_user = self.user
- quota = True
- if target_user == self.user:
- quota = False
- new_history = History( name=name, user=target_user )
- db_session = object_session( self )
- db_session.add( new_history )
- db_session.flush()
- # Copy annotation.
- self.copy_item_annotation( db_session, self.user, self, target_user, new_history )
- # Copy Tags
- new_history.copy_tags_from(target_user=target_user, source_history=self)
- # Copy HDAs.
- if activatable:
- hdas = self.activatable_datasets
- elif all_datasets:
- hdas = self.datasets
- else:
- hdas = self.active_datasets
- for hda in hdas:
- # Copy HDA.
- new_hda = hda.copy( copy_children=True )
- new_history.add_dataset( new_hda, set_hid = False, quota=quota )
- db_session.add( new_hda )
- db_session.flush()
- # Copy annotation.
- self.copy_item_annotation( db_session, self.user, hda, target_user, new_hda )
- new_history.hid_counter = self.hid_counter
- db_session.add( new_history )
- db_session.flush()
- return new_history
- @property
- def activatable_datasets( self ):
- # This needs to be a list
- return [ hda for hda in self.datasets if not hda.dataset.deleted ]
- def to_dict( self, view='collection', value_mapper = None ):
- # Get basic value.
- rval = super( History, self ).to_dict( view=view, value_mapper=value_mapper )
- # Add tags.
- tags_str_list = []
- for tag in self.tags:
- tag_str = tag.user_tname
- if tag.value is not None:
- tag_str += ":" + tag.user_value
- tags_str_list.append( tag_str )
- rval[ 'tags' ] = tags_str_list
- return rval
- def set_from_dict( self, new_data ):
- #AKA: set_api_value
- """
- Set object attributes to the values in dictionary new_data limiting
- to only those keys in dict_element_visible_keys.
- Returns a dictionary of the keys, values that have been changed.
- """
- # precondition: keys are proper, values are parsed and validated
- changed = {}
- # unknown keys are ignored here
- for key in [ k for k in new_data.keys() if k in self.dict_element_visible_keys ]:
- new_val = new_data[ key ]
- old_val = self.__getattribute__( key )
- if new_val == old_val:
- continue
- self.__setattr__( key, new_val )
- changed[ key ] = new_val
- return changed
- @property
- def latest_export( self ):
- exports = self.exports
- return exports and exports[ 0 ]
- @property
- def get_disk_size_bytes( self ):
- return self.get_disk_size( nice_size=False )
- def unhide_datasets( self ):
- for dataset in self.datasets:
- dataset.mark_unhidden()
- def resume_paused_jobs( self ):
- for dataset in self.datasets:
- job = dataset.creating_job
- if job is not None and job.state == Job.states.PAUSED:
- job.set_state(Job.states.NEW)
- def get_disk_size( self, nice_size=False ):
- # unique datasets only
- db_session = object_session( self )
- rval = db_session.query(
- func.sum( db_session.query( HistoryDatasetAssociation.dataset_id, Dataset.total_size ).join( Dataset )
- .filter( HistoryDatasetAssociation.table.c.history_id == self.id )
- .filter( HistoryDatasetAssociation.purged != True )
- .filter( Dataset.purged != True )
- .distinct().subquery().c.total_size ) ).first()[0]
- if rval is None:
- rval = 0
- if nice_size:
- rval = galaxy.datatypes.data.nice_size( rval )
- return rval
- @property
- def active_datasets_children_and_roles( self ):
- if not hasattr(self, '_active_datasets_children_and_roles'):
- db_session = object_session( self )
- query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ). \
- filter( not_( HistoryDatasetAssociation.deleted ) ). \
- order_by( HistoryDatasetAssociation.table.c.hid.asc() ). \
- options(
- joinedload("children"),
- joinedload("dataset"),
- joinedload("dataset.actions"),
- joinedload("dataset.actions.role"),
- )
- self._active_datasets_children_and_roles = query.all()
- return self._active_datasets_children_and_roles
- def contents_iter( self, **kwds ):
- """
- Fetch filtered list of contents of history.
- """
- default_contents_types = [
- 'dataset',
- ]
- types = kwds.get('types', default_contents_types)
- iters = []
- if 'dataset' in types:
- iters.append( self.__dataset_contents_iter( **kwds ) )
- return galaxy.util.merge_sorted_iterables( operator.attrgetter( "hid" ), *iters )
- def __dataset_contents_iter(self, **kwds):
- return self.__filter_contents( HistoryDatasetAssociation, **kwds )
- def __filter_contents( self, content_class, **kwds ):
- db_session = object_session( self )
- assert db_session != None
- query = db_session.query( content_class ).filter( content_class.table.c.history_id == self.id )
- query = query.order_by( content_class.table.c.hid.asc() )
- python_filter = None
- deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) )
- if deleted is not None:
- query = query.filter( content_class.deleted == deleted )
- visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) )
- if visible is not None:
- query = query.filter( content_class.visible == visible )
- if 'ids' in kwds:
- ids = kwds['ids']
- max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH)
- if len(ids) < max_in_filter_length:
- query = query.filter( content_class.id.in_(ids) )
- else:
- python_filter = lambda content: content.id in ids
- if python_filter:
- return ifilter(python_filter, query)
- else:
- return query
- def copy_tags_from(self,target_user,source_history):
- for src_shta in source_history.tags:
- new_shta = src_shta.copy()
- new_shta.user = target_user
- self.tags.append(new_shta)
- class HistoryUserShareAssociation( object ):
- def __init__( self ):
- self.history = None
- self.user = None
- class UserRoleAssociation( object ):
- def __init__( self, user, role ):
- self.user = user
- self.role = role
- class GroupRoleAssociation( object ):
- def __init__( self, group, role ):
- self.group = group
- self.role = role
- class Role( object, Dictifiable ):
- dict_collection_visible_keys = ( 'id', 'name' )
- dict_element_visible_keys = ( 'id', 'name', 'description', 'type' )
- private_id = None
- types = Bunch(
- PRIVATE = 'private',
- SYSTEM = 'system',
- USER = 'user',
- ADMIN = 'admin',
- SHARING = 'sharing'
- )
- def __init__( self, name="", description="", type="system", deleted=False ):
- self.name = name
- self.description = description
- self.type = type
- self.deleted = deleted
- class UserQuotaAssociation( object, Dictifiable ):
- dict_element_visible_keys = ( 'user', )
- def __init__( self, user, quota ):
- self.user = user
- self.quota = quota
- class GroupQuotaAssociation( object, Dictifiable ):
- dict_element_visible_keys = ( 'group', )
- def __init__( self, group, quota ):
- self.group = group
- self.quota = quota
- class Quota( object, Dictifiable ):
- dict_collection_visible_keys = ( 'id', 'name' )
- dict_element_visible_keys = ( 'id', 'name', 'description', 'bytes', 'operation', 'display_amount', 'default', 'users', 'groups' )
- valid_operations = ( '+', '-', '=' )
- def __init__( self, name="", description="", amount=0, operation="=" ):
- self.name = name
- self.description = description
- if amount is None:
- self.bytes = -1
- else:
- self.bytes = amount
- self.operation = operation
- def get_amount( self ):
- if self.bytes == -1:
- return None
- return self.bytes
- def set_amount( self, amount ):
- if amount is None:
- self.bytes = -1
- else:
- self.bytes = amount
- amount = property( get_amount, set_amount )
- @property
- def display_amount( self ):
- if self.bytes == -1:
- return "unlimited"
- else:
- return nice_size( self.bytes )
- class DefaultQuotaAssociation( Quota, Dictifiable ):
- dict_element_visible_keys = ( 'type', )
- types = Bunch(
- UNREGISTERED = 'unregistered',
- REGISTERED = 'registered'
- )
- def __init__( self, type, quota ):
- assert type in self.types.__dict__.values(), 'Invalid type'
- self.type = type
- self.quota = quota
- class DatasetPermissions( object ):
- def __init__( self, action, dataset, role ):
- self.action = action
- self.dataset = dataset
- self.role = role
- class LibraryPermissions( object ):
- def __init__( self, action, library_item, role ):
- self.action = action
- if isinstance( library_item, Library ):
- self.library = library_item
- else:
- raise "Invalid Library specified: %s" % library_item.__class__.__name__
- self.role = role
- class LibraryFolderPermissions( object ):
- def __init__( self, action, library_item, role ):
- self.action = action
- if isinstance( library_item, LibraryFolder ):
- self.folder = library_item
- else:
- raise "Invalid LibraryFolder specified: %s" % library_item.__class__.__name__
- self.role = role
- class LibraryDatasetPermissions( object ):
- def __init__( self, action, library_item, role ):
- self.action = action
- if isinstance( library_item, LibraryDataset ):
- self.library_dataset = library_item
- else:
- raise "Invalid LibraryDataset specified: %s" % library_item.__class__.__name__
- self.role = role
- class LibraryDatasetDatasetAssociationPermissions( object ):
- def __init__( self, action, library_item, role ):
- self.action = action
- if isinstance( library_item, LibraryDatasetDatasetAssociation ):
- self.library_dataset_dataset_association = library_item
- else:
- raise "Invalid LibraryDatasetDatasetAssociation specified: %s" % library_item.__class__.__name__
- self.role = role
- class DefaultUserPermissions( object ):
- def __init__( self, user, action, role ):
- self.user = user
- self.action = action
- self.role = role
- class DefaultHistoryPermissions( object ):
- def __init__( self, history, action, role ):
- self.history = history
- self.action = action
- self.role = role
- class Dataset( object ):
- states = Bunch( NEW = 'new',
- UPLOAD = 'upload',
- QUEUED = 'queued',
- RUNNING = 'running',
- OK = 'ok',
- EMPTY = 'empty',
- ERROR = 'error',
- DISCARDED = 'discarded',
- PAUSED = 'paused',
- SETTING_METADATA = 'setting_metadata',
- FAILED_METADATA = 'failed_metadata' )
- conversion_messages = Bunch( PENDING = "pending",
- NO_DATA = "no data",
- NO_CHROMOSOME = "no chromosome",
- NO_CONVERTER = "no converter",
- NO_TOOL = "no tool",
- DATA = "data",
- ERROR = "error",
- OK = "ok" )
- permitted_actions = get_permitted_actions( filter='DATASET' )
- file_path = "/tmp/"
- object_store = None # This get initialized in mapping.py (method init) by app.py
- engine = None
- def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True, uuid=None ):
- self.id = id
- self.state = state
- self.deleted = False
- self.purged = False
- self.purgable = purgable
- self.external_filename = external_filename
- self._extra_files_path = extra_files_path
- self.file_size = file_size
- if uuid is None:
- self.uuid = uuid4()
- else:
- self.uuid = UUID(str(uuid))
- def get_file_name( self ):
- if not self.external_filename:
- assert self.id is not None, "ID must be set before filename used (commit the object)"
- assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
- filename = self.object_store.get_filename( self )
- return filename
- else:
- filename = self.external_filename
- # Make filename absolute
- return os.path.abspath( filename )
- def set_file_name ( self, filename ):
- if not filename:
- self.external_filename = None
- else:
- self.external_filename = filename
- file_name = property( get_file_name, set_file_name )
- @property
- def extra_files_path( self ):
- return self.object_store.get_filename( self, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id )
- def _calculate_size( self ):
- if self.external_filename:
- try:
- return os.path.getsize(self.external_filename)
- except OSError:
- return 0
- else:
- return self.object_store.size(self)
- def get_size( self, nice_size=False ):
- """Returns the size of the data on disk"""
- if self.file_size:
- if nice_size:
- return galaxy.datatypes.data.nice_size( self.file_size )
- else:
- return self.file_size
- else:
- if nice_size:
- return galaxy.datatypes.data.nice_size( self._calculate_size() )
- else:
- return self._calculate_size()
- def set_size( self ):
- """Returns the size of the data on disk"""
- if not self.file_size:
- self.file_size = self._calculate_size()
- def get_total_size( self ):
- if self.total_size is not None:
- return self.total_size
- if self.file_size:
- # for backwards compatibility, set if unset
- self.set_total_size()
- db_session = object_session( self )
- db_session.flush()
- return self.total_size
- return 0
- def set_total_size( self ):
- if self.file_size is None:
- self.set_size()
- self.total_size = self.file_size or 0
- if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
- for root, dirs, files in os.walk( self.extra_files_path ):
- self.total_size += sum( [ os.path.getsize( os.path.join( root, file ) ) for file in files if os.path.exists( os.path.join( root, file ) ) ] )
- def has_data( self ):
- """Detects whether there is any data"""
- return self.get_size() > 0
- def mark_deleted( self, include_children=True ):
- self.deleted = True
- def is_multi_byte( self ):
- if not self.has_data():
- return False
- try:
- return is_multi_byte( codecs.open( self.file_name, 'r', 'utf-8' ).read( 100 ) )
- except UnicodeDecodeError:
- return False
- # FIXME: sqlalchemy will replace this
- def _delete(self):
- """Remove the file that corresponds to this data"""
- self.object_store.delete(self)
- @property
- def user_can_purge( self ):
- return self.purged == False \
- and not bool( self.library_associations ) \
- and len( self.history_associations ) == len( self.purged_history_associations )
- def full_delete( self ):
- """Remove the file and extra files, marks deleted and purged"""
- # os.unlink( self.file_name )
- self.object_store.delete(self)
- if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
- self.object_store.delete(self, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True)
- # if os.path.exists( self.extra_files_path ):
- # shutil.rmtree( self.extra_files_path )
- # TODO: purge metadata files
- self.deleted = True
- self.purged = True
- def get_access_roles( self, trans ):
- roles = []
- for dp in self.actions:
- if dp.action == trans.app.security_agent.permitted_actions.DATASET_ACCESS.action:
- roles.append( dp.role )
- return roles
- def get_manage_permissions_roles( self, trans ):
- roles = []
- for dp in self.actions:
- if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
- roles.append( dp.role )
- return roles
- def has_manage_permissions_roles( self, trans ):
- for dp in self.actions:
- if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
- return True
- return False
- class DatasetInst…
Large files files are truncated, but you can click here to view the full file