__init__.py - This is a collection of classes that represen…

/lib/galaxy/model/init.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 3547 lines · 3139 code · 121 blank · 287 comment · 306 complexity · ec9af105465b68c9ba85c2dae5583df1 MD5 · raw file
Large files are truncated click here to view the full file

"""
Galaxy data model classes

Naming: try to use class names that have a distinct plural form so that
the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
"""

from galaxy import eggs
eggs.require("pexpect")

import codecs
import errno
import logging
import operator
import os
import pexpect
import json
import socket
import time
from uuid import UUID, uuid4
from string import Template
from itertools import ifilter
from itertools import chain

import galaxy.datatypes
import galaxy.datatypes.registry
import galaxy.security.passwords
from galaxy.datatypes.metadata import MetadataCollection
from galaxy.model.item_attrs import Dictifiable, UsesAnnotations
from galaxy.security import get_permitted_actions
from galaxy.util import is_multi_byte, nice_size, Params, restore_text, send_mail
from galaxy.util import ready_name_for_url
from galaxy.util.bunch import Bunch
from galaxy.util.hash_util import new_secure_hash
from galaxy.util.directory_hash import directory_hash_id
from galaxy.web.framework.helpers import to_unicode
from galaxy.web.form_builder import (AddressField, CheckboxField, HistoryField,
        PasswordField, SelectField, TextArea, TextField, WorkflowField,
        WorkflowMappingField)
from sqlalchemy.orm import object_session
from sqlalchemy.orm import joinedload
from sqlalchemy.sql.expression import func
from sqlalchemy import not_

log = logging.getLogger( __name__ )

datatypes_registry = galaxy.datatypes.registry.Registry()
# Default Value Required for unit tests
datatypes_registry.load_datatypes()

# When constructing filters with in for a fixed set of ids, maximum
# number of items to place in the IN statement. Different databases
# are going to have different limits so it is likely best to not let
# this be unlimited - filter in Python if over this limit.
MAX_IN_FILTER_LENGTH = 100

class NoConverterException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

class ConverterDependencyException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

def set_datatypes_registry( d_registry ):
    """
    Set up datatypes_registry
    """
    global datatypes_registry
    datatypes_registry = d_registry


class HasName:

    def get_display_name( self ):
        """
        These objects have a name attribute can be either a string or a unicode
        object. If string, convert to unicode object assuming 'utf-8' format.
        """
        name = self.name
        if isinstance(name, str):
            name = unicode(name, 'utf-8')
        return name


class User( object, Dictifiable ):
    use_pbkdf2 = True
    """
    Data for a Galaxy user or admin and relations to their
    histories, credentials, and roles.
    """
    # attributes that will be accessed and returned when calling to_dict( view='collection' )
    dict_collection_visible_keys = ( 'id', 'email' )
    # attributes that will be accessed and returned when calling to_dict( view='element' )
    dict_element_visible_keys = ( 'id', 'email', 'username', 'total_disk_usage', 'nice_total_disk_usage' )

    def __init__( self, email=None, password=None ):
        self.email = email
        self.password = password
        self.external = False
        self.deleted = False
        self.purged = False
        self.active = False
        self.activation_token = None
        self.username = None
        # Relationships
        self.histories = []
        self.credentials = []
        #? self.roles = []

    def set_password_cleartext( self, cleartext ):
        """
        Set user password to the digest of `cleartext`.
        """
        if User.use_pbkdf2:
            self.password = galaxy.security.passwords.hash_password( cleartext )
        else:
            self.password = new_secure_hash( text_type=cleartext )

    def check_password( self, cleartext ):
        """
        Check if `cleartext` matches user password when hashed.
        """
        return galaxy.security.passwords.check_password( cleartext, self.password )

    def all_roles( self ):
        """
        Return a unique list of Roles associated with this user or any of their groups.
        """
        roles = [ ura.role for ura in self.roles ]
        for group in [ uga.group for uga in self.groups ]:
            for role in [ gra.role for gra in group.roles ]:
                if role not in roles:
                    roles.append( role )
        return roles

    def get_disk_usage( self, nice_size=False ):
        """
        Return byte count of disk space used by user or a human-readable
        string if `nice_size` is `True`.
        """
        rval = 0
        if self.disk_usage is not None:
            rval = self.disk_usage
        if nice_size:
            rval = galaxy.datatypes.data.nice_size( rval )
        return rval

    def set_disk_usage( self, bytes ):
        """
        Manually set the disk space used by a user to `bytes`.
        """
        self.disk_usage = bytes

    total_disk_usage = property( get_disk_usage, set_disk_usage )

    @property
    def nice_total_disk_usage( self ):
        """
        Return byte count of disk space used in a human-readable string.
        """
        return self.get_disk_usage( nice_size=True )

    def calculate_disk_usage( self ):
        """
        Return byte count total of disk space used by all non-purged, non-library
        HDAs in non-purged histories.
        """
        # maintain a list so that we don't double count
        dataset_ids = []
        total = 0
        # this can be a huge number and can run out of memory, so we avoid the mappers
        db_session = object_session( self )
        for history in db_session.query( History ).enable_eagerloads( False ).filter_by( user_id=self.id, purged=False ).yield_per( 1000 ):
            for hda in db_session.query( HistoryDatasetAssociation ).enable_eagerloads( False ).filter_by( history_id=history.id, purged=False ).yield_per( 1000 ):
                #TODO: def hda.counts_toward_disk_usage():
                #   return ( not self.dataset.purged and not self.dataset.library_associations )
                if not hda.dataset.id in dataset_ids and not hda.dataset.purged and not hda.dataset.library_associations:
                    dataset_ids.append( hda.dataset.id )
                    total += hda.dataset.get_total_size()
        return total

    @staticmethod
    def user_template_environment( user ):
        """

        >>> env = User.user_template_environment(None)
        >>> env['__user_email__']
        'Anonymous'
        >>> env['__user_id__']
        'Anonymous'
        >>> user = User('foo@example.com')
        >>> user.id = 6
        >>> user.username = 'foo2'
        >>> env = User.user_template_environment(user)
        >>> env['__user_id__']
        '6'
        >>> env['__user_name__']
        'foo2'
        """
        if user:
            user_id = '%d' % user.id
            user_email = str( user.email )
            user_name = str( user.username )
        else:
            user = None
            user_id = 'Anonymous'
            user_email = 'Anonymous'
            user_name = 'Anonymous'
        environment = {}
        environment[ '__user__' ] = user
        environment[ '__user_id__' ] = environment[ 'userId' ] = user_id
        environment[ '__user_email__' ] = environment[ 'userEmail' ] = user_email
        environment[ '__user_name__' ] = user_name
        return environment

    @staticmethod
    def expand_user_properties( user, in_string ):
        """
        """
        environment = User.user_template_environment( user )
        return Template( in_string ).safe_substitute( environment )


class Job( object, Dictifiable ):
    dict_collection_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time' ]
    dict_element_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time'  ]

    """
    A job represents a request to run a tool given input datasets, tool
    parameters, and output datasets.
    """
    states = Bunch( NEW = 'new',
                    UPLOAD = 'upload',
                    WAITING = 'waiting',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    ERROR = 'error',
                    PAUSED = 'paused',
                    DELETED = 'deleted',
                    DELETED_NEW = 'deleted_new' )
    # Please include an accessor (get/set pair) for any new columns/members.
    def __init__( self ):
        self.session_id = None
        self.user_id = None
        self.tool_id = None
        self.tool_version = None
        self.command_line = None
        self.param_filename = None
        self.parameters = []
        self.input_datasets = []
        self.output_datasets = []
        self.input_library_datasets = []
        self.output_library_datasets = []
        self.state = Job.states.NEW
        self.info = None
        self.job_runner_name = None
        self.job_runner_external_id = None
        self.destination_id = None
        self.destination_params = None
        self.post_job_actions = []
        self.imported = False
        self.handler = None
        self.exit_code = None

    @property
    def finished( self ):
        states = self.states
        return self.state in [
            states.OK,
            states.ERROR,
            states.DELETED,
            states.DELETED_NEW,
        ]

    # TODO: Add accessors for members defined in SQL Alchemy for the Job table and
    # for the mapper defined to the Job table.
    def get_external_output_metadata( self ):
        """
        The external_output_metadata is currently a reference from Job to
        JobExternalOutputMetadata. It exists for a job but not a task.
        """
        return self.external_output_metadata
    def get_session_id( self ):
        return self.session_id
    def get_user_id( self ):
        return self.user_id
    def get_tool_id( self ):
        return self.tool_id
    def get_tool_version( self ):
        return self.tool_version
    def get_command_line( self ):
        return self.command_line
    def get_param_filename( self ):
        return self.param_filename
    def get_parameters( self ):
        return self.parameters
    def get_input_datasets( self ):
        return self.input_datasets
    def get_output_datasets( self ):
        return self.output_datasets
    def get_input_library_datasets( self ):
        return self.input_library_datasets
    def get_output_library_datasets( self ):
        return self.output_library_datasets
    def get_state( self ):
        return self.state
    def get_info( self ):
        return self.info
    def get_job_runner_name( self ):
        # This differs from the Task class in that job_runner_name is
        # accessed instead of task_runner_name. Note that the field
        # runner_name is not the same thing.
        return self.job_runner_name
    def get_job_runner_external_id( self ):
        # This is different from the Task just in the member accessed:
        return self.job_runner_external_id
    def get_post_job_actions( self ):
        return self.post_job_actions
    def get_imported( self ):
        return self.imported
    def get_handler( self ):
        return self.handler
    def get_params( self ):
        return self.params
    def get_user( self ):
        # This is defined in the SQL Alchemy mapper as a relation to the User.
        return self.user
    def get_id( self ):
        # This is defined in the SQL Alchemy's Job table (and not in the model).
        return self.id
    def get_tasks( self ):
        # The tasks member is pert of a reference in the SQL Alchemy schema:
        return self.tasks
    def get_id_tag( self ):
        """
        Return a tag that can be useful in identifying a Job.
        This returns the Job's get_id
        """
        return "%s" % self.id;

    def set_session_id( self, session_id ):
        self.session_id = session_id
    def set_user_id( self, user_id ):
        self.user_id = user_id
    def set_tool_id( self, tool_id ):
        self.tool_id = tool_id
    def set_tool_version( self, tool_version ):
        self.tool_version = tool_version
    def set_command_line( self, command_line ):
        self.command_line = command_line
    def set_param_filename( self, param_filename ):
        self.param_filename = param_filename
    def set_parameters( self, parameters ):
        self.parameters = parameters
    def set_input_datasets( self, input_datasets ):
        self.input_datasets = input_datasets
    def set_output_datasets( self, output_datasets ):
        self.output_datasets = output_datasets
    def set_input_library_datasets( self, input_library_datasets ):
        self.input_library_datasets = input_library_datasets
    def set_output_library_datasets( self, output_library_datasets ):
        self.output_library_datasets = output_library_datasets
    def set_info( self, info ):
        self.info = info
    def set_runner_name( self, job_runner_name ):
        self.job_runner_name = job_runner_name
    def set_runner_external_id( self, job_runner_external_id ):
        self.job_runner_external_id = job_runner_external_id
    def set_post_job_actions( self, post_job_actions ):
        self.post_job_actions = post_job_actions
    def set_imported( self, imported ):
        self.imported = imported
    def set_handler( self, handler ):
        self.handler = handler
    def set_params( self, params ):
        self.params = params

    def add_parameter( self, name, value ):
        self.parameters.append( JobParameter( name, value ) )
    def add_input_dataset( self, name, dataset ):
        self.input_datasets.append( JobToInputDatasetAssociation( name, dataset ) )
    def add_output_dataset( self, name, dataset ):
        self.output_datasets.append( JobToOutputDatasetAssociation( name, dataset ) )
    def add_input_library_dataset( self, name, dataset ):
        self.input_library_datasets.append( JobToInputLibraryDatasetAssociation( name, dataset ) )
    def add_output_library_dataset( self, name, dataset ):
        self.output_library_datasets.append( JobToOutputLibraryDatasetAssociation( name, dataset ) )
    def add_post_job_action(self, pja):
        self.post_job_actions.append( PostJobActionAssociation( pja, self ) )
    def set_state( self, state ):
        """
        This is the only set method that performs extra work. In this case, the
        state is propagated down to datasets.
        """
        self.state = state
        # For historical reasons state propogates down to datasets
        for da in self.output_datasets:
            da.dataset.state = state
    def get_param_values( self, app, ignore_errors=False ):
        """
        Read encoded parameter values from the database and turn back into a
        dict of tool parameter values.
        """
        param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
        tool = app.toolbox.get_tool( self.tool_id )
        param_dict = tool.params_from_strings( param_dict, app, ignore_errors=ignore_errors )
        return param_dict
    def check_if_output_datasets_deleted( self ):
        """
        Return true if all of the output datasets associated with this job are
        in the deleted state
        """
        for dataset_assoc in self.output_datasets:
            dataset = dataset_assoc.dataset
            # only the originator of the job can delete a dataset to cause
            # cancellation of the job, no need to loop through history_associations
            if not dataset.deleted:
                return False
        return True
    def mark_deleted( self, track_jobs_in_database=False ):
        """
        Mark this job as deleted, and mark any output datasets as discarded.
        """
        if track_jobs_in_database:
            self.state = Job.states.DELETED_NEW
        else:
            self.state = Job.states.DELETED
        self.info = "Job output deleted by user before job completed."
        for dataset_assoc in self.output_datasets:
            dataset = dataset_assoc.dataset
            dataset.deleted = True
            dataset.state = dataset.states.DISCARDED
            for dataset in dataset.dataset.history_associations:
                # propagate info across shared datasets
                dataset.deleted = True
                dataset.blurb = 'deleted'
                dataset.peek = 'Job deleted'
                dataset.info = 'Job output deleted by user before job completed'
    def to_dict( self, view='collection' ):
        rval = super( Job, self ).to_dict( view=view )
        rval['tool_id'] = self.tool_id
        if view == 'element':
            param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
            rval['params'] = param_dict

            input_dict = {}
            for i in self.input_datasets:
                if i.dataset is not None:
                    input_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
            for i in self.input_library_datasets:
                if i.dataset is not None:
                    input_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
            for k in input_dict:
                if k in param_dict:
                    del param_dict[k]
            rval['inputs'] = input_dict

            output_dict = {}
            for i in self.output_datasets:
                if i.dataset is not None:
                    output_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
            for i in self.output_library_datasets:
                if i.dataset is not None:
                    output_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
            rval['outputs'] = output_dict

        return rval

class Task( object ):
    """
    A task represents a single component of a job.
    """
    states = Bunch( NEW = 'new',
                    WAITING = 'waiting',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    ERROR = 'error',
                    DELETED = 'deleted' )

    # Please include an accessor (get/set pair) for any new columns/members.
    def __init__( self, job, working_directory, prepare_files_cmd ):
        self.command_line = None
        self.parameters = []
        self.state = Task.states.NEW
        self.info = None
        self.working_directory = working_directory
        self.task_runner_name = None
        self.task_runner_external_id = None
        self.job = job
        self.stdout = ""
        self.stderr = ""
        self.exit_code = None
        self.prepare_input_files_cmd = prepare_files_cmd

    def get_param_values( self, app ):
        """
        Read encoded parameter values from the database and turn back into a
        dict of tool parameter values.
        """
        param_dict = dict( [ ( p.name, p.value ) for p in self.parent_job.parameters ] )
        tool = app.toolbox.get_tool( self.tool_id )
        param_dict = tool.params_from_strings( param_dict, app )
        return param_dict

    def get_id( self ):
        # This is defined in the SQL Alchemy schema:
        return self.id
    def get_id_tag( self ):
        """
        Return an id tag suitable for identifying the task.
        This combines the task's job id and the task's own id.
        """
        return "%s_%s" % ( self.job.get_id(), self.get_id() )
    def get_command_line( self ):
        return self.command_line
    def get_parameters( self ):
        return self.parameters
    def get_state( self ):
        return self.state
    def get_info( self ):
        return self.info
    def get_working_directory( self ):
        return self.working_directory
    def get_task_runner_name( self ):
        return self.task_runner_name
    def get_task_runner_external_id( self ):
        return self.task_runner_external_id
    def get_job( self ):
        return self.job
    def get_stdout( self ):
        return self.stdout
    def get_stderr( self ):
        return self.stderr
    def get_prepare_input_files_cmd( self ):
        return self.prepare_input_files_cmd

    # The following accessors are for members that are in the Job class but
    # not in the Task class. So they can either refer to the parent Job
    # or return None, depending on whether Tasks need to point to the parent
    # (e.g., for a session) or never use the member (e.g., external output
    # metdata). These can be filled in as needed.
    def get_external_output_metadata( self ):
        """
        The external_output_metadata is currently a backref to
        JobExternalOutputMetadata. It exists for a job but not a task,
        and when a task is cancelled its corresponding parent Job will
        be cancelled. So None is returned now, but that could be changed
        to self.get_job().get_external_output_metadata().
        """
        return None
    def get_job_runner_name( self ):
        """
        Since runners currently access Tasks the same way they access Jobs,
        this method just refers to *this* instance's runner.
        """
        return self.task_runner_name
    def get_job_runner_external_id( self ):
        """
        Runners will use the same methods to get information about the Task
        class as they will about the Job class, so this method just returns
        the task's external id.
        """
        # TODO: Merge into get_runner_external_id.
        return self.task_runner_external_id
    def get_session_id( self ):
        # The Job's galaxy session is equal to the Job's session, so the
        # Job's session is the same as the Task's session.
        return self.get_job().get_session_id()

    def set_id( self, id ):
        # This is defined in the SQL Alchemy's mapper and not here.
        # This should never be called.
        self.id = id
    def set_command_line( self, command_line ):
        self.command_line = command_line
    def set_parameters( self, parameters ):
        self.parameters = parameters
    def set_state( self, state ):
        self.state = state
    def set_info( self, info ):
        self.info = info
    def set_working_directory( self, working_directory ):
        self.working_directory = working_directory
    def set_task_runner_name( self, task_runner_name ):
        self.task_runner_name = task_runner_name
    def set_job_runner_external_id( self, task_runner_external_id ):
        # This method is available for runners that do not want/need to
        # differentiate between the kinds of Runnable things (Jobs and Tasks)
        # that they're using.
        log.debug( "Task %d: Set external id to %s"
                 % ( self.id, task_runner_external_id ) )
        self.task_runner_external_id = task_runner_external_id
    def set_task_runner_external_id( self, task_runner_external_id ):
        self.task_runner_external_id = task_runner_external_id
    def set_job( self, job ):
        self.job = job
    def set_stdout( self, stdout ):
        self.stdout = stdout
    def set_stderr( self, stderr ):
        self.stderr = stderr
    def set_prepare_input_files_cmd( self, prepare_input_files_cmd ):
        self.prepare_input_files_cmd = prepare_input_files_cmd

class JobParameter( object ):
    def __init__( self, name, value ):
        self.name = name
        self.value = value

class JobToInputDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class JobToOutputDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class JobToInputLibraryDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class JobToOutputLibraryDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class PostJobAction( object ):
    def __init__( self, action_type, workflow_step, output_name = None, action_arguments = None):
        self.action_type = action_type
        self.output_name = output_name
        self.action_arguments = action_arguments
        self.workflow_step = workflow_step

class PostJobActionAssociation( object ):
    def __init__(self, pja, job):
        self.job = job
        self.post_job_action = pja

class JobExternalOutputMetadata( object ):
    def __init__( self, job = None, dataset = None ):
        self.job = job
        if isinstance( dataset, galaxy.model.HistoryDatasetAssociation ):
            self.history_dataset_association = dataset
        elif isinstance( dataset, galaxy.model.LibraryDatasetDatasetAssociation ):
            self.library_dataset_dataset_association = dataset
    @property
    def dataset( self ):
        if self.history_dataset_association:
            return self.history_dataset_association
        elif self.library_dataset_dataset_association:
            return self.library_dataset_dataset_association
        return None


class JobExportHistoryArchive( object ):
    def __init__( self, job=None, history=None, dataset=None, compressed=False, \
                  history_attrs_filename=None, datasets_attrs_filename=None,
                  jobs_attrs_filename=None ):
        self.job = job
        self.history = history
        self.dataset = dataset
        self.compressed = compressed
        self.history_attrs_filename = history_attrs_filename
        self.datasets_attrs_filename = datasets_attrs_filename
        self.jobs_attrs_filename = jobs_attrs_filename

    @property
    def up_to_date( self ):
        """ Return False, if a new export should be generated for corresponding
        history.
        """
        job = self.job
        return job.state not in [ Job.states.ERROR, Job.states.DELETED ] \
           and job.update_time > self.history.update_time

    @property
    def ready( self ):
        return self.job.state == Job.states.OK

    @property
    def preparing( self ):
        return self.job.state in [ Job.states.RUNNING, Job.states.QUEUED, Job.states.WAITING ]

    @property
    def export_name( self ):
        # Stream archive.
        hname = ready_name_for_url( self.history.name )
        hname = "Galaxy-History-%s.tar" % ( hname )
        if self.compressed:
            hname += ".gz"
        return hname


class JobImportHistoryArchive( object ):
    def __init__( self, job=None, history=None, archive_dir=None ):
        self.job = job
        self.history = history
        self.archive_dir=archive_dir

class GenomeIndexToolData( object ):
    def __init__( self, job=None, params=None, dataset=None, deferred_job=None, \
                  transfer_job=None, fasta_path=None, created_time=None, modified_time=None, \
                  dbkey=None, user=None, indexer=None ):
        self.job = job
        self.dataset = dataset
        self.fasta_path = fasta_path
        self.user = user
        self.indexer = indexer
        self.created_time = created_time
        self.modified_time = modified_time
        self.deferred = deferred_job
        self.transfer = transfer_job

class DeferredJob( object ):
    states = Bunch( NEW = 'new',
                    WAITING = 'waiting',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    ERROR = 'error' )
    def __init__( self, state=None, plugin=None, params=None ):
        self.state = state
        self.plugin = plugin
        self.params = params
    def get_check_interval( self ):
        if not hasattr( self, '_check_interval' ):
            self._check_interval = None
        return self._check_interval
    def set_check_interval( self, seconds ):
        self._check_interval = seconds
    check_interval = property( get_check_interval, set_check_interval )
    def get_last_check( self ):
        if not hasattr( self, '_last_check' ):
            self._last_check = 0
        return self._last_check
    def set_last_check( self, seconds ):
        try:
            self._last_check = int( seconds )
        except:
            self._last_check = time.time()
    last_check = property( get_last_check, set_last_check )
    @property
    def is_check_time( self ):
        if self.check_interval is None:
            return True
        elif ( int( time.time() ) - self.last_check ) > self.check_interval:
            return True
        else:
            return False

class Group( object, Dictifiable  ):
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'name' )

    def __init__( self, name = None ):
        self.name = name
        self.deleted = False

class UserGroupAssociation( object ):
    def __init__( self, user, group ):
        self.user = user
        self.group = group

class History( object, Dictifiable, UsesAnnotations, HasName ):

    dict_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' )
    dict_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged', 'importable', 'slug' )
    default_name = 'Unnamed history'

    def __init__( self, id=None, name=None, user=None ):
        self.id = id
        self.name = name or History.default_name
        self.deleted = False
        self.purged = False
        self.importing = False
        self.genome_build = None
        self.published = False
        # Relationships
        self.user = user
        self.datasets = []
        self.galaxy_sessions = []
        self.tags = []

    def _next_hid( self ):
        # this is overriden in mapping.py db_next_hid() method
        if len( self.datasets ) == 0:
            return 1
        else:
            last_hid = 0
            for dataset in self.datasets:
                if dataset.hid > last_hid:
                    last_hid = dataset.hid
            return last_hid + 1

    def add_galaxy_session( self, galaxy_session, association=None ):
        if association is None:
            self.galaxy_sessions.append( GalaxySessionToHistoryAssociation( galaxy_session, self ) )
        else:
            self.galaxy_sessions.append( association )

    def add_dataset( self, dataset, parent_id=None, genome_build=None, set_hid=True, quota=True ):
        if isinstance( dataset, Dataset ):
            dataset = HistoryDatasetAssociation(dataset=dataset)
            object_session( self ).add( dataset )
            object_session( self ).flush()
        elif not isinstance( dataset, HistoryDatasetAssociation ):
            raise TypeError, ( "You can only add Dataset and HistoryDatasetAssociation instances to a history" +
                               " ( you tried to add %s )." % str( dataset ) )
        if parent_id:
            for data in self.datasets:
                if data.id == parent_id:
                    dataset.hid = data.hid
                    break
            else:
                if set_hid:
                    dataset.hid = self._next_hid()
        else:
            if set_hid:
                dataset.hid = self._next_hid()
        if quota and self.user:
            self.user.total_disk_usage += dataset.quota_amount( self.user )
        dataset.history = self
        if genome_build not in [None, '?']:
            self.genome_build = genome_build
        self.datasets.append( dataset )
        return dataset

    def copy( self, name=None, target_user=None, activatable=False, all_datasets=False ):
        """
        Return a copy of this history using the given `name` and `target_user`.
        If `activatable`, copy only non-deleted datasets. If `all_datasets`, copy
        non-deleted, deleted, and purged datasets.
        """
        # Create new history.
        if not name:
            name = self.name
        if not target_user:
            target_user = self.user
        quota = True
        if target_user == self.user:
            quota = False
        new_history = History( name=name, user=target_user )
        db_session = object_session( self )
        db_session.add( new_history )
        db_session.flush()

        # Copy annotation.
        self.copy_item_annotation( db_session, self.user, self, target_user, new_history )

        # Copy Tags
        new_history.copy_tags_from(target_user=target_user, source_history=self)

        # Copy HDAs.
        if activatable:
            hdas = self.activatable_datasets
        elif all_datasets:
            hdas = self.datasets
        else:
            hdas = self.active_datasets
        for hda in hdas:
            # Copy HDA.
            new_hda = hda.copy( copy_children=True )
            new_history.add_dataset( new_hda, set_hid = False, quota=quota )
            db_session.add( new_hda )
            db_session.flush()
            # Copy annotation.
            self.copy_item_annotation( db_session, self.user, hda, target_user, new_hda )
        new_history.hid_counter = self.hid_counter
        db_session.add( new_history )
        db_session.flush()
        return new_history

    @property
    def activatable_datasets( self ):
        # This needs to be a list
        return [ hda for hda in self.datasets if not hda.dataset.deleted ]

    def to_dict( self, view='collection', value_mapper = None ):

        # Get basic value.
        rval = super( History, self ).to_dict( view=view, value_mapper=value_mapper )

        # Add tags.
        tags_str_list = []
        for tag in self.tags:
            tag_str = tag.user_tname
            if tag.value is not None:
                tag_str += ":" + tag.user_value
            tags_str_list.append( tag_str )
        rval[ 'tags' ] = tags_str_list

        return rval

    def set_from_dict( self, new_data ):
        #AKA: set_api_value
        """
        Set object attributes to the values in dictionary new_data limiting
        to only those keys in dict_element_visible_keys.

        Returns a dictionary of the keys, values that have been changed.
        """
        # precondition: keys are proper, values are parsed and validated
        changed = {}
        # unknown keys are ignored here
        for key in [ k for k in new_data.keys() if k in self.dict_element_visible_keys ]:
            new_val = new_data[ key ]
            old_val = self.__getattribute__( key )
            if new_val == old_val:
                continue

            self.__setattr__( key, new_val )
            changed[ key ] = new_val

        return changed

    @property
    def latest_export( self ):
        exports = self.exports
        return exports and exports[ 0 ]

    @property
    def get_disk_size_bytes( self ):
        return self.get_disk_size( nice_size=False )

    def unhide_datasets( self ):
        for dataset in self.datasets:
            dataset.mark_unhidden()

    def resume_paused_jobs( self ):
        for dataset in self.datasets:
            job = dataset.creating_job
            if job is not None and job.state == Job.states.PAUSED:
                job.set_state(Job.states.NEW)

    def get_disk_size( self, nice_size=False ):
        # unique datasets only
        db_session = object_session( self )
        rval = db_session.query(
            func.sum( db_session.query( HistoryDatasetAssociation.dataset_id, Dataset.total_size ).join( Dataset )
                                            .filter( HistoryDatasetAssociation.table.c.history_id == self.id )
                                            .filter( HistoryDatasetAssociation.purged != True )
                                            .filter( Dataset.purged != True )
                                            .distinct().subquery().c.total_size ) ).first()[0]
        if rval is None:
            rval = 0
        if nice_size:
            rval = galaxy.datatypes.data.nice_size( rval )
        return rval

    @property
    def active_datasets_children_and_roles( self ):
        if not hasattr(self, '_active_datasets_children_and_roles'):
            db_session = object_session( self )
            query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ). \
                filter( not_( HistoryDatasetAssociation.deleted ) ). \
                order_by( HistoryDatasetAssociation.table.c.hid.asc() ). \
                options(
                    joinedload("children"),
                    joinedload("dataset"),
                    joinedload("dataset.actions"),
                    joinedload("dataset.actions.role"),
                )
            self._active_datasets_children_and_roles = query.all()
        return self._active_datasets_children_and_roles

    def contents_iter( self, **kwds ):
        """
        Fetch filtered list of contents of history.
        """
        default_contents_types = [
            'dataset',
        ]
        types = kwds.get('types', default_contents_types)
        iters = []
        if 'dataset' in types:
            iters.append( self.__dataset_contents_iter( **kwds ) )
        return galaxy.util.merge_sorted_iterables( operator.attrgetter( "hid" ), *iters )

    def __dataset_contents_iter(self, **kwds):
        return self.__filter_contents( HistoryDatasetAssociation, **kwds )

    def __filter_contents( self, content_class, **kwds ):
        db_session = object_session( self )
        assert db_session != None
        query = db_session.query( content_class ).filter( content_class.table.c.history_id == self.id )
        query = query.order_by( content_class.table.c.hid.asc() )
        python_filter = None
        deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) )
        if deleted is not None:
            query = query.filter( content_class.deleted == deleted )
        visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) )
        if visible is not None:
            query = query.filter( content_class.visible == visible )
        if 'ids' in kwds:
            ids = kwds['ids']
            max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH)
            if len(ids) < max_in_filter_length:
                query = query.filter( content_class.id.in_(ids) )
            else:
                python_filter = lambda content: content.id in ids
        if python_filter:
            return ifilter(python_filter, query)
        else:
            return query

    def copy_tags_from(self,target_user,source_history):
        for src_shta in source_history.tags:
            new_shta = src_shta.copy()
            new_shta.user = target_user
            self.tags.append(new_shta)


class HistoryUserShareAssociation( object ):
    def __init__( self ):
        self.history = None
        self.user = None

class UserRoleAssociation( object ):
    def __init__( self, user, role ):
        self.user = user
        self.role = role

class GroupRoleAssociation( object ):
    def __init__( self, group, role ):
        self.group = group
        self.role = role

class Role( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'name', 'description', 'type' )
    private_id = None
    types = Bunch(
        PRIVATE = 'private',
        SYSTEM = 'system',
        USER = 'user',
        ADMIN = 'admin',
        SHARING = 'sharing'
    )
    def __init__( self, name="", description="", type="system", deleted=False ):
        self.name = name
        self.description = description
        self.type = type
        self.deleted = deleted

class UserQuotaAssociation( object, Dictifiable ):
    dict_element_visible_keys = ( 'user', )
    def __init__( self, user, quota ):
        self.user = user
        self.quota = quota

class GroupQuotaAssociation( object, Dictifiable ):
    dict_element_visible_keys = ( 'group', )
    def __init__( self, group, quota ):
        self.group = group
        self.quota = quota

class Quota( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'name', 'description', 'bytes', 'operation', 'display_amount', 'default', 'users', 'groups' )
    valid_operations = ( '+', '-', '=' )
    def __init__( self, name="", description="", amount=0, operation="=" ):
        self.name = name
        self.description = description
        if amount is None:
            self.bytes = -1
        else:
            self.bytes = amount
        self.operation = operation
    def get_amount( self ):
        if self.bytes == -1:
            return None
        return self.bytes
    def set_amount( self, amount ):
        if amount is None:
            self.bytes = -1
        else:
            self.bytes = amount
    amount = property( get_amount, set_amount )
    @property
    def display_amount( self ):
        if self.bytes == -1:
            return "unlimited"
        else:
            return nice_size( self.bytes )

class DefaultQuotaAssociation( Quota, Dictifiable ):
    dict_element_visible_keys = ( 'type', )
    types = Bunch(
        UNREGISTERED = 'unregistered',
        REGISTERED = 'registered'
    )
    def __init__( self, type, quota ):
        assert type in self.types.__dict__.values(), 'Invalid type'
        self.type = type
        self.quota = quota

class DatasetPermissions( object ):
    def __init__( self, action, dataset, role ):
        self.action = action
        self.dataset = dataset
        self.role = role

class LibraryPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, Library ):
            self.library = library_item
        else:
            raise "Invalid Library specified: %s" % library_item.__class__.__name__
        self.role = role

class LibraryFolderPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, LibraryFolder ):
            self.folder = library_item
        else:
            raise "Invalid LibraryFolder specified: %s" % library_item.__class__.__name__
        self.role = role

class LibraryDatasetPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, LibraryDataset ):
            self.library_dataset = library_item
        else:
            raise "Invalid LibraryDataset specified: %s" % library_item.__class__.__name__
        self.role = role

class LibraryDatasetDatasetAssociationPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, LibraryDatasetDatasetAssociation ):
            self.library_dataset_dataset_association = library_item
        else:
            raise "Invalid LibraryDatasetDatasetAssociation specified: %s" % library_item.__class__.__name__
        self.role = role

class DefaultUserPermissions( object ):
    def __init__( self, user, action, role ):
        self.user = user
        self.action = action
        self.role = role

class DefaultHistoryPermissions( object ):
    def __init__( self, history, action, role ):
        self.history = history
        self.action = action
        self.role = role

class Dataset( object ):
    states = Bunch( NEW = 'new',
                    UPLOAD = 'upload',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    EMPTY = 'empty',
                    ERROR = 'error',
                    DISCARDED = 'discarded',
                    PAUSED = 'paused',
                    SETTING_METADATA = 'setting_metadata',
                    FAILED_METADATA = 'failed_metadata' )

    conversion_messages = Bunch( PENDING = "pending",
                                 NO_DATA = "no data",
                                 NO_CHROMOSOME = "no chromosome",
                                 NO_CONVERTER = "no converter",
                                 NO_TOOL = "no tool",
                                 DATA = "data",
                                 ERROR = "error",
                                 OK = "ok" )

    permitted_actions = get_permitted_actions( filter='DATASET' )
    file_path = "/tmp/"
    object_store = None # This get initialized in mapping.py (method init) by app.py
    engine = None
    def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True, uuid=None ):
        self.id = id
        self.state = state
        self.deleted = False
        self.purged = False
        self.purgable = purgable
        self.external_filename = external_filename
        self._extra_files_path = extra_files_path
        self.file_size = file_size
        if uuid is None:
            self.uuid = uuid4()
        else:
            self.uuid = UUID(str(uuid))

    def get_file_name( self ):
        if not self.external_filename:
            assert self.id is not None, "ID must be set before filename used (commit the object)"
            assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
            filename = self.object_store.get_filename( self )
            return filename
        else:
            filename = self.external_filename
        # Make filename absolute
        return os.path.abspath( filename )
    def set_file_name ( self, filename ):
        if not filename:
            self.external_filename = None
        else:
            self.external_filename = filename
    file_name = property( get_file_name, set_file_name )
    @property
    def extra_files_path( self ):
        return self.object_store.get_filename( self, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id )
    def _calculate_size( self ):
        if self.external_filename:
            try:
                return os.path.getsize(self.external_filename)
            except OSError:
                return 0
        else:
            return self.object_store.size(self)
    def get_size( self, nice_size=False ):
        """Returns the size of the data on disk"""
        if self.file_size:
            if nice_size:
                return galaxy.datatypes.data.nice_size( self.file_size )
            else:
                return self.file_size
        else:
            if nice_size:
                return galaxy.datatypes.data.nice_size( self._calculate_size() )
            else:
                return self._calculate_size()
    def set_size( self ):
        """Returns the size of the data on disk"""
        if not self.file_size:
            self.file_size = self._calculate_size()
    def get_total_size( self ):
        if self.total_size is not None:
            return self.total_size
        if self.file_size:
            # for backwards compatibility, set if unset
            self.set_total_size()
            db_session = object_session( self )
            db_session.flush()
            return self.total_size
        return 0
    def set_total_size( self ):
        if self.file_size is None:
            self.set_size()
        self.total_size = self.file_size or 0
        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
            for root, dirs, files in os.walk( self.extra_files_path ):
                self.total_size += sum( [ os.path.getsize( os.path.join( root, file ) ) for file in files if os.path.exists( os.path.join( root, file ) ) ] )
    def has_data( self ):
        """Detects whether there is any data"""
        return self.get_size() > 0
    def mark_deleted( self, include_children=True ):
        self.deleted = True
    def is_multi_byte( self ):
        if not self.has_data():
            return False
        try:
            return is_multi_byte( codecs.open( self.file_name, 'r', 'utf-8' ).read( 100 ) )
        except UnicodeDecodeError:
            return False
    # FIXME: sqlalchemy will replace this
    def _delete(self):
        """Remove the file that corresponds to this data"""
        self.object_store.delete(self)
    @property
    def user_can_purge( self ):
        return self.purged == False \
                and not bool( self.library_associations ) \
                and len( self.history_associations ) == len( self.purged_history_associations )
    def full_delete( self ):
        """Remove the file and extra files, marks deleted and purged"""
        # os.unlink( self.file_name )
        self.object_store.delete(self)
        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
            self.object_store.delete(self, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True)
        # if os.path.exists( self.extra_files_path ):
        #     shutil.rmtree( self.extra_files_path )
        # TODO: purge metadata files
        self.deleted = True
        self.purged = True
    def get_access_roles( self, trans ):
        roles = []
        for dp in self.actions:
            if dp.action == trans.app.security_agent.permitted_actions.DATASET_ACCESS.action:
                roles.append( dp.role )
        return roles
    def get_manage_permissions_roles( self, trans ):
        roles = []
        for dp in self.actions:
            if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
                roles.append( dp.role )
        return roles
    def has_manage_permissions_roles( self, trans ):
        for dp in self.actions:
            if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
                return True
        return False

class DatasetInst…
Summary ✨

This is a collection of classes that represent various objects and their relationships in Galaxy, a web-based platform for bioinformatics and computational biology research. The classes include User, History, Dataset, Job, Workflow, Page, Visualization, Data Manager, and API Keys. Each class has its own set of attributes and methods that define its behavior and interactions with other objects in the system.
Tech Fingerprint

Alerts (127)

'global' Avoid global variables; use function parameters or class attributes for better scope management
73
'isinstance(' Overuse may indicate design issues; consider polymorphism
85 651 653 811 815 1115 1124 1133 1142
'def' Ensure functions have docstrings for documentation
180 272 289 291 293 295 297 299 301 303 305 307 309 311 313 315 320 323 325 327 329 331 334 337 347 349 351 353 355 357 359 361 363 365 367 369 371 373 375 377 379 381 384 386 388 390 392 394 445 512 521 523 525 527 529 531 533 535 537 539 541 572 577 581 583 585 587 589 591 593 600 602 604 606 608 656 686 690 694 734 738 741 745 752 804 810 883 887 903 926 931 934 938 944 960 1015 1078 1082 1089 1200 1210 1217 1243 1253 1263 1265 1277 1292 1298 1304
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
461
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
748
'def' Avoid long function definitions; keep signatures concise for readability
1186
Complexity hotspot; lines 1256 to 1259 (total complexity: 6)
1256 1257 1258 1259
/lib/galaxy/model/__init__.py

/lib/galaxy/model/init.py