__init__.py - This is a collection of classes that represen…

/lib/galaxy/model/init.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 3547 lines · 3139 code · 121 blank · 287 comment · 306 complexity · ec9af105465b68c9ba85c2dae5583df1 MD5 · raw file

"""
Galaxy data model classes

Naming: try to use class names that have a distinct plural form so that
the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
"""

from galaxy import eggs
eggs.require("pexpect")

import codecs
import errno
import logging
import operator
import os
import pexpect
import json
import socket
import time
from uuid import UUID, uuid4
from string import Template
from itertools import ifilter
from itertools import chain

import galaxy.datatypes
import galaxy.datatypes.registry
import galaxy.security.passwords
from galaxy.datatypes.metadata import MetadataCollection
from galaxy.model.item_attrs import Dictifiable, UsesAnnotations
from galaxy.security import get_permitted_actions
from galaxy.util import is_multi_byte, nice_size, Params, restore_text, send_mail
from galaxy.util import ready_name_for_url
from galaxy.util.bunch import Bunch
from galaxy.util.hash_util import new_secure_hash
from galaxy.util.directory_hash import directory_hash_id
from galaxy.web.framework.helpers import to_unicode
from galaxy.web.form_builder import (AddressField, CheckboxField, HistoryField,
        PasswordField, SelectField, TextArea, TextField, WorkflowField,
        WorkflowMappingField)
from sqlalchemy.orm import object_session
from sqlalchemy.orm import joinedload
from sqlalchemy.sql.expression import func
from sqlalchemy import not_

log = logging.getLogger( __name__ )

datatypes_registry = galaxy.datatypes.registry.Registry()
# Default Value Required for unit tests
datatypes_registry.load_datatypes()

# When constructing filters with in for a fixed set of ids, maximum
# number of items to place in the IN statement. Different databases
# are going to have different limits so it is likely best to not let
# this be unlimited - filter in Python if over this limit.
MAX_IN_FILTER_LENGTH = 100

class NoConverterException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

class ConverterDependencyException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

def set_datatypes_registry( d_registry ):
    """
    Set up datatypes_registry
    """
    global datatypes_registry
    datatypes_registry = d_registry


class HasName:

    def get_display_name( self ):
        """
        These objects have a name attribute can be either a string or a unicode
        object. If string, convert to unicode object assuming 'utf-8' format.
        """
        name = self.name
        if isinstance(name, str):
            name = unicode(name, 'utf-8')
        return name


class User( object, Dictifiable ):
    use_pbkdf2 = True
    """
    Data for a Galaxy user or admin and relations to their
    histories, credentials, and roles.
    """
    # attributes that will be accessed and returned when calling to_dict( view='collection' )
    dict_collection_visible_keys = ( 'id', 'email' )
    # attributes that will be accessed and returned when calling to_dict( view='element' )
    dict_element_visible_keys = ( 'id', 'email', 'username', 'total_disk_usage', 'nice_total_disk_usage' )

    def __init__( self, email=None, password=None ):
        self.email = email
        self.password = password
        self.external = False
        self.deleted = False
        self.purged = False
        self.active = False
        self.activation_token = None
        self.username = None
        # Relationships
        self.histories = []
        self.credentials = []
        #? self.roles = []

    def set_password_cleartext( self, cleartext ):
        """
        Set user password to the digest of `cleartext`.
        """
        if User.use_pbkdf2:
            self.password = galaxy.security.passwords.hash_password( cleartext )
        else:
            self.password = new_secure_hash( text_type=cleartext )

    def check_password( self, cleartext ):
        """
        Check if `cleartext` matches user password when hashed.
        """
        return galaxy.security.passwords.check_password( cleartext, self.password )

    def all_roles( self ):
        """
        Return a unique list of Roles associated with this user or any of their groups.
        """
        roles = [ ura.role for ura in self.roles ]
        for group in [ uga.group for uga in self.groups ]:
            for role in [ gra.role for gra in group.roles ]:
                if role not in roles:
                    roles.append( role )
        return roles

    def get_disk_usage( self, nice_size=False ):
        """
        Return byte count of disk space used by user or a human-readable
        string if `nice_size` is `True`.
        """
        rval = 0
        if self.disk_usage is not None:
            rval = self.disk_usage
        if nice_size:
            rval = galaxy.datatypes.data.nice_size( rval )
        return rval

    def set_disk_usage( self, bytes ):
        """
        Manually set the disk space used by a user to `bytes`.
        """
        self.disk_usage = bytes

    total_disk_usage = property( get_disk_usage, set_disk_usage )

    @property
    def nice_total_disk_usage( self ):
        """
        Return byte count of disk space used in a human-readable string.
        """
        return self.get_disk_usage( nice_size=True )

    def calculate_disk_usage( self ):
        """
        Return byte count total of disk space used by all non-purged, non-library
        HDAs in non-purged histories.
        """
        # maintain a list so that we don't double count
        dataset_ids = []
        total = 0
        # this can be a huge number and can run out of memory, so we avoid the mappers
        db_session = object_session( self )
        for history in db_session.query( History ).enable_eagerloads( False ).filter_by( user_id=self.id, purged=False ).yield_per( 1000 ):
            for hda in db_session.query( HistoryDatasetAssociation ).enable_eagerloads( False ).filter_by( history_id=history.id, purged=False ).yield_per( 1000 ):
                #TODO: def hda.counts_toward_disk_usage():
                #   return ( not self.dataset.purged and not self.dataset.library_associations )
                if not hda.dataset.id in dataset_ids and not hda.dataset.purged and not hda.dataset.library_associations:
                    dataset_ids.append( hda.dataset.id )
                    total += hda.dataset.get_total_size()
        return total

    @staticmethod
    def user_template_environment( user ):
        """

        >>> env = User.user_template_environment(None)
        >>> env['__user_email__']
        'Anonymous'
        >>> env['__user_id__']
        'Anonymous'
        >>> user = User('foo@example.com')
        >>> user.id = 6
        >>> user.username = 'foo2'
        >>> env = User.user_template_environment(user)
        >>> env['__user_id__']
        '6'
        >>> env['__user_name__']
        'foo2'
        """
        if user:
            user_id = '%d' % user.id
            user_email = str( user.email )
            user_name = str( user.username )
        else:
            user = None
            user_id = 'Anonymous'
            user_email = 'Anonymous'
            user_name = 'Anonymous'
        environment = {}
        environment[ '__user__' ] = user
        environment[ '__user_id__' ] = environment[ 'userId' ] = user_id
        environment[ '__user_email__' ] = environment[ 'userEmail' ] = user_email
        environment[ '__user_name__' ] = user_name
        return environment

    @staticmethod
    def expand_user_properties( user, in_string ):
        """
        """
        environment = User.user_template_environment( user )
        return Template( in_string ).safe_substitute( environment )


class Job( object, Dictifiable ):
    dict_collection_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time' ]
    dict_element_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time'  ]

    """
    A job represents a request to run a tool given input datasets, tool
    parameters, and output datasets.
    """
    states = Bunch( NEW = 'new',
                    UPLOAD = 'upload',
                    WAITING = 'waiting',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    ERROR = 'error',
                    PAUSED = 'paused',
                    DELETED = 'deleted',
                    DELETED_NEW = 'deleted_new' )
    # Please include an accessor (get/set pair) for any new columns/members.
    def __init__( self ):
        self.session_id = None
        self.user_id = None
        self.tool_id = None
        self.tool_version = None
        self.command_line = None
        self.param_filename = None
        self.parameters = []
        self.input_datasets = []
        self.output_datasets = []
        self.input_library_datasets = []
        self.output_library_datasets = []
        self.state = Job.states.NEW
        self.info = None
        self.job_runner_name = None
        self.job_runner_external_id = None
        self.destination_id = None
        self.destination_params = None
        self.post_job_actions = []
        self.imported = False
        self.handler = None
        self.exit_code = None

    @property
    def finished( self ):
        states = self.states
        return self.state in [
            states.OK,
            states.ERROR,
            states.DELETED,
            states.DELETED_NEW,
        ]

    # TODO: Add accessors for members defined in SQL Alchemy for the Job table and
    # for the mapper defined to the Job table.
    def get_external_output_metadata( self ):
        """
        The external_output_metadata is currently a reference from Job to
        JobExternalOutputMetadata. It exists for a job but not a task.
        """
        return self.external_output_metadata
    def get_session_id( self ):
        return self.session_id
    def get_user_id( self ):
        return self.user_id
    def get_tool_id( self ):
        return self.tool_id
    def get_tool_version( self ):
        return self.tool_version
    def get_command_line( self ):
        return self.command_line
    def get_param_filename( self ):
        return self.param_filename
    def get_parameters( self ):
        return self.parameters
    def get_input_datasets( self ):
        return self.input_datasets
    def get_output_datasets( self ):
        return self.output_datasets
    def get_input_library_datasets( self ):
        return self.input_library_datasets
    def get_output_library_datasets( self ):
        return self.output_library_datasets
    def get_state( self ):
        return self.state
    def get_info( self ):
        return self.info
    def get_job_runner_name( self ):
        # This differs from the Task class in that job_runner_name is
        # accessed instead of task_runner_name. Note that the field
        # runner_name is not the same thing.
        return self.job_runner_name
    def get_job_runner_external_id( self ):
        # This is different from the Task just in the member accessed:
        return self.job_runner_external_id
    def get_post_job_actions( self ):
        return self.post_job_actions
    def get_imported( self ):
        return self.imported
    def get_handler( self ):
        return self.handler
    def get_params( self ):
        return self.params
    def get_user( self ):
        # This is defined in the SQL Alchemy mapper as a relation to the User.
        return self.user
    def get_id( self ):
        # This is defined in the SQL Alchemy's Job table (and not in the model).
        return self.id
    def get_tasks( self ):
        # The tasks member is pert of a reference in the SQL Alchemy schema:
        return self.tasks
    def get_id_tag( self ):
        """
        Return a tag that can be useful in identifying a Job.
        This returns the Job's get_id
        """
        return "%s" % self.id;

    def set_session_id( self, session_id ):
        self.session_id = session_id
    def set_user_id( self, user_id ):
        self.user_id = user_id
    def set_tool_id( self, tool_id ):
        self.tool_id = tool_id
    def set_tool_version( self, tool_version ):
        self.tool_version = tool_version
    def set_command_line( self, command_line ):
        self.command_line = command_line
    def set_param_filename( self, param_filename ):
        self.param_filename = param_filename
    def set_parameters( self, parameters ):
        self.parameters = parameters
    def set_input_datasets( self, input_datasets ):
        self.input_datasets = input_datasets
    def set_output_datasets( self, output_datasets ):
        self.output_datasets = output_datasets
    def set_input_library_datasets( self, input_library_datasets ):
        self.input_library_datasets = input_library_datasets
    def set_output_library_datasets( self, output_library_datasets ):
        self.output_library_datasets = output_library_datasets
    def set_info( self, info ):
        self.info = info
    def set_runner_name( self, job_runner_name ):
        self.job_runner_name = job_runner_name
    def set_runner_external_id( self, job_runner_external_id ):
        self.job_runner_external_id = job_runner_external_id
    def set_post_job_actions( self, post_job_actions ):
        self.post_job_actions = post_job_actions
    def set_imported( self, imported ):
        self.imported = imported
    def set_handler( self, handler ):
        self.handler = handler
    def set_params( self, params ):
        self.params = params

    def add_parameter( self, name, value ):
        self.parameters.append( JobParameter( name, value ) )
    def add_input_dataset( self, name, dataset ):
        self.input_datasets.append( JobToInputDatasetAssociation( name, dataset ) )
    def add_output_dataset( self, name, dataset ):
        self.output_datasets.append( JobToOutputDatasetAssociation( name, dataset ) )
    def add_input_library_dataset( self, name, dataset ):
        self.input_library_datasets.append( JobToInputLibraryDatasetAssociation( name, dataset ) )
    def add_output_library_dataset( self, name, dataset ):
        self.output_library_datasets.append( JobToOutputLibraryDatasetAssociation( name, dataset ) )
    def add_post_job_action(self, pja):
        self.post_job_actions.append( PostJobActionAssociation( pja, self ) )
    def set_state( self, state ):
        """
        This is the only set method that performs extra work. In this case, the
        state is propagated down to datasets.
        """
        self.state = state
        # For historical reasons state propogates down to datasets
        for da in self.output_datasets:
            da.dataset.state = state
    def get_param_values( self, app, ignore_errors=False ):
        """
        Read encoded parameter values from the database and turn back into a
        dict of tool parameter values.
        """
        param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
        tool = app.toolbox.get_tool( self.tool_id )
        param_dict = tool.params_from_strings( param_dict, app, ignore_errors=ignore_errors )
        return param_dict
    def check_if_output_datasets_deleted( self ):
        """
        Return true if all of the output datasets associated with this job are
        in the deleted state
        """
        for dataset_assoc in self.output_datasets:
            dataset = dataset_assoc.dataset
            # only the originator of the job can delete a dataset to cause
            # cancellation of the job, no need to loop through history_associations
            if not dataset.deleted:
                return False
        return True
    def mark_deleted( self, track_jobs_in_database=False ):
        """
        Mark this job as deleted, and mark any output datasets as discarded.
        """
        if track_jobs_in_database:
            self.state = Job.states.DELETED_NEW
        else:
            self.state = Job.states.DELETED
        self.info = "Job output deleted by user before job completed."
        for dataset_assoc in self.output_datasets:
            dataset = dataset_assoc.dataset
            dataset.deleted = True
            dataset.state = dataset.states.DISCARDED
            for dataset in dataset.dataset.history_associations:
                # propagate info across shared datasets
                dataset.deleted = True
                dataset.blurb = 'deleted'
                dataset.peek = 'Job deleted'
                dataset.info = 'Job output deleted by user before job completed'
    def to_dict( self, view='collection' ):
        rval = super( Job, self ).to_dict( view=view )
        rval['tool_id'] = self.tool_id
        if view == 'element':
            param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
            rval['params'] = param_dict

            input_dict = {}
            for i in self.input_datasets:
                if i.dataset is not None:
                    input_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
            for i in self.input_library_datasets:
                if i.dataset is not None:
                    input_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
            for k in input_dict:
                if k in param_dict:
                    del param_dict[k]
            rval['inputs'] = input_dict

            output_dict = {}
            for i in self.output_datasets:
                if i.dataset is not None:
                    output_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
            for i in self.output_library_datasets:
                if i.dataset is not None:
                    output_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
            rval['outputs'] = output_dict

        return rval

class Task( object ):
    """
    A task represents a single component of a job.
    """
    states = Bunch( NEW = 'new',
                    WAITING = 'waiting',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    ERROR = 'error',
                    DELETED = 'deleted' )

    # Please include an accessor (get/set pair) for any new columns/members.
    def __init__( self, job, working_directory, prepare_files_cmd ):
        self.command_line = None
        self.parameters = []
        self.state = Task.states.NEW
        self.info = None
        self.working_directory = working_directory
        self.task_runner_name = None
        self.task_runner_external_id = None
        self.job = job
        self.stdout = ""
        self.stderr = ""
        self.exit_code = None
        self.prepare_input_files_cmd = prepare_files_cmd

    def get_param_values( self, app ):
        """
        Read encoded parameter values from the database and turn back into a
        dict of tool parameter values.
        """
        param_dict = dict( [ ( p.name, p.value ) for p in self.parent_job.parameters ] )
        tool = app.toolbox.get_tool( self.tool_id )
        param_dict = tool.params_from_strings( param_dict, app )
        return param_dict

    def get_id( self ):
        # This is defined in the SQL Alchemy schema:
        return self.id
    def get_id_tag( self ):
        """
        Return an id tag suitable for identifying the task.
        This combines the task's job id and the task's own id.
        """
        return "%s_%s" % ( self.job.get_id(), self.get_id() )
    def get_command_line( self ):
        return self.command_line
    def get_parameters( self ):
        return self.parameters
    def get_state( self ):
        return self.state
    def get_info( self ):
        return self.info
    def get_working_directory( self ):
        return self.working_directory
    def get_task_runner_name( self ):
        return self.task_runner_name
    def get_task_runner_external_id( self ):
        return self.task_runner_external_id
    def get_job( self ):
        return self.job
    def get_stdout( self ):
        return self.stdout
    def get_stderr( self ):
        return self.stderr
    def get_prepare_input_files_cmd( self ):
        return self.prepare_input_files_cmd

    # The following accessors are for members that are in the Job class but
    # not in the Task class. So they can either refer to the parent Job
    # or return None, depending on whether Tasks need to point to the parent
    # (e.g., for a session) or never use the member (e.g., external output
    # metdata). These can be filled in as needed.
    def get_external_output_metadata( self ):
        """
        The external_output_metadata is currently a backref to
        JobExternalOutputMetadata. It exists for a job but not a task,
        and when a task is cancelled its corresponding parent Job will
        be cancelled. So None is returned now, but that could be changed
        to self.get_job().get_external_output_metadata().
        """
        return None
    def get_job_runner_name( self ):
        """
        Since runners currently access Tasks the same way they access Jobs,
        this method just refers to *this* instance's runner.
        """
        return self.task_runner_name
    def get_job_runner_external_id( self ):
        """
        Runners will use the same methods to get information about the Task
        class as they will about the Job class, so this method just returns
        the task's external id.
        """
        # TODO: Merge into get_runner_external_id.
        return self.task_runner_external_id
    def get_session_id( self ):
        # The Job's galaxy session is equal to the Job's session, so the
        # Job's session is the same as the Task's session.
        return self.get_job().get_session_id()

    def set_id( self, id ):
        # This is defined in the SQL Alchemy's mapper and not here.
        # This should never be called.
        self.id = id
    def set_command_line( self, command_line ):
        self.command_line = command_line
    def set_parameters( self, parameters ):
        self.parameters = parameters
    def set_state( self, state ):
        self.state = state
    def set_info( self, info ):
        self.info = info
    def set_working_directory( self, working_directory ):
        self.working_directory = working_directory
    def set_task_runner_name( self, task_runner_name ):
        self.task_runner_name = task_runner_name
    def set_job_runner_external_id( self, task_runner_external_id ):
        # This method is available for runners that do not want/need to
        # differentiate between the kinds of Runnable things (Jobs and Tasks)
        # that they're using.
        log.debug( "Task %d: Set external id to %s"
                 % ( self.id, task_runner_external_id ) )
        self.task_runner_external_id = task_runner_external_id
    def set_task_runner_external_id( self, task_runner_external_id ):
        self.task_runner_external_id = task_runner_external_id
    def set_job( self, job ):
        self.job = job
    def set_stdout( self, stdout ):
        self.stdout = stdout
    def set_stderr( self, stderr ):
        self.stderr = stderr
    def set_prepare_input_files_cmd( self, prepare_input_files_cmd ):
        self.prepare_input_files_cmd = prepare_input_files_cmd

class JobParameter( object ):
    def __init__( self, name, value ):
        self.name = name
        self.value = value

class JobToInputDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class JobToOutputDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class JobToInputLibraryDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class JobToOutputLibraryDatasetAssociation( object ):
    def __init__( self, name, dataset ):
        self.name = name
        self.dataset = dataset

class PostJobAction( object ):
    def __init__( self, action_type, workflow_step, output_name = None, action_arguments = None):
        self.action_type = action_type
        self.output_name = output_name
        self.action_arguments = action_arguments
        self.workflow_step = workflow_step

class PostJobActionAssociation( object ):
    def __init__(self, pja, job):
        self.job = job
        self.post_job_action = pja

class JobExternalOutputMetadata( object ):
    def __init__( self, job = None, dataset = None ):
        self.job = job
        if isinstance( dataset, galaxy.model.HistoryDatasetAssociation ):
            self.history_dataset_association = dataset
        elif isinstance( dataset, galaxy.model.LibraryDatasetDatasetAssociation ):
            self.library_dataset_dataset_association = dataset
    @property
    def dataset( self ):
        if self.history_dataset_association:
            return self.history_dataset_association
        elif self.library_dataset_dataset_association:
            return self.library_dataset_dataset_association
        return None


class JobExportHistoryArchive( object ):
    def __init__( self, job=None, history=None, dataset=None, compressed=False, \
                  history_attrs_filename=None, datasets_attrs_filename=None,
                  jobs_attrs_filename=None ):
        self.job = job
        self.history = history
        self.dataset = dataset
        self.compressed = compressed
        self.history_attrs_filename = history_attrs_filename
        self.datasets_attrs_filename = datasets_attrs_filename
        self.jobs_attrs_filename = jobs_attrs_filename

    @property
    def up_to_date( self ):
        """ Return False, if a new export should be generated for corresponding
        history.
        """
        job = self.job
        return job.state not in [ Job.states.ERROR, Job.states.DELETED ] \
           and job.update_time > self.history.update_time

    @property
    def ready( self ):
        return self.job.state == Job.states.OK

    @property
    def preparing( self ):
        return self.job.state in [ Job.states.RUNNING, Job.states.QUEUED, Job.states.WAITING ]

    @property
    def export_name( self ):
        # Stream archive.
        hname = ready_name_for_url( self.history.name )
        hname = "Galaxy-History-%s.tar" % ( hname )
        if self.compressed:
            hname += ".gz"
        return hname


class JobImportHistoryArchive( object ):
    def __init__( self, job=None, history=None, archive_dir=None ):
        self.job = job
        self.history = history
        self.archive_dir=archive_dir

class GenomeIndexToolData( object ):
    def __init__( self, job=None, params=None, dataset=None, deferred_job=None, \
                  transfer_job=None, fasta_path=None, created_time=None, modified_time=None, \
                  dbkey=None, user=None, indexer=None ):
        self.job = job
        self.dataset = dataset
        self.fasta_path = fasta_path
        self.user = user
        self.indexer = indexer
        self.created_time = created_time
        self.modified_time = modified_time
        self.deferred = deferred_job
        self.transfer = transfer_job

class DeferredJob( object ):
    states = Bunch( NEW = 'new',
                    WAITING = 'waiting',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    ERROR = 'error' )
    def __init__( self, state=None, plugin=None, params=None ):
        self.state = state
        self.plugin = plugin
        self.params = params
    def get_check_interval( self ):
        if not hasattr( self, '_check_interval' ):
            self._check_interval = None
        return self._check_interval
    def set_check_interval( self, seconds ):
        self._check_interval = seconds
    check_interval = property( get_check_interval, set_check_interval )
    def get_last_check( self ):
        if not hasattr( self, '_last_check' ):
            self._last_check = 0
        return self._last_check
    def set_last_check( self, seconds ):
        try:
            self._last_check = int( seconds )
        except:
            self._last_check = time.time()
    last_check = property( get_last_check, set_last_check )
    @property
    def is_check_time( self ):
        if self.check_interval is None:
            return True
        elif ( int( time.time() ) - self.last_check ) > self.check_interval:
            return True
        else:
            return False

class Group( object, Dictifiable  ):
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'name' )

    def __init__( self, name = None ):
        self.name = name
        self.deleted = False

class UserGroupAssociation( object ):
    def __init__( self, user, group ):
        self.user = user
        self.group = group

class History( object, Dictifiable, UsesAnnotations, HasName ):

    dict_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' )
    dict_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged', 'importable', 'slug' )
    default_name = 'Unnamed history'

    def __init__( self, id=None, name=None, user=None ):
        self.id = id
        self.name = name or History.default_name
        self.deleted = False
        self.purged = False
        self.importing = False
        self.genome_build = None
        self.published = False
        # Relationships
        self.user = user
        self.datasets = []
        self.galaxy_sessions = []
        self.tags = []

    def _next_hid( self ):
        # this is overriden in mapping.py db_next_hid() method
        if len( self.datasets ) == 0:
            return 1
        else:
            last_hid = 0
            for dataset in self.datasets:
                if dataset.hid > last_hid:
                    last_hid = dataset.hid
            return last_hid + 1

    def add_galaxy_session( self, galaxy_session, association=None ):
        if association is None:
            self.galaxy_sessions.append( GalaxySessionToHistoryAssociation( galaxy_session, self ) )
        else:
            self.galaxy_sessions.append( association )

    def add_dataset( self, dataset, parent_id=None, genome_build=None, set_hid=True, quota=True ):
        if isinstance( dataset, Dataset ):
            dataset = HistoryDatasetAssociation(dataset=dataset)
            object_session( self ).add( dataset )
            object_session( self ).flush()
        elif not isinstance( dataset, HistoryDatasetAssociation ):
            raise TypeError, ( "You can only add Dataset and HistoryDatasetAssociation instances to a history" +
                               " ( you tried to add %s )." % str( dataset ) )
        if parent_id:
            for data in self.datasets:
                if data.id == parent_id:
                    dataset.hid = data.hid
                    break
            else:
                if set_hid:
                    dataset.hid = self._next_hid()
        else:
            if set_hid:
                dataset.hid = self._next_hid()
        if quota and self.user:
            self.user.total_disk_usage += dataset.quota_amount( self.user )
        dataset.history = self
        if genome_build not in [None, '?']:
            self.genome_build = genome_build
        self.datasets.append( dataset )
        return dataset

    def copy( self, name=None, target_user=None, activatable=False, all_datasets=False ):
        """
        Return a copy of this history using the given `name` and `target_user`.
        If `activatable`, copy only non-deleted datasets. If `all_datasets`, copy
        non-deleted, deleted, and purged datasets.
        """
        # Create new history.
        if not name:
            name = self.name
        if not target_user:
            target_user = self.user
        quota = True
        if target_user == self.user:
            quota = False
        new_history = History( name=name, user=target_user )
        db_session = object_session( self )
        db_session.add( new_history )
        db_session.flush()

        # Copy annotation.
        self.copy_item_annotation( db_session, self.user, self, target_user, new_history )

        # Copy Tags
        new_history.copy_tags_from(target_user=target_user, source_history=self)

        # Copy HDAs.
        if activatable:
            hdas = self.activatable_datasets
        elif all_datasets:
            hdas = self.datasets
        else:
            hdas = self.active_datasets
        for hda in hdas:
            # Copy HDA.
            new_hda = hda.copy( copy_children=True )
            new_history.add_dataset( new_hda, set_hid = False, quota=quota )
            db_session.add( new_hda )
            db_session.flush()
            # Copy annotation.
            self.copy_item_annotation( db_session, self.user, hda, target_user, new_hda )
        new_history.hid_counter = self.hid_counter
        db_session.add( new_history )
        db_session.flush()
        return new_history

    @property
    def activatable_datasets( self ):
        # This needs to be a list
        return [ hda for hda in self.datasets if not hda.dataset.deleted ]

    def to_dict( self, view='collection', value_mapper = None ):

        # Get basic value.
        rval = super( History, self ).to_dict( view=view, value_mapper=value_mapper )

        # Add tags.
        tags_str_list = []
        for tag in self.tags:
            tag_str = tag.user_tname
            if tag.value is not None:
                tag_str += ":" + tag.user_value
            tags_str_list.append( tag_str )
        rval[ 'tags' ] = tags_str_list

        return rval

    def set_from_dict( self, new_data ):
        #AKA: set_api_value
        """
        Set object attributes to the values in dictionary new_data limiting
        to only those keys in dict_element_visible_keys.

        Returns a dictionary of the keys, values that have been changed.
        """
        # precondition: keys are proper, values are parsed and validated
        changed = {}
        # unknown keys are ignored here
        for key in [ k for k in new_data.keys() if k in self.dict_element_visible_keys ]:
            new_val = new_data[ key ]
            old_val = self.__getattribute__( key )
            if new_val == old_val:
                continue

            self.__setattr__( key, new_val )
            changed[ key ] = new_val

        return changed

    @property
    def latest_export( self ):
        exports = self.exports
        return exports and exports[ 0 ]

    @property
    def get_disk_size_bytes( self ):
        return self.get_disk_size( nice_size=False )

    def unhide_datasets( self ):
        for dataset in self.datasets:
            dataset.mark_unhidden()

    def resume_paused_jobs( self ):
        for dataset in self.datasets:
            job = dataset.creating_job
            if job is not None and job.state == Job.states.PAUSED:
                job.set_state(Job.states.NEW)

    def get_disk_size( self, nice_size=False ):
        # unique datasets only
        db_session = object_session( self )
        rval = db_session.query(
            func.sum( db_session.query( HistoryDatasetAssociation.dataset_id, Dataset.total_size ).join( Dataset )
                                            .filter( HistoryDatasetAssociation.table.c.history_id == self.id )
                                            .filter( HistoryDatasetAssociation.purged != True )
                                            .filter( Dataset.purged != True )
                                            .distinct().subquery().c.total_size ) ).first()[0]
        if rval is None:
            rval = 0
        if nice_size:
            rval = galaxy.datatypes.data.nice_size( rval )
        return rval

    @property
    def active_datasets_children_and_roles( self ):
        if not hasattr(self, '_active_datasets_children_and_roles'):
            db_session = object_session( self )
            query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ). \
                filter( not_( HistoryDatasetAssociation.deleted ) ). \
                order_by( HistoryDatasetAssociation.table.c.hid.asc() ). \
                options(
                    joinedload("children"),
                    joinedload("dataset"),
                    joinedload("dataset.actions"),
                    joinedload("dataset.actions.role"),
                )
            self._active_datasets_children_and_roles = query.all()
        return self._active_datasets_children_and_roles

    def contents_iter( self, **kwds ):
        """
        Fetch filtered list of contents of history.
        """
        default_contents_types = [
            'dataset',
        ]
        types = kwds.get('types', default_contents_types)
        iters = []
        if 'dataset' in types:
            iters.append( self.__dataset_contents_iter( **kwds ) )
        return galaxy.util.merge_sorted_iterables( operator.attrgetter( "hid" ), *iters )

    def __dataset_contents_iter(self, **kwds):
        return self.__filter_contents( HistoryDatasetAssociation, **kwds )

    def __filter_contents( self, content_class, **kwds ):
        db_session = object_session( self )
        assert db_session != None
        query = db_session.query( content_class ).filter( content_class.table.c.history_id == self.id )
        query = query.order_by( content_class.table.c.hid.asc() )
        python_filter = None
        deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) )
        if deleted is not None:
            query = query.filter( content_class.deleted == deleted )
        visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) )
        if visible is not None:
            query = query.filter( content_class.visible == visible )
        if 'ids' in kwds:
            ids = kwds['ids']
            max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH)
            if len(ids) < max_in_filter_length:
                query = query.filter( content_class.id.in_(ids) )
            else:
                python_filter = lambda content: content.id in ids
        if python_filter:
            return ifilter(python_filter, query)
        else:
            return query

    def copy_tags_from(self,target_user,source_history):
        for src_shta in source_history.tags:
            new_shta = src_shta.copy()
            new_shta.user = target_user
            self.tags.append(new_shta)


class HistoryUserShareAssociation( object ):
    def __init__( self ):
        self.history = None
        self.user = None

class UserRoleAssociation( object ):
    def __init__( self, user, role ):
        self.user = user
        self.role = role

class GroupRoleAssociation( object ):
    def __init__( self, group, role ):
        self.group = group
        self.role = role

class Role( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'name', 'description', 'type' )
    private_id = None
    types = Bunch(
        PRIVATE = 'private',
        SYSTEM = 'system',
        USER = 'user',
        ADMIN = 'admin',
        SHARING = 'sharing'
    )
    def __init__( self, name="", description="", type="system", deleted=False ):
        self.name = name
        self.description = description
        self.type = type
        self.deleted = deleted

class UserQuotaAssociation( object, Dictifiable ):
    dict_element_visible_keys = ( 'user', )
    def __init__( self, user, quota ):
        self.user = user
        self.quota = quota

class GroupQuotaAssociation( object, Dictifiable ):
    dict_element_visible_keys = ( 'group', )
    def __init__( self, group, quota ):
        self.group = group
        self.quota = quota

class Quota( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'name', 'description', 'bytes', 'operation', 'display_amount', 'default', 'users', 'groups' )
    valid_operations = ( '+', '-', '=' )
    def __init__( self, name="", description="", amount=0, operation="=" ):
        self.name = name
        self.description = description
        if amount is None:
            self.bytes = -1
        else:
            self.bytes = amount
        self.operation = operation
    def get_amount( self ):
        if self.bytes == -1:
            return None
        return self.bytes
    def set_amount( self, amount ):
        if amount is None:
            self.bytes = -1
        else:
            self.bytes = amount
    amount = property( get_amount, set_amount )
    @property
    def display_amount( self ):
        if self.bytes == -1:
            return "unlimited"
        else:
            return nice_size( self.bytes )

class DefaultQuotaAssociation( Quota, Dictifiable ):
    dict_element_visible_keys = ( 'type', )
    types = Bunch(
        UNREGISTERED = 'unregistered',
        REGISTERED = 'registered'
    )
    def __init__( self, type, quota ):
        assert type in self.types.__dict__.values(), 'Invalid type'
        self.type = type
        self.quota = quota

class DatasetPermissions( object ):
    def __init__( self, action, dataset, role ):
        self.action = action
        self.dataset = dataset
        self.role = role

class LibraryPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, Library ):
            self.library = library_item
        else:
            raise "Invalid Library specified: %s" % library_item.__class__.__name__
        self.role = role

class LibraryFolderPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, LibraryFolder ):
            self.folder = library_item
        else:
            raise "Invalid LibraryFolder specified: %s" % library_item.__class__.__name__
        self.role = role

class LibraryDatasetPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, LibraryDataset ):
            self.library_dataset = library_item
        else:
            raise "Invalid LibraryDataset specified: %s" % library_item.__class__.__name__
        self.role = role

class LibraryDatasetDatasetAssociationPermissions( object ):
    def __init__( self, action, library_item, role ):
        self.action = action
        if isinstance( library_item, LibraryDatasetDatasetAssociation ):
            self.library_dataset_dataset_association = library_item
        else:
            raise "Invalid LibraryDatasetDatasetAssociation specified: %s" % library_item.__class__.__name__
        self.role = role

class DefaultUserPermissions( object ):
    def __init__( self, user, action, role ):
        self.user = user
        self.action = action
        self.role = role

class DefaultHistoryPermissions( object ):
    def __init__( self, history, action, role ):
        self.history = history
        self.action = action
        self.role = role

class Dataset( object ):
    states = Bunch( NEW = 'new',
                    UPLOAD = 'upload',
                    QUEUED = 'queued',
                    RUNNING = 'running',
                    OK = 'ok',
                    EMPTY = 'empty',
                    ERROR = 'error',
                    DISCARDED = 'discarded',
                    PAUSED = 'paused',
                    SETTING_METADATA = 'setting_metadata',
                    FAILED_METADATA = 'failed_metadata' )

    conversion_messages = Bunch( PENDING = "pending",
                                 NO_DATA = "no data",
                                 NO_CHROMOSOME = "no chromosome",
                                 NO_CONVERTER = "no converter",
                                 NO_TOOL = "no tool",
                                 DATA = "data",
                                 ERROR = "error",
                                 OK = "ok" )

    permitted_actions = get_permitted_actions( filter='DATASET' )
    file_path = "/tmp/"
    object_store = None # This get initialized in mapping.py (method init) by app.py
    engine = None
    def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True, uuid=None ):
        self.id = id
        self.state = state
        self.deleted = False
        self.purged = False
        self.purgable = purgable
        self.external_filename = external_filename
        self._extra_files_path = extra_files_path
        self.file_size = file_size
        if uuid is None:
            self.uuid = uuid4()
        else:
            self.uuid = UUID(str(uuid))

    def get_file_name( self ):
        if not self.external_filename:
            assert self.id is not None, "ID must be set before filename used (commit the object)"
            assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
            filename = self.object_store.get_filename( self )
            return filename
        else:
            filename = self.external_filename
        # Make filename absolute
        return os.path.abspath( filename )
    def set_file_name ( self, filename ):
        if not filename:
            self.external_filename = None
        else:
            self.external_filename = filename
    file_name = property( get_file_name, set_file_name )
    @property
    def extra_files_path( self ):
        return self.object_store.get_filename( self, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id )
    def _calculate_size( self ):
        if self.external_filename:
            try:
                return os.path.getsize(self.external_filename)
            except OSError:
                return 0
        else:
            return self.object_store.size(self)
    def get_size( self, nice_size=False ):
        """Returns the size of the data on disk"""
        if self.file_size:
            if nice_size:
                return galaxy.datatypes.data.nice_size( self.file_size )
            else:
                return self.file_size
        else:
            if nice_size:
                return galaxy.datatypes.data.nice_size( self._calculate_size() )
            else:
                return self._calculate_size()
    def set_size( self ):
        """Returns the size of the data on disk"""
        if not self.file_size:
            self.file_size = self._calculate_size()
    def get_total_size( self ):
        if self.total_size is not None:
            return self.total_size
        if self.file_size:
            # for backwards compatibility, set if unset
            self.set_total_size()
            db_session = object_session( self )
            db_session.flush()
            return self.total_size
        return 0
    def set_total_size( self ):
        if self.file_size is None:
            self.set_size()
        self.total_size = self.file_size or 0
        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
            for root, dirs, files in os.walk( self.extra_files_path ):
                self.total_size += sum( [ os.path.getsize( os.path.join( root, file ) ) for file in files if os.path.exists( os.path.join( root, file ) ) ] )
    def has_data( self ):
        """Detects whether there is any data"""
        return self.get_size() > 0
    def mark_deleted( self, include_children=True ):
        self.deleted = True
    def is_multi_byte( self ):
        if not self.has_data():
            return False
        try:
            return is_multi_byte( codecs.open( self.file_name, 'r', 'utf-8' ).read( 100 ) )
        except UnicodeDecodeError:
            return False
    # FIXME: sqlalchemy will replace this
    def _delete(self):
        """Remove the file that corresponds to this data"""
        self.object_store.delete(self)
    @property
    def user_can_purge( self ):
        return self.purged == False \
                and not bool( self.library_associations ) \
                and len( self.history_associations ) == len( self.purged_history_associations )
    def full_delete( self ):
        """Remove the file and extra files, marks deleted and purged"""
        # os.unlink( self.file_name )
        self.object_store.delete(self)
        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
            self.object_store.delete(self, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True)
        # if os.path.exists( self.extra_files_path ):
        #     shutil.rmtree( self.extra_files_path )
        # TODO: purge metadata files
        self.deleted = True
        self.purged = True
    def get_access_roles( self, trans ):
        roles = []
        for dp in self.actions:
            if dp.action == trans.app.security_agent.permitted_actions.DATASET_ACCESS.action:
                roles.append( dp.role )
        return roles
    def get_manage_permissions_roles( self, trans ):
        roles = []
        for dp in self.actions:
            if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
                roles.append( dp.role )
        return roles
    def has_manage_permissions_roles( self, trans ):
        for dp in self.actions:
            if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
                return True
        return False

class DatasetInstance( object ):
    """A base class for all 'dataset instances', HDAs, LDAs, etc"""
    states = Dataset.states
    conversion_messages = Dataset.conversion_messages
    permitted_actions = Dataset.permitted_actions
    def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, tool_version=None, extension=None,
                  dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
                  parent_id=None, validation_errors=None, visible=True, create_dataset=False, sa_session=None, extended_metadata=None ):
        self.name = name or "Unnamed dataset"
        self.id = id
        self.info = info
        self.blurb = blurb
        self.peek = peek
        self.tool_version = tool_version
        self.extension = extension
        self.designation = designation
        self.metadata = metadata or dict()
        self.extended_metadata = extended_metadata
        if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
            self.dbkey = dbkey
        self.deleted = deleted
        self.visible = visible
        # Relationships
        if not dataset and create_dataset:
            # Had to pass the sqlalchemy session in order to create a new dataset
            dataset = Dataset( state=Dataset.states.NEW )
            sa_session.add( dataset )
            sa_session.flush()
        self.dataset = dataset
        self.parent_id = parent_id
        self.validation_errors = validation_errors
    @property
    def ext( self ):
        return self.extension
    def get_dataset_state( self ):
        #self._state is currently only used when setting metadata externally
        #leave setting the state as-is, we'll currently handle this specially in the external metadata code
        if self._state:
            return self._state
        return self.dataset.state
    def set_dataset_state ( self, state ):
        self.dataset.state = state
        object_session( self ).add( self.dataset )
        object_session( self ).flush() #flush here, because hda.flush() won't flush the Dataset object
    state = property( get_dataset_state, set_dataset_state )
    def get_file_name( self ):
        return self.dataset.get_file_name()
    def set_file_name (self, filename):
        return self.dataset.set_file_name( filename )
    file_name = property( get_file_name, set_file_name )
    @property
    def extra_files_path( self ):
        return self.dataset.extra_files_path
    @property
    def datatype( self ):
        return datatypes_registry.get_datatype_by_extension( self.extension )
    def get_metadata( self ):
        if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a Session.clear() cause parent to be invalidated, while still copying over this non-database attribute?
            self._metadata_collection = MetadataCollection( self )
        return self._metadata_collection
    def set_metadata( self, bunch ):
        # Needs to accept a MetadataCollection, a bunch, or a dict
        self._metadata = self.metadata.make_dict_copy( bunch )
    metadata = property( get_metadata, set_metadata )
    # This provide backwards compatibility with using the old dbkey
    # field in the database.  That field now maps to "old_dbkey" (see mapping.py).
    def get_dbkey( self ):
        dbkey = self.metadata.dbkey
        if not isinstance(dbkey, list): dbkey = [dbkey]
        if dbkey in [[None], []]: return "?"
        return dbkey[0]
    def set_dbkey( self, value ):
        if "dbkey" in self.datatype.metadata_spec:
            if not isinstance(value, list):
                self.metadata.dbkey = [value]
            else:
                self.metadata.dbkey = value
    dbkey = property( get_dbkey, set_dbkey )
    def change_datatype( self, new_ext ):
        self.clear_associated_files()
        datatypes_registry.change_datatype( self, new_ext )
    def get_size( self, nice_size=False ):
        """Returns the size of the data on disk"""
        if nice_size:
            return galaxy.datatypes.data.nice_size( self.dataset.get_size() )
        return self.dataset.get_size()
    def set_size( self ):
        """Returns the size of the data on disk"""
        return self.dataset.set_size()
    def get_total_size( self ):
        return self.dataset.get_total_size()
    def set_total_size( self ):
        return self.dataset.set_total_size()
    def has_data( self ):
        """Detects whether there is any data"""
        return self.dataset.has_data()
    def get_raw_data( self ):
        """Returns the full data. To stream it open the file_name and read/write as needed"""
        return self.datatype.get_raw_data( self )
    def write_from_stream( self, stream ):
        """Writes data from a stream"""
        self.datatype.write_from_stream(self, stream)
    def set_raw_data( self, data ):
        """Saves the data on the disc"""
        self.datatype.set_raw_data(self, data)
    def get_mime( self ):
        """Returns the mime type of the data"""
        try:
            return datatypes_registry.get_mimetype_by_extension( self.extension.lower() )
        except AttributeError:
            # extension is None
            return 'data'
    def is_multi_byte( self ):
        """Data consists of multi-byte characters"""
        return self.dataset.is_multi_byte()
    def set_peek( self, is_multi_byte=False ):
        return self.datatype.set_peek( self, is_multi_byte=is_multi_byte )
    def init_meta( self, copy_from=None ):
        return self.datatype.init_meta( self, copy_from=copy_from )
    def set_meta( self, **kwd ):
        self.clear_associated_files( metadata_safe = True )
        return self.datatype.set_meta( self, **kwd )
    def missing_meta( self, **kwd ):
        return self.datatype.missing_meta( self, **kwd )
    def as_display_type( self, type, **kwd ):
        return self.datatype.as_display_type( self, type, **kwd )
    def display_peek( self ):
        return self.datatype.display_peek( self )
    def display_name( self ):
        return self.datatype.display_name( self )
    def display_info( self ):
        return self.datatype.display_info( self )
    def get_converted_files_by_type( self, file_type ):
        for assoc in self.implicitly_converted_datasets:
            if not assoc.deleted and assoc.type == file_type:
                if assoc.dataset:
                    return assoc.dataset
                return assoc.dataset_ldda
        return None
    def get_converted_dataset_deps(self, trans, target_ext):
        """
        Returns dict of { "dependency" => HDA }
        """
        # List of string of dependencies
        try:
            depends_list = trans.app.datatypes_registry.converter_deps[self.extension][target_ext]
        except KeyError:
            depends_list = []
        return dict([ (dep, self.get_converted_dataset(trans, dep)) for dep in depends_list ])
    def get_converted_dataset(self, trans, target_ext):
        """
        Return converted dataset(s) if they exist, along with a dict of dependencies.
        If not converted yet, do so and return None (the first time). If unconvertible, raise exception.
        """
        # See if we can convert the dataset
        if target_ext not in self.get_converter_types():
            raise NoConverterException("Conversion from '%s' to '%s' not possible" % (self.extension, target_ext) )
        deps = {}
        # List of string of dependencies
        try:
            depends_list = trans.app.datatypes_registry.converter_deps[self.extension][target_ext]
        except KeyError:
            depends_list = []
        # See if converted dataset already exists, either in metadata in conversions.
        converted_dataset = self.get_metadata_dataset( trans, target_ext )
        if converted_dataset:
            return converted_dataset
        converted_dataset = self.get_converted_files_by_type( target_ext )
        if converted_dataset:
            return converted_dataset
        # Conversion is possible but hasn't been done yet, run converter.
        # Check if we have dependencies
        try:
            for dependency in depends_list:
                dep_dataset = self.get_converted_dataset(trans, dependency)
                if dep_dataset is None:
                    # None means converter is running first time
                    return None
                elif dep_dataset.state == Job.states.ERROR:
                    raise ConverterDependencyException("A dependency (%s) was in an error state." % dependency)
                elif dep_dataset.state != Job.states.OK:
                    # Pending
                    return None
                deps[dependency] = dep_dataset
        except NoConverterException:
            raise NoConverterException("A dependency (%s) is missing a converter." % dependency)
        except KeyError:
            pass # No deps
        new_dataset = self.datatype.convert_dataset( trans, self, target_ext, return_output=True, visible=False, deps=deps, set_output_history=False ).values()[0]
        new_dataset.name = self.name
        assoc = ImplicitlyConvertedDatasetAssociation( parent=self, file_type=target_ext, dataset=new_dataset, metadata_safe=False )
        session = trans.sa_session
        session.add( new_dataset )
        session.add( assoc )
        session.flush()
        return None
    def get_metadata_dataset( self, trans, dataset_ext ):
        """
        Returns an HDA that points to a metadata file which contains a
        converted data with the requested extension.
        """
        for name, value in self.metadata.items():
            # HACK: MetadataFile objects do not have a type/ext, so need to use metadata name
            # to determine type.
            if dataset_ext == 'bai' and name == 'bam_index' and isinstance( value, MetadataFile ):
                # HACK: MetadataFile objects cannot be used by tools, so return
                # a fake HDA that points to metadata file.
                fake_dataset = Dataset( state=Dataset.states.OK, external_filename=value.file_name )
                fake_hda = HistoryDatasetAssociation( dataset=fake_dataset )
                return fake_hda
    def clear_associated_files( self, metadata_safe = False, purge = False ):
        raise 'Unimplemented'
    def get_child_by_designation(self, designation):
        for child in self.children:
            if child.designation == designation:
                return child
        return None
    def get_converter_types(self):
        return self.datatype.get_converter_types( self, datatypes_registry )
    def can_convert_to(self, format):
        return format in self.get_converter_types()
    def find_conversion_destination( self, accepted_formats, **kwd ):
        """Returns ( target_ext, existing converted dataset )"""
        return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
    def add_validation_error( self, validation_error ):
        self.validation_errors.append( validation_error )
    def extend_validation_errors( self, validation_errors ):
        self.validation_errors.extend(validation_errors)
    def mark_deleted( self, include_children=True ):
        self.deleted = True
        if include_children:
            for child in self.children:
                child.mark_deleted()
    def mark_undeleted( self, include_children=True ):
        self.deleted = False
        if include_children:
            for child in self.children:
                child.mark_undeleted()
    def mark_unhidden( self, include_children=True ):
        self.visible = True
        if include_children:
            for child in self.children:
                child.mark_unhidden()
    def undeletable( self ):
        if self.purged:
            return False
        return True
    @property
    def is_pending( self ):
        """
        Return true if the dataset is neither ready nor in error
        """
        return self.state in ( self.states.NEW, self.states.UPLOAD,
                               self.states.QUEUED, self.states.RUNNING,
                               self.states.SETTING_METADATA )
    @property
    def source_library_dataset( self ):
        def get_source( dataset ):
            if isinstance( dataset, LibraryDatasetDatasetAssociation ):
                if dataset.library_dataset:
                    return ( dataset, dataset.library_dataset )
            if dataset.copied_from_library_dataset_dataset_association:
                source = get_source( dataset.copied_from_library_dataset_dataset_association )
                if source:
                    return source
            if dataset.copied_from_history_dataset_association:
                source = get_source( dataset.copied_from_history_dataset_association )
                if source:
                    return source
            return ( None, None )
        return get_source( self )
    @property
    def source_dataset_chain( self ):
        def _source_dataset_chain( dataset, lst ):
            try:
                cp_from_ldda = dataset.copied_from_library_dataset_dataset_association
                if cp_from_ldda:
                    lst.append( (cp_from_ldda, "(Data Library)") )
                    return _source_dataset_chain( cp_from_ldda, lst )
            except Exception, e:
                log.warning( e )
            try:
                cp_from_hda  = dataset.copied_from_history_dataset_association
                if cp_from_hda:
                    lst.append( (cp_from_hda, cp_from_hda.history.name) )
                    return _source_dataset_chain( cp_from_hda, lst )
            except Exception, e:
                log.warning( e )
            return lst
        return _source_dataset_chain( self, [] )
    @property
    def creating_job( self ):
        creating_job_associations = None
        if self.creating_job_associations:
            creating_job_associations = self.creating_job_associations
        else:
            inherit_chain = self.source_dataset_chain
            if inherit_chain:
                creating_job_associations = inherit_chain[-1][0].creating_job_associations
        if creating_job_associations:
            return creating_job_associations[0].job
        return None
    def get_display_applications( self, trans ):
        return self.datatype.get_display_applications_by_dataset( self, trans )

    def get_visualizations( self ):
        return self.datatype.get_visualizations( self )

    def get_datasources( self, trans ):
        """
        Returns datasources for dataset; if datasources are not available
        due to indexing, indexing is started. Return value is a dictionary
        with entries of type
        (<datasource_type> : {<datasource_name>, <indexing_message>}).
        """
        data_sources_dict = {}
        msg = None
        for source_type, source_list in self.datatype.data_sources.iteritems():
            data_source = None
            if source_type == "data_standalone":
                # Nothing to do.
                msg = None
                data_source = source_list
            else:
                # Convert.
                if isinstance( source_list, str ):
                    source_list = [ source_list ]

                # Loop through sources until viable one is found.
                for source in source_list:
                    msg = self.convert_dataset( trans, source )
                    # No message or PENDING means that source is viable. No
                    # message indicates conversion was done and is successful.
                    if not msg or msg == self.conversion_messages.PENDING:
                        data_source = source
                        break

            # Store msg.
            data_sources_dict[ source_type ] = { "name": data_source, "message": msg }

        return data_sources_dict

    def convert_dataset( self, trans, target_type ):
        """
        Converts a dataset to the target_type and returns a message indicating
        status of the conversion. None is returned to indicate that dataset
        was converted successfully.
        """

        # Get converted dataset; this will start the conversion if necessary.
        try:
            converted_dataset = self.get_converted_dataset( trans, target_type )
        except NoConverterException:
            return self.conversion_messages.NO_CONVERTER
        except ConverterDependencyException, dep_error:
            return { 'kind': self.conversion_messages.ERROR, 'message': dep_error.value }

        # Check dataset state and return any messages.
        msg = None
        if converted_dataset and converted_dataset.state == Dataset.states.ERROR:
            job_id = trans.sa_session.query( JobToOutputDatasetAssociation ) \
                        .filter_by( dataset_id=converted_dataset.id ).first().job_id
            job = trans.sa_session.query( Job ).get( job_id )
            msg = { 'kind': self.conversion_messages.ERROR, 'message': job.stderr }
        elif not converted_dataset or converted_dataset.state != Dataset.states.OK:
            msg = self.conversion_messages.PENDING

        return msg

class HistoryDatasetAssociation( DatasetInstance, Dictifiable, UsesAnnotations, HasName ):
    """
    Resource class that creates a relation between a dataset and a user history.
    """

    def __init__( self,
                  hid = None,
                  history = None,
                  copied_from_history_dataset_association = None,
                  copied_from_library_dataset_dataset_association = None,
                  sa_session = None,
                  **kwd ):
        """
        Create a a new HDA and associate it with the given history.
        """
        # FIXME: sa_session is must be passed to DataSetInstance if the create_dataset
        # parameter is True so that the new object can be flushed.  Is there a better way?
        DatasetInstance.__init__( self, sa_session=sa_session, **kwd )
        self.hid = hid
        # Relationships
        self.history = history
        self.copied_from_history_dataset_association = copied_from_history_dataset_association
        self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association

    def copy( self, copy_children = False, parent_id = None ):
        """
        Create a copy of this HDA.
        """
        hda = HistoryDatasetAssociation( hid=self.hid,
                                         name=self.name,
                                         info=self.info,
                                         blurb=self.blurb,
                                         peek=self.peek,
                                         tool_version=self.tool_version,
                                         extension=self.extension,
                                         dbkey=self.dbkey,
                                         dataset = self.dataset,
                                         visible=self.visible,
                                         deleted=self.deleted,
                                         parent_id=parent_id,
                                         copied_from_history_dataset_association=self )
        # update init non-keywords as well
        hda.purged = self.purged

        object_session( self ).add( hda )
        object_session( self ).flush()
        hda.set_size()
        # Need to set after flushed, as MetadataFiles require dataset.id
        hda.metadata = self.metadata
        if copy_children:
            for child in self.children:
                child.copy( copy_children = copy_children, parent_id = hda.id )
        if not self.datatype.copy_safe_peek:
            # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
            hda.set_peek()
        object_session( self ).flush()
        return hda

    def to_library_dataset_dataset_association( self, trans, target_folder,
            replace_dataset=None, parent_id=None, user=None, roles=None, ldda_message='' ):
        """
        Copy this HDA to a library optionally replacing an existing LDDA.
        """
        if replace_dataset:
            # The replace_dataset param ( when not None ) refers to a LibraryDataset that
            #   is being replaced with a new version.
            library_dataset = replace_dataset
        else:
            # If replace_dataset is None, the Library level permissions will be taken from the folder and
            #   applied to the new LibraryDataset, and the current user's DefaultUserPermissions will be applied
            #   to the associated Dataset.
            library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info )
            object_session( self ).add( library_dataset )
            object_session( self ).flush()
        if not user:
            # This should never happen since users must be authenticated to upload to a data library
            user = self.history.user
        ldda = LibraryDatasetDatasetAssociation( name=self.name,
                                                 info=self.info,
                                                 blurb=self.blurb,
                                                 peek=self.peek,
                                                 tool_version=self.tool_version,
                                                 extension=self.extension,
                                                 dbkey=self.dbkey,
                                                 dataset=self.dataset,
                                                 library_dataset=library_dataset,
                                                 visible=self.visible,
                                                 deleted=self.deleted,
                                                 parent_id=parent_id,
                                                 copied_from_history_dataset_association=self,
                                                 user=user )
        object_session( self ).add( ldda )
        object_session( self ).flush()
        # If roles were selected on the upload form, restrict access to the Dataset to those roles
        roles = roles or []
        for role in roles:
            dp = trans.model.DatasetPermissions( trans.app.security_agent.permitted_actions.DATASET_ACCESS.action,
                                                 ldda.dataset, role )
            trans.sa_session.add( dp )
            trans.sa_session.flush()
        # Must set metadata after ldda flushed, as MetadataFiles require ldda.id
        ldda.metadata = self.metadata
        if ldda_message:
            ldda.message = ldda_message
        if not replace_dataset:
            target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey )
            object_session( self ).add( target_folder )
            object_session( self ).flush()
        library_dataset.library_dataset_dataset_association_id = ldda.id
        object_session( self ).add( library_dataset )
        object_session( self ).flush()
        for child in self.children:
            child.to_library_dataset_dataset_association( trans,
                                                          target_folder=target_folder,
                                                          replace_dataset=replace_dataset,
                                                          parent_id=ldda.id,
                                                          user=ldda.user )
        if not self.datatype.copy_safe_peek:
            # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
            ldda.set_peek()
        object_session( self ).flush()
        return ldda

    def clear_associated_files( self, metadata_safe = False, purge = False ):
        """
        """
        # metadata_safe = True means to only clear when assoc.metadata_safe == False
        for assoc in self.implicitly_converted_datasets:
            if not assoc.deleted and ( not metadata_safe or not assoc.metadata_safe ):
                assoc.clear( purge = purge )
        for assoc in self.implicitly_converted_parent_datasets:
            assoc.clear( purge = purge, delete_dataset = False )

    def get_access_roles( self, trans ):
        """
        Return The access roles associated with this HDA's dataset.
        """
        return self.dataset.get_access_roles( trans )

    def quota_amount( self, user ):
        """
        Return the disk space used for this HDA relevant to user quotas.

        If the user has multiple instances of this dataset, it will not affect their
        disk usage statistic.
        """
        rval = 0
        # Anon users are handled just by their single history size.
        if not user:
            return rval
        # Gets an HDA and its children's disk usage, if the user does not already
        #   have an association of the same dataset
        if not self.dataset.library_associations and not self.purged and not self.dataset.purged:
            for hda in self.dataset.history_associations:
                if hda.id == self.id:
                    continue
                if not hda.purged and hda.history and hda.history.user and hda.history.user == user:
                    break
            else:
                rval += self.get_total_size()
        for child in self.children:
            rval += child.get_disk_usage( user )
        return rval

    def to_dict( self, view='collection', expose_dataset_path=False ):
        """
        Return attributes of this HDA that are exposed using the API.
        """
        # Since this class is a proxy to rather complex attributes we want to
        # display in other objects, we can't use the simpler method used by
        # other model classes.
        hda = self
        rval = dict( id = hda.id,
                     hda_ldda = 'hda',
                     uuid = ( lambda uuid: str( uuid ) if uuid else None )( hda.dataset.uuid ),
                     hid = hda.hid,
                     file_ext = hda.ext,
                     peek = ( lambda hda: hda.display_peek() if hda.peek and hda.peek != 'no peek' else None )( hda ),
                     model_class = self.__class__.__name__,
                     name = hda.name,
                     deleted = hda.deleted,
                     purged = hda.purged,
                     visible = hda.visible,
                     state = hda.state,
                     file_size = int( hda.get_size() ),
                     update_time = hda.update_time.isoformat(),
                     data_type = hda.ext,
                     genome_build = hda.dbkey,
                     misc_info = hda.info.strip() if isinstance( hda.info, basestring ) else hda.info,
                     misc_blurb = hda.blurb )

        # add tags string list
        tags_str_list = []
        for tag in self.tags:
            tag_str = tag.user_tname
            if tag.value is not None:
                tag_str += ":" + tag.user_value
            tags_str_list.append( tag_str )
        rval[ 'tags' ] = tags_str_list

        if hda.copied_from_library_dataset_dataset_association is not None:
            rval['copied_from_ldda_id'] = hda.copied_from_library_dataset_dataset_association.id

        if hda.history is not None:
            rval['history_id'] = hda.history.id

        if hda.extended_metadata is not None:
            rval['extended_metadata'] = hda.extended_metadata.data

        rval[ 'peek' ] = to_unicode( hda.display_peek() )
        for name, spec in hda.metadata.spec.items():
            val = hda.metadata.get( name )
            if isinstance( val, MetadataFile ):
                # only when explicitly set: fetching filepaths can be expensive
                if not expose_dataset_path:
                    continue
                val = val.file_name
            # If no value for metadata, look in datatype for metadata.
            elif val == None and hasattr( hda.datatype, name ):
                val = getattr( hda.datatype, name )
            rval['metadata_' + name] = val
        return rval

    def set_from_dict( self, new_data ):
        #AKA: set_api_value
        """
        Set object attributes to the values in dictionary new_data limiting
        to only the following keys: name, deleted, visible, genome_build,
        info, and blurb.

        Returns a dictionary of the keys, values that have been changed.
        """
        # precondition: keys are proper, values are parsed and validated
        #NOTE!: does not handle metadata
        editable_keys = ( 'name', 'deleted', 'visible', 'dbkey', 'info', 'blurb' )

        changed = {}
        # unknown keys are ignored here
        for key in [ k for k in new_data.keys() if k in editable_keys ]:
            new_val = new_data[ key ]
            old_val = self.__getattribute__( key )
            if new_val == old_val:
                continue

            # special cases here
            if key == 'deleted' and new_val is False and self.purged:
                raise Exception( 'Cannot undelete a purged dataset' )

            self.__setattr__( key, new_val )
            changed[ key ] = new_val

        return changed


class HistoryDatasetAssociationDisplayAtAuthorization( object ):
    def __init__( self, hda=None, user=None, site=None ):
        self.history_dataset_association = hda
        self.user = user
        self.site = site

class HistoryDatasetAssociationSubset( object ):
    def __init__(self, hda, subset, location):
        self.hda = hda
        self.subset = subset
        self.location = location

class Library( object, Dictifiable, HasName ):
    permitted_actions = get_permitted_actions( filter='LIBRARY' )
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'deleted', 'name', 'description', 'synopsis', 'root_folder_id' )
    def __init__( self, name=None, description=None, synopsis=None, root_folder=None ):
        self.name = name or "Unnamed library"
        self.description = description
        self.synopsis = synopsis
        self.root_folder = root_folder
    def to_dict( self, view='collection', value_mapper=None ):
        """
        We prepend an F to folders.
        """
        rval = super( Library, self ).to_dict( view=view, value_mapper=value_mapper )
        if 'root_folder_id' in rval:
            rval[ 'root_folder_id' ] = 'F' + rval[ 'root_folder_id' ]
        return rval
    def get_active_folders( self, folder, folders=None ):
        # TODO: should we make sure the library is not deleted?
        def sort_by_attr( seq, attr ):
            """
            Sort the sequence of objects by object's attribute
            Arguments:
            seq  - the list or any sequence (including immutable one) of objects to sort.
            attr - the name of attribute to sort by
            """
            # Use the "Schwartzian transform"
            # Create the auxiliary list of tuples where every i-th tuple has form
            # (seq[i].attr, i, seq[i]) and sort it. The second item of tuple is needed not
            # only to provide stable sorting, but mainly to eliminate comparison of objects
            # (which can be expensive or prohibited) in case of equal attribute values.
            intermed = map( None, map( getattr, seq, ( attr, ) * len( seq ) ), xrange( len( seq ) ), seq )
            intermed.sort()
            return map( operator.getitem, intermed, ( -1, ) * len( intermed ) )
        if folders is None:
            active_folders = [ folder ]
        for active_folder in folder.active_folders:
            active_folders.extend( self.get_active_folders( active_folder, folders ) )
        return sort_by_attr( active_folders, 'id' )
    def get_info_association( self, restrict=False, inherited=False ):
        if self.info_association:
            if not inherited or self.info_association[0].inheritable:
                return self.info_association[0], inherited
            else:
                return None, inherited
        return None, inherited
    def get_template_widgets( self, trans, get_contents=True ):
        # See if we have any associated templates - the returned value for
        # inherited is not applicable at the library level.  The get_contents
        # param is passed by callers that are inheriting a template - these
        # are usually new library datsets for which we want to include template
        # fields on the upload form, but not necessarily the contents of the
        # inherited template saved for the parent.
        info_association, inherited = self.get_info_association()
        if info_association:
            template = info_association.template
            if get_contents:
                # See if we have any field contents
                info = info_association.info
                if info:
                    return template.get_widgets( trans.user, contents=info.content )
            return template.get_widgets( trans.user )
        return []
    def get_access_roles( self, trans ):
        roles = []
        for lp in self.actions:
            if lp.action == trans.app.security_agent.permitted_actions.LIBRARY_ACCESS.action:
                roles.append( lp.role )
        return roles

class LibraryFolder( object, Dictifiable, HasName ):
    dict_element_visible_keys = ( 'id', 'parent_id', 'name', 'description', 'item_count', 'genome_build', 'update_time' )
    def __init__( self, name=None, description=None, item_count=0, order_id=None ):
        self.name = name or "Unnamed folder"
        self.description = description
        self.item_count = item_count
        self.order_id = order_id
        self.genome_build = None
    def add_library_dataset( self, library_dataset, genome_build=None ):
        library_dataset.folder_id = self.id
        library_dataset.order_id = self.item_count
        self.item_count += 1
        if genome_build not in [None, '?']:
            self.genome_build = genome_build
    def add_folder( self, folder ):
        folder.parent_id = self.id
        folder.order_id = self.item_count
        self.item_count += 1
    def get_info_association( self, restrict=False, inherited=False ):
        # If restrict is True, we will return this folder's info_association, not inheriting.
        # If restrict is False, we'll return the next available info_association in the
        # inheritable hierarchy if it is "inheritable".  True is also returned if the
        # info_association was inherited and False if not.  This enables us to eliminate
        # displaying any contents of the inherited template.
        if self.info_association:
            if not inherited or self.info_association[0].inheritable:
                return self.info_association[0], inherited
            else:
                return None, inherited
        if restrict:
            return None, inherited
        if self.parent:
            return self.parent.get_info_association( inherited=True )
        if self.library_root:
            return self.library_root[0].get_info_association( inherited=True )
        return None, inherited
    def get_template_widgets( self, trans, get_contents=True ):
        # See if we have any associated templates.  The get_contents
        # param is passed by callers that are inheriting a template - these
        # are usually new library datsets for which we want to include template
        # fields on the upload form.
        info_association, inherited = self.get_info_association()
        if info_association:
            if inherited:
                template = info_association.template.current.latest_form
            else:
                template = info_association.template
            # See if we have any field contents, but only if the info_association was
            # not inherited ( we do not want to display the inherited contents ).
            # (gvk: 8/30/10) Based on conversations with Dan, we agreed to ALWAYS inherit
            # contents.  We'll use this behavior until we hear from the community that
            # contents should not be inherited.  If we don't hear anything for a while,
            # eliminate the old commented out behavior.
            #if not inherited and get_contents:
            if get_contents:
                info = info_association.info
                if info:
                    return template.get_widgets( trans.user, info.content )
            else:
                return template.get_widgets( trans.user )
        return []
    @property
    def activatable_library_datasets( self ):
         # This needs to be a list
        return [ ld for ld in self.datasets if ld.library_dataset_dataset_association and not ld.library_dataset_dataset_association.dataset.deleted ]

    def to_dict( self, view='collection', value_mapper=None ):
        rval = super( LibraryFolder, self ).to_dict( view=view, value_mapper=value_mapper  )
        info_association, inherited = self.get_info_association()
        if info_association:
            if inherited:
                template = info_association.template.current.latest_form
            else:
                template = info_association.template
            rval['data_template'] = template.name
        rval['library_path'] = self.library_path
        rval['parent_library_id'] = self.parent_library.id
        return rval
    @property
    def library_path(self):
        l_path = []
        f = self
        while f.parent:
            l_path.insert(0, f.name)
            f = f.parent
        return l_path
    @property
    def parent_library( self ):
        f = self
        while f.parent:
            f = f.parent
        return f.library_root[0]

class LibraryDataset( object ):
    # This class acts as a proxy to the currently selected LDDA
    upload_options = [ ( 'upload_file', 'Upload files' ),
                       ( 'upload_directory', 'Upload directory of files' ),
                       ( 'upload_paths', 'Upload files from filesystem paths' ),
                       ( 'import_from_history', 'Import datasets from your current history' ) ]
    def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ):
        self.folder = folder
        self.order_id = order_id
        self.name = name
        self.info = info
        self.library_dataset_dataset_association = library_dataset_dataset_association
    def set_library_dataset_dataset_association( self, ldda ):
        self.library_dataset_dataset_association = ldda
        ldda.library_dataset = self
        object_session( self ).add_all( ( ldda, self ) )
        object_session( self ).flush()
    def get_info( self ):
        if self.library_dataset_dataset_association:
            return self.library_dataset_dataset_association.info
        elif self._info:
            return self._info
        else:
            return 'no info'
    def set_info( self, info ):
        self._info = info
    info = property( get_info, set_info )
    def get_name( self ):
        if self.library_dataset_dataset_association:
            return self.library_dataset_dataset_association.name
        elif self._name:
            return self._name
        else:
            return 'Unnamed dataset'
    def set_name( self, name ):
        self._name = name
    name = property( get_name, set_name )
    def display_name( self ):
        self.library_dataset_dataset_association.display_name()
    def to_dict( self, view='collection' ):
        # Since this class is a proxy to rather complex attributes we want to
        # display in other objects, we can't use the simpler method used by
        # other model classes.
        ldda = self.library_dataset_dataset_association
        template_data = {}
        for temp_info in ldda.info_association:
            template = temp_info.template
            content = temp_info.info.content
            tmp_dict = {}
            for field in template.fields:
                tmp_dict[field['label']] = content[field['name']]
            template_data[template.name] = tmp_dict

        rval = dict( id = self.id,
                     ldda_id = ldda.id,
                     parent_library_id = self.folder.parent_library.id,
                     folder_id = self.folder_id,
                     model_class = self.__class__.__name__,
                     state = ldda.state,
                     name = ldda.name,
                     file_name = ldda.file_name,
                     uploaded_by = ldda.user.email,
                     message = ldda.message,
                     date_uploaded = ldda.create_time.isoformat(),
                     file_size = int( ldda.get_size() ),
                     data_type = ldda.ext,
                     genome_build = ldda.dbkey,
                     misc_info = ldda.info,
                     misc_blurb = ldda.blurb,
                     peek = ( lambda ldda: ldda.display_peek() if ldda.peek and ldda.peek != 'no peek' else None )( ldda ),
                     template_data = template_data )
        if ldda.dataset.uuid is None:
            rval['uuid'] = None
        else:
            rval['uuid'] = str(ldda.dataset.uuid)
        for name, spec in ldda.metadata.spec.items():
            val = ldda.metadata.get( name )
            if isinstance( val, MetadataFile ):
                val = val.file_name
            elif isinstance( val, list ):
                val = ', '.join( [str(v) for v in val] )
            rval['metadata_' + name] = val
        return rval

class LibraryDatasetDatasetAssociation( DatasetInstance, HasName ):
    def __init__( self,
                  copied_from_history_dataset_association=None,
                  copied_from_library_dataset_dataset_association=None,
                  library_dataset=None,
                  user=None,
                  sa_session=None,
                  **kwd ):
        # FIXME: sa_session is must be passed to DataSetInstance if the create_dataset
        # parameter in kwd is True so that the new object can be flushed.  Is there a better way?
        DatasetInstance.__init__( self, sa_session=sa_session, **kwd )
        if copied_from_history_dataset_association:
            self.copied_from_history_dataset_association_id = copied_from_history_dataset_association.id
        if copied_from_library_dataset_dataset_association:
            self.copied_from_library_dataset_dataset_association_id = copied_from_library_dataset_dataset_association.id
        self.library_dataset = library_dataset
        self.user = user
    def to_history_dataset_association( self, target_history, parent_id = None, add_to_history = False ):
        hda = HistoryDatasetAssociation( name=self.name,
                                         info=self.info,
                                         blurb=self.blurb,
                                         peek=self.peek,
                                         tool_version=self.tool_version,
                                         extension=self.extension,
                                         dbkey=self.dbkey,
                                         dataset=self.dataset,
                                         visible=self.visible,
                                         deleted=self.deleted,
                                         parent_id=parent_id,
                                         copied_from_library_dataset_dataset_association=self,
                                         history=target_history )
        object_session( self ).add( hda )
        object_session( self ).flush()
        hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
        if add_to_history and target_history:
            target_history.add_dataset( hda )
        for child in self.children:
            child.to_history_dataset_association( target_history = target_history, parent_id = hda.id, add_to_history = False )
        if not self.datatype.copy_safe_peek:
            hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
        object_session( self ).flush()
        return hda
    def copy( self, copy_children = False, parent_id = None, target_folder = None ):
        ldda = LibraryDatasetDatasetAssociation( name=self.name,
                                                 info=self.info,
                                                 blurb=self.blurb,
                                                 peek=self.peek,
                                                 tool_version=self.tool_version,
                                                 extension=self.extension,
                                                 dbkey=self.dbkey,
                                                 dataset=self.dataset,
                                                 visible=self.visible,
                                                 deleted=self.deleted,
                                                 parent_id=parent_id,
                                                 copied_from_library_dataset_dataset_association=self,
                                                 folder=target_folder )
        object_session( self ).add( ldda )
        object_session( self ).flush()
         # Need to set after flushed, as MetadataFiles require dataset.id
        ldda.metadata = self.metadata
        if copy_children:
            for child in self.children:
                child.copy( copy_children = copy_children, parent_id = ldda.id )
        if not self.datatype.copy_safe_peek:
             # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
            ldda.set_peek()
        object_session( self ).flush()
        return ldda
    def clear_associated_files( self, metadata_safe = False, purge = False ):
        return
    def get_access_roles( self, trans ):
        return self.dataset.get_access_roles( trans )
    def get_manage_permissions_roles( self, trans ):
        return self.dataset.get_manage_permissions_roles( trans )
    def has_manage_permissions_roles( self, trans ):
        return self.dataset.has_manage_permissions_roles( trans )
    def get_info_association( self, restrict=False, inherited=False ):
        # If restrict is True, we will return this ldda's info_association whether it
        # exists or not ( in which case None will be returned ).  If restrict is False,
        # we'll return the next available info_association in the inheritable hierarchy.
        # True is also returned if the info_association was inherited, and False if not.
        # This enables us to eliminate displaying any contents of the inherited template.
        # SM: Accessing self.info_association can cause a query to be emitted
        if self.info_association:
            return self.info_association[0], inherited
        if restrict:
            return None, inherited
        return self.library_dataset.folder.get_info_association( inherited=True )
    def to_dict( self, view='collection' ):
        # Since this class is a proxy to rather complex attributes we want to
        # display in other objects, we can't use the simpler method used by
        # other model classes.
        ldda = self
        try:
            file_size = int( ldda.get_size() )
        except OSError:
            file_size = 0
        rval = dict( id = ldda.id,
                     hda_ldda = 'ldda',
                     model_class = self.__class__.__name__,
                     name = ldda.name,
                     deleted = ldda.deleted,
                     visible = ldda.visible,
                     state = ldda.state,
                     library_dataset_id = ldda.library_dataset_id,
                     file_size = file_size,
                     file_name = ldda.file_name,
                     update_time = ldda.update_time.isoformat(),
                     data_type = ldda.ext,
                     genome_build = ldda.dbkey,
                     misc_info = ldda.info,
                     misc_blurb = ldda.blurb )
        if ldda.dataset.uuid is None:
            rval['uuid'] = None
        else:
            rval['uuid'] = str(ldda.dataset.uuid)
        rval['parent_library_id'] = ldda.library_dataset.folder.parent_library.id
        if ldda.extended_metadata is not None:
            rval['extended_metadata'] = ldda.extended_metadata.data
        for name, spec in ldda.metadata.spec.items():
            val = ldda.metadata.get( name )
            if isinstance( val, MetadataFile ):
                val = val.file_name
            # If no value for metadata, look in datatype for metadata.
            elif val == None and hasattr( ldda.datatype, name ):
                val = getattr( ldda.datatype, name )
            rval['metadata_' + name] = val
        return rval
    def get_template_widgets( self, trans, get_contents=True ):
        # See if we have any associated templatesThe get_contents
        # param is passed by callers that are inheriting a template - these
        # are usually new library datsets for which we want to include template
        # fields on the upload form, but not necessarily the contents of the
        # inherited template saved for the parent.
        info_association, inherited = self.get_info_association()
        if info_association:
            if inherited:
                template = info_association.template.current.latest_form
            else:
                template = info_association.template
            # See if we have any field contents, but only if the info_association was
            # not inherited ( we do not want to display the inherited contents ).
            # (gvk: 8/30/10) Based on conversations with Dan, we agreed to ALWAYS inherit
            # contents.  We'll use this behavior until we hear from the community that
            # contents should not be inherited.  If we don't hear anything for a while,
            # eliminate the old commented out behavior.
            #if not inherited and get_contents:
            if get_contents:
                info = info_association.info
                if info:
                    return template.get_widgets( trans.user, info.content )
            else:
                return template.get_widgets( trans.user )
        return []
    def templates_dict( self, use_name=False ):
        """
        Returns a dict of template info
        """
        #TODO: Should have a method that allows names and labels to be returned together in a structured way
        template_data = {}
        for temp_info in self.info_association:
            template = temp_info.template
            content = temp_info.info.content
            tmp_dict = {}
            for field in template.fields:
                if use_name:
                    name = field[ 'name' ]
                else:
                    name = field[ 'label' ]
                tmp_dict[ name ] = content.get( field[ 'name' ] )
            template_data[template.name] = tmp_dict
        return template_data
    def templates_json( self, use_name=False ):
        return json.dumps( self.templates_dict( use_name=use_name ) )


class ExtendedMetadata( object ):
    def __init__(self, data):
        self.data = data


class ExtendedMetadataIndex( object ):
    def __init__( self, extended_metadata, path, value):
        self.extended_metadata = extended_metadata
        self.path = path
        self.value = value


class LibraryInfoAssociation( object ):
    def __init__( self, library, form_definition, info, inheritable=False ):
        self.library = library
        self.template = form_definition
        self.info = info
        self.inheritable = inheritable

class LibraryFolderInfoAssociation( object ):
    def __init__( self, folder, form_definition, info, inheritable=False ):
        self.folder = folder
        self.template = form_definition
        self.info = info
        self.inheritable = inheritable

class LibraryDatasetDatasetInfoAssociation( object ):
    def __init__( self, library_dataset_dataset_association, form_definition, info ):
        # TODO: need to figure out if this should be inheritable to the associated LibraryDataset
        self.library_dataset_dataset_association = library_dataset_dataset_association
        self.template = form_definition
        self.info = info
    @property
    def inheritable( self ):
        return True #always allow inheriting, used for replacement

class ValidationError( object ):
    def __init__( self, message=None, err_type=None, attributes=None ):
        self.message = message
        self.err_type = err_type
        self.attributes = attributes

class DatasetToValidationErrorAssociation( object ):
    def __init__( self, dataset, validation_error ):
        self.dataset = dataset
        self.validation_error = validation_error

class ImplicitlyConvertedDatasetAssociation( object ):
    def __init__( self, id = None, parent = None, dataset = None, file_type = None, deleted = False, purged = False, metadata_safe = True ):
        self.id = id
        if isinstance(dataset, HistoryDatasetAssociation):
            self.dataset = dataset
        elif isinstance(dataset, LibraryDatasetDatasetAssociation):
            self.dataset_ldda = dataset
        else:
            raise AttributeError, 'Unknown dataset type provided for dataset: %s' % type( dataset )
        if isinstance(parent, HistoryDatasetAssociation):
            self.parent_hda = parent
        elif isinstance(parent, LibraryDatasetDatasetAssociation):
            self.parent_ldda = parent
        else:
            raise AttributeError, 'Unknown dataset type provided for parent: %s' % type( parent )
        self.type = file_type
        self.deleted = deleted
        self.purged = purged
        self.metadata_safe = metadata_safe

    def clear( self, purge = False, delete_dataset = True ):
        self.deleted = True
        if self.dataset:
            if delete_dataset:
                self.dataset.deleted = True
            if purge:
                self.dataset.purged = True
        if purge and self.dataset.deleted: #do something with purging
            self.purged = True
            try: os.unlink( self.file_name )
            except Exception, e: print "Failed to purge associated file (%s) from disk: %s" % ( self.file_name, e )

class Event( object ):
    def __init__( self, message=None, history=None, user=None, galaxy_session=None ):
        self.history = history
        self.galaxy_session = galaxy_session
        self.user = user
        self.tool_id = None
        self.message = message

class GalaxySession( object ):
    def __init__( self,
                  id=None,
                  user=None,
                  remote_host=None,
                  remote_addr=None,
                  referer=None,
                  current_history=None,
                  session_key=None,
                  is_valid=False,
                  prev_session_id=None ):
        self.id = id
        self.user = user
        self.remote_host = remote_host
        self.remote_addr = remote_addr
        self.referer = referer
        self.current_history = current_history
        self.session_key = session_key
        self.is_valid = is_valid
        self.prev_session_id = prev_session_id
        self.histories = []
    def add_history( self, history, association=None ):
        if association is None:
            self.histories.append( GalaxySessionToHistoryAssociation( self, history ) )
        else:
            self.histories.append( association )
    def get_disk_usage( self ):
        if self.disk_usage is None:
            return 0
        return self.disk_usage
    def set_disk_usage( self, bytes ):
        self.disk_usage = bytes
    total_disk_usage = property( get_disk_usage, set_disk_usage )

class GalaxySessionToHistoryAssociation( object ):
    def __init__( self, galaxy_session, history ):
        self.galaxy_session = galaxy_session
        self.history = history

class UCI( object ):
    def __init__( self ):
        self.id = None
        self.user = None


class StoredWorkflow( object, Dictifiable):

    dict_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' )
    dict_element_visible_keys = ( 'id', 'name', 'published', 'deleted' )

    def __init__( self ):
        self.id = None
        self.user = None
        self.name = None
        self.slug = None
        self.published = False
        self.latest_workflow_id = None
        self.workflows = []

    def copy_tags_from(self,target_user,source_workflow):
        for src_swta in source_workflow.owner_tags:
            new_swta = src_swta.copy()
            new_swta.user = target_user
            self.tags.append(new_swta)

    def to_dict( self, view='collection', value_mapper = None  ):
        rval = super( StoredWorkflow, self ).to_dict( view=view, value_mapper = value_mapper )
        tags_str_list = []
        for tag in self.tags:
            tag_str = tag.user_tname
            if tag.value is not None:
                tag_str += ":" + tag.user_value
            tags_str_list.append( tag_str )
        rval['tags'] = tags_str_list
        return rval


class Workflow( object, Dictifiable ):

    dict_collection_visible_keys = ( 'name', 'has_cycles', 'has_errors' )
    dict_element_visible_keys = ( 'name', 'has_cycles', 'has_errors' )

    def __init__( self ):
        self.user = None
        self.name = None
        self.has_cycles = None
        self.has_errors = None
        self.steps = []


class WorkflowStep( object ):

    def __init__( self ):
        self.id = None
        self.type = None
        self.tool_id = None
        self.tool_inputs = None
        self.tool_errors = None
        self.position = None
        self.input_connections = []
        self.config = None


class WorkflowStepConnection( object ):

    def __init__( self ):
        self.output_step_id = None
        self.output_name = None
        self.input_step_id = None
        self.input_name = None


class WorkflowOutput(object):

    def __init__( self, workflow_step, output_name):
        self.workflow_step = workflow_step
        self.output_name = output_name


class StoredWorkflowUserShareAssociation( object ):

    def __init__( self ):
        self.stored_workflow = None
        self.user = None


class StoredWorkflowMenuEntry( object ):

    def __init__( self ):
        self.stored_workflow = None
        self.user = None
        self.order_index = None


class WorkflowInvocation( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'update_time', 'workflow_id' )
    dict_element_visible_keys = ( 'id', 'update_time', 'workflow_id' )

    def to_dict( self, view='collection', value_mapper = None ):
        rval = super( WorkflowInvocation, self ).to_dict( view=view, value_mapper=value_mapper )
        if view == 'element':
            steps = {}
            for step in self.steps:
                v = step.to_dict()
                steps[str(v['order_index'])] = v
            rval['steps'] = steps

            inputs = {}
            for step in self.steps:
                if step.workflow_step.type =='tool':
                    for step_input in step.workflow_step.input_connections:
                        if step_input.output_step.type == 'data_input':
                            for job_input in step.job.input_datasets:
                                if job_input.name == step_input.input_name:
                                    inputs[str(step_input.output_step.order_index)] = { "id" : job_input.dataset_id, "src" : "hda"}
            rval['inputs'] = inputs
        return rval


class WorkflowInvocationStep( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'update_time', 'job_id', 'workflow_step_id' )
    dict_element_visible_keys = ( 'id', 'update_time', 'job_id', 'workflow_step_id' )

    def to_dict( self, view='collection', value_mapper = None ):
        rval = super( WorkflowInvocationStep, self ).to_dict( view=view, value_mapper=value_mapper )
        rval['order_index'] = self.workflow_step.order_index
        return rval

class MetadataFile( object ):

    def __init__( self, dataset = None, name = None ):
        if isinstance( dataset, HistoryDatasetAssociation ):
            self.history_dataset = dataset
        elif isinstance( dataset, LibraryDatasetDatasetAssociation ):
            self.library_dataset = dataset
        self.name = name
    @property
    def file_name( self ):
        assert self.id is not None, "ID must be set before filename used (commit the object)"
        # Ensure the directory structure and the metadata file object exist
        try:
            da = self.history_dataset or self.library_dataset
            if self.object_store_id is None and da is not None:
                self.object_store_id = da.dataset.object_store_id
            if not da.dataset.object_store.exists( self, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id ):
                da.dataset.object_store.create( self, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id )
            path = da.dataset.object_store.get_filename( self, extra_dir='_metadata_files', extra_dir_at_root=True, alt_name="metadata_%d.dat" % self.id )
            return path
        except AttributeError:
            # In case we're not working with the history_dataset
            # print "Caught AttributeError"
            path = os.path.join( Dataset.file_path, '_metadata_files', *directory_hash_id( self.id ) )
            # Create directory if it does not exist
            try:
                os.makedirs( path )
            except OSError, e:
                # File Exists is okay, otherwise reraise
                if e.errno != errno.EEXIST:
                    raise
            # Return filename inside hashed directory
            return os.path.abspath( os.path.join( path, "metadata_%d.dat" % self.id ) )


class FormDefinition( object, Dictifiable ):
    # The following form_builder classes are supported by the FormDefinition class.
    supported_field_types = [ AddressField, CheckboxField, PasswordField, SelectField, TextArea, TextField, WorkflowField, WorkflowMappingField, HistoryField ]
    types = Bunch( REQUEST = 'Sequencing Request Form',
                   SAMPLE = 'Sequencing Sample Form',
                   EXTERNAL_SERVICE = 'External Service Information Form',
                   RUN_DETAILS_TEMPLATE = 'Sample run details template',
                   LIBRARY_INFO_TEMPLATE = 'Library information template',
                   USER_INFO = 'User Information' )
    dict_collection_visible_keys = ( 'id', 'name' )
    dict_element_visible_keys = ( 'id', 'name', 'desc', 'form_definition_current_id', 'fields', 'layout' )
    def __init__( self, name=None, desc=None, fields=[], form_definition_current=None, form_type=None, layout=None ):
        self.name = name
        self.desc = desc
        self.fields = fields
        self.form_definition_current = form_definition_current
        self.type = form_type
        self.layout = layout
    def grid_fields( self, grid_index ):
        # Returns a dictionary whose keys are integers corresponding to field positions
        # on the grid and whose values are the field.
        gridfields = {}
        for i, f in enumerate( self.fields ):
            if str( f[ 'layout' ] ) == str( grid_index ):
                gridfields[i] = f
        return gridfields
    def get_widgets( self, user, contents={}, **kwd ):
        '''
        Return the list of widgets that comprise a form definition,
        including field contents if any.
        '''
        params = Params( kwd )
        widgets = []
        for index, field in enumerate( self.fields ):
            field_type = field[ 'type' ]
            if 'name' in field:
                field_name = field[ 'name' ]
            else:
                # Default to names like field_0, field_1, etc for backward compatibility
                # (not sure this is necessary)...
                field_name = 'field_%i' % index
            # Determine the value of the field
            if field_name in kwd:
                # The form was submitted via refresh_on_change
                if field_type == 'CheckboxField':
                    value = CheckboxField.is_checked( params.get( field_name, False ) )
                else:
                    value = restore_text( params.get( field_name, '' ) )
            elif contents:
                try:
                    # This field has a saved value.
                    value = str( contents[ field[ 'name' ] ] )
                except:
                    # If there was an error getting the saved value, we'll still
                    # display the widget, but it will be empty.
                    if field_type == 'AddressField':
                        value = 'none'
                    elif field_type == 'CheckboxField':
                        # Since we do not have contents, set checkbox value to False
                        value = False
                    else:
                        # Set other field types to empty string
                        value = ''
            else:
                # If none of the above, then leave the field empty
                if field_type == 'AddressField':
                    value = 'none'
                elif field_type == 'CheckboxField':
                    # Since we do not have contents, set checkbox value to False
                    value = False
                else:
                    # Set other field types to the default value of the field
                    value = field.get( 'default', '' )
            # Create the field widget
            field_widget = eval( field_type )( field_name )
            if field_type in [ 'TextField', 'PasswordField' ]:
                field_widget.set_size( 40 )
                field_widget.value = value
            elif field_type == 'TextArea':
                field_widget.set_size( 3, 40 )
                field_widget.value = value
            elif field_type in ['AddressField', 'WorkflowField', 'WorkflowMappingField', 'HistoryField']:
                field_widget.user = user
                field_widget.value = value
                field_widget.params = params
            elif field_type == 'SelectField':
                for option in field[ 'selectlist' ]:
                    if option == value:
                        field_widget.add_option( option, option, selected=True )
                    else:
                        field_widget.add_option( option, option )
            elif field_type == 'CheckboxField':
                field_widget.set_checked( value )
            if field[ 'required' ] == 'required':
                req = 'Required'
            else:
                req = 'Optional'
            if field[ 'helptext' ]:
                helptext='%s (%s)' % ( field[ 'helptext' ], req )
            else:
                helptext = '(%s)' % req
            widgets.append( dict( label=field[ 'label' ],
                                  widget=field_widget,
                                  helptext=helptext ) )
        return widgets
    def field_as_html( self, field ):
        """Generates disabled html for a field"""
        type = field[ 'type' ]
        form_field = None
        for field_type in self.supported_field_types:
            if type == field_type.__name__:
                # Name it AddressField, CheckboxField, etc.
                form_field = field_type( type )
                break
        if form_field:
            return form_field.get_html( disabled=True )
        # Return None if unsupported field type
        return None

class FormDefinitionCurrent( object ):
    def __init__(self, form_definition=None):
        self.latest_form = form_definition

class FormValues( object ):
    def __init__(self, form_def=None, content=None):
        self.form_definition = form_def
        self.content = content

class Request( object, Dictifiable ):
    states = Bunch( NEW = 'New',
                    SUBMITTED = 'In Progress',
                    REJECTED = 'Rejected',
                    COMPLETE = 'Complete' )
    dict_collection_visible_keys = ( 'id', 'name', 'state' )
    def __init__( self, name=None, desc=None, request_type=None, user=None, form_values=None, notification=None ):
        self.name = name
        self.desc = desc
        self.type = request_type
        self.values = form_values
        self.user = user
        self.notification = notification
        self.samples_list = []
    @property
    def state( self ):
        latest_event = self.latest_event
        if latest_event:
            return latest_event.state
        return None
    @property
    def latest_event( self ):
        if self.events:
            return self.events[0]
        return None
    @property
    def samples_have_common_state( self ):
        """
        Returns the state of this request's samples when they are all
        in one common state. Otherwise returns False.
        """
        state_for_comparison = self.samples[0].state
        if state_for_comparison is None:
            for s in self.samples:
                if s.state is not None:
                    return False
        for s in self.samples:
            if s.state.id != state_for_comparison.id:
                return False
        return state_for_comparison
    @property
    def last_comment( self ):
        latest_event = self.latest_event
        if latest_event:
            if latest_event.comment:
                return latest_event.comment
            return ''
        return 'No comment'
    def get_sample( self, sample_name ):
        for sample in self.samples:
            if sample.name == sample_name:
                return sample
        return None
    @property
    def is_unsubmitted( self ):
        return self.state in [ self.states.REJECTED, self.states.NEW ]
    @property
    def is_rejected( self ):
        return self.state == self.states.REJECTED
    @property
    def is_submitted( self ):
        return self.state == self.states.SUBMITTED
    @property
    def is_new( self ):
        return self.state == self.states.NEW
    @property
    def is_complete( self ):
        return self.state == self.states.COMPLETE
    @property
    def samples_without_library_destinations( self ):
        # Return all samples that are not associated with a library
        samples = []
        for sample in self.samples:
            if not sample.library:
                samples.append( sample )
        return samples
    @property
    def samples_with_bar_code( self ):
        # Return all samples that have associated bar code
        samples = []
        for sample in self.samples:
            if sample.bar_code:
                samples.append( sample )
        return samples
    def send_email_notification( self, trans, common_state, final_state=False ):
        # Check if an email notification is configured to be sent when the samples
        # are in this state
        if self.notification and common_state.id not in self.notification[ 'sample_states' ]:
            return
        comments = ''
        # Send email
        if trans.app.config.smtp_server is not None and self.notification and self.notification[ 'email' ]:
            host = trans.request.host.split( ':' )[0]
            if host in [ 'localhost', '127.0.0.1', '0.0.0.0' ]:
                host = socket.getfqdn()
            body = """
Galaxy Sample Tracking Notification
===================================

User:                     %(user)s

Sequencing request:       %(request_name)s
Sequencer configuration:  %(request_type)s
Sequencing request state: %(request_state)s

Number of samples:        %(num_samples)s
All samples in state:     %(sample_state)s

"""
            values = dict( user=self.user.email,
                           request_name=self.name,
                           request_type=self.type.name,
                           request_state=self.state,
                           num_samples=str( len( self.samples ) ),
                           sample_state=common_state.name,
                           create_time=self.create_time,
                           submit_time=self.create_time )
            body = body % values
            # check if this is the final state of the samples
            if final_state:
                txt = "Sample Name -> Data Library/Folder\r\n"
                for s in self.samples:
                    if s.library:
                        library_name = s.library.name
                        folder_name = s.folder.name
                    else:
                        library_name = 'No target data library'
                        folder_name = 'No target data library folder'
                    txt = txt + "%s -> %s/%s\r\n" % ( s.name, library_name, folder_name )
                body = body + txt
            to = self.notification['email']
            frm = 'galaxy-no-reply@' + host
            subject = "Galaxy Sample Tracking notification: '%s' sequencing request" % self.name
            try:
                send_mail( frm, to, subject, body, trans.app.config )
                comments = "Email notification sent to %s." % ", ".join( to ).strip().strip( ',' )
            except Exception,e:
                comments = "Email notification failed. (%s)" % str(e)
            # update the request history with the email notification event
        elif not trans.app.config.smtp_server:
            comments = "Email notification failed as SMTP server not set in config file"
        if comments:
            event = RequestEvent( self, self.state, comments )
            trans.sa_session.add( event )
            trans.sa_session.flush()
        return comments

class RequestEvent( object ):
    def __init__(self, request=None, request_state=None, comment=''):
        self.request = request
        self.state = request_state
        self.comment = comment

class ExternalService( object ):
    data_transfer_protocol = Bunch( HTTP = 'http',
                                    HTTPS = 'https',
                                    SCP = 'scp' )
    def __init__( self, name=None, description=None, external_service_type_id=None, version=None, form_definition_id=None, form_values_id=None, deleted=None ):
        self.name = name
        self.description = description
        self.external_service_type_id = external_service_type_id
        self.version = version
        self.form_definition_id = form_definition_id
        self.form_values_id = form_values_id
        self.deleted = deleted
        self.label = None # Used in the request_type controller's __build_external_service_select_field() method
    def get_external_service_type( self, trans ):
        return trans.app.external_service_types.all_external_service_types[ self.external_service_type_id ]
    def load_data_transfer_settings( self, trans ):
        trans.app.external_service_types.reload( self.external_service_type_id )
        self.data_transfer = {}
        external_service_type = self.get_external_service_type( trans )
        for data_transfer_protocol, data_transfer_obj in external_service_type.data_transfer.items():
            if data_transfer_protocol == self.data_transfer_protocol.SCP:
                scp_configs = {}
                automatic_transfer = data_transfer_obj.config.get( 'automatic_transfer', 'false' )
                scp_configs[ 'automatic_transfer' ] = galaxy.util.string_as_bool( automatic_transfer )
                scp_configs[ 'host' ] = self.form_values.content.get( data_transfer_obj.config.get( 'host', '' ), '' )
                scp_configs[ 'user_name' ] = self.form_values.content.get( data_transfer_obj.config.get( 'user_name', '' ), '' )
                scp_configs[ 'password' ] = self.form_values.content.get( data_transfer_obj.config.get( 'password', '' ), '' )
                scp_configs[ 'data_location' ] = self.form_values.content.get( data_transfer_obj.config.get( 'data_location', '' ), '' )
                scp_configs[ 'rename_dataset' ] = self.form_values.content.get( data_transfer_obj.config.get( 'rename_dataset', '' ), '' )
                self.data_transfer[ self.data_transfer_protocol.SCP ] = scp_configs
            if data_transfer_protocol == self.data_transfer_protocol.HTTP:
                http_configs = {}
                automatic_transfer = data_transfer_obj.config.get( 'automatic_transfer', 'false' )
                http_configs[ 'automatic_transfer' ] = galaxy.util.string_as_bool( automatic_transfer )
                self.data_transfer[ self.data_transfer_protocol.HTTP ] = http_configs
    def populate_actions( self, trans, item, param_dict=None ):
        return self.get_external_service_type( trans ).actions.populate( self, item, param_dict=param_dict )

class RequestType( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'name', 'desc' )
    dict_element_visible_keys = ( 'id', 'name', 'desc', 'request_form_id', 'sample_form_id' )
    rename_dataset_options = Bunch( NO = 'Do not rename',
                                    SAMPLE_NAME = 'Preprend sample name',
                                    EXPERIMENT_NAME = 'Prepend experiment name',
                                    EXPERIMENT_AND_SAMPLE_NAME = 'Prepend experiment and sample name')
    permitted_actions = get_permitted_actions( filter='REQUEST_TYPE' )
    def __init__( self, name=None, desc=None, request_form=None, sample_form=None ):
        self.name = name
        self.desc = desc
        self.request_form = request_form
        self.sample_form = sample_form
    @property
    def external_services( self ):
        external_services = []
        for rtesa in self.external_service_associations:
            external_services.append( rtesa.external_service )
        return external_services
    def get_external_service( self, external_service_type_id ):
        for rtesa in self.external_service_associations:
            if rtesa.external_service.external_service_type_id == external_service_type_id:
                return rtesa.external_service
        return None
    def get_external_services_for_manual_data_transfer( self, trans ):
        '''Returns all external services that use manual data transfer'''
        external_services = []
        for rtesa in self.external_service_associations:
            external_service = rtesa.external_service
            # load data transfer settings
            external_service.load_data_transfer_settings( trans )
            if external_service.data_transfer:
                for transfer_type, transfer_type_settings in external_service.data_transfer.items():
                    if not transfer_type_settings[ 'automatic_transfer' ]:
                        external_services.append( external_service )
        return external_services
    def delete_external_service_associations( self, trans ):
        '''Deletes all external service associations.'''
        flush_needed = False
        for rtesa in self.external_service_associations:
            trans.sa_session.delete( rtesa )
            flush_needed = True
        if flush_needed:
            trans.sa_session.flush()
    def add_external_service_association( self, trans, external_service ):
        rtesa = trans.model.RequestTypeExternalServiceAssociation( self, external_service )
        trans.sa_session.add( rtesa )
        trans.sa_session.flush()
    @property
    def final_sample_state( self ):
        # The states mapper for this object orders ascending
        return self.states[-1]
    @property
    def run_details( self ):
        if self.run:
            # self.run[0] is [RequestTypeRunAssociation]
            return self.run[0]
        return None
    def get_template_widgets( self, trans, get_contents=True ):
        # See if we have any associated templates.  The get_contents param
        # is passed by callers that are inheriting a template - these are
        # usually new samples for which we want to include template fields,
        # but not necessarily the contents of the inherited template.
        rtra = self.run_details
        if rtra:
            run = rtra.run
            template = run.template
            if get_contents:
                # See if we have any field contents
                info = run.info
                if info:
                    return template.get_widgets( trans.user, contents=info.content )
            return template.get_widgets( trans.user )
        return []

class RequestTypeExternalServiceAssociation( object ):
    def __init__( self, request_type, external_service ):
        self.request_type = request_type
        self.external_service = external_service

class RequestTypePermissions( object ):
    def __init__( self, action, request_type, role ):
        self.action = action
        self.request_type = request_type
        self.role = role

class Sample( object, Dictifiable ):
    # The following form_builder classes are supported by the Sample class.
    supported_field_types = [ CheckboxField, SelectField, TextField, WorkflowField, WorkflowMappingField, HistoryField ]
    bulk_operations = Bunch( CHANGE_STATE = 'Change state',
                             SELECT_LIBRARY = 'Select data library and folder' )
    dict_collection_visible_keys = ( 'id', 'name' )
    def __init__(self, name=None, desc=None, request=None, form_values=None, bar_code=None, library=None, folder=None, workflow=None, history=None):
        self.name = name
        self.desc = desc
        self.request = request
        self.values = form_values
        self.bar_code = bar_code
        self.library = library
        self.folder = folder
        self.history = history
        self.workflow = workflow
    @property
    def state( self ):
        latest_event = self.latest_event
        if latest_event:
            return latest_event.state
        return None
    @property
    def latest_event( self ):
        if self.events:
            return self.events[0]
        return None
    @property
    def adding_to_library_dataset_files( self ):
        adding_to_library_datasets = []
        for dataset in self.datasets:
            if dataset.status == SampleDataset.transfer_status.ADD_TO_LIBRARY:
                adding_to_library_datasets.append( dataset )
        return adding_to_library_datasets
    @property
    def inprogress_dataset_files( self ):
        inprogress_datasets = []
        for dataset in self.datasets:
            if dataset.status not in [ SampleDataset.transfer_status.NOT_STARTED, SampleDataset.transfer_status.COMPLETE ]:
                inprogress_datasets.append( dataset )
        return inprogress_datasets
    @property
    def queued_dataset_files( self ):
        queued_datasets = []
        for dataset in self.datasets:
            if dataset.status == SampleDataset.transfer_status.IN_QUEUE:
                queued_datasets.append( dataset )
        return queued_datasets
    @property
    def transfer_error_dataset_files( self ):
        transfer_error_datasets = []
        for dataset in self.datasets:
            if dataset.status == SampleDataset.transfer_status.ERROR:
                transfer_error_datasets.append( dataset )
        return transfer_error_datasets
    @property
    def transferred_dataset_files( self ):
        transferred_datasets = []
        for dataset in self.datasets:
            if dataset.status == SampleDataset.transfer_status.COMPLETE:
                transferred_datasets.append( dataset )
        return transferred_datasets
    @property
    def transferring_dataset_files( self ):
        transferring_datasets = []
        for dataset in self.datasets:
            if dataset.status == SampleDataset.transfer_status.TRANSFERRING:
                transferring_datasets.append( dataset )
        return transferring_datasets
    @property
    def untransferred_dataset_files( self ):
        untransferred_datasets = []
        for dataset in self.datasets:
            if dataset.status != SampleDataset.transfer_status.COMPLETE:
                untransferred_datasets.append( dataset )
        return untransferred_datasets
    def get_untransferred_dataset_size( self, filepath, scp_configs ):
        def print_ticks( d ):
            pass
        error_msg = 'Error encountered in determining the file size of %s on the external_service.' % filepath
        if not scp_configs['host'] or not scp_configs['user_name'] or not scp_configs['password']:
            return error_msg
        login_str = '%s@%s' % ( scp_configs['user_name'], scp_configs['host'] )
        cmd  = 'ssh %s "du -sh \'%s\'"' % ( login_str, filepath )
        try:
            output = pexpect.run( cmd,
                                  events={ '.ssword:*': scp_configs['password']+'\r\n',
                                           pexpect.TIMEOUT:print_ticks},
                                  timeout=10 )
        except Exception:
            return error_msg
        # cleanup the output to get just the file size
        return  output.replace( filepath, '' )\
                      .replace( 'Password:', '' )\
                      .replace( "'s password:", '' )\
                      .replace( login_str, '' )\
                      .strip()
    @property
    def run_details( self ):
        # self.runs is a list of SampleRunAssociations ordered descending on update_time.
        if self.runs:
            # Always use the latest run details template, self.runs[0] is a SampleRunAssociation
            return self.runs[0]
        # Inherit this sample's RequestType run details, if one exists.
        return self.request.type.run_details
    def get_template_widgets( self, trans, get_contents=True ):
        # Samples have a one-to-many relationship with run details, so we return the
        # widgets for last associated template.  The get_contents param will populate
        # the widget fields with values from the template inherited from the sample's
        # RequestType.
        template = None
        if self.runs:
            # The self.runs mapper orders descending on update_time.
            run = self.runs[0].run
            template = run.template
        if template is None:
            # There are no run details associated with this sample, so inherit the
            # run details template from the sample's RequestType.
            rtra = self.request.type.run_details
            if rtra:
                run = rtra.run
                template = run.template
        if template:
            if get_contents:
                # See if we have any field contents
                info = run.info
                if info:
                    return template.get_widgets( trans.user, contents=info.content )
            return template.get_widgets( trans.user )
        return []
    def populate_external_services( self, param_dict = None, trans = None ):
        if self.request and self.request.type:
            return [ service.populate_actions( item = self, param_dict = param_dict, trans = trans ) for service in self.request.type.external_services ]

class SampleState( object ):
    def __init__(self, name=None, desc=None, request_type=None):
        self.name = name
        self.desc = desc
        self.request_type = request_type

class SampleEvent( object ):
    def __init__(self, sample=None, sample_state=None, comment=''):
        self.sample = sample
        self.state = sample_state
        self.comment = comment

class SampleDataset( object ):
    transfer_status = Bunch( NOT_STARTED = 'Not started',
                             IN_QUEUE = 'In queue',
                             TRANSFERRING = 'Transferring dataset',
                             ADD_TO_LIBRARY = 'Adding to data library',
                             COMPLETE = 'Complete',
                             ERROR = 'Error' )
    def __init__( self, sample=None, name=None, file_path=None, status=None, error_msg=None, size=None, external_service=None ):
        self.sample = sample
        self.name = name
        self.file_path = file_path
        self.status = status
        self.error_msg = error_msg
        self.size = size
        self.external_service = external_service

class Run( object ):
    def __init__( self, form_definition, form_values, subindex=None ):
        self.template = form_definition
        self.info = form_values
        self.subindex = subindex

class RequestTypeRunAssociation( object ):
    def __init__( self, request_type, run ):
        self.request_type = request_type
        self.run = run

class SampleRunAssociation( object ):
    def __init__( self, sample, run ):
        self.sample = sample
        self.run = run

class UserAddress( object ):
    def __init__( self, user=None, desc=None, name=None, institution=None,
                  address=None, city=None, state=None, postal_code=None,
                  country=None, phone=None ):
        self.user = user
        self.desc = desc
        self.name = name
        self.institution = institution
        self.address = address
        self.city = city
        self.state = state
        self.postal_code = postal_code
        self.country = country
        self.phone = phone
    def get_html(self):
        html = ''
        if self.name:
            html = html + self.name
        if self.institution:
            html = html + '<br/>' + self.institution
        if self.address:
            html = html + '<br/>' + self.address
        if self.city:
            html = html + '<br/>' + self.city
        if self.state:
            html = html + ' ' + self.state
        if self.postal_code:
            html = html + ' ' + self.postal_code
        if self.country:
            html = html + '<br/>' + self.country
        if self.phone:
            html = html + '<br/>' + 'Phone: ' + self.phone
        return html

class UserOpenID( object ):
    def __init__( self, user=None, session=None, openid=None ):
        self.user = user
        self.session = session
        self.openid = openid

class Page( object, Dictifiable ):
    dict_element_visible_keys = [ 'id', 'title', 'latest_revision_id', 'slug', 'published', 'importable', 'deleted' ]
    def __init__( self ):
        self.id = None
        self.user = None
        self.title = None
        self.slug = None
        self.latest_revision_id = None
        self.revisions = []
        self.importable = None
        self.published = None

    def to_dict( self, view='element' ):
        rval = super( Page, self ).to_dict( view=view )
        rev = []
        for a in self.revisions:
            rev.append(a.id)
        rval['revision_ids'] = rev
        return rval

class PageRevision( object, Dictifiable ):
    dict_element_visible_keys = [ 'id', 'page_id', 'title', 'content' ]
    def __init__( self ):
        self.user = None
        self.title = None
        self.content = None

    def to_dict( self, view='element' ):
        rval = super( PageRevision, self ).to_dict( view=view )
        rval['create_time'] = str(self.create_time)
        rval['update_time'] = str(self.update_time)
        return rval

class PageUserShareAssociation( object ):
    def __init__( self ):
        self.page = None
        self.user = None

class Visualization( object ):
    def __init__( self, id=None, user=None, type=None, title=None, dbkey=None, slug=None, latest_revision=None ):
        self.id = id
        self.user = user
        self.type = type
        self.title = title
        self.dbkey = dbkey
        self.slug = slug
        self.latest_revision = latest_revision
        self.revisions = []
        if self.latest_revision:
            self.revisions.append( latest_revision )

    def copy( self, user=None, title=None ):
        """
        Provide copy of visualization with only its latest revision.
        """
        # NOTE: a shallow copy is done: the config is copied as is but datasets
        # are not copied nor are the dataset ids changed. This means that the
        # user does not have a copy of the data in his/her history and the
        # user who owns the datasets may delete them, making them inaccessible
        # for the current user.
        # TODO: a deep copy option is needed.

        if not user:
            user = self.user
        if not title:
            title = self.title

        copy_viz = Visualization( user=user, type=self.type, title=title, dbkey=self.dbkey )
        copy_revision = self.latest_revision.copy( visualization=copy_viz )
        copy_viz.latest_revision = copy_revision
        return copy_viz

class VisualizationRevision( object ):
    def __init__( self, visualization=None, title=None, dbkey=None, config=None ):
        self.id = None
        self.visualization = visualization
        self.title = title
        self.dbkey = dbkey
        self.config = config

    def copy( self, visualization=None ):
        """
        Returns a copy of this object.
        """
        if not visualization:
            visualization = self.visualization

        return VisualizationRevision(
            visualization=visualization,
            title=self.title,
            dbkey=self.dbkey,
            config=self.config
        )

class VisualizationUserShareAssociation( object ):
    def __init__( self ):
        self.visualization = None
        self.user = None

class TransferJob( object ):
    # These states are used both by the transfer manager's IPC and the object
    # state in the database.  Not all states are used by both.
    states = Bunch( NEW = 'new',
                    UNKNOWN = 'unknown',
                    PROGRESS = 'progress',
                    RUNNING = 'running',
                    ERROR = 'error',
                    DONE = 'done' )
    terminal_states = [ states.ERROR,
                        states.DONE ]
    def __init__( self, state=None, path=None, info=None, pid=None, socket=None, params=None ):
        self.state = state
        self.path = path
        self.info = info
        self.pid = pid
        self.socket = socket
        self.params = params

class Tag ( object ):
    def __init__( self, id=None, type=None, parent_id=None, name=None ):
        self.id = id
        self.type = type
        self.parent_id = parent_id
        self.name = name

    def __str__ ( self ):
        return "Tag(id=%s, type=%i, parent_id=%s, name=%s)" %  ( self.id, self.type, self.parent_id, self.name )

class ItemTagAssociation ( object, Dictifiable ):
    dict_collection_visible_keys = ( 'id', 'user_tname', 'user_value' )
    dict_element_visible_keys = dict_collection_visible_keys

    def __init__( self, id=None, user=None, item_id=None, tag_id=None, user_tname=None, value=None ):
        self.id = id
        self.user = user
        self.item_id = item_id
        self.tag_id = tag_id
        self.user_tname = user_tname
        self.value = None
        self.user_value = None

    def copy(self):
        new_ta = type(self)()
        new_ta.tag_id = self.tag_id
        new_ta.user_tname = self.user_tname
        new_ta.value = self.value
        new_ta.user_value = self.user_value
        return new_ta

class HistoryTagAssociation ( ItemTagAssociation ):
    pass

class DatasetTagAssociation ( ItemTagAssociation ):
    pass

class HistoryDatasetAssociationTagAssociation ( ItemTagAssociation ):
    pass

class PageTagAssociation ( ItemTagAssociation ):
    pass

class WorkflowStepTagAssociation ( ItemTagAssociation ):
    pass

class StoredWorkflowTagAssociation ( ItemTagAssociation ):
    pass

class VisualizationTagAssociation ( ItemTagAssociation ):
    pass

class ToolTagAssociation( ItemTagAssociation ):
    def __init__( self, id=None, user=None, tool_id=None, tag_id=None, user_tname=None, value=None ):
        self.id = id
        self.user = user
        self.tool_id = tool_id
        self.tag_id = tag_id
        self.user_tname = user_tname
        self.value = None
        self.user_value = None

# Item annotation classes.

class HistoryAnnotationAssociation( object ):
    pass

class HistoryDatasetAssociationAnnotationAssociation( object ):
    pass

class StoredWorkflowAnnotationAssociation( object ):
    pass

class WorkflowStepAnnotationAssociation( object ):
    pass

class PageAnnotationAssociation( object ):
    pass

class VisualizationAnnotationAssociation( object ):
    pass

# Item rating classes.

class ItemRatingAssociation( object ):
    def __init__( self, id=None, user=None, item=None, rating=0 ):
        self.id = id
        self.user = user
        self.item = item
        self.rating = rating

    def set_item( self, item ):
        """ Set association's item. """
        pass

class HistoryRatingAssociation( ItemRatingAssociation ):
    def set_item( self, history ):
        self.history = history

class HistoryDatasetAssociationRatingAssociation( ItemRatingAssociation ):
    def set_item( self, history_dataset_association ):
        self.history_dataset_association = history_dataset_association

class StoredWorkflowRatingAssociation( ItemRatingAssociation ):
    def set_item( self, stored_workflow ):
        self.stored_workflow = stored_workflow

class PageRatingAssociation( ItemRatingAssociation ):
    def set_item( self, page ):
        self.page = page

class VisualizationRatingAssociation( ItemRatingAssociation ):
    def set_item( self, visualization ):
        self.visualization = visualization

#Data Manager Classes
class DataManagerHistoryAssociation( object ):
    def __init__( self, id=None, history=None, user=None ):
        self.id = id
        self.history = history
        self.user = user

class DataManagerJobAssociation( object ):
    def __init__( self, id=None, job=None, data_manager_id=None ):
        self.id = id
        self.job = job
        self.data_manager_id = data_manager_id
#end of Data Manager Classes

class UserPreference ( object ):
    def __init__( self, name=None, value=None):
        self.name = name
        self.value = value

class UserAction( object ):
    def __init__( self, id=None, create_time=None, user_id=None, session_id=None, action=None, params=None, context=None):
        self.id = id
        self.create_time = create_time
        self.user_id = user_id
        self.session_id = session_id
        self.action = action
        self.params = params
        self.context = context

class APIKeys( object ):
    def __init__( self, id=None, user_id=None, key=None):
        self.id = id
        self.user_id = user_id
        self.key = key
Summary ✨

This is a collection of classes that represent various objects and their relationships in Galaxy, a web-based platform for bioinformatics and computational biology research. The classes include User, History, Dataset, Job, Workflow, Page, Visualization, Data Manager, and API Keys. Each class has its own set of attributes and methods that define its behavior and interactions with other objects in the system.
Tech Fingerprint

Alerts (284)

'global' Avoid global variables; use function parameters or class attributes for better scope management
73
'isinstance(' Overuse may indicate design issues; consider polymorphism
85 651 653 811 815 1115 1124 1133 1142 1378 1383 1514 1568 1635 1867 1891 2185 2187 2313 2418 2420 2424 2426 2626 2628
'def' Ensure functions have docstrings for documentation
180 272 289 291 293 295 297 299 301 303 305 307 309 311 313 315 320 323 325 327 329 331 334 337 347 349 351 353 355 357 359 361 363 365 367 369 371 373 375 377 379 381 384 386 388 390 392 394 445 512 521 523 525 527 529 531 533 535 537 539 541 572 577 581 583 585 587 589 591 593 600 602 604 606 608 656 686 690 694 734 738 741 745 752 804 810 883 887 903 926 931 934 938 944 960 1015 1078 1082 1089 1200 1210 1217 1243 1253 1263 1265 1277 1292 1298 1304 1342 1344 1350 1355 1357 1361 1364 1366 1370 1376 1381 1388 1399 1401 1425 1427 1429 1432 1434 1436 1438 1440 1442 1520 1522 1527 1529 1534 1536 1538 1543 1548 1553 1566 1567 1582 1601 1612 1615 1737 1902 1962 1984 1991 2008 2023 2029 2033 2051 2077 2081 2094 2102 2120 2125 2132 2135 2142 2145 2147 2209 2234 2260 2262 2264 2266 2268 2280 2320 2364 2401 2435 2476 2481 2485 2514 2520 2593 2618 2632 2676 2801 2807 2827 2834 2840 2843 2846 2849 2852 2855 2863 2870 2952 2954 2974 2991 2996 3021 3026 3030 3035 3080 3086 3091 3098 3105 3112 3119 3126 3133 3139 3140 3161 3168 3193 3255 3293 3308 3422 3495 3499 3503 3507 3511
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
461
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
748 2710
'def' Avoid long function definitions; keep signatures concise for readability
1186 2416 2943 3069 3216
Complexity hotspot; lines 1256 to 1259 (total complexity: 6)
1256 1257 1258 1259
'lambda' Avoid complex 'lambda' functions; prefer named functions for clarity and debugging
1853 1856 2177
'== None' Use 'is' for None comparisons (e.g., x is None)
1897 2316
'raise Exception(' Raise specific exception types for better error handling
1925
'type(' Use isinstance() for type checking instead of type()
2423 2429 3423
'eval(' Avoid due to security risks; use ast.literal_eval for safer evaluation of literals
2732
'except Exception:' Catch specific exceptions instead of Exception to avoid masking bugs
3152
/lib/galaxy/model/__init__.py

/lib/galaxy/model/init.py