PageRenderTime 117ms CodeModel.GetById 30ms app.highlight 70ms RepoModel.GetById 1ms app.codeStats 1ms

/lib/galaxy/model/__init__.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 3547 lines | 3472 code | 38 blank | 37 comment | 32 complexity | ec9af105465b68c9ba85c2dae5583df1 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1"""
   2Galaxy data model classes
   3
   4Naming: try to use class names that have a distinct plural form so that
   5the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
   6"""
   7
   8from galaxy import eggs
   9eggs.require("pexpect")
  10
  11import codecs
  12import errno
  13import logging
  14import operator
  15import os
  16import pexpect
  17import json
  18import socket
  19import time
  20from uuid import UUID, uuid4
  21from string import Template
  22from itertools import ifilter
  23from itertools import chain
  24
  25import galaxy.datatypes
  26import galaxy.datatypes.registry
  27import galaxy.security.passwords
  28from galaxy.datatypes.metadata import MetadataCollection
  29from galaxy.model.item_attrs import Dictifiable, UsesAnnotations
  30from galaxy.security import get_permitted_actions
  31from galaxy.util import is_multi_byte, nice_size, Params, restore_text, send_mail
  32from galaxy.util import ready_name_for_url
  33from galaxy.util.bunch import Bunch
  34from galaxy.util.hash_util import new_secure_hash
  35from galaxy.util.directory_hash import directory_hash_id
  36from galaxy.web.framework.helpers import to_unicode
  37from galaxy.web.form_builder import (AddressField, CheckboxField, HistoryField,
  38        PasswordField, SelectField, TextArea, TextField, WorkflowField,
  39        WorkflowMappingField)
  40from sqlalchemy.orm import object_session
  41from sqlalchemy.orm import joinedload
  42from sqlalchemy.sql.expression import func
  43from sqlalchemy import not_
  44
  45log = logging.getLogger( __name__ )
  46
  47datatypes_registry = galaxy.datatypes.registry.Registry()
  48# Default Value Required for unit tests
  49datatypes_registry.load_datatypes()
  50
  51# When constructing filters with in for a fixed set of ids, maximum
  52# number of items to place in the IN statement. Different databases
  53# are going to have different limits so it is likely best to not let
  54# this be unlimited - filter in Python if over this limit.
  55MAX_IN_FILTER_LENGTH = 100
  56
  57class NoConverterException(Exception):
  58    def __init__(self, value):
  59        self.value = value
  60    def __str__(self):
  61        return repr(self.value)
  62
  63class ConverterDependencyException(Exception):
  64    def __init__(self, value):
  65        self.value = value
  66    def __str__(self):
  67        return repr(self.value)
  68
  69def set_datatypes_registry( d_registry ):
  70    """
  71    Set up datatypes_registry
  72    """
  73    global datatypes_registry
  74    datatypes_registry = d_registry
  75
  76
  77class HasName:
  78
  79    def get_display_name( self ):
  80        """
  81        These objects have a name attribute can be either a string or a unicode
  82        object. If string, convert to unicode object assuming 'utf-8' format.
  83        """
  84        name = self.name
  85        if isinstance(name, str):
  86            name = unicode(name, 'utf-8')
  87        return name
  88
  89
  90class User( object, Dictifiable ):
  91    use_pbkdf2 = True
  92    """
  93    Data for a Galaxy user or admin and relations to their
  94    histories, credentials, and roles.
  95    """
  96    # attributes that will be accessed and returned when calling to_dict( view='collection' )
  97    dict_collection_visible_keys = ( 'id', 'email' )
  98    # attributes that will be accessed and returned when calling to_dict( view='element' )
  99    dict_element_visible_keys = ( 'id', 'email', 'username', 'total_disk_usage', 'nice_total_disk_usage' )
 100
 101    def __init__( self, email=None, password=None ):
 102        self.email = email
 103        self.password = password
 104        self.external = False
 105        self.deleted = False
 106        self.purged = False
 107        self.active = False
 108        self.activation_token = None
 109        self.username = None
 110        # Relationships
 111        self.histories = []
 112        self.credentials = []
 113        #? self.roles = []
 114
 115    def set_password_cleartext( self, cleartext ):
 116        """
 117        Set user password to the digest of `cleartext`.
 118        """
 119        if User.use_pbkdf2:
 120            self.password = galaxy.security.passwords.hash_password( cleartext )
 121        else:
 122            self.password = new_secure_hash( text_type=cleartext )
 123
 124    def check_password( self, cleartext ):
 125        """
 126        Check if `cleartext` matches user password when hashed.
 127        """
 128        return galaxy.security.passwords.check_password( cleartext, self.password )
 129
 130    def all_roles( self ):
 131        """
 132        Return a unique list of Roles associated with this user or any of their groups.
 133        """
 134        roles = [ ura.role for ura in self.roles ]
 135        for group in [ uga.group for uga in self.groups ]:
 136            for role in [ gra.role for gra in group.roles ]:
 137                if role not in roles:
 138                    roles.append( role )
 139        return roles
 140
 141    def get_disk_usage( self, nice_size=False ):
 142        """
 143        Return byte count of disk space used by user or a human-readable
 144        string if `nice_size` is `True`.
 145        """
 146        rval = 0
 147        if self.disk_usage is not None:
 148            rval = self.disk_usage
 149        if nice_size:
 150            rval = galaxy.datatypes.data.nice_size( rval )
 151        return rval
 152
 153    def set_disk_usage( self, bytes ):
 154        """
 155        Manually set the disk space used by a user to `bytes`.
 156        """
 157        self.disk_usage = bytes
 158
 159    total_disk_usage = property( get_disk_usage, set_disk_usage )
 160
 161    @property
 162    def nice_total_disk_usage( self ):
 163        """
 164        Return byte count of disk space used in a human-readable string.
 165        """
 166        return self.get_disk_usage( nice_size=True )
 167
 168    def calculate_disk_usage( self ):
 169        """
 170        Return byte count total of disk space used by all non-purged, non-library
 171        HDAs in non-purged histories.
 172        """
 173        # maintain a list so that we don't double count
 174        dataset_ids = []
 175        total = 0
 176        # this can be a huge number and can run out of memory, so we avoid the mappers
 177        db_session = object_session( self )
 178        for history in db_session.query( History ).enable_eagerloads( False ).filter_by( user_id=self.id, purged=False ).yield_per( 1000 ):
 179            for hda in db_session.query( HistoryDatasetAssociation ).enable_eagerloads( False ).filter_by( history_id=history.id, purged=False ).yield_per( 1000 ):
 180                #TODO: def hda.counts_toward_disk_usage():
 181                #   return ( not self.dataset.purged and not self.dataset.library_associations )
 182                if not hda.dataset.id in dataset_ids and not hda.dataset.purged and not hda.dataset.library_associations:
 183                    dataset_ids.append( hda.dataset.id )
 184                    total += hda.dataset.get_total_size()
 185        return total
 186
 187    @staticmethod
 188    def user_template_environment( user ):
 189        """
 190
 191        >>> env = User.user_template_environment(None)
 192        >>> env['__user_email__']
 193        'Anonymous'
 194        >>> env['__user_id__']
 195        'Anonymous'
 196        >>> user = User('foo@example.com')
 197        >>> user.id = 6
 198        >>> user.username = 'foo2'
 199        >>> env = User.user_template_environment(user)
 200        >>> env['__user_id__']
 201        '6'
 202        >>> env['__user_name__']
 203        'foo2'
 204        """
 205        if user:
 206            user_id = '%d' % user.id
 207            user_email = str( user.email )
 208            user_name = str( user.username )
 209        else:
 210            user = None
 211            user_id = 'Anonymous'
 212            user_email = 'Anonymous'
 213            user_name = 'Anonymous'
 214        environment = {}
 215        environment[ '__user__' ] = user
 216        environment[ '__user_id__' ] = environment[ 'userId' ] = user_id
 217        environment[ '__user_email__' ] = environment[ 'userEmail' ] = user_email
 218        environment[ '__user_name__' ] = user_name
 219        return environment
 220
 221    @staticmethod
 222    def expand_user_properties( user, in_string ):
 223        """
 224        """
 225        environment = User.user_template_environment( user )
 226        return Template( in_string ).safe_substitute( environment )
 227
 228
 229class Job( object, Dictifiable ):
 230    dict_collection_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time' ]
 231    dict_element_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time'  ]
 232
 233    """
 234    A job represents a request to run a tool given input datasets, tool
 235    parameters, and output datasets.
 236    """
 237    states = Bunch( NEW = 'new',
 238                    UPLOAD = 'upload',
 239                    WAITING = 'waiting',
 240                    QUEUED = 'queued',
 241                    RUNNING = 'running',
 242                    OK = 'ok',
 243                    ERROR = 'error',
 244                    PAUSED = 'paused',
 245                    DELETED = 'deleted',
 246                    DELETED_NEW = 'deleted_new' )
 247    # Please include an accessor (get/set pair) for any new columns/members.
 248    def __init__( self ):
 249        self.session_id = None
 250        self.user_id = None
 251        self.tool_id = None
 252        self.tool_version = None
 253        self.command_line = None
 254        self.param_filename = None
 255        self.parameters = []
 256        self.input_datasets = []
 257        self.output_datasets = []
 258        self.input_library_datasets = []
 259        self.output_library_datasets = []
 260        self.state = Job.states.NEW
 261        self.info = None
 262        self.job_runner_name = None
 263        self.job_runner_external_id = None
 264        self.destination_id = None
 265        self.destination_params = None
 266        self.post_job_actions = []
 267        self.imported = False
 268        self.handler = None
 269        self.exit_code = None
 270
 271    @property
 272    def finished( self ):
 273        states = self.states
 274        return self.state in [
 275            states.OK,
 276            states.ERROR,
 277            states.DELETED,
 278            states.DELETED_NEW,
 279        ]
 280
 281    # TODO: Add accessors for members defined in SQL Alchemy for the Job table and
 282    # for the mapper defined to the Job table.
 283    def get_external_output_metadata( self ):
 284        """
 285        The external_output_metadata is currently a reference from Job to
 286        JobExternalOutputMetadata. It exists for a job but not a task.
 287        """
 288        return self.external_output_metadata
 289    def get_session_id( self ):
 290        return self.session_id
 291    def get_user_id( self ):
 292        return self.user_id
 293    def get_tool_id( self ):
 294        return self.tool_id
 295    def get_tool_version( self ):
 296        return self.tool_version
 297    def get_command_line( self ):
 298        return self.command_line
 299    def get_param_filename( self ):
 300        return self.param_filename
 301    def get_parameters( self ):
 302        return self.parameters
 303    def get_input_datasets( self ):
 304        return self.input_datasets
 305    def get_output_datasets( self ):
 306        return self.output_datasets
 307    def get_input_library_datasets( self ):
 308        return self.input_library_datasets
 309    def get_output_library_datasets( self ):
 310        return self.output_library_datasets
 311    def get_state( self ):
 312        return self.state
 313    def get_info( self ):
 314        return self.info
 315    def get_job_runner_name( self ):
 316        # This differs from the Task class in that job_runner_name is
 317        # accessed instead of task_runner_name. Note that the field
 318        # runner_name is not the same thing.
 319        return self.job_runner_name
 320    def get_job_runner_external_id( self ):
 321        # This is different from the Task just in the member accessed:
 322        return self.job_runner_external_id
 323    def get_post_job_actions( self ):
 324        return self.post_job_actions
 325    def get_imported( self ):
 326        return self.imported
 327    def get_handler( self ):
 328        return self.handler
 329    def get_params( self ):
 330        return self.params
 331    def get_user( self ):
 332        # This is defined in the SQL Alchemy mapper as a relation to the User.
 333        return self.user
 334    def get_id( self ):
 335        # This is defined in the SQL Alchemy's Job table (and not in the model).
 336        return self.id
 337    def get_tasks( self ):
 338        # The tasks member is pert of a reference in the SQL Alchemy schema:
 339        return self.tasks
 340    def get_id_tag( self ):
 341        """
 342        Return a tag that can be useful in identifying a Job.
 343        This returns the Job's get_id
 344        """
 345        return "%s" % self.id;
 346
 347    def set_session_id( self, session_id ):
 348        self.session_id = session_id
 349    def set_user_id( self, user_id ):
 350        self.user_id = user_id
 351    def set_tool_id( self, tool_id ):
 352        self.tool_id = tool_id
 353    def set_tool_version( self, tool_version ):
 354        self.tool_version = tool_version
 355    def set_command_line( self, command_line ):
 356        self.command_line = command_line
 357    def set_param_filename( self, param_filename ):
 358        self.param_filename = param_filename
 359    def set_parameters( self, parameters ):
 360        self.parameters = parameters
 361    def set_input_datasets( self, input_datasets ):
 362        self.input_datasets = input_datasets
 363    def set_output_datasets( self, output_datasets ):
 364        self.output_datasets = output_datasets
 365    def set_input_library_datasets( self, input_library_datasets ):
 366        self.input_library_datasets = input_library_datasets
 367    def set_output_library_datasets( self, output_library_datasets ):
 368        self.output_library_datasets = output_library_datasets
 369    def set_info( self, info ):
 370        self.info = info
 371    def set_runner_name( self, job_runner_name ):
 372        self.job_runner_name = job_runner_name
 373    def set_runner_external_id( self, job_runner_external_id ):
 374        self.job_runner_external_id = job_runner_external_id
 375    def set_post_job_actions( self, post_job_actions ):
 376        self.post_job_actions = post_job_actions
 377    def set_imported( self, imported ):
 378        self.imported = imported
 379    def set_handler( self, handler ):
 380        self.handler = handler
 381    def set_params( self, params ):
 382        self.params = params
 383
 384    def add_parameter( self, name, value ):
 385        self.parameters.append( JobParameter( name, value ) )
 386    def add_input_dataset( self, name, dataset ):
 387        self.input_datasets.append( JobToInputDatasetAssociation( name, dataset ) )
 388    def add_output_dataset( self, name, dataset ):
 389        self.output_datasets.append( JobToOutputDatasetAssociation( name, dataset ) )
 390    def add_input_library_dataset( self, name, dataset ):
 391        self.input_library_datasets.append( JobToInputLibraryDatasetAssociation( name, dataset ) )
 392    def add_output_library_dataset( self, name, dataset ):
 393        self.output_library_datasets.append( JobToOutputLibraryDatasetAssociation( name, dataset ) )
 394    def add_post_job_action(self, pja):
 395        self.post_job_actions.append( PostJobActionAssociation( pja, self ) )
 396    def set_state( self, state ):
 397        """
 398        This is the only set method that performs extra work. In this case, the
 399        state is propagated down to datasets.
 400        """
 401        self.state = state
 402        # For historical reasons state propogates down to datasets
 403        for da in self.output_datasets:
 404            da.dataset.state = state
 405    def get_param_values( self, app, ignore_errors=False ):
 406        """
 407        Read encoded parameter values from the database and turn back into a
 408        dict of tool parameter values.
 409        """
 410        param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
 411        tool = app.toolbox.get_tool( self.tool_id )
 412        param_dict = tool.params_from_strings( param_dict, app, ignore_errors=ignore_errors )
 413        return param_dict
 414    def check_if_output_datasets_deleted( self ):
 415        """
 416        Return true if all of the output datasets associated with this job are
 417        in the deleted state
 418        """
 419        for dataset_assoc in self.output_datasets:
 420            dataset = dataset_assoc.dataset
 421            # only the originator of the job can delete a dataset to cause
 422            # cancellation of the job, no need to loop through history_associations
 423            if not dataset.deleted:
 424                return False
 425        return True
 426    def mark_deleted( self, track_jobs_in_database=False ):
 427        """
 428        Mark this job as deleted, and mark any output datasets as discarded.
 429        """
 430        if track_jobs_in_database:
 431            self.state = Job.states.DELETED_NEW
 432        else:
 433            self.state = Job.states.DELETED
 434        self.info = "Job output deleted by user before job completed."
 435        for dataset_assoc in self.output_datasets:
 436            dataset = dataset_assoc.dataset
 437            dataset.deleted = True
 438            dataset.state = dataset.states.DISCARDED
 439            for dataset in dataset.dataset.history_associations:
 440                # propagate info across shared datasets
 441                dataset.deleted = True
 442                dataset.blurb = 'deleted'
 443                dataset.peek = 'Job deleted'
 444                dataset.info = 'Job output deleted by user before job completed'
 445    def to_dict( self, view='collection' ):
 446        rval = super( Job, self ).to_dict( view=view )
 447        rval['tool_id'] = self.tool_id
 448        if view == 'element':
 449            param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
 450            rval['params'] = param_dict
 451
 452            input_dict = {}
 453            for i in self.input_datasets:
 454                if i.dataset is not None:
 455                    input_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
 456            for i in self.input_library_datasets:
 457                if i.dataset is not None:
 458                    input_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
 459            for k in input_dict:
 460                if k in param_dict:
 461                    del param_dict[k]
 462            rval['inputs'] = input_dict
 463
 464            output_dict = {}
 465            for i in self.output_datasets:
 466                if i.dataset is not None:
 467                    output_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
 468            for i in self.output_library_datasets:
 469                if i.dataset is not None:
 470                    output_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
 471            rval['outputs'] = output_dict
 472
 473        return rval
 474
 475class Task( object ):
 476    """
 477    A task represents a single component of a job.
 478    """
 479    states = Bunch( NEW = 'new',
 480                    WAITING = 'waiting',
 481                    QUEUED = 'queued',
 482                    RUNNING = 'running',
 483                    OK = 'ok',
 484                    ERROR = 'error',
 485                    DELETED = 'deleted' )
 486
 487    # Please include an accessor (get/set pair) for any new columns/members.
 488    def __init__( self, job, working_directory, prepare_files_cmd ):
 489        self.command_line = None
 490        self.parameters = []
 491        self.state = Task.states.NEW
 492        self.info = None
 493        self.working_directory = working_directory
 494        self.task_runner_name = None
 495        self.task_runner_external_id = None
 496        self.job = job
 497        self.stdout = ""
 498        self.stderr = ""
 499        self.exit_code = None
 500        self.prepare_input_files_cmd = prepare_files_cmd
 501
 502    def get_param_values( self, app ):
 503        """
 504        Read encoded parameter values from the database and turn back into a
 505        dict of tool parameter values.
 506        """
 507        param_dict = dict( [ ( p.name, p.value ) for p in self.parent_job.parameters ] )
 508        tool = app.toolbox.get_tool( self.tool_id )
 509        param_dict = tool.params_from_strings( param_dict, app )
 510        return param_dict
 511
 512    def get_id( self ):
 513        # This is defined in the SQL Alchemy schema:
 514        return self.id
 515    def get_id_tag( self ):
 516        """
 517        Return an id tag suitable for identifying the task.
 518        This combines the task's job id and the task's own id.
 519        """
 520        return "%s_%s" % ( self.job.get_id(), self.get_id() )
 521    def get_command_line( self ):
 522        return self.command_line
 523    def get_parameters( self ):
 524        return self.parameters
 525    def get_state( self ):
 526        return self.state
 527    def get_info( self ):
 528        return self.info
 529    def get_working_directory( self ):
 530        return self.working_directory
 531    def get_task_runner_name( self ):
 532        return self.task_runner_name
 533    def get_task_runner_external_id( self ):
 534        return self.task_runner_external_id
 535    def get_job( self ):
 536        return self.job
 537    def get_stdout( self ):
 538        return self.stdout
 539    def get_stderr( self ):
 540        return self.stderr
 541    def get_prepare_input_files_cmd( self ):
 542        return self.prepare_input_files_cmd
 543
 544    # The following accessors are for members that are in the Job class but
 545    # not in the Task class. So they can either refer to the parent Job
 546    # or return None, depending on whether Tasks need to point to the parent
 547    # (e.g., for a session) or never use the member (e.g., external output
 548    # metdata). These can be filled in as needed.
 549    def get_external_output_metadata( self ):
 550        """
 551        The external_output_metadata is currently a backref to
 552        JobExternalOutputMetadata. It exists for a job but not a task,
 553        and when a task is cancelled its corresponding parent Job will
 554        be cancelled. So None is returned now, but that could be changed
 555        to self.get_job().get_external_output_metadata().
 556        """
 557        return None
 558    def get_job_runner_name( self ):
 559        """
 560        Since runners currently access Tasks the same way they access Jobs,
 561        this method just refers to *this* instance's runner.
 562        """
 563        return self.task_runner_name
 564    def get_job_runner_external_id( self ):
 565        """
 566        Runners will use the same methods to get information about the Task
 567        class as they will about the Job class, so this method just returns
 568        the task's external id.
 569        """
 570        # TODO: Merge into get_runner_external_id.
 571        return self.task_runner_external_id
 572    def get_session_id( self ):
 573        # The Job's galaxy session is equal to the Job's session, so the
 574        # Job's session is the same as the Task's session.
 575        return self.get_job().get_session_id()
 576
 577    def set_id( self, id ):
 578        # This is defined in the SQL Alchemy's mapper and not here.
 579        # This should never be called.
 580        self.id = id
 581    def set_command_line( self, command_line ):
 582        self.command_line = command_line
 583    def set_parameters( self, parameters ):
 584        self.parameters = parameters
 585    def set_state( self, state ):
 586        self.state = state
 587    def set_info( self, info ):
 588        self.info = info
 589    def set_working_directory( self, working_directory ):
 590        self.working_directory = working_directory
 591    def set_task_runner_name( self, task_runner_name ):
 592        self.task_runner_name = task_runner_name
 593    def set_job_runner_external_id( self, task_runner_external_id ):
 594        # This method is available for runners that do not want/need to
 595        # differentiate between the kinds of Runnable things (Jobs and Tasks)
 596        # that they're using.
 597        log.debug( "Task %d: Set external id to %s"
 598                 % ( self.id, task_runner_external_id ) )
 599        self.task_runner_external_id = task_runner_external_id
 600    def set_task_runner_external_id( self, task_runner_external_id ):
 601        self.task_runner_external_id = task_runner_external_id
 602    def set_job( self, job ):
 603        self.job = job
 604    def set_stdout( self, stdout ):
 605        self.stdout = stdout
 606    def set_stderr( self, stderr ):
 607        self.stderr = stderr
 608    def set_prepare_input_files_cmd( self, prepare_input_files_cmd ):
 609        self.prepare_input_files_cmd = prepare_input_files_cmd
 610
 611class JobParameter( object ):
 612    def __init__( self, name, value ):
 613        self.name = name
 614        self.value = value
 615
 616class JobToInputDatasetAssociation( object ):
 617    def __init__( self, name, dataset ):
 618        self.name = name
 619        self.dataset = dataset
 620
 621class JobToOutputDatasetAssociation( object ):
 622    def __init__( self, name, dataset ):
 623        self.name = name
 624        self.dataset = dataset
 625
 626class JobToInputLibraryDatasetAssociation( object ):
 627    def __init__( self, name, dataset ):
 628        self.name = name
 629        self.dataset = dataset
 630
 631class JobToOutputLibraryDatasetAssociation( object ):
 632    def __init__( self, name, dataset ):
 633        self.name = name
 634        self.dataset = dataset
 635
 636class PostJobAction( object ):
 637    def __init__( self, action_type, workflow_step, output_name = None, action_arguments = None):
 638        self.action_type = action_type
 639        self.output_name = output_name
 640        self.action_arguments = action_arguments
 641        self.workflow_step = workflow_step
 642
 643class PostJobActionAssociation( object ):
 644    def __init__(self, pja, job):
 645        self.job = job
 646        self.post_job_action = pja
 647
 648class JobExternalOutputMetadata( object ):
 649    def __init__( self, job = None, dataset = None ):
 650        self.job = job
 651        if isinstance( dataset, galaxy.model.HistoryDatasetAssociation ):
 652            self.history_dataset_association = dataset
 653        elif isinstance( dataset, galaxy.model.LibraryDatasetDatasetAssociation ):
 654            self.library_dataset_dataset_association = dataset
 655    @property
 656    def dataset( self ):
 657        if self.history_dataset_association:
 658            return self.history_dataset_association
 659        elif self.library_dataset_dataset_association:
 660            return self.library_dataset_dataset_association
 661        return None
 662
 663
 664class JobExportHistoryArchive( object ):
 665    def __init__( self, job=None, history=None, dataset=None, compressed=False, \
 666                  history_attrs_filename=None, datasets_attrs_filename=None,
 667                  jobs_attrs_filename=None ):
 668        self.job = job
 669        self.history = history
 670        self.dataset = dataset
 671        self.compressed = compressed
 672        self.history_attrs_filename = history_attrs_filename
 673        self.datasets_attrs_filename = datasets_attrs_filename
 674        self.jobs_attrs_filename = jobs_attrs_filename
 675
 676    @property
 677    def up_to_date( self ):
 678        """ Return False, if a new export should be generated for corresponding
 679        history.
 680        """
 681        job = self.job
 682        return job.state not in [ Job.states.ERROR, Job.states.DELETED ] \
 683           and job.update_time > self.history.update_time
 684
 685    @property
 686    def ready( self ):
 687        return self.job.state == Job.states.OK
 688
 689    @property
 690    def preparing( self ):
 691        return self.job.state in [ Job.states.RUNNING, Job.states.QUEUED, Job.states.WAITING ]
 692
 693    @property
 694    def export_name( self ):
 695        # Stream archive.
 696        hname = ready_name_for_url( self.history.name )
 697        hname = "Galaxy-History-%s.tar" % ( hname )
 698        if self.compressed:
 699            hname += ".gz"
 700        return hname
 701
 702
 703class JobImportHistoryArchive( object ):
 704    def __init__( self, job=None, history=None, archive_dir=None ):
 705        self.job = job
 706        self.history = history
 707        self.archive_dir=archive_dir
 708
 709class GenomeIndexToolData( object ):
 710    def __init__( self, job=None, params=None, dataset=None, deferred_job=None, \
 711                  transfer_job=None, fasta_path=None, created_time=None, modified_time=None, \
 712                  dbkey=None, user=None, indexer=None ):
 713        self.job = job
 714        self.dataset = dataset
 715        self.fasta_path = fasta_path
 716        self.user = user
 717        self.indexer = indexer
 718        self.created_time = created_time
 719        self.modified_time = modified_time
 720        self.deferred = deferred_job
 721        self.transfer = transfer_job
 722
 723class DeferredJob( object ):
 724    states = Bunch( NEW = 'new',
 725                    WAITING = 'waiting',
 726                    QUEUED = 'queued',
 727                    RUNNING = 'running',
 728                    OK = 'ok',
 729                    ERROR = 'error' )
 730    def __init__( self, state=None, plugin=None, params=None ):
 731        self.state = state
 732        self.plugin = plugin
 733        self.params = params
 734    def get_check_interval( self ):
 735        if not hasattr( self, '_check_interval' ):
 736            self._check_interval = None
 737        return self._check_interval
 738    def set_check_interval( self, seconds ):
 739        self._check_interval = seconds
 740    check_interval = property( get_check_interval, set_check_interval )
 741    def get_last_check( self ):
 742        if not hasattr( self, '_last_check' ):
 743            self._last_check = 0
 744        return self._last_check
 745    def set_last_check( self, seconds ):
 746        try:
 747            self._last_check = int( seconds )
 748        except:
 749            self._last_check = time.time()
 750    last_check = property( get_last_check, set_last_check )
 751    @property
 752    def is_check_time( self ):
 753        if self.check_interval is None:
 754            return True
 755        elif ( int( time.time() ) - self.last_check ) > self.check_interval:
 756            return True
 757        else:
 758            return False
 759
 760class Group( object, Dictifiable  ):
 761    dict_collection_visible_keys = ( 'id', 'name' )
 762    dict_element_visible_keys = ( 'id', 'name' )
 763
 764    def __init__( self, name = None ):
 765        self.name = name
 766        self.deleted = False
 767
 768class UserGroupAssociation( object ):
 769    def __init__( self, user, group ):
 770        self.user = user
 771        self.group = group
 772
 773class History( object, Dictifiable, UsesAnnotations, HasName ):
 774
 775    dict_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' )
 776    dict_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged', 'importable', 'slug' )
 777    default_name = 'Unnamed history'
 778
 779    def __init__( self, id=None, name=None, user=None ):
 780        self.id = id
 781        self.name = name or History.default_name
 782        self.deleted = False
 783        self.purged = False
 784        self.importing = False
 785        self.genome_build = None
 786        self.published = False
 787        # Relationships
 788        self.user = user
 789        self.datasets = []
 790        self.galaxy_sessions = []
 791        self.tags = []
 792
 793    def _next_hid( self ):
 794        # this is overriden in mapping.py db_next_hid() method
 795        if len( self.datasets ) == 0:
 796            return 1
 797        else:
 798            last_hid = 0
 799            for dataset in self.datasets:
 800                if dataset.hid > last_hid:
 801                    last_hid = dataset.hid
 802            return last_hid + 1
 803
 804    def add_galaxy_session( self, galaxy_session, association=None ):
 805        if association is None:
 806            self.galaxy_sessions.append( GalaxySessionToHistoryAssociation( galaxy_session, self ) )
 807        else:
 808            self.galaxy_sessions.append( association )
 809
 810    def add_dataset( self, dataset, parent_id=None, genome_build=None, set_hid=True, quota=True ):
 811        if isinstance( dataset, Dataset ):
 812            dataset = HistoryDatasetAssociation(dataset=dataset)
 813            object_session( self ).add( dataset )
 814            object_session( self ).flush()
 815        elif not isinstance( dataset, HistoryDatasetAssociation ):
 816            raise TypeError, ( "You can only add Dataset and HistoryDatasetAssociation instances to a history" +
 817                               " ( you tried to add %s )." % str( dataset ) )
 818        if parent_id:
 819            for data in self.datasets:
 820                if data.id == parent_id:
 821                    dataset.hid = data.hid
 822                    break
 823            else:
 824                if set_hid:
 825                    dataset.hid = self._next_hid()
 826        else:
 827            if set_hid:
 828                dataset.hid = self._next_hid()
 829        if quota and self.user:
 830            self.user.total_disk_usage += dataset.quota_amount( self.user )
 831        dataset.history = self
 832        if genome_build not in [None, '?']:
 833            self.genome_build = genome_build
 834        self.datasets.append( dataset )
 835        return dataset
 836
 837    def copy( self, name=None, target_user=None, activatable=False, all_datasets=False ):
 838        """
 839        Return a copy of this history using the given `name` and `target_user`.
 840        If `activatable`, copy only non-deleted datasets. If `all_datasets`, copy
 841        non-deleted, deleted, and purged datasets.
 842        """
 843        # Create new history.
 844        if not name:
 845            name = self.name
 846        if not target_user:
 847            target_user = self.user
 848        quota = True
 849        if target_user == self.user:
 850            quota = False
 851        new_history = History( name=name, user=target_user )
 852        db_session = object_session( self )
 853        db_session.add( new_history )
 854        db_session.flush()
 855
 856        # Copy annotation.
 857        self.copy_item_annotation( db_session, self.user, self, target_user, new_history )
 858
 859        # Copy Tags
 860        new_history.copy_tags_from(target_user=target_user, source_history=self)
 861
 862        # Copy HDAs.
 863        if activatable:
 864            hdas = self.activatable_datasets
 865        elif all_datasets:
 866            hdas = self.datasets
 867        else:
 868            hdas = self.active_datasets
 869        for hda in hdas:
 870            # Copy HDA.
 871            new_hda = hda.copy( copy_children=True )
 872            new_history.add_dataset( new_hda, set_hid = False, quota=quota )
 873            db_session.add( new_hda )
 874            db_session.flush()
 875            # Copy annotation.
 876            self.copy_item_annotation( db_session, self.user, hda, target_user, new_hda )
 877        new_history.hid_counter = self.hid_counter
 878        db_session.add( new_history )
 879        db_session.flush()
 880        return new_history
 881
 882    @property
 883    def activatable_datasets( self ):
 884        # This needs to be a list
 885        return [ hda for hda in self.datasets if not hda.dataset.deleted ]
 886
 887    def to_dict( self, view='collection', value_mapper = None ):
 888
 889        # Get basic value.
 890        rval = super( History, self ).to_dict( view=view, value_mapper=value_mapper )
 891
 892        # Add tags.
 893        tags_str_list = []
 894        for tag in self.tags:
 895            tag_str = tag.user_tname
 896            if tag.value is not None:
 897                tag_str += ":" + tag.user_value
 898            tags_str_list.append( tag_str )
 899        rval[ 'tags' ] = tags_str_list
 900
 901        return rval
 902
 903    def set_from_dict( self, new_data ):
 904        #AKA: set_api_value
 905        """
 906        Set object attributes to the values in dictionary new_data limiting
 907        to only those keys in dict_element_visible_keys.
 908
 909        Returns a dictionary of the keys, values that have been changed.
 910        """
 911        # precondition: keys are proper, values are parsed and validated
 912        changed = {}
 913        # unknown keys are ignored here
 914        for key in [ k for k in new_data.keys() if k in self.dict_element_visible_keys ]:
 915            new_val = new_data[ key ]
 916            old_val = self.__getattribute__( key )
 917            if new_val == old_val:
 918                continue
 919
 920            self.__setattr__( key, new_val )
 921            changed[ key ] = new_val
 922
 923        return changed
 924
 925    @property
 926    def latest_export( self ):
 927        exports = self.exports
 928        return exports and exports[ 0 ]
 929
 930    @property
 931    def get_disk_size_bytes( self ):
 932        return self.get_disk_size( nice_size=False )
 933
 934    def unhide_datasets( self ):
 935        for dataset in self.datasets:
 936            dataset.mark_unhidden()
 937
 938    def resume_paused_jobs( self ):
 939        for dataset in self.datasets:
 940            job = dataset.creating_job
 941            if job is not None and job.state == Job.states.PAUSED:
 942                job.set_state(Job.states.NEW)
 943
 944    def get_disk_size( self, nice_size=False ):
 945        # unique datasets only
 946        db_session = object_session( self )
 947        rval = db_session.query(
 948            func.sum( db_session.query( HistoryDatasetAssociation.dataset_id, Dataset.total_size ).join( Dataset )
 949                                            .filter( HistoryDatasetAssociation.table.c.history_id == self.id )
 950                                            .filter( HistoryDatasetAssociation.purged != True )
 951                                            .filter( Dataset.purged != True )
 952                                            .distinct().subquery().c.total_size ) ).first()[0]
 953        if rval is None:
 954            rval = 0
 955        if nice_size:
 956            rval = galaxy.datatypes.data.nice_size( rval )
 957        return rval
 958
 959    @property
 960    def active_datasets_children_and_roles( self ):
 961        if not hasattr(self, '_active_datasets_children_and_roles'):
 962            db_session = object_session( self )
 963            query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ). \
 964                filter( not_( HistoryDatasetAssociation.deleted ) ). \
 965                order_by( HistoryDatasetAssociation.table.c.hid.asc() ). \
 966                options(
 967                    joinedload("children"),
 968                    joinedload("dataset"),
 969                    joinedload("dataset.actions"),
 970                    joinedload("dataset.actions.role"),
 971                )
 972            self._active_datasets_children_and_roles = query.all()
 973        return self._active_datasets_children_and_roles
 974
 975    def contents_iter( self, **kwds ):
 976        """
 977        Fetch filtered list of contents of history.
 978        """
 979        default_contents_types = [
 980            'dataset',
 981        ]
 982        types = kwds.get('types', default_contents_types)
 983        iters = []
 984        if 'dataset' in types:
 985            iters.append( self.__dataset_contents_iter( **kwds ) )
 986        return galaxy.util.merge_sorted_iterables( operator.attrgetter( "hid" ), *iters )
 987
 988    def __dataset_contents_iter(self, **kwds):
 989        return self.__filter_contents( HistoryDatasetAssociation, **kwds )
 990
 991    def __filter_contents( self, content_class, **kwds ):
 992        db_session = object_session( self )
 993        assert db_session != None
 994        query = db_session.query( content_class ).filter( content_class.table.c.history_id == self.id )
 995        query = query.order_by( content_class.table.c.hid.asc() )
 996        python_filter = None
 997        deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) )
 998        if deleted is not None:
 999            query = query.filter( content_class.deleted == deleted )
1000        visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) )
1001        if visible is not None:
1002            query = query.filter( content_class.visible == visible )
1003        if 'ids' in kwds:
1004            ids = kwds['ids']
1005            max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH)
1006            if len(ids) < max_in_filter_length:
1007                query = query.filter( content_class.id.in_(ids) )
1008            else:
1009                python_filter = lambda content: content.id in ids
1010        if python_filter:
1011            return ifilter(python_filter, query)
1012        else:
1013            return query
1014
1015    def copy_tags_from(self,target_user,source_history):
1016        for src_shta in source_history.tags:
1017            new_shta = src_shta.copy()
1018            new_shta.user = target_user
1019            self.tags.append(new_shta)
1020
1021
1022class HistoryUserShareAssociation( object ):
1023    def __init__( self ):
1024        self.history = None
1025        self.user = None
1026
1027class UserRoleAssociation( object ):
1028    def __init__( self, user, role ):
1029        self.user = user
1030        self.role = role
1031
1032class GroupRoleAssociation( object ):
1033    def __init__( self, group, role ):
1034        self.group = group
1035        self.role = role
1036
1037class Role( object, Dictifiable ):
1038    dict_collection_visible_keys = ( 'id', 'name' )
1039    dict_element_visible_keys = ( 'id', 'name', 'description', 'type' )
1040    private_id = None
1041    types = Bunch(
1042        PRIVATE = 'private',
1043        SYSTEM = 'system',
1044        USER = 'user',
1045        ADMIN = 'admin',
1046        SHARING = 'sharing'
1047    )
1048    def __init__( self, name="", description="", type="system", deleted=False ):
1049        self.name = name
1050        self.description = description
1051        self.type = type
1052        self.deleted = deleted
1053
1054class UserQuotaAssociation( object, Dictifiable ):
1055    dict_element_visible_keys = ( 'user', )
1056    def __init__( self, user, quota ):
1057        self.user = user
1058        self.quota = quota
1059
1060class GroupQuotaAssociation( object, Dictifiable ):
1061    dict_element_visible_keys = ( 'group', )
1062    def __init__( self, group, quota ):
1063        self.group = group
1064        self.quota = quota
1065
1066class Quota( object, Dictifiable ):
1067    dict_collection_visible_keys = ( 'id', 'name' )
1068    dict_element_visible_keys = ( 'id', 'name', 'description', 'bytes', 'operation', 'display_amount', 'default', 'users', 'groups' )
1069    valid_operations = ( '+', '-', '=' )
1070    def __init__( self, name="", description="", amount=0, operation="=" ):
1071        self.name = name
1072        self.description = description
1073        if amount is None:
1074            self.bytes = -1
1075        else:
1076            self.bytes = amount
1077        self.operation = operation
1078    def get_amount( self ):
1079        if self.bytes == -1:
1080            return None
1081        return self.bytes
1082    def set_amount( self, amount ):
1083        if amount is None:
1084            self.bytes = -1
1085        else:
1086            self.bytes = amount
1087    amount = property( get_amount, set_amount )
1088    @property
1089    def display_amount( self ):
1090        if self.bytes == -1:
1091            return "unlimited"
1092        else:
1093            return nice_size( self.bytes )
1094
1095class DefaultQuotaAssociation( Quota, Dictifiable ):
1096    dict_element_visible_keys = ( 'type', )
1097    types = Bunch(
1098        UNREGISTERED = 'unregistered',
1099        REGISTERED = 'registered'
1100    )
1101    def __init__( self, type, quota ):
1102        assert type in self.types.__dict__.values(), 'Invalid type'
1103        self.type = type
1104        self.quota = quota
1105
1106class DatasetPermissions( object ):
1107    def __init__( self, action, dataset, role ):
1108        self.action = action
1109        self.dataset = dataset
1110        self.role = role
1111
1112class LibraryPermissions( object ):
1113    def __init__( self, action, library_item, role ):
1114        self.action = action
1115        if isinstance( library_item, Library ):
1116            self.library = library_item
1117        else:
1118            raise "Invalid Library specified: %s" % library_item.__class__.__name__
1119        self.role = role
1120
1121class LibraryFolderPermissions( object ):
1122    def __init__( self, action, library_item, role ):
1123        self.action = action
1124        if isinstance( library_item, LibraryFolder ):
1125            self.folder = library_item
1126        else:
1127            raise "Invalid LibraryFolder specified: %s" % library_item.__class__.__name__
1128        self.role = role
1129
1130class LibraryDatasetPermissions( object ):
1131    def __init__( self, action, library_item, role ):
1132        self.action = action
1133        if isinstance( library_item, LibraryDataset ):
1134            self.library_dataset = library_item
1135        else:
1136            raise "Invalid LibraryDataset specified: %s" % library_item.__class__.__name__
1137        self.role = role
1138
1139class LibraryDatasetDatasetAssociationPermissions( object ):
1140    def __init__( self, action, library_item, role ):
1141        self.action = action
1142        if isinstance( library_item, LibraryDatasetDatasetAssociation ):
1143            self.library_dataset_dataset_association = library_item
1144        else:
1145            raise "Invalid LibraryDatasetDatasetAssociation specified: %s" % library_item.__class__.__name__
1146        self.role = role
1147
1148class DefaultUserPermissions( object ):
1149    def __init__( self, user, action, role ):
1150        self.user = user
1151        self.action = action
1152        self.role = role
1153
1154class DefaultHistoryPermissions( object ):
1155    def __init__( self, history, action, role ):
1156        self.history = history
1157        self.action = action
1158        self.role = role
1159
1160class Dataset( object ):
1161    states = Bunch( NEW = 'new',
1162                    UPLOAD = 'upload',
1163                    QUEUED = 'queued',
1164                    RUNNING = 'running',
1165                    OK = 'ok',
1166                    EMPTY = 'empty',
1167                    ERROR = 'error',
1168                    DISCARDED = 'discarded',
1169                    PAUSED = 'paused',
1170                    SETTING_METADATA = 'setting_metadata',
1171                    FAILED_METADATA = 'failed_metadata' )
1172
1173    conversion_messages = Bunch( PENDING = "pending",
1174                                 NO_DATA = "no data",
1175                                 NO_CHROMOSOME = "no chromosome",
1176                                 NO_CONVERTER = "no converter",
1177                                 NO_TOOL = "no tool",
1178                                 DATA = "data",
1179                                 ERROR = "error",
1180                                 OK = "ok" )
1181
1182    permitted_actions = get_permitted_actions( filter='DATASET' )
1183    file_path = "/tmp/"
1184    object_store = None # This get initialized in mapping.py (method init) by app.py
1185    engine = None
1186    def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True, uuid=None ):
1187        self.id = id
1188        self.state = state
1189        self.deleted = False
1190        self.purged = False
1191        self.purgable = purgable
1192        self.external_filename = external_filename
1193        self._extra_files_path = extra_files_path
1194        self.file_size = file_size
1195        if uuid is None:
1196            self.uuid = uuid4()
1197        else:
1198            self.uuid = UUID(str(uuid))
1199
1200    def get_file_name( self ):
1201        if not self.external_filename:
1202            assert self.id is not None, "ID must be set before filename used (commit the object)"
1203            assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
1204            filename = self.object_store.get_filename( self )
1205            return filename
1206        else:
1207            filename = self.external_filename
1208        # Make filename absolute
1209        return os.path.abspath( filename )
1210    def set_file_name ( self, filename ):
1211        if not filename:
1212            self.external_filename = None
1213        else:
1214            self.external_filename = filename
1215    file_name = property( get_file_name, set_file_name )
1216    @property
1217    def extra_files_path( self ):
1218        return self.object_store.get_filename( self, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id )
1219    def _calculate_size( self ):
1220        if self.external_filename:
1221            try:
1222                return os.path.getsize(self.external_filename)
1223            except OSError:
1224                return 0
1225        else:
1226            return self.object_store.size(self)
1227    def get_size( self, nice_size=False ):
1228        """Returns the size of the data on disk"""
1229        if self.file_size:
1230            if nice_size:
1231                return galaxy.datatypes.data.nice_size( self.file_size )
1232            else:
1233                return self.file_size
1234        else:
1235            if nice_size:
1236                return galaxy.datatypes.data.nice_size( self._calculate_size() )
1237            else:
1238                return self._calculate_size()
1239    def set_size( self ):
1240        """Returns the size of the data on disk"""
1241        if not self.file_size:
1242            self.file_size = self._calculate_size()
1243    def get_total_size( self ):
1244        if self.total_size is not None:
1245            return self.total_size
1246        if self.file_size:
1247            # for backwards compatibility, set if unset
1248            self.set_total_size()
1249            db_session = object_session( self )
1250            db_session.flush()
1251            return self.total_size
1252        return 0
1253    def set_total_size( self ):
1254        if self.file_size is None:
1255            self.set_size()
1256        self.total_size = self.file_size or 0
1257        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
1258            for root, dirs, files in os.walk( self.extra_files_path ):
1259                self.total_size += sum( [ os.path.getsize( os.path.join( root, file ) ) for file in files if os.path.exists( os.path.join( root, file ) ) ] )
1260    def has_data( self ):
1261        """Detects whether there is any data"""
1262        return self.get_size() > 0
1263    def mark_deleted( self, include_children=True ):
1264        self.deleted = True
1265    def is_multi_byte( self ):
1266        if not self.has_data():
1267            return False
1268        try:
1269            return is_multi_byte( codecs.open( self.file_name, 'r', 'utf-8' ).read( 100 ) )
1270        except UnicodeDecodeError:
1271            return False
1272    # FIXME: sqlalchemy will replace this
1273    def _delete(self):
1274        """Remove the file that corresponds to this data"""
1275        self.object_store.delete(self)
1276    @property
1277    def user_can_purge( self ):
1278        return self.purged == False \
1279                and not bool( self.library_associations ) \
1280                and len( self.history_associations ) == len( self.purged_history_associations )
1281    def full_delete( self ):
1282        """Remove the file and extra files, marks deleted and purged"""
1283        # os.unlink( self.file_name )
1284        self.object_store.delete(self)
1285        if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
1286            self.object_store.delete(self, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True)
1287        # if os.path.exists( self.extra_files_path ):
1288        #     shutil.rmtree( self.extra_files_path )
1289        # TODO: purge metadata files
1290        self.deleted = True
1291        self.purged = True
1292    def get_access_roles( self, trans ):
1293        roles = []
1294        for dp in self.actions:
1295            if dp.action == trans.app.security_agent.permitted_actions.DATASET_ACCESS.action:
1296                roles.append( dp.role )
1297        return roles
1298    def get_manage_permissions_roles( self, trans ):
1299        roles = []
1300        for dp in self.actions:
1301            if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
1302                roles.append( dp.role )
1303        return roles
1304    def has_manage_permissions_roles( self, trans ):
1305        for dp in self.actions:
1306            if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
1307                return True
1308        return False
1309
1310class DatasetInst

Large files files are truncated, but you can click here to view the full file