PageRenderTime 845ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/galaxy/model/__init__.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 3547 lines | 3472 code | 38 blank | 37 comment | 18 complexity | ec9af105465b68c9ba85c2dae5583df1 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. """
  2. Galaxy data model classes
  3. Naming: try to use class names that have a distinct plural form so that
  4. the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
  5. """
  6. from galaxy import eggs
  7. eggs.require("pexpect")
  8. import codecs
  9. import errno
  10. import logging
  11. import operator
  12. import os
  13. import pexpect
  14. import json
  15. import socket
  16. import time
  17. from uuid import UUID, uuid4
  18. from string import Template
  19. from itertools import ifilter
  20. from itertools import chain
  21. import galaxy.datatypes
  22. import galaxy.datatypes.registry
  23. import galaxy.security.passwords
  24. from galaxy.datatypes.metadata import MetadataCollection
  25. from galaxy.model.item_attrs import Dictifiable, UsesAnnotations
  26. from galaxy.security import get_permitted_actions
  27. from galaxy.util import is_multi_byte, nice_size, Params, restore_text, send_mail
  28. from galaxy.util import ready_name_for_url
  29. from galaxy.util.bunch import Bunch
  30. from galaxy.util.hash_util import new_secure_hash
  31. from galaxy.util.directory_hash import directory_hash_id
  32. from galaxy.web.framework.helpers import to_unicode
  33. from galaxy.web.form_builder import (AddressField, CheckboxField, HistoryField,
  34. PasswordField, SelectField, TextArea, TextField, WorkflowField,
  35. WorkflowMappingField)
  36. from sqlalchemy.orm import object_session
  37. from sqlalchemy.orm import joinedload
  38. from sqlalchemy.sql.expression import func
  39. from sqlalchemy import not_
  40. log = logging.getLogger( __name__ )
  41. datatypes_registry = galaxy.datatypes.registry.Registry()
  42. # Default Value Required for unit tests
  43. datatypes_registry.load_datatypes()
  44. # When constructing filters with in for a fixed set of ids, maximum
  45. # number of items to place in the IN statement. Different databases
  46. # are going to have different limits so it is likely best to not let
  47. # this be unlimited - filter in Python if over this limit.
  48. MAX_IN_FILTER_LENGTH = 100
  49. class NoConverterException(Exception):
  50. def __init__(self, value):
  51. self.value = value
  52. def __str__(self):
  53. return repr(self.value)
  54. class ConverterDependencyException(Exception):
  55. def __init__(self, value):
  56. self.value = value
  57. def __str__(self):
  58. return repr(self.value)
  59. def set_datatypes_registry( d_registry ):
  60. """
  61. Set up datatypes_registry
  62. """
  63. global datatypes_registry
  64. datatypes_registry = d_registry
  65. class HasName:
  66. def get_display_name( self ):
  67. """
  68. These objects have a name attribute can be either a string or a unicode
  69. object. If string, convert to unicode object assuming 'utf-8' format.
  70. """
  71. name = self.name
  72. if isinstance(name, str):
  73. name = unicode(name, 'utf-8')
  74. return name
  75. class User( object, Dictifiable ):
  76. use_pbkdf2 = True
  77. """
  78. Data for a Galaxy user or admin and relations to their
  79. histories, credentials, and roles.
  80. """
  81. # attributes that will be accessed and returned when calling to_dict( view='collection' )
  82. dict_collection_visible_keys = ( 'id', 'email' )
  83. # attributes that will be accessed and returned when calling to_dict( view='element' )
  84. dict_element_visible_keys = ( 'id', 'email', 'username', 'total_disk_usage', 'nice_total_disk_usage' )
  85. def __init__( self, email=None, password=None ):
  86. self.email = email
  87. self.password = password
  88. self.external = False
  89. self.deleted = False
  90. self.purged = False
  91. self.active = False
  92. self.activation_token = None
  93. self.username = None
  94. # Relationships
  95. self.histories = []
  96. self.credentials = []
  97. #? self.roles = []
  98. def set_password_cleartext( self, cleartext ):
  99. """
  100. Set user password to the digest of `cleartext`.
  101. """
  102. if User.use_pbkdf2:
  103. self.password = galaxy.security.passwords.hash_password( cleartext )
  104. else:
  105. self.password = new_secure_hash( text_type=cleartext )
  106. def check_password( self, cleartext ):
  107. """
  108. Check if `cleartext` matches user password when hashed.
  109. """
  110. return galaxy.security.passwords.check_password( cleartext, self.password )
  111. def all_roles( self ):
  112. """
  113. Return a unique list of Roles associated with this user or any of their groups.
  114. """
  115. roles = [ ura.role for ura in self.roles ]
  116. for group in [ uga.group for uga in self.groups ]:
  117. for role in [ gra.role for gra in group.roles ]:
  118. if role not in roles:
  119. roles.append( role )
  120. return roles
  121. def get_disk_usage( self, nice_size=False ):
  122. """
  123. Return byte count of disk space used by user or a human-readable
  124. string if `nice_size` is `True`.
  125. """
  126. rval = 0
  127. if self.disk_usage is not None:
  128. rval = self.disk_usage
  129. if nice_size:
  130. rval = galaxy.datatypes.data.nice_size( rval )
  131. return rval
  132. def set_disk_usage( self, bytes ):
  133. """
  134. Manually set the disk space used by a user to `bytes`.
  135. """
  136. self.disk_usage = bytes
  137. total_disk_usage = property( get_disk_usage, set_disk_usage )
  138. @property
  139. def nice_total_disk_usage( self ):
  140. """
  141. Return byte count of disk space used in a human-readable string.
  142. """
  143. return self.get_disk_usage( nice_size=True )
  144. def calculate_disk_usage( self ):
  145. """
  146. Return byte count total of disk space used by all non-purged, non-library
  147. HDAs in non-purged histories.
  148. """
  149. # maintain a list so that we don't double count
  150. dataset_ids = []
  151. total = 0
  152. # this can be a huge number and can run out of memory, so we avoid the mappers
  153. db_session = object_session( self )
  154. for history in db_session.query( History ).enable_eagerloads( False ).filter_by( user_id=self.id, purged=False ).yield_per( 1000 ):
  155. for hda in db_session.query( HistoryDatasetAssociation ).enable_eagerloads( False ).filter_by( history_id=history.id, purged=False ).yield_per( 1000 ):
  156. #TODO: def hda.counts_toward_disk_usage():
  157. # return ( not self.dataset.purged and not self.dataset.library_associations )
  158. if not hda.dataset.id in dataset_ids and not hda.dataset.purged and not hda.dataset.library_associations:
  159. dataset_ids.append( hda.dataset.id )
  160. total += hda.dataset.get_total_size()
  161. return total
  162. @staticmethod
  163. def user_template_environment( user ):
  164. """
  165. >>> env = User.user_template_environment(None)
  166. >>> env['__user_email__']
  167. 'Anonymous'
  168. >>> env['__user_id__']
  169. 'Anonymous'
  170. >>> user = User('foo@example.com')
  171. >>> user.id = 6
  172. >>> user.username = 'foo2'
  173. >>> env = User.user_template_environment(user)
  174. >>> env['__user_id__']
  175. '6'
  176. >>> env['__user_name__']
  177. 'foo2'
  178. """
  179. if user:
  180. user_id = '%d' % user.id
  181. user_email = str( user.email )
  182. user_name = str( user.username )
  183. else:
  184. user = None
  185. user_id = 'Anonymous'
  186. user_email = 'Anonymous'
  187. user_name = 'Anonymous'
  188. environment = {}
  189. environment[ '__user__' ] = user
  190. environment[ '__user_id__' ] = environment[ 'userId' ] = user_id
  191. environment[ '__user_email__' ] = environment[ 'userEmail' ] = user_email
  192. environment[ '__user_name__' ] = user_name
  193. return environment
  194. @staticmethod
  195. def expand_user_properties( user, in_string ):
  196. """
  197. """
  198. environment = User.user_template_environment( user )
  199. return Template( in_string ).safe_substitute( environment )
  200. class Job( object, Dictifiable ):
  201. dict_collection_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time' ]
  202. dict_element_visible_keys = [ 'id', 'state', 'exit_code', 'update_time', 'create_time' ]
  203. """
  204. A job represents a request to run a tool given input datasets, tool
  205. parameters, and output datasets.
  206. """
  207. states = Bunch( NEW = 'new',
  208. UPLOAD = 'upload',
  209. WAITING = 'waiting',
  210. QUEUED = 'queued',
  211. RUNNING = 'running',
  212. OK = 'ok',
  213. ERROR = 'error',
  214. PAUSED = 'paused',
  215. DELETED = 'deleted',
  216. DELETED_NEW = 'deleted_new' )
  217. # Please include an accessor (get/set pair) for any new columns/members.
  218. def __init__( self ):
  219. self.session_id = None
  220. self.user_id = None
  221. self.tool_id = None
  222. self.tool_version = None
  223. self.command_line = None
  224. self.param_filename = None
  225. self.parameters = []
  226. self.input_datasets = []
  227. self.output_datasets = []
  228. self.input_library_datasets = []
  229. self.output_library_datasets = []
  230. self.state = Job.states.NEW
  231. self.info = None
  232. self.job_runner_name = None
  233. self.job_runner_external_id = None
  234. self.destination_id = None
  235. self.destination_params = None
  236. self.post_job_actions = []
  237. self.imported = False
  238. self.handler = None
  239. self.exit_code = None
  240. @property
  241. def finished( self ):
  242. states = self.states
  243. return self.state in [
  244. states.OK,
  245. states.ERROR,
  246. states.DELETED,
  247. states.DELETED_NEW,
  248. ]
  249. # TODO: Add accessors for members defined in SQL Alchemy for the Job table and
  250. # for the mapper defined to the Job table.
  251. def get_external_output_metadata( self ):
  252. """
  253. The external_output_metadata is currently a reference from Job to
  254. JobExternalOutputMetadata. It exists for a job but not a task.
  255. """
  256. return self.external_output_metadata
  257. def get_session_id( self ):
  258. return self.session_id
  259. def get_user_id( self ):
  260. return self.user_id
  261. def get_tool_id( self ):
  262. return self.tool_id
  263. def get_tool_version( self ):
  264. return self.tool_version
  265. def get_command_line( self ):
  266. return self.command_line
  267. def get_param_filename( self ):
  268. return self.param_filename
  269. def get_parameters( self ):
  270. return self.parameters
  271. def get_input_datasets( self ):
  272. return self.input_datasets
  273. def get_output_datasets( self ):
  274. return self.output_datasets
  275. def get_input_library_datasets( self ):
  276. return self.input_library_datasets
  277. def get_output_library_datasets( self ):
  278. return self.output_library_datasets
  279. def get_state( self ):
  280. return self.state
  281. def get_info( self ):
  282. return self.info
  283. def get_job_runner_name( self ):
  284. # This differs from the Task class in that job_runner_name is
  285. # accessed instead of task_runner_name. Note that the field
  286. # runner_name is not the same thing.
  287. return self.job_runner_name
  288. def get_job_runner_external_id( self ):
  289. # This is different from the Task just in the member accessed:
  290. return self.job_runner_external_id
  291. def get_post_job_actions( self ):
  292. return self.post_job_actions
  293. def get_imported( self ):
  294. return self.imported
  295. def get_handler( self ):
  296. return self.handler
  297. def get_params( self ):
  298. return self.params
  299. def get_user( self ):
  300. # This is defined in the SQL Alchemy mapper as a relation to the User.
  301. return self.user
  302. def get_id( self ):
  303. # This is defined in the SQL Alchemy's Job table (and not in the model).
  304. return self.id
  305. def get_tasks( self ):
  306. # The tasks member is pert of a reference in the SQL Alchemy schema:
  307. return self.tasks
  308. def get_id_tag( self ):
  309. """
  310. Return a tag that can be useful in identifying a Job.
  311. This returns the Job's get_id
  312. """
  313. return "%s" % self.id;
  314. def set_session_id( self, session_id ):
  315. self.session_id = session_id
  316. def set_user_id( self, user_id ):
  317. self.user_id = user_id
  318. def set_tool_id( self, tool_id ):
  319. self.tool_id = tool_id
  320. def set_tool_version( self, tool_version ):
  321. self.tool_version = tool_version
  322. def set_command_line( self, command_line ):
  323. self.command_line = command_line
  324. def set_param_filename( self, param_filename ):
  325. self.param_filename = param_filename
  326. def set_parameters( self, parameters ):
  327. self.parameters = parameters
  328. def set_input_datasets( self, input_datasets ):
  329. self.input_datasets = input_datasets
  330. def set_output_datasets( self, output_datasets ):
  331. self.output_datasets = output_datasets
  332. def set_input_library_datasets( self, input_library_datasets ):
  333. self.input_library_datasets = input_library_datasets
  334. def set_output_library_datasets( self, output_library_datasets ):
  335. self.output_library_datasets = output_library_datasets
  336. def set_info( self, info ):
  337. self.info = info
  338. def set_runner_name( self, job_runner_name ):
  339. self.job_runner_name = job_runner_name
  340. def set_runner_external_id( self, job_runner_external_id ):
  341. self.job_runner_external_id = job_runner_external_id
  342. def set_post_job_actions( self, post_job_actions ):
  343. self.post_job_actions = post_job_actions
  344. def set_imported( self, imported ):
  345. self.imported = imported
  346. def set_handler( self, handler ):
  347. self.handler = handler
  348. def set_params( self, params ):
  349. self.params = params
  350. def add_parameter( self, name, value ):
  351. self.parameters.append( JobParameter( name, value ) )
  352. def add_input_dataset( self, name, dataset ):
  353. self.input_datasets.append( JobToInputDatasetAssociation( name, dataset ) )
  354. def add_output_dataset( self, name, dataset ):
  355. self.output_datasets.append( JobToOutputDatasetAssociation( name, dataset ) )
  356. def add_input_library_dataset( self, name, dataset ):
  357. self.input_library_datasets.append( JobToInputLibraryDatasetAssociation( name, dataset ) )
  358. def add_output_library_dataset( self, name, dataset ):
  359. self.output_library_datasets.append( JobToOutputLibraryDatasetAssociation( name, dataset ) )
  360. def add_post_job_action(self, pja):
  361. self.post_job_actions.append( PostJobActionAssociation( pja, self ) )
  362. def set_state( self, state ):
  363. """
  364. This is the only set method that performs extra work. In this case, the
  365. state is propagated down to datasets.
  366. """
  367. self.state = state
  368. # For historical reasons state propogates down to datasets
  369. for da in self.output_datasets:
  370. da.dataset.state = state
  371. def get_param_values( self, app, ignore_errors=False ):
  372. """
  373. Read encoded parameter values from the database and turn back into a
  374. dict of tool parameter values.
  375. """
  376. param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
  377. tool = app.toolbox.get_tool( self.tool_id )
  378. param_dict = tool.params_from_strings( param_dict, app, ignore_errors=ignore_errors )
  379. return param_dict
  380. def check_if_output_datasets_deleted( self ):
  381. """
  382. Return true if all of the output datasets associated with this job are
  383. in the deleted state
  384. """
  385. for dataset_assoc in self.output_datasets:
  386. dataset = dataset_assoc.dataset
  387. # only the originator of the job can delete a dataset to cause
  388. # cancellation of the job, no need to loop through history_associations
  389. if not dataset.deleted:
  390. return False
  391. return True
  392. def mark_deleted( self, track_jobs_in_database=False ):
  393. """
  394. Mark this job as deleted, and mark any output datasets as discarded.
  395. """
  396. if track_jobs_in_database:
  397. self.state = Job.states.DELETED_NEW
  398. else:
  399. self.state = Job.states.DELETED
  400. self.info = "Job output deleted by user before job completed."
  401. for dataset_assoc in self.output_datasets:
  402. dataset = dataset_assoc.dataset
  403. dataset.deleted = True
  404. dataset.state = dataset.states.DISCARDED
  405. for dataset in dataset.dataset.history_associations:
  406. # propagate info across shared datasets
  407. dataset.deleted = True
  408. dataset.blurb = 'deleted'
  409. dataset.peek = 'Job deleted'
  410. dataset.info = 'Job output deleted by user before job completed'
  411. def to_dict( self, view='collection' ):
  412. rval = super( Job, self ).to_dict( view=view )
  413. rval['tool_id'] = self.tool_id
  414. if view == 'element':
  415. param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
  416. rval['params'] = param_dict
  417. input_dict = {}
  418. for i in self.input_datasets:
  419. if i.dataset is not None:
  420. input_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
  421. for i in self.input_library_datasets:
  422. if i.dataset is not None:
  423. input_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
  424. for k in input_dict:
  425. if k in param_dict:
  426. del param_dict[k]
  427. rval['inputs'] = input_dict
  428. output_dict = {}
  429. for i in self.output_datasets:
  430. if i.dataset is not None:
  431. output_dict[i.name] = {"id" : i.dataset.id, "src" : "hda"}
  432. for i in self.output_library_datasets:
  433. if i.dataset is not None:
  434. output_dict[i.name] = {"id" : i.dataset.id, "src" : "ldda"}
  435. rval['outputs'] = output_dict
  436. return rval
  437. class Task( object ):
  438. """
  439. A task represents a single component of a job.
  440. """
  441. states = Bunch( NEW = 'new',
  442. WAITING = 'waiting',
  443. QUEUED = 'queued',
  444. RUNNING = 'running',
  445. OK = 'ok',
  446. ERROR = 'error',
  447. DELETED = 'deleted' )
  448. # Please include an accessor (get/set pair) for any new columns/members.
  449. def __init__( self, job, working_directory, prepare_files_cmd ):
  450. self.command_line = None
  451. self.parameters = []
  452. self.state = Task.states.NEW
  453. self.info = None
  454. self.working_directory = working_directory
  455. self.task_runner_name = None
  456. self.task_runner_external_id = None
  457. self.job = job
  458. self.stdout = ""
  459. self.stderr = ""
  460. self.exit_code = None
  461. self.prepare_input_files_cmd = prepare_files_cmd
  462. def get_param_values( self, app ):
  463. """
  464. Read encoded parameter values from the database and turn back into a
  465. dict of tool parameter values.
  466. """
  467. param_dict = dict( [ ( p.name, p.value ) for p in self.parent_job.parameters ] )
  468. tool = app.toolbox.get_tool( self.tool_id )
  469. param_dict = tool.params_from_strings( param_dict, app )
  470. return param_dict
  471. def get_id( self ):
  472. # This is defined in the SQL Alchemy schema:
  473. return self.id
  474. def get_id_tag( self ):
  475. """
  476. Return an id tag suitable for identifying the task.
  477. This combines the task's job id and the task's own id.
  478. """
  479. return "%s_%s" % ( self.job.get_id(), self.get_id() )
  480. def get_command_line( self ):
  481. return self.command_line
  482. def get_parameters( self ):
  483. return self.parameters
  484. def get_state( self ):
  485. return self.state
  486. def get_info( self ):
  487. return self.info
  488. def get_working_directory( self ):
  489. return self.working_directory
  490. def get_task_runner_name( self ):
  491. return self.task_runner_name
  492. def get_task_runner_external_id( self ):
  493. return self.task_runner_external_id
  494. def get_job( self ):
  495. return self.job
  496. def get_stdout( self ):
  497. return self.stdout
  498. def get_stderr( self ):
  499. return self.stderr
  500. def get_prepare_input_files_cmd( self ):
  501. return self.prepare_input_files_cmd
  502. # The following accessors are for members that are in the Job class but
  503. # not in the Task class. So they can either refer to the parent Job
  504. # or return None, depending on whether Tasks need to point to the parent
  505. # (e.g., for a session) or never use the member (e.g., external output
  506. # metdata). These can be filled in as needed.
  507. def get_external_output_metadata( self ):
  508. """
  509. The external_output_metadata is currently a backref to
  510. JobExternalOutputMetadata. It exists for a job but not a task,
  511. and when a task is cancelled its corresponding parent Job will
  512. be cancelled. So None is returned now, but that could be changed
  513. to self.get_job().get_external_output_metadata().
  514. """
  515. return None
  516. def get_job_runner_name( self ):
  517. """
  518. Since runners currently access Tasks the same way they access Jobs,
  519. this method just refers to *this* instance's runner.
  520. """
  521. return self.task_runner_name
  522. def get_job_runner_external_id( self ):
  523. """
  524. Runners will use the same methods to get information about the Task
  525. class as they will about the Job class, so this method just returns
  526. the task's external id.
  527. """
  528. # TODO: Merge into get_runner_external_id.
  529. return self.task_runner_external_id
  530. def get_session_id( self ):
  531. # The Job's galaxy session is equal to the Job's session, so the
  532. # Job's session is the same as the Task's session.
  533. return self.get_job().get_session_id()
  534. def set_id( self, id ):
  535. # This is defined in the SQL Alchemy's mapper and not here.
  536. # This should never be called.
  537. self.id = id
  538. def set_command_line( self, command_line ):
  539. self.command_line = command_line
  540. def set_parameters( self, parameters ):
  541. self.parameters = parameters
  542. def set_state( self, state ):
  543. self.state = state
  544. def set_info( self, info ):
  545. self.info = info
  546. def set_working_directory( self, working_directory ):
  547. self.working_directory = working_directory
  548. def set_task_runner_name( self, task_runner_name ):
  549. self.task_runner_name = task_runner_name
  550. def set_job_runner_external_id( self, task_runner_external_id ):
  551. # This method is available for runners that do not want/need to
  552. # differentiate between the kinds of Runnable things (Jobs and Tasks)
  553. # that they're using.
  554. log.debug( "Task %d: Set external id to %s"
  555. % ( self.id, task_runner_external_id ) )
  556. self.task_runner_external_id = task_runner_external_id
  557. def set_task_runner_external_id( self, task_runner_external_id ):
  558. self.task_runner_external_id = task_runner_external_id
  559. def set_job( self, job ):
  560. self.job = job
  561. def set_stdout( self, stdout ):
  562. self.stdout = stdout
  563. def set_stderr( self, stderr ):
  564. self.stderr = stderr
  565. def set_prepare_input_files_cmd( self, prepare_input_files_cmd ):
  566. self.prepare_input_files_cmd = prepare_input_files_cmd
  567. class JobParameter( object ):
  568. def __init__( self, name, value ):
  569. self.name = name
  570. self.value = value
  571. class JobToInputDatasetAssociation( object ):
  572. def __init__( self, name, dataset ):
  573. self.name = name
  574. self.dataset = dataset
  575. class JobToOutputDatasetAssociation( object ):
  576. def __init__( self, name, dataset ):
  577. self.name = name
  578. self.dataset = dataset
  579. class JobToInputLibraryDatasetAssociation( object ):
  580. def __init__( self, name, dataset ):
  581. self.name = name
  582. self.dataset = dataset
  583. class JobToOutputLibraryDatasetAssociation( object ):
  584. def __init__( self, name, dataset ):
  585. self.name = name
  586. self.dataset = dataset
  587. class PostJobAction( object ):
  588. def __init__( self, action_type, workflow_step, output_name = None, action_arguments = None):
  589. self.action_type = action_type
  590. self.output_name = output_name
  591. self.action_arguments = action_arguments
  592. self.workflow_step = workflow_step
  593. class PostJobActionAssociation( object ):
  594. def __init__(self, pja, job):
  595. self.job = job
  596. self.post_job_action = pja
  597. class JobExternalOutputMetadata( object ):
  598. def __init__( self, job = None, dataset = None ):
  599. self.job = job
  600. if isinstance( dataset, galaxy.model.HistoryDatasetAssociation ):
  601. self.history_dataset_association = dataset
  602. elif isinstance( dataset, galaxy.model.LibraryDatasetDatasetAssociation ):
  603. self.library_dataset_dataset_association = dataset
  604. @property
  605. def dataset( self ):
  606. if self.history_dataset_association:
  607. return self.history_dataset_association
  608. elif self.library_dataset_dataset_association:
  609. return self.library_dataset_dataset_association
  610. return None
  611. class JobExportHistoryArchive( object ):
  612. def __init__( self, job=None, history=None, dataset=None, compressed=False, \
  613. history_attrs_filename=None, datasets_attrs_filename=None,
  614. jobs_attrs_filename=None ):
  615. self.job = job
  616. self.history = history
  617. self.dataset = dataset
  618. self.compressed = compressed
  619. self.history_attrs_filename = history_attrs_filename
  620. self.datasets_attrs_filename = datasets_attrs_filename
  621. self.jobs_attrs_filename = jobs_attrs_filename
  622. @property
  623. def up_to_date( self ):
  624. """ Return False, if a new export should be generated for corresponding
  625. history.
  626. """
  627. job = self.job
  628. return job.state not in [ Job.states.ERROR, Job.states.DELETED ] \
  629. and job.update_time > self.history.update_time
  630. @property
  631. def ready( self ):
  632. return self.job.state == Job.states.OK
  633. @property
  634. def preparing( self ):
  635. return self.job.state in [ Job.states.RUNNING, Job.states.QUEUED, Job.states.WAITING ]
  636. @property
  637. def export_name( self ):
  638. # Stream archive.
  639. hname = ready_name_for_url( self.history.name )
  640. hname = "Galaxy-History-%s.tar" % ( hname )
  641. if self.compressed:
  642. hname += ".gz"
  643. return hname
  644. class JobImportHistoryArchive( object ):
  645. def __init__( self, job=None, history=None, archive_dir=None ):
  646. self.job = job
  647. self.history = history
  648. self.archive_dir=archive_dir
  649. class GenomeIndexToolData( object ):
  650. def __init__( self, job=None, params=None, dataset=None, deferred_job=None, \
  651. transfer_job=None, fasta_path=None, created_time=None, modified_time=None, \
  652. dbkey=None, user=None, indexer=None ):
  653. self.job = job
  654. self.dataset = dataset
  655. self.fasta_path = fasta_path
  656. self.user = user
  657. self.indexer = indexer
  658. self.created_time = created_time
  659. self.modified_time = modified_time
  660. self.deferred = deferred_job
  661. self.transfer = transfer_job
  662. class DeferredJob( object ):
  663. states = Bunch( NEW = 'new',
  664. WAITING = 'waiting',
  665. QUEUED = 'queued',
  666. RUNNING = 'running',
  667. OK = 'ok',
  668. ERROR = 'error' )
  669. def __init__( self, state=None, plugin=None, params=None ):
  670. self.state = state
  671. self.plugin = plugin
  672. self.params = params
  673. def get_check_interval( self ):
  674. if not hasattr( self, '_check_interval' ):
  675. self._check_interval = None
  676. return self._check_interval
  677. def set_check_interval( self, seconds ):
  678. self._check_interval = seconds
  679. check_interval = property( get_check_interval, set_check_interval )
  680. def get_last_check( self ):
  681. if not hasattr( self, '_last_check' ):
  682. self._last_check = 0
  683. return self._last_check
  684. def set_last_check( self, seconds ):
  685. try:
  686. self._last_check = int( seconds )
  687. except:
  688. self._last_check = time.time()
  689. last_check = property( get_last_check, set_last_check )
  690. @property
  691. def is_check_time( self ):
  692. if self.check_interval is None:
  693. return True
  694. elif ( int( time.time() ) - self.last_check ) > self.check_interval:
  695. return True
  696. else:
  697. return False
  698. class Group( object, Dictifiable ):
  699. dict_collection_visible_keys = ( 'id', 'name' )
  700. dict_element_visible_keys = ( 'id', 'name' )
  701. def __init__( self, name = None ):
  702. self.name = name
  703. self.deleted = False
  704. class UserGroupAssociation( object ):
  705. def __init__( self, user, group ):
  706. self.user = user
  707. self.group = group
  708. class History( object, Dictifiable, UsesAnnotations, HasName ):
  709. dict_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' )
  710. dict_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged', 'importable', 'slug' )
  711. default_name = 'Unnamed history'
  712. def __init__( self, id=None, name=None, user=None ):
  713. self.id = id
  714. self.name = name or History.default_name
  715. self.deleted = False
  716. self.purged = False
  717. self.importing = False
  718. self.genome_build = None
  719. self.published = False
  720. # Relationships
  721. self.user = user
  722. self.datasets = []
  723. self.galaxy_sessions = []
  724. self.tags = []
  725. def _next_hid( self ):
  726. # this is overriden in mapping.py db_next_hid() method
  727. if len( self.datasets ) == 0:
  728. return 1
  729. else:
  730. last_hid = 0
  731. for dataset in self.datasets:
  732. if dataset.hid > last_hid:
  733. last_hid = dataset.hid
  734. return last_hid + 1
  735. def add_galaxy_session( self, galaxy_session, association=None ):
  736. if association is None:
  737. self.galaxy_sessions.append( GalaxySessionToHistoryAssociation( galaxy_session, self ) )
  738. else:
  739. self.galaxy_sessions.append( association )
  740. def add_dataset( self, dataset, parent_id=None, genome_build=None, set_hid=True, quota=True ):
  741. if isinstance( dataset, Dataset ):
  742. dataset = HistoryDatasetAssociation(dataset=dataset)
  743. object_session( self ).add( dataset )
  744. object_session( self ).flush()
  745. elif not isinstance( dataset, HistoryDatasetAssociation ):
  746. raise TypeError, ( "You can only add Dataset and HistoryDatasetAssociation instances to a history" +
  747. " ( you tried to add %s )." % str( dataset ) )
  748. if parent_id:
  749. for data in self.datasets:
  750. if data.id == parent_id:
  751. dataset.hid = data.hid
  752. break
  753. else:
  754. if set_hid:
  755. dataset.hid = self._next_hid()
  756. else:
  757. if set_hid:
  758. dataset.hid = self._next_hid()
  759. if quota and self.user:
  760. self.user.total_disk_usage += dataset.quota_amount( self.user )
  761. dataset.history = self
  762. if genome_build not in [None, '?']:
  763. self.genome_build = genome_build
  764. self.datasets.append( dataset )
  765. return dataset
  766. def copy( self, name=None, target_user=None, activatable=False, all_datasets=False ):
  767. """
  768. Return a copy of this history using the given `name` and `target_user`.
  769. If `activatable`, copy only non-deleted datasets. If `all_datasets`, copy
  770. non-deleted, deleted, and purged datasets.
  771. """
  772. # Create new history.
  773. if not name:
  774. name = self.name
  775. if not target_user:
  776. target_user = self.user
  777. quota = True
  778. if target_user == self.user:
  779. quota = False
  780. new_history = History( name=name, user=target_user )
  781. db_session = object_session( self )
  782. db_session.add( new_history )
  783. db_session.flush()
  784. # Copy annotation.
  785. self.copy_item_annotation( db_session, self.user, self, target_user, new_history )
  786. # Copy Tags
  787. new_history.copy_tags_from(target_user=target_user, source_history=self)
  788. # Copy HDAs.
  789. if activatable:
  790. hdas = self.activatable_datasets
  791. elif all_datasets:
  792. hdas = self.datasets
  793. else:
  794. hdas = self.active_datasets
  795. for hda in hdas:
  796. # Copy HDA.
  797. new_hda = hda.copy( copy_children=True )
  798. new_history.add_dataset( new_hda, set_hid = False, quota=quota )
  799. db_session.add( new_hda )
  800. db_session.flush()
  801. # Copy annotation.
  802. self.copy_item_annotation( db_session, self.user, hda, target_user, new_hda )
  803. new_history.hid_counter = self.hid_counter
  804. db_session.add( new_history )
  805. db_session.flush()
  806. return new_history
  807. @property
  808. def activatable_datasets( self ):
  809. # This needs to be a list
  810. return [ hda for hda in self.datasets if not hda.dataset.deleted ]
  811. def to_dict( self, view='collection', value_mapper = None ):
  812. # Get basic value.
  813. rval = super( History, self ).to_dict( view=view, value_mapper=value_mapper )
  814. # Add tags.
  815. tags_str_list = []
  816. for tag in self.tags:
  817. tag_str = tag.user_tname
  818. if tag.value is not None:
  819. tag_str += ":" + tag.user_value
  820. tags_str_list.append( tag_str )
  821. rval[ 'tags' ] = tags_str_list
  822. return rval
  823. def set_from_dict( self, new_data ):
  824. #AKA: set_api_value
  825. """
  826. Set object attributes to the values in dictionary new_data limiting
  827. to only those keys in dict_element_visible_keys.
  828. Returns a dictionary of the keys, values that have been changed.
  829. """
  830. # precondition: keys are proper, values are parsed and validated
  831. changed = {}
  832. # unknown keys are ignored here
  833. for key in [ k for k in new_data.keys() if k in self.dict_element_visible_keys ]:
  834. new_val = new_data[ key ]
  835. old_val = self.__getattribute__( key )
  836. if new_val == old_val:
  837. continue
  838. self.__setattr__( key, new_val )
  839. changed[ key ] = new_val
  840. return changed
  841. @property
  842. def latest_export( self ):
  843. exports = self.exports
  844. return exports and exports[ 0 ]
  845. @property
  846. def get_disk_size_bytes( self ):
  847. return self.get_disk_size( nice_size=False )
  848. def unhide_datasets( self ):
  849. for dataset in self.datasets:
  850. dataset.mark_unhidden()
  851. def resume_paused_jobs( self ):
  852. for dataset in self.datasets:
  853. job = dataset.creating_job
  854. if job is not None and job.state == Job.states.PAUSED:
  855. job.set_state(Job.states.NEW)
  856. def get_disk_size( self, nice_size=False ):
  857. # unique datasets only
  858. db_session = object_session( self )
  859. rval = db_session.query(
  860. func.sum( db_session.query( HistoryDatasetAssociation.dataset_id, Dataset.total_size ).join( Dataset )
  861. .filter( HistoryDatasetAssociation.table.c.history_id == self.id )
  862. .filter( HistoryDatasetAssociation.purged != True )
  863. .filter( Dataset.purged != True )
  864. .distinct().subquery().c.total_size ) ).first()[0]
  865. if rval is None:
  866. rval = 0
  867. if nice_size:
  868. rval = galaxy.datatypes.data.nice_size( rval )
  869. return rval
  870. @property
  871. def active_datasets_children_and_roles( self ):
  872. if not hasattr(self, '_active_datasets_children_and_roles'):
  873. db_session = object_session( self )
  874. query = db_session.query( HistoryDatasetAssociation ).filter( HistoryDatasetAssociation.table.c.history_id == self.id ). \
  875. filter( not_( HistoryDatasetAssociation.deleted ) ). \
  876. order_by( HistoryDatasetAssociation.table.c.hid.asc() ). \
  877. options(
  878. joinedload("children"),
  879. joinedload("dataset"),
  880. joinedload("dataset.actions"),
  881. joinedload("dataset.actions.role"),
  882. )
  883. self._active_datasets_children_and_roles = query.all()
  884. return self._active_datasets_children_and_roles
  885. def contents_iter( self, **kwds ):
  886. """
  887. Fetch filtered list of contents of history.
  888. """
  889. default_contents_types = [
  890. 'dataset',
  891. ]
  892. types = kwds.get('types', default_contents_types)
  893. iters = []
  894. if 'dataset' in types:
  895. iters.append( self.__dataset_contents_iter( **kwds ) )
  896. return galaxy.util.merge_sorted_iterables( operator.attrgetter( "hid" ), *iters )
  897. def __dataset_contents_iter(self, **kwds):
  898. return self.__filter_contents( HistoryDatasetAssociation, **kwds )
  899. def __filter_contents( self, content_class, **kwds ):
  900. db_session = object_session( self )
  901. assert db_session != None
  902. query = db_session.query( content_class ).filter( content_class.table.c.history_id == self.id )
  903. query = query.order_by( content_class.table.c.hid.asc() )
  904. python_filter = None
  905. deleted = galaxy.util.string_as_bool_or_none( kwds.get( 'deleted', None ) )
  906. if deleted is not None:
  907. query = query.filter( content_class.deleted == deleted )
  908. visible = galaxy.util.string_as_bool_or_none( kwds.get( 'visible', None ) )
  909. if visible is not None:
  910. query = query.filter( content_class.visible == visible )
  911. if 'ids' in kwds:
  912. ids = kwds['ids']
  913. max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH)
  914. if len(ids) < max_in_filter_length:
  915. query = query.filter( content_class.id.in_(ids) )
  916. else:
  917. python_filter = lambda content: content.id in ids
  918. if python_filter:
  919. return ifilter(python_filter, query)
  920. else:
  921. return query
  922. def copy_tags_from(self,target_user,source_history):
  923. for src_shta in source_history.tags:
  924. new_shta = src_shta.copy()
  925. new_shta.user = target_user
  926. self.tags.append(new_shta)
  927. class HistoryUserShareAssociation( object ):
  928. def __init__( self ):
  929. self.history = None
  930. self.user = None
  931. class UserRoleAssociation( object ):
  932. def __init__( self, user, role ):
  933. self.user = user
  934. self.role = role
  935. class GroupRoleAssociation( object ):
  936. def __init__( self, group, role ):
  937. self.group = group
  938. self.role = role
  939. class Role( object, Dictifiable ):
  940. dict_collection_visible_keys = ( 'id', 'name' )
  941. dict_element_visible_keys = ( 'id', 'name', 'description', 'type' )
  942. private_id = None
  943. types = Bunch(
  944. PRIVATE = 'private',
  945. SYSTEM = 'system',
  946. USER = 'user',
  947. ADMIN = 'admin',
  948. SHARING = 'sharing'
  949. )
  950. def __init__( self, name="", description="", type="system", deleted=False ):
  951. self.name = name
  952. self.description = description
  953. self.type = type
  954. self.deleted = deleted
  955. class UserQuotaAssociation( object, Dictifiable ):
  956. dict_element_visible_keys = ( 'user', )
  957. def __init__( self, user, quota ):
  958. self.user = user
  959. self.quota = quota
  960. class GroupQuotaAssociation( object, Dictifiable ):
  961. dict_element_visible_keys = ( 'group', )
  962. def __init__( self, group, quota ):
  963. self.group = group
  964. self.quota = quota
  965. class Quota( object, Dictifiable ):
  966. dict_collection_visible_keys = ( 'id', 'name' )
  967. dict_element_visible_keys = ( 'id', 'name', 'description', 'bytes', 'operation', 'display_amount', 'default', 'users', 'groups' )
  968. valid_operations = ( '+', '-', '=' )
  969. def __init__( self, name="", description="", amount=0, operation="=" ):
  970. self.name = name
  971. self.description = description
  972. if amount is None:
  973. self.bytes = -1
  974. else:
  975. self.bytes = amount
  976. self.operation = operation
  977. def get_amount( self ):
  978. if self.bytes == -1:
  979. return None
  980. return self.bytes
  981. def set_amount( self, amount ):
  982. if amount is None:
  983. self.bytes = -1
  984. else:
  985. self.bytes = amount
  986. amount = property( get_amount, set_amount )
  987. @property
  988. def display_amount( self ):
  989. if self.bytes == -1:
  990. return "unlimited"
  991. else:
  992. return nice_size( self.bytes )
  993. class DefaultQuotaAssociation( Quota, Dictifiable ):
  994. dict_element_visible_keys = ( 'type', )
  995. types = Bunch(
  996. UNREGISTERED = 'unregistered',
  997. REGISTERED = 'registered'
  998. )
  999. def __init__( self, type, quota ):
  1000. assert type in self.types.__dict__.values(), 'Invalid type'
  1001. self.type = type
  1002. self.quota = quota
  1003. class DatasetPermissions( object ):
  1004. def __init__( self, action, dataset, role ):
  1005. self.action = action
  1006. self.dataset = dataset
  1007. self.role = role
  1008. class LibraryPermissions( object ):
  1009. def __init__( self, action, library_item, role ):
  1010. self.action = action
  1011. if isinstance( library_item, Library ):
  1012. self.library = library_item
  1013. else:
  1014. raise "Invalid Library specified: %s" % library_item.__class__.__name__
  1015. self.role = role
  1016. class LibraryFolderPermissions( object ):
  1017. def __init__( self, action, library_item, role ):
  1018. self.action = action
  1019. if isinstance( library_item, LibraryFolder ):
  1020. self.folder = library_item
  1021. else:
  1022. raise "Invalid LibraryFolder specified: %s" % library_item.__class__.__name__
  1023. self.role = role
  1024. class LibraryDatasetPermissions( object ):
  1025. def __init__( self, action, library_item, role ):
  1026. self.action = action
  1027. if isinstance( library_item, LibraryDataset ):
  1028. self.library_dataset = library_item
  1029. else:
  1030. raise "Invalid LibraryDataset specified: %s" % library_item.__class__.__name__
  1031. self.role = role
  1032. class LibraryDatasetDatasetAssociationPermissions( object ):
  1033. def __init__( self, action, library_item, role ):
  1034. self.action = action
  1035. if isinstance( library_item, LibraryDatasetDatasetAssociation ):
  1036. self.library_dataset_dataset_association = library_item
  1037. else:
  1038. raise "Invalid LibraryDatasetDatasetAssociation specified: %s" % library_item.__class__.__name__
  1039. self.role = role
  1040. class DefaultUserPermissions( object ):
  1041. def __init__( self, user, action, role ):
  1042. self.user = user
  1043. self.action = action
  1044. self.role = role
  1045. class DefaultHistoryPermissions( object ):
  1046. def __init__( self, history, action, role ):
  1047. self.history = history
  1048. self.action = action
  1049. self.role = role
  1050. class Dataset( object ):
  1051. states = Bunch( NEW = 'new',
  1052. UPLOAD = 'upload',
  1053. QUEUED = 'queued',
  1054. RUNNING = 'running',
  1055. OK = 'ok',
  1056. EMPTY = 'empty',
  1057. ERROR = 'error',
  1058. DISCARDED = 'discarded',
  1059. PAUSED = 'paused',
  1060. SETTING_METADATA = 'setting_metadata',
  1061. FAILED_METADATA = 'failed_metadata' )
  1062. conversion_messages = Bunch( PENDING = "pending",
  1063. NO_DATA = "no data",
  1064. NO_CHROMOSOME = "no chromosome",
  1065. NO_CONVERTER = "no converter",
  1066. NO_TOOL = "no tool",
  1067. DATA = "data",
  1068. ERROR = "error",
  1069. OK = "ok" )
  1070. permitted_actions = get_permitted_actions( filter='DATASET' )
  1071. file_path = "/tmp/"
  1072. object_store = None # This get initialized in mapping.py (method init) by app.py
  1073. engine = None
  1074. def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True, uuid=None ):
  1075. self.id = id
  1076. self.state = state
  1077. self.deleted = False
  1078. self.purged = False
  1079. self.purgable = purgable
  1080. self.external_filename = external_filename
  1081. self._extra_files_path = extra_files_path
  1082. self.file_size = file_size
  1083. if uuid is None:
  1084. self.uuid = uuid4()
  1085. else:
  1086. self.uuid = UUID(str(uuid))
  1087. def get_file_name( self ):
  1088. if not self.external_filename:
  1089. assert self.id is not None, "ID must be set before filename used (commit the object)"
  1090. assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
  1091. filename = self.object_store.get_filename( self )
  1092. return filename
  1093. else:
  1094. filename = self.external_filename
  1095. # Make filename absolute
  1096. return os.path.abspath( filename )
  1097. def set_file_name ( self, filename ):
  1098. if not filename:
  1099. self.external_filename = None
  1100. else:
  1101. self.external_filename = filename
  1102. file_name = property( get_file_name, set_file_name )
  1103. @property
  1104. def extra_files_path( self ):
  1105. return self.object_store.get_filename( self, dir_only=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id )
  1106. def _calculate_size( self ):
  1107. if self.external_filename:
  1108. try:
  1109. return os.path.getsize(self.external_filename)
  1110. except OSError:
  1111. return 0
  1112. else:
  1113. return self.object_store.size(self)
  1114. def get_size( self, nice_size=False ):
  1115. """Returns the size of the data on disk"""
  1116. if self.file_size:
  1117. if nice_size:
  1118. return galaxy.datatypes.data.nice_size( self.file_size )
  1119. else:
  1120. return self.file_size
  1121. else:
  1122. if nice_size:
  1123. return galaxy.datatypes.data.nice_size( self._calculate_size() )
  1124. else:
  1125. return self._calculate_size()
  1126. def set_size( self ):
  1127. """Returns the size of the data on disk"""
  1128. if not self.file_size:
  1129. self.file_size = self._calculate_size()
  1130. def get_total_size( self ):
  1131. if self.total_size is not None:
  1132. return self.total_size
  1133. if self.file_size:
  1134. # for backwards compatibility, set if unset
  1135. self.set_total_size()
  1136. db_session = object_session( self )
  1137. db_session.flush()
  1138. return self.total_size
  1139. return 0
  1140. def set_total_size( self ):
  1141. if self.file_size is None:
  1142. self.set_size()
  1143. self.total_size = self.file_size or 0
  1144. if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
  1145. for root, dirs, files in os.walk( self.extra_files_path ):
  1146. self.total_size += sum( [ os.path.getsize( os.path.join( root, file ) ) for file in files if os.path.exists( os.path.join( root, file ) ) ] )
  1147. def has_data( self ):
  1148. """Detects whether there is any data"""
  1149. return self.get_size() > 0
  1150. def mark_deleted( self, include_children=True ):
  1151. self.deleted = True
  1152. def is_multi_byte( self ):
  1153. if not self.has_data():
  1154. return False
  1155. try:
  1156. return is_multi_byte( codecs.open( self.file_name, 'r', 'utf-8' ).read( 100 ) )
  1157. except UnicodeDecodeError:
  1158. return False
  1159. # FIXME: sqlalchemy will replace this
  1160. def _delete(self):
  1161. """Remove the file that corresponds to this data"""
  1162. self.object_store.delete(self)
  1163. @property
  1164. def user_can_purge( self ):
  1165. return self.purged == False \
  1166. and not bool( self.library_associations ) \
  1167. and len( self.history_associations ) == len( self.purged_history_associations )
  1168. def full_delete( self ):
  1169. """Remove the file and extra files, marks deleted and purged"""
  1170. # os.unlink( self.file_name )
  1171. self.object_store.delete(self)
  1172. if self.object_store.exists(self, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True):
  1173. self.object_store.delete(self, entire_dir=True, extra_dir=self._extra_files_path or "dataset_%d_files" % self.id, dir_only=True)
  1174. # if os.path.exists( self.extra_files_path ):
  1175. # shutil.rmtree( self.extra_files_path )
  1176. # TODO: purge metadata files
  1177. self.deleted = True
  1178. self.purged = True
  1179. def get_access_roles( self, trans ):
  1180. roles = []
  1181. for dp in self.actions:
  1182. if dp.action == trans.app.security_agent.permitted_actions.DATASET_ACCESS.action:
  1183. roles.append( dp.role )
  1184. return roles
  1185. def get_manage_permissions_roles( self, trans ):
  1186. roles = []
  1187. for dp in self.actions:
  1188. if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
  1189. roles.append( dp.role )
  1190. return roles
  1191. def has_manage_permissions_roles( self, trans ):
  1192. for dp in self.actions:
  1193. if dp.action == trans.app.security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action:
  1194. return True
  1195. return False
  1196. class DatasetInst

Large files files are truncated, but you can click here to view the full file