PageRenderTime 56ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/jobs/__init__.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 1663 lines | 1452 code | 55 blank | 156 comment | 102 complexity | 67fd55bd5e5369ca562536fa8927b8c2 MD5 | raw file
  1. """
  2. Support for running a tool in Galaxy via an internal job management system
  3. """
  4. from abc import ABCMeta
  5. from abc import abstractmethod
  6. import time
  7. import copy
  8. import datetime
  9. import galaxy
  10. import logging
  11. import os
  12. import pwd
  13. import random
  14. import re
  15. import shutil
  16. import subprocess
  17. import sys
  18. import traceback
  19. from galaxy import model, util
  20. from galaxy.datatypes import metadata
  21. from galaxy.exceptions import ObjectInvalid, ObjectNotFound
  22. from galaxy.jobs.actions.post import ActionBox
  23. from galaxy.jobs.mapper import JobRunnerMapper
  24. from galaxy.jobs.runners import BaseJobRunner
  25. from galaxy.util.bunch import Bunch
  26. from galaxy.util.expressions import ExpressionContext
  27. from galaxy.util.json import from_json_string
  28. from galaxy.util import unicodify
  29. from .output_checker import check_output
  30. from .datasets import TaskPathRewriter
  31. from .datasets import OutputsToWorkingDirectoryPathRewriter
  32. from .datasets import NullDatasetPathRewriter
  33. from .datasets import DatasetPath
  34. log = logging.getLogger( __name__ )
  35. DATABASE_MAX_STRING_SIZE = util.DATABASE_MAX_STRING_SIZE
  36. DATABASE_MAX_STRING_SIZE_PRETTY = util.DATABASE_MAX_STRING_SIZE_PRETTY
  37. # This file, if created in the job's working directory, will be used for
  38. # setting advanced metadata properties on the job and its associated outputs.
  39. # This interface is currently experimental, is only used by the upload tool,
  40. # and should eventually become API'd
  41. TOOL_PROVIDED_JOB_METADATA_FILE = 'galaxy.json'
  42. class JobDestination( Bunch ):
  43. """
  44. Provides details about where a job runs
  45. """
  46. def __init__(self, **kwds):
  47. self['id'] = None
  48. self['url'] = None
  49. self['tags'] = None
  50. self['runner'] = None
  51. self['legacy'] = False
  52. self['converted'] = False
  53. # dict is appropriate (rather than a bunch) since keys may not be valid as attributes
  54. self['params'] = dict()
  55. super(JobDestination, self).__init__(**kwds)
  56. # Store tags as a list
  57. if self.tags is not None:
  58. self['tags'] = [ x.strip() for x in self.tags.split(',') ]
  59. class JobToolConfiguration( Bunch ):
  60. """
  61. Provides details on what handler and destination a tool should use
  62. A JobToolConfiguration will have the required attribute 'id' and optional
  63. attributes 'handler', 'destination', and 'params'
  64. """
  65. def __init__(self, **kwds):
  66. self['handler'] = None
  67. self['destination'] = None
  68. self['params'] = dict()
  69. super(JobToolConfiguration, self).__init__(**kwds)
  70. class JobConfiguration( object ):
  71. """A parser and interface to advanced job management features.
  72. These features are configured in the job configuration, by default, ``job_conf.xml``
  73. """
  74. DEFAULT_NWORKERS = 4
  75. def __init__(self, app):
  76. """Parse the job configuration XML.
  77. """
  78. self.app = app
  79. self.runner_plugins = []
  80. self.handlers = {}
  81. self.handler_runner_plugins = {}
  82. self.default_handler_id = None
  83. self.destinations = {}
  84. self.destination_tags = {}
  85. self.default_destination_id = None
  86. self.tools = {}
  87. self.limits = Bunch()
  88. # Initialize the config
  89. try:
  90. tree = util.parse_xml(self.app.config.job_config_file)
  91. self.__parse_job_conf_xml(tree)
  92. except IOError:
  93. log.warning( 'Job configuration "%s" does not exist, using legacy job configuration from Galaxy config file "%s" instead' % ( self.app.config.job_config_file, self.app.config.config_file ) )
  94. self.__parse_job_conf_legacy()
  95. def __parse_job_conf_xml(self, tree):
  96. """Loads the new-style job configuration from options in the job config file (by default, job_conf.xml).
  97. :param tree: Object representing the root ``<job_conf>`` object in the job config file.
  98. :type tree: ``xml.etree.ElementTree.Element``
  99. """
  100. root = tree.getroot()
  101. log.debug('Loading job configuration from %s' % self.app.config.job_config_file)
  102. # Parse job plugins
  103. plugins = root.find('plugins')
  104. if plugins is not None:
  105. for plugin in self.__findall_with_required(plugins, 'plugin', ('id', 'type', 'load')):
  106. if plugin.get('type') == 'runner':
  107. workers = plugin.get('workers', plugins.get('workers', JobConfiguration.DEFAULT_NWORKERS))
  108. runner_kwds = self.__get_params(plugin)
  109. runner_info = dict(id=plugin.get('id'),
  110. load=plugin.get('load'),
  111. workers=int(workers),
  112. kwds=runner_kwds)
  113. self.runner_plugins.append(runner_info)
  114. else:
  115. log.error('Unknown plugin type: %s' % plugin.get('type'))
  116. # Load tasks if configured
  117. if self.app.config.use_tasked_jobs:
  118. self.runner_plugins.append(dict(id='tasks', load='tasks', workers=self.app.config.local_task_queue_workers))
  119. # Parse handlers
  120. handlers = root.find('handlers')
  121. if handlers is not None:
  122. for handler in self.__findall_with_required(handlers, 'handler'):
  123. id = handler.get('id')
  124. if id in self.handlers:
  125. log.error("Handler '%s' overlaps handler with the same name, ignoring" % id)
  126. else:
  127. log.debug("Read definition for handler '%s'" % id)
  128. self.handlers[id] = (id,)
  129. for plugin in handler.findall('plugin'):
  130. if id not in self.handler_runner_plugins:
  131. self.handler_runner_plugins[id] = []
  132. self.handler_runner_plugins[id].append( plugin.get('id') )
  133. if handler.get('tags', None) is not None:
  134. for tag in [ x.strip() for x in handler.get('tags').split(',') ]:
  135. if tag in self.handlers:
  136. self.handlers[tag].append(id)
  137. else:
  138. self.handlers[tag] = [id]
  139. # Determine the default handler(s)
  140. self.default_handler_id = self.__get_default(handlers, self.handlers.keys())
  141. # Parse destinations
  142. destinations = root.find('destinations')
  143. for destination in self.__findall_with_required(destinations, 'destination', ('id', 'runner')):
  144. id = destination.get('id')
  145. job_destination = JobDestination(**dict(destination.items()))
  146. job_destination['params'] = self.__get_params(destination)
  147. self.destinations[id] = (job_destination,)
  148. if job_destination.tags is not None:
  149. for tag in job_destination.tags:
  150. if tag not in self.destinations:
  151. self.destinations[tag] = []
  152. self.destinations[tag].append(job_destination)
  153. # Determine the default destination
  154. self.default_destination_id = self.__get_default(destinations, self.destinations.keys())
  155. # Parse tool mappings
  156. tools = root.find('tools')
  157. if tools is not None:
  158. for tool in self.__findall_with_required(tools, 'tool'):
  159. # There can be multiple definitions with identical ids, but different params
  160. id = tool.get('id').lower().rstrip('/')
  161. if id not in self.tools:
  162. self.tools[id] = list()
  163. self.tools[id].append(JobToolConfiguration(**dict(tool.items())))
  164. self.tools[id][-1]['params'] = self.__get_params(tool)
  165. types = dict(registered_user_concurrent_jobs=int,
  166. anonymous_user_concurrent_jobs=int,
  167. walltime=str,
  168. output_size=int)
  169. self.limits = Bunch(registered_user_concurrent_jobs=None,
  170. anonymous_user_concurrent_jobs=None,
  171. walltime=None,
  172. walltime_delta=None,
  173. output_size=None,
  174. concurrent_jobs={})
  175. # Parse job limits
  176. limits = root.find('limits')
  177. if limits is not None:
  178. for limit in self.__findall_with_required(limits, 'limit', ('type',)):
  179. type = limit.get('type')
  180. if type == 'concurrent_jobs':
  181. id = limit.get('tag', None) or limit.get('id')
  182. self.limits.concurrent_jobs[id] = int(limit.text)
  183. elif limit.text:
  184. self.limits.__dict__[type] = types.get(type, str)(limit.text)
  185. if self.limits.walltime is not None:
  186. h, m, s = [ int( v ) for v in self.limits.walltime.split( ':' ) ]
  187. self.limits.walltime_delta = datetime.timedelta( 0, s, 0, 0, m, h )
  188. log.debug('Done loading job configuration')
  189. def __parse_job_conf_legacy(self):
  190. """Loads the old-style job configuration from options in the galaxy config file (by default, universe_wsgi.ini).
  191. """
  192. log.debug('Loading job configuration from %s' % self.app.config.config_file)
  193. # Always load local and lwr
  194. self.runner_plugins = [dict(id='local', load='local', workers=self.app.config.local_job_queue_workers), dict(id='lwr', load='lwr', workers=self.app.config.cluster_job_queue_workers)]
  195. # Load tasks if configured
  196. if self.app.config.use_tasked_jobs:
  197. self.runner_plugins.append(dict(id='tasks', load='tasks', workers=self.app.config.local_task_queue_workers))
  198. for runner in self.app.config.start_job_runners:
  199. self.runner_plugins.append(dict(id=runner, load=runner, workers=self.app.config.cluster_job_queue_workers))
  200. # Set the handlers
  201. for id in self.app.config.job_handlers:
  202. self.handlers[id] = (id,)
  203. self.handlers['default_job_handlers'] = self.app.config.default_job_handlers
  204. self.default_handler_id = 'default_job_handlers'
  205. # Set tool handler configs
  206. for id, tool_handlers in self.app.config.tool_handlers.items():
  207. self.tools[id] = list()
  208. for handler_config in tool_handlers:
  209. # rename the 'name' key to 'handler'
  210. handler_config['handler'] = handler_config.pop('name')
  211. self.tools[id].append(JobToolConfiguration(**handler_config))
  212. # Set tool runner configs
  213. for id, tool_runners in self.app.config.tool_runners.items():
  214. # Might have been created in the handler parsing above
  215. if id not in self.tools:
  216. self.tools[id] = list()
  217. for runner_config in tool_runners:
  218. url = runner_config['url']
  219. if url not in self.destinations:
  220. # Create a new "legacy" JobDestination - it will have its URL converted to a destination params once the appropriate plugin has loaded
  221. self.destinations[url] = (JobDestination(id=url, runner=url.split(':', 1)[0], url=url, legacy=True, converted=False),)
  222. for tool_conf in self.tools[id]:
  223. if tool_conf.params == runner_config.get('params', {}):
  224. tool_conf['destination'] = url
  225. break
  226. else:
  227. # There was not an existing config (from the handlers section) with the same params
  228. # rename the 'url' key to 'destination'
  229. runner_config['destination'] = runner_config.pop('url')
  230. self.tools[id].append(JobToolConfiguration(**runner_config))
  231. self.destinations[self.app.config.default_cluster_job_runner] = (JobDestination(id=self.app.config.default_cluster_job_runner, runner=self.app.config.default_cluster_job_runner.split(':', 1)[0], url=self.app.config.default_cluster_job_runner, legacy=True, converted=False),)
  232. self.default_destination_id = self.app.config.default_cluster_job_runner
  233. # Set the job limits
  234. self.limits = Bunch(registered_user_concurrent_jobs=self.app.config.registered_user_job_limit,
  235. anonymous_user_concurrent_jobs=self.app.config.anonymous_user_job_limit,
  236. walltime=self.app.config.job_walltime,
  237. walltime_delta=self.app.config.job_walltime_delta,
  238. output_size=self.app.config.output_size_limit,
  239. concurrent_jobs={})
  240. log.debug('Done loading job configuration')
  241. def __get_default(self, parent, names):
  242. """Returns the default attribute set in a parent tag like <handlers> or <destinations>, or return the ID of the child, if there is no explicit default and only one child.
  243. :param parent: Object representing a tag that may or may not have a 'default' attribute.
  244. :type parent: ``xml.etree.ElementTree.Element``
  245. :param names: The list of destination or handler IDs or tags that were loaded.
  246. :type names: list of str
  247. :returns: str -- id or tag representing the default.
  248. """
  249. rval = parent.get('default')
  250. if rval is not None:
  251. # If the parent element has a 'default' attribute, use the id or tag in that attribute
  252. if rval not in names:
  253. raise Exception("<%s> default attribute '%s' does not match a defined id or tag in a child element" % (parent.tag, rval))
  254. log.debug("<%s> default set to child with id or tag '%s'" % (parent.tag, rval))
  255. elif len(names) == 1:
  256. log.info("Setting <%s> default to child with id '%s'" % (parent.tag, names[0]))
  257. rval = names[0]
  258. else:
  259. raise Exception("No <%s> default specified, please specify a valid id or tag with the 'default' attribute" % parent.tag)
  260. return rval
  261. def __findall_with_required(self, parent, match, attribs=None):
  262. """Like ``xml.etree.ElementTree.Element.findall()``, except only returns children that have the specified attribs.
  263. :param parent: Parent element in which to find.
  264. :type parent: ``xml.etree.ElementTree.Element``
  265. :param match: Name of child elements to find.
  266. :type match: str
  267. :param attribs: List of required attributes in children elements.
  268. :type attribs: list of str
  269. :returns: list of ``xml.etree.ElementTree.Element``
  270. """
  271. rval = []
  272. if attribs is None:
  273. attribs = ('id',)
  274. for elem in parent.findall(match):
  275. for attrib in attribs:
  276. if attrib not in elem.attrib:
  277. log.warning("required '%s' attribute is missing from <%s> element" % (attrib, match))
  278. break
  279. else:
  280. rval.append(elem)
  281. return rval
  282. def __get_params(self, parent):
  283. """Parses any child <param> tags in to a dictionary suitable for persistence.
  284. :param parent: Parent element in which to find child <param> tags.
  285. :type parent: ``xml.etree.ElementTree.Element``
  286. :returns: dict
  287. """
  288. rval = {}
  289. for param in parent.findall('param'):
  290. rval[param.get('id')] = param.text
  291. return rval
  292. @property
  293. def default_job_tool_configuration(self):
  294. """The default JobToolConfiguration, used if a tool does not have an explicit defintion in the configuration. It consists of a reference to the default handler and default destination.
  295. :returns: JobToolConfiguration -- a representation of a <tool> element that uses the default handler and destination
  296. """
  297. return JobToolConfiguration(id='default', handler=self.default_handler_id, destination=self.default_destination_id)
  298. # Called upon instantiation of a Tool object
  299. def get_job_tool_configurations(self, ids):
  300. """Get all configured JobToolConfigurations for a tool ID, or, if given a list of IDs, the JobToolConfigurations for the first id in ``ids`` matching a tool definition.
  301. .. note::
  302. You should not mix tool shed tool IDs, versionless tool shed IDs, and tool config tool IDs that refer to the same tool.
  303. :param ids: Tool ID or IDs to fetch the JobToolConfiguration of.
  304. :type ids: list or str.
  305. :returns: list -- JobToolConfiguration Bunches representing <tool> elements matching the specified ID(s).
  306. Example tool ID strings include:
  307. * Full tool shed id: ``toolshed.example.org/repos/nate/filter_tool_repo/filter_tool/1.0.0``
  308. * Tool shed id less version: ``toolshed.example.org/repos/nate/filter_tool_repo/filter_tool``
  309. * Tool config tool id: ``filter_tool``
  310. """
  311. rval = []
  312. # listify if ids is a single (string) id
  313. ids = util.listify(ids)
  314. for id in ids:
  315. if id in self.tools:
  316. # If a tool has definitions that include job params but not a
  317. # definition for jobs without params, include the default
  318. # config
  319. for job_tool_configuration in self.tools[id]:
  320. if not job_tool_configuration.params:
  321. break
  322. else:
  323. rval.append(self.default_job_tool_configuration)
  324. rval.extend(self.tools[id])
  325. break
  326. else:
  327. rval.append(self.default_job_tool_configuration)
  328. return rval
  329. def __get_single_item(self, collection):
  330. """Given a collection of handlers or destinations, return one item from the collection at random.
  331. """
  332. # Done like this to avoid random under the assumption it's faster to avoid it
  333. if len(collection) == 1:
  334. return collection[0]
  335. else:
  336. return random.choice(collection)
  337. # This is called by Tool.get_job_handler()
  338. def get_handler(self, id_or_tag):
  339. """Given a handler ID or tag, return the provided ID or an ID matching the provided tag
  340. :param id_or_tag: A handler ID or tag.
  341. :type id_or_tag: str
  342. :returns: str -- A valid job handler ID.
  343. """
  344. if id_or_tag is None:
  345. id_or_tag = self.default_handler_id
  346. return self.__get_single_item(self.handlers[id_or_tag])
  347. def get_destination(self, id_or_tag):
  348. """Given a destination ID or tag, return the JobDestination matching the provided ID or tag
  349. :param id_or_tag: A destination ID or tag.
  350. :type id_or_tag: str
  351. :returns: JobDestination -- A valid destination
  352. Destinations are deepcopied as they are expected to be passed in to job
  353. runners, which will modify them for persisting params set at runtime.
  354. """
  355. if id_or_tag is None:
  356. id_or_tag = self.default_destination_id
  357. return copy.deepcopy(self.__get_single_item(self.destinations[id_or_tag]))
  358. def get_destinations(self, id_or_tag):
  359. """Given a destination ID or tag, return all JobDestinations matching the provided ID or tag
  360. :param id_or_tag: A destination ID or tag.
  361. :type id_or_tag: str
  362. :returns: list or tuple of JobDestinations
  363. Destinations are not deepcopied, so they should not be passed to
  364. anything which might modify them.
  365. """
  366. return self.destinations.get(id_or_tag, None)
  367. def get_job_runner_plugins(self, handler_id):
  368. """Load all configured job runner plugins
  369. :returns: list of job runner plugins
  370. """
  371. rval = {}
  372. if handler_id in self.handler_runner_plugins:
  373. plugins_to_load = [ rp for rp in self.runner_plugins if rp['id'] in self.handler_runner_plugins[handler_id] ]
  374. log.info( "Handler '%s' will load specified runner plugins: %s", handler_id, ', '.join( [ rp['id'] for rp in plugins_to_load ] ) )
  375. else:
  376. plugins_to_load = self.runner_plugins
  377. log.info( "Handler '%s' will load all configured runner plugins", handler_id )
  378. for runner in plugins_to_load:
  379. class_names = []
  380. module = None
  381. id = runner['id']
  382. load = runner['load']
  383. if ':' in load:
  384. # Name to load was specified as '<module>:<class>'
  385. module_name, class_name = load.rsplit(':', 1)
  386. class_names = [ class_name ]
  387. module = __import__( module_name )
  388. else:
  389. # Name to load was specified as '<module>'
  390. if '.' not in load:
  391. # For legacy reasons, try from galaxy.jobs.runners first if there's no '.' in the name
  392. module_name = 'galaxy.jobs.runners.' + load
  393. try:
  394. module = __import__( module_name )
  395. except ImportError:
  396. # No such module, we'll retry without prepending galaxy.jobs.runners.
  397. # All other exceptions (e.g. something wrong with the module code) will raise
  398. pass
  399. if module is None:
  400. # If the name included a '.' or loading from the static runners path failed, try the original name
  401. module = __import__( load )
  402. module_name = load
  403. if module is None:
  404. # Module couldn't be loaded, error should have already been displayed
  405. continue
  406. for comp in module_name.split( "." )[1:]:
  407. module = getattr( module, comp )
  408. if not class_names:
  409. # If there's not a ':', we check <module>.__all__ for class names
  410. try:
  411. assert module.__all__
  412. class_names = module.__all__
  413. except AssertionError:
  414. log.error( 'Runner "%s" does not contain a list of exported classes in __all__' % load )
  415. continue
  416. for class_name in class_names:
  417. runner_class = getattr( module, class_name )
  418. try:
  419. assert issubclass(runner_class, BaseJobRunner)
  420. except TypeError:
  421. log.warning("A non-class name was found in __all__, ignoring: %s" % id)
  422. continue
  423. except AssertionError:
  424. log.warning("Job runner classes must be subclassed from BaseJobRunner, %s has bases: %s" % (id, runner_class.__bases__))
  425. continue
  426. try:
  427. rval[id] = runner_class( self.app, runner[ 'workers' ], **runner.get( 'kwds', {} ) )
  428. except TypeError:
  429. log.exception( "Job runner '%s:%s' has not been converted to a new-style runner or encountered TypeError on load" % ( module_name, class_name ) )
  430. rval[id] = runner_class( self.app )
  431. log.debug( "Loaded job runner '%s:%s' as '%s'" % ( module_name, class_name, id ) )
  432. return rval
  433. def is_id(self, collection):
  434. """Given a collection of handlers or destinations, indicate whether the collection represents a tag or a real ID
  435. :param collection: A representation of a destination or handler
  436. :type collection: tuple or list
  437. :returns: bool
  438. """
  439. return type(collection) == tuple
  440. def is_tag(self, collection):
  441. """Given a collection of handlers or destinations, indicate whether the collection represents a tag or a real ID
  442. :param collection: A representation of a destination or handler
  443. :type collection: tuple or list
  444. :returns: bool
  445. """
  446. return type(collection) == list
  447. def is_handler(self, server_name):
  448. """Given a server name, indicate whether the server is a job handler
  449. :param server_name: The name to check
  450. :type server_name: str
  451. :return: bool
  452. """
  453. for collection in self.handlers.values():
  454. if server_name in collection:
  455. return True
  456. return False
  457. def convert_legacy_destinations(self, job_runners):
  458. """Converts legacy (from a URL) destinations to contain the appropriate runner params defined in the URL.
  459. :param job_runners: All loaded job runner plugins.
  460. :type job_runners: list of job runner plugins
  461. """
  462. for id, destination in [ ( id, destinations[0] ) for id, destinations in self.destinations.items() if self.is_id(destinations) ]:
  463. # Only need to deal with real destinations, not members of tags
  464. if destination.legacy and not destination.converted:
  465. if destination.runner in job_runners:
  466. destination.params = job_runners[destination.runner].url_to_destination(destination.url).params
  467. destination.converted = True
  468. if destination.params:
  469. log.debug("Legacy destination with id '%s', url '%s' converted, got params:" % (id, destination.url))
  470. for k, v in destination.params.items():
  471. log.debug(" %s: %s" % (k, v))
  472. else:
  473. log.debug("Legacy destination with id '%s', url '%s' converted, got params:" % (id, destination.url))
  474. else:
  475. log.warning("Legacy destination with id '%s' could not be converted: Unknown runner plugin: %s" % (id, destination.runner))
  476. class JobWrapper( object ):
  477. """
  478. Wraps a 'model.Job' with convenience methods for running processes and
  479. state management.
  480. """
  481. def __init__( self, job, queue ):
  482. self.job_id = job.id
  483. self.session_id = job.session_id
  484. self.user_id = job.user_id
  485. self.tool = queue.app.toolbox.tools_by_id.get( job.tool_id, None )
  486. self.queue = queue
  487. self.app = queue.app
  488. self.sa_session = self.app.model.context
  489. self.extra_filenames = []
  490. self.command_line = None
  491. # Tool versioning variables
  492. self.write_version_cmd = None
  493. self.version_string = ""
  494. self.galaxy_lib_dir = None
  495. # With job outputs in the working directory, we need the working
  496. # directory to be set before prepare is run, or else premature deletion
  497. # and job recovery fail.
  498. # Create the working dir if necessary
  499. try:
  500. self.app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
  501. self.working_directory = self.app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
  502. log.debug('(%s) Working directory for job is: %s' % (self.job_id, self.working_directory))
  503. except ObjectInvalid:
  504. raise Exception('Unable to create job working directory, job failure')
  505. self.dataset_path_rewriter = self._job_dataset_path_rewriter( self.working_directory )
  506. self.output_paths = None
  507. self.output_hdas_and_paths = None
  508. self.tool_provided_job_metadata = None
  509. # Wrapper holding the info required to restore and clean up from files used for setting metadata externally
  510. self.external_output_metadata = metadata.JobExternalOutputMetadataWrapper( job )
  511. self.job_runner_mapper = JobRunnerMapper( self, queue.dispatcher.url_to_destination, self.app.job_config )
  512. self.params = None
  513. if job.params:
  514. self.params = from_json_string( job.params )
  515. self.__user_system_pwent = None
  516. self.__galaxy_system_pwent = None
  517. def _job_dataset_path_rewriter( self, working_directory ):
  518. if self.app.config.outputs_to_working_directory:
  519. dataset_path_rewriter = OutputsToWorkingDirectoryPathRewriter( working_directory )
  520. else:
  521. dataset_path_rewriter = NullDatasetPathRewriter( )
  522. return dataset_path_rewriter
  523. def can_split( self ):
  524. # Should the job handler split this job up?
  525. return self.app.config.use_tasked_jobs and self.tool.parallelism
  526. def get_job_runner_url( self ):
  527. log.warning('(%s) Job runner URLs are deprecated, use destinations instead.' % self.job_id)
  528. return self.job_destination.url
  529. def get_parallelism(self):
  530. return self.tool.parallelism
  531. # legacy naming
  532. get_job_runner = get_job_runner_url
  533. @property
  534. def job_destination(self):
  535. """Return the JobDestination that this job will use to run. This will
  536. either be a configured destination, a randomly selected destination if
  537. the configured destination was a tag, or a dynamically generated
  538. destination from the dynamic runner.
  539. Calling this method for the first time causes the dynamic runner to do
  540. its calculation, if any.
  541. :returns: ``JobDestination``
  542. """
  543. return self.job_runner_mapper.get_job_destination(self.params)
  544. def get_job( self ):
  545. return self.sa_session.query( model.Job ).get( self.job_id )
  546. def get_id_tag(self):
  547. # For compatability with drmaa, which uses job_id right now, and TaskWrapper
  548. return self.get_job().get_id_tag()
  549. def get_param_dict( self ):
  550. """
  551. Restore the dictionary of parameters from the database.
  552. """
  553. job = self.get_job()
  554. param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] )
  555. param_dict = self.tool.params_from_strings( param_dict, self.app )
  556. return param_dict
  557. def get_version_string_path( self ):
  558. return os.path.abspath(os.path.join(self.app.config.new_file_path, "GALAXY_VERSION_STRING_%s" % self.job_id))
  559. def prepare( self, compute_environment=None ):
  560. """
  561. Prepare the job to run by creating the working directory and the
  562. config files.
  563. """
  564. self.sa_session.expunge_all() # this prevents the metadata reverting that has been seen in conjunction with the PBS job runner
  565. if not os.path.exists( self.working_directory ):
  566. os.mkdir( self.working_directory )
  567. job = self._load_job()
  568. def get_special( ):
  569. special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first()
  570. if not special:
  571. special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
  572. return special
  573. tool_evaluator = self._get_tool_evaluator( job )
  574. compute_environment = compute_environment or self.default_compute_environment( job )
  575. tool_evaluator.set_compute_environment( compute_environment, get_special=get_special )
  576. self.sa_session.flush()
  577. self.command_line, self.extra_filenames = tool_evaluator.build()
  578. # FIXME: for now, tools get Galaxy's lib dir in their path
  579. if self.command_line and self.command_line.startswith( 'python' ):
  580. self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root
  581. # Shell fragment to inject dependencies
  582. self.dependency_shell_commands = self.tool.build_dependency_shell_commands()
  583. # We need command_line persisted to the db in order for Galaxy to re-queue the job
  584. # if the server was stopped and restarted before the job finished
  585. job.command_line = self.command_line
  586. self.sa_session.add( job )
  587. self.sa_session.flush()
  588. # Return list of all extra files
  589. self.param_dict = tool_evaluator.param_dict
  590. version_string_cmd = self.tool.version_string_cmd
  591. if version_string_cmd:
  592. self.write_version_cmd = "%s > %s 2>&1" % ( version_string_cmd, compute_environment.version_path() )
  593. else:
  594. self.write_version_cmd = None
  595. return self.extra_filenames
  596. def default_compute_environment( self, job=None ):
  597. if not job:
  598. job = self.get_job()
  599. return SharedComputeEnvironment( self, job )
  600. def _load_job( self ):
  601. # Load job from database and verify it has user or session.
  602. # Restore parameters from the database
  603. job = self.get_job()
  604. if job.user is None and job.galaxy_session is None:
  605. raise Exception( 'Job %s has no user and no session.' % job.id )
  606. return job
  607. def _get_tool_evaluator( self, job ):
  608. # Hacky way to avoid cirular import for now.
  609. # Placing ToolEvaluator in either jobs or tools
  610. # result in ciruclar dependency.
  611. from galaxy.tools.evaluation import ToolEvaluator
  612. tool_evaluator = ToolEvaluator(
  613. app=self.app,
  614. job=job,
  615. tool=self.tool,
  616. local_working_directory=self.working_directory,
  617. )
  618. return tool_evaluator
  619. def fail( self, message, exception=False, stdout="", stderr="", exit_code=None ):
  620. """
  621. Indicate job failure by setting state and message on all output
  622. datasets.
  623. """
  624. job = self.get_job()
  625. self.sa_session.refresh( job )
  626. # if the job was deleted, don't fail it
  627. if not job.state == job.states.DELETED:
  628. # Check if the failure is due to an exception
  629. if exception:
  630. # Save the traceback immediately in case we generate another
  631. # below
  632. job.traceback = traceback.format_exc()
  633. # Get the exception and let the tool attempt to generate
  634. # a better message
  635. etype, evalue, tb = sys.exc_info()
  636. m = self.tool.handle_job_failure_exception( evalue )
  637. if m:
  638. message = m
  639. if self.app.config.outputs_to_working_directory:
  640. for dataset_path in self.get_output_fnames():
  641. try:
  642. shutil.move( dataset_path.false_path, dataset_path.real_path )
  643. log.debug( "fail(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) )
  644. except ( IOError, OSError ), e:
  645. log.error( "fail(): Missing output file in working directory: %s" % e )
  646. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  647. dataset = dataset_assoc.dataset
  648. self.sa_session.refresh( dataset )
  649. dataset.state = dataset.states.ERROR
  650. dataset.blurb = 'tool error'
  651. dataset.info = message
  652. dataset.set_size()
  653. dataset.dataset.set_total_size()
  654. dataset.mark_unhidden()
  655. if dataset.ext == 'auto':
  656. dataset.extension = 'data'
  657. # Update (non-library) job output datasets through the object store
  658. if dataset not in job.output_library_datasets:
  659. self.app.object_store.update_from_file(dataset.dataset, create=True)
  660. # Pause any dependent jobs (and those jobs' outputs)
  661. for dep_job_assoc in dataset.dependent_jobs:
  662. self.pause( dep_job_assoc.job, "Execution of this dataset's job is paused because its input datasets are in an error state." )
  663. self.sa_session.add( dataset )
  664. self.sa_session.flush()
  665. job.state = job.states.ERROR
  666. job.command_line = self.command_line
  667. job.info = message
  668. # TODO: Put setting the stdout, stderr, and exit code in one place
  669. # (not duplicated with the finish method).
  670. if ( len( stdout ) > DATABASE_MAX_STRING_SIZE ):
  671. stdout = util.shrink_string_by_size( stdout, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  672. log.info( "stdout for job %d is greater than %s, only a portion will be logged to database" % ( job.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  673. job.stdout = stdout
  674. if ( len( stderr ) > DATABASE_MAX_STRING_SIZE ):
  675. stderr = util.shrink_string_by_size( stderr, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  676. log.info( "stderr for job %d is greater than %s, only a portion will be logged to database" % ( job.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  677. job.stderr = stderr
  678. # Let the exit code be Null if one is not provided:
  679. if ( exit_code != None ):
  680. job.exit_code = exit_code
  681. self.sa_session.add( job )
  682. self.sa_session.flush()
  683. #Perform email action even on failure.
  684. for pja in [pjaa.post_job_action for pjaa in job.post_job_actions if pjaa.post_job_action.action_type == "EmailAction"]:
  685. ActionBox.execute(self.app, self.sa_session, pja, job)
  686. # If the job was deleted, call tool specific fail actions (used for e.g. external metadata) and clean up
  687. if self.tool:
  688. self.tool.job_failed( self, message, exception )
  689. delete_files = self.app.config.cleanup_job == 'always' or (self.app.config.cleanup_job == 'onsuccess' and job.state == job.states.DELETED)
  690. self.cleanup( delete_files=delete_files )
  691. def pause( self, job=None, message=None ):
  692. if job is None:
  693. job = self.get_job()
  694. if message is None:
  695. message = "Execution of this dataset's job is paused"
  696. if job.state == job.states.NEW:
  697. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  698. dataset_assoc.dataset.dataset.state = dataset_assoc.dataset.dataset.states.PAUSED
  699. dataset_assoc.dataset.info = message
  700. self.sa_session.add( dataset_assoc.dataset )
  701. job.state = job.states.PAUSED
  702. self.sa_session.add( job )
  703. def change_state( self, state, info=False ):
  704. job = self.get_job()
  705. self.sa_session.refresh( job )
  706. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  707. dataset = dataset_assoc.dataset
  708. self.sa_session.refresh( dataset )
  709. dataset.state = state
  710. if info:
  711. dataset.info = info
  712. self.sa_session.add( dataset )
  713. self.sa_session.flush()
  714. if info:
  715. job.info = info
  716. job.state = state
  717. self.sa_session.add( job )
  718. self.sa_session.flush()
  719. def get_state( self ):
  720. job = self.get_job()
  721. self.sa_session.refresh( job )
  722. return job.state
  723. def set_runner( self, runner_url, external_id ):
  724. log.warning('set_runner() is deprecated, use set_job_destination()')
  725. self.set_job_destination(self.job_destination, external_id)
  726. def set_job_destination( self, job_destination, external_id=None ):
  727. """
  728. Persist job destination params in the database for recovery.
  729. self.job_destination is not used because a runner may choose to rewrite
  730. parts of the destination (e.g. the params).
  731. """
  732. job = self.get_job()
  733. self.sa_session.refresh(job)
  734. log.debug('(%s) Persisting job destination (destination id: %s)' % (job.id, job_destination.id))
  735. job.destination_id = job_destination.id
  736. job.destination_params = job_destination.params
  737. job.job_runner_name = job_destination.runner
  738. job.job_runner_external_id = external_id
  739. self.sa_session.add(job)
  740. self.sa_session.flush()
  741. def finish( self, stdout, stderr, tool_exit_code=None ):
  742. """
  743. Called to indicate that the associated command has been run. Updates
  744. the output datasets based on stderr and stdout from the command, and
  745. the contents of the output files.
  746. """
  747. stdout = unicodify( stdout )
  748. stderr = unicodify( stderr )
  749. # default post job setup
  750. self.sa_session.expunge_all()
  751. job = self.get_job()
  752. # TODO: After failing here, consider returning from the function.
  753. try:
  754. self.reclaim_ownership()
  755. except:
  756. log.exception( '(%s) Failed to change ownership of %s, failing' % ( job.id, self.working_directory ) )
  757. return self.fail( job.info, stdout=stdout, stderr=stderr, exit_code=tool_exit_code )
  758. # if the job was deleted, don't finish it
  759. if job.state == job.states.DELETED or job.state == job.states.ERROR:
  760. # SM: Note that, at this point, the exit code must be saved in case
  761. # there was an error. Errors caught here could mean that the job
  762. # was deleted by an administrator (based on old comments), but it
  763. # could also mean that a job was broken up into tasks and one of
  764. # the tasks failed. So include the stderr, stdout, and exit code:
  765. return self.fail( job.info, stderr=stderr, stdout=stdout, exit_code=tool_exit_code )
  766. # Check the tool's stdout, stderr, and exit code for errors, but only
  767. # if the job has not already been marked as having an error.
  768. # The job's stdout and stderr will be set accordingly.
  769. # We set final_job_state to use for dataset management, but *don't* set
  770. # job.state until after dataset collection to prevent history issues
  771. if ( self.check_tool_output( stdout, stderr, tool_exit_code, job ) ):
  772. final_job_state = job.states.OK
  773. else:
  774. final_job_state = job.states.ERROR
  775. if self.write_version_cmd:
  776. version_filename = self.get_version_string_path()
  777. if os.path.exists(version_filename):
  778. self.version_string = open(version_filename).read()
  779. os.unlink(version_filename)
  780. if self.app.config.outputs_to_working_directory and not self.__link_file_check():
  781. for dataset_path in self.get_output_fnames():
  782. try:
  783. shutil.move( dataset_path.false_path, dataset_path.real_path )
  784. log.debug( "finish(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) )
  785. except ( IOError, OSError ):
  786. # this can happen if Galaxy is restarted during the job's
  787. # finish method - the false_path file has already moved,
  788. # and when the job is recovered, it won't be found.
  789. if os.path.exists( dataset_path.real_path ) and os.stat( dataset_path.real_path ).st_size > 0:
  790. log.warning( "finish(): %s not found, but %s is not empty, so it will be used instead" % ( dataset_path.false_path, dataset_path.real_path ) )
  791. else:
  792. # Prior to fail we need to set job.state
  793. job.state = final_job_state
  794. return self.fail( "Job %s's output dataset(s) could not be read" % job.id )
  795. job_context = ExpressionContext( dict( stdout=job.stdout, stderr=job.stderr ) )
  796. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  797. context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset )
  798. #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur
  799. for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations: # need to update all associated output hdas, i.e. history was shared with job running
  800. trynum = 0
  801. while trynum < self.app.config.retry_job_output_collection:
  802. try:
  803. # Attempt to short circuit NFS attribute caching
  804. os.stat( dataset.dataset.file_name )
  805. os.chown( dataset.dataset.file_name, os.getuid(), -1 )
  806. trynum = self.app.config.retry_job_output_collection
  807. except ( OSError, ObjectNotFound ), e:
  808. trynum += 1
  809. log.warning( 'Error accessing %s, will retry: %s', dataset.dataset.file_name, e )
  810. time.sleep( 2 )
  811. dataset.blurb = 'done'
  812. dataset.peek = 'no peek'
  813. dataset.info = (dataset.info or '')
  814. if context['stdout'].strip():
  815. #Ensure white space between entries
  816. dataset.info = dataset.info.rstrip() + "\n" + context['stdout'].strip()
  817. if context['stderr'].strip():
  818. #Ensure white space between entries
  819. dataset.info = dataset.info.rstrip() + "\n" + context['stderr'].strip()
  820. dataset.tool_version = self.version_string
  821. dataset.set_size()
  822. if 'uuid' in context:
  823. dataset.dataset.uuid = context['uuid']
  824. # Update (non-library) job output datasets through the object store
  825. if dataset not in job.output_library_datasets:
  826. self.app.object_store.update_from_file(dataset.dataset, create=True)
  827. if job.states.ERROR == final_job_state:
  828. dataset.blurb = "error"
  829. dataset.mark_unhidden()
  830. elif dataset.has_data():
  831. # If the tool was expected to set the extension, attempt to retrieve it
  832. if dataset.ext == 'auto':
  833. dataset.extension = context.get( 'ext', 'data' )
  834. dataset.init_meta( copy_from=dataset )
  835. #if a dataset was copied, it won't appear in our dictionary:
  836. #either use the metadata from originating output dataset, or call set_meta on the copies
  837. #it would be quicker to just copy the metadata from the originating output dataset,
  838. #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta()
  839. if ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and self.app.config.retry_metadata_internally ):
  840. dataset.datatype.set_meta( dataset, overwrite=False ) # call datatype.set_meta directly for the initial set_meta call during dataset creation
  841. elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and job.states.ERROR != final_job_state:
  842. dataset._state = model.Dataset.states.FAILED_METADATA
  843. else:
  844. #load metadata from file
  845. #we need to no longer allow metadata to be edited while the job is still running,
  846. #since if it is edited, the metadata changed on the running output will no longer match
  847. #the metadata that was stored to disk for use via the external process,
  848. #and the changes made by the user will be lost, without warning or notice
  849. dataset.metadata.from_JSON_dict( self.external_output_metadata.get_output_filenames_by_dataset( dataset, self.sa_session ).filename_out )
  850. try:
  851. assert context.get( 'line_count', None ) is not None
  852. if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
  853. dataset.set_peek( line_count=context['line_count'], is_multi_byte=True )
  854. else:
  855. dataset.set_peek( line_count=context['line_count'] )
  856. except:
  857. if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
  858. dataset.set_peek( is_multi_byte=True )
  859. else:
  860. dataset.set_peek()
  861. try:
  862. # set the name if provided by the tool
  863. dataset.name = context['name']
  864. except:
  865. pass
  866. else:
  867. dataset.blurb = "empty"
  868. if dataset.ext == 'auto':
  869. dataset.extension = 'txt'
  870. self.sa_session.add( dataset )
  871. if job.states.ERROR == final_job_state:
  872. log.debug( "setting dataset state to ERROR" )
  873. # TODO: This is where the state is being set to error. Change it!
  874. dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR
  875. # Pause any dependent jobs (and those jobs' outputs)
  876. for dep_job_assoc in dataset_assoc.dataset.dependent_jobs:
  877. self.pause( dep_job_assoc.job, "Execution of this dataset's job is paused because its input datasets are in an error state." )
  878. else:
  879. dataset_assoc.dataset.dataset.state = model.Dataset.states.OK
  880. # If any of the rest of the finish method below raises an
  881. # exception, the fail method will run and set the datasets to
  882. # ERROR. The user will never see that the datasets are in error if
  883. # they were flushed as OK here, since upon doing so, the history
  884. # panel stops checking for updates. So allow the
  885. # self.sa_session.flush() at the bottom of this method set
  886. # the state instead.
  887. for pja in job.post_job_actions:
  888. ActionBox.execute(self.app, self.sa_session, pja.post_job_action, job)
  889. # Flush all the dataset and job changes above. Dataset state changes
  890. # will now be seen by the user.
  891. self.sa_session.flush()
  892. # Save stdout and stderr
  893. if len( job.stdout ) > DATABASE_MAX_STRING_SIZE:
  894. log.info( "stdout for job %d is greater than %s, only a portion will be logged to database" % ( job.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  895. job.stdout = util.shrink_string_by_size( job.stdout, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  896. if len( job.stderr ) > DATABASE_MAX_STRING_SIZE:
  897. log.info( "stderr for job %d is greater than %s, only a portion will be logged to database" % ( job.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  898. job.stderr = util.shrink_string_by_size( job.stderr, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  899. # The exit code will be null if there is no exit code to be set.
  900. # This is so that we don't assign an exit code, such as 0, that
  901. # is either incorrect or has the wrong semantics.
  902. if None != tool_exit_code:
  903. job.exit_code = tool_exit_code
  904. # custom post process setup
  905. inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] )
  906. out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] )
  907. inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] )
  908. out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] )
  909. param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) # why not re-use self.param_dict here? ##dunno...probably should, this causes tools.parameters.basic.UnvalidatedValue to be used in following methods instead of validated and transformed values during i.e. running workflows
  910. param_dict = self.tool.params_from_strings( param_dict, self.app )
  911. # Check for and move associated_files
  912. self.tool.collect_associated_files(out_data, self.working_directory)
  913. gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
  914. if gitd:
  915. self.tool.collect_associated_files({'': gitd}, self.working_directory)
  916. # Create generated output children and primary datasets and add to param_dict
  917. collected_datasets = {
  918. 'children': self.tool.collect_child_datasets(out_data, self.working_directory),
  919. 'primary': self.tool.collect_primary_datasets(out_data, self.working_directory)
  920. }
  921. param_dict.update({'__collected_datasets__': collected_datasets})
  922. # Certain tools require tasks to be completed after job execution
  923. # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ).
  924. self.tool.exec_after_process( self.queue.app, inp_data, out_data, param_dict, job=job )
  925. # Call 'exec_after_process' hook
  926. self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data,
  927. out_data=out_data, param_dict=param_dict,
  928. tool=self.tool, stdout=job.stdout, stderr=job.stderr )
  929. job.command_line = self.command_line
  930. bytes = 0
  931. # Once datasets are collected, set the total dataset size (includes extra files)
  932. for dataset_assoc in job.output_datasets:
  933. dataset_assoc.dataset.dataset.set_total_size()
  934. bytes += dataset_assoc.dataset.dataset.get_total_size()
  935. if job.user:
  936. job.user.total_disk_usage += bytes
  937. # fix permissions
  938. for path in [ dp.real_path for dp in self.get_mutable_output_fnames() ]:
  939. util.umask_fix_perms( path, self.app.config.umask, 0666, self.app.config.gid )
  940. # Finally set the job state. This should only happen *after* all
  941. # dataset creation, and will allow us to eliminate force_history_refresh.
  942. job.state = final_job_state
  943. self.sa_session.flush()
  944. log.debug( 'job %d ended' % self.job_id )
  945. delete_files = self.app.config.cleanup_job == 'always' or ( job.state == job.states.OK and self.app.config.cleanup_job == 'onsuccess' )
  946. self.cleanup( delete_files=delete_files )
  947. def check_tool_output( self, stdout, stderr, tool_exit_code, job ):
  948. return check_output( self.tool, stdout, stderr, tool_exit_code, job )
  949. def cleanup( self, delete_files=True ):
  950. # At least one of these tool cleanup actions (job import), is needed
  951. # for thetool to work properly, that is why one might want to run
  952. # cleanup but not delete files.
  953. try:
  954. if delete_files:
  955. for fname in self.extra_filenames:
  956. os.remove( fname )
  957. self.external_output_metadata.cleanup_external_metadata( self.sa_session )
  958. galaxy.tools.imp_exp.JobExportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session )
  959. galaxy.tools.imp_exp.JobImportHistoryArchiveWrapper( self.app, self.job_id ).cleanup_after_job()
  960. galaxy.tools.genome_index.GenomeIndexToolWrapper( self.job_id ).postprocessing( self.sa_session, self.app )
  961. if delete_files:
  962. self.app.object_store.delete(self.get_job(), base_dir='job_work', entire_dir=True, dir_only=True, extra_dir=str(self.job_id))
  963. except:
  964. log.exception( "Unable to cleanup job %d" % self.job_id )
  965. def get_output_sizes( self ):
  966. sizes = []
  967. output_paths = self.get_output_fnames()
  968. for outfile in [ str( o ) for o in output_paths ]:
  969. if os.path.exists( outfile ):
  970. sizes.append( ( outfile, os.stat( outfile ).st_size ) )
  971. else:
  972. sizes.append( ( outfile, 0 ) )
  973. return sizes
  974. def check_limits(self, runtime=None):
  975. if self.app.job_config.limits.output_size > 0:
  976. for outfile, size in self.get_output_sizes():
  977. if size > self.app.config.output_size_limit:
  978. log.warning( '(%s) Job output %s is over the output size limit' % ( self.get_id_tag(), os.path.basename( outfile ) ) )
  979. return 'Job output file grew too large (greater than %s), please try different inputs or parameters' % util.nice_size( self.app.job_config.limits.output_size )
  980. if self.app.job_config.limits.walltime_delta is not None and runtime is not None:
  981. if runtime > self.app.job_config.limits.walltime_delta:
  982. log.warning( '(%s) Job has reached walltime, it will be terminated' % ( self.get_id_tag() ) )
  983. return 'Job ran longer than the maximum allowed execution time (%s), please try different inputs or parameters' % self.app.job_config.limits.walltime
  984. return None
  985. def get_command_line( self ):
  986. return self.command_line
  987. def get_session_id( self ):
  988. return self.session_id
  989. def get_env_setup_clause( self ):
  990. if self.app.config.environment_setup_file is None:
  991. return ''
  992. return '[ -f "%s" ] && . %s' % ( self.app.config.environment_setup_file, self.app.config.environment_setup_file )
  993. def get_input_dataset_fnames( self, ds ):
  994. filenames = []
  995. filenames = [ ds.file_name ]
  996. #we will need to stage in metadata file names also
  997. #TODO: would be better to only stage in metadata files that are actually needed (found in command line, referenced in config files, etc.)
  998. for key, value in ds.metadata.items():
  999. if isinstance( value, model.MetadataFile ):
  1000. filenames.append( value.file_name )
  1001. return filenames
  1002. def get_input_fnames( self ):
  1003. job = self.get_job()
  1004. filenames = []
  1005. for da in job.input_datasets + job.input_library_datasets: # da is JobToInputDatasetAssociation object
  1006. if da.dataset:
  1007. filenames.extend(self.get_input_dataset_fnames(da.dataset))
  1008. return filenames
  1009. def get_input_paths( self, job=None ):
  1010. if job is None:
  1011. job = self.get_job()
  1012. paths = []
  1013. for da in job.input_datasets + job.input_library_datasets: # da is JobToInputDatasetAssociation object
  1014. if da.dataset:
  1015. filenames = self.get_input_dataset_fnames(da.dataset)
  1016. for real_path in filenames:
  1017. false_path = self.dataset_path_rewriter.rewrite_dataset_path( da.dataset, 'input' )
  1018. paths.append( DatasetPath( da.id, real_path=real_path, false_path=false_path, mutable=False ) )
  1019. return paths
  1020. def get_output_fnames( self ):
  1021. if self.output_paths is None:
  1022. self.compute_outputs()
  1023. return self.output_paths
  1024. def get_mutable_output_fnames( self ):
  1025. if self.output_paths is None:
  1026. self.compute_outputs()
  1027. return filter( lambda dsp: dsp.mutable, self.output_paths )
  1028. def get_output_hdas_and_fnames( self ):
  1029. if self.output_hdas_and_paths is None:
  1030. self.compute_outputs()
  1031. return self.output_hdas_and_paths
  1032. def compute_outputs( self ) :
  1033. dataset_path_rewriter = self.dataset_path_rewriter
  1034. job = self.get_job()
  1035. # Job output datasets are combination of history, library, jeha and gitd datasets.
  1036. special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first()
  1037. if not special:
  1038. special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
  1039. false_path = None
  1040. results = []
  1041. for da in job.output_datasets + job.output_library_datasets:
  1042. da_false_path = dataset_path_rewriter.rewrite_dataset_path( da.dataset, 'output' )
  1043. mutable = da.dataset.dataset.external_filename is None
  1044. dataset_path = DatasetPath( da.dataset.dataset.id, da.dataset.file_name, false_path=da_false_path, mutable=mutable )
  1045. results.append( ( da.name, da.dataset, dataset_path ) )
  1046. self.output_paths = [t[2] for t in results]
  1047. self.output_hdas_and_paths = dict([(t[0], t[1:]) for t in results])
  1048. if special:
  1049. false_path = dataset_path_rewriter.rewrite_dataset_path( special.dataset, 'output' )
  1050. dsp = DatasetPath( special.dataset.id, special.dataset.file_name, false_path )
  1051. self.output_paths.append( dsp )
  1052. return self.output_paths
  1053. def get_output_file_id( self, file ):
  1054. if self.output_paths is None:
  1055. self.get_output_fnames()
  1056. for dp in self.output_paths:
  1057. if self.app.config.outputs_to_working_directory and os.path.basename( dp.false_path ) == file:
  1058. return dp.dataset_id
  1059. elif os.path.basename( dp.real_path ) == file:
  1060. return dp.dataset_id
  1061. return None
  1062. def get_tool_provided_job_metadata( self ):
  1063. if self.tool_provided_job_metadata is not None:
  1064. return self.tool_provided_job_metadata
  1065. # Look for JSONified job metadata
  1066. self.tool_provided_job_metadata = []
  1067. meta_file = os.path.join( self.working_directory, TOOL_PROVIDED_JOB_METADATA_FILE )
  1068. if os.path.exists( meta_file ):
  1069. for line in open( meta_file, 'r' ):
  1070. try:
  1071. line = from_json_string( line )
  1072. assert 'type' in line
  1073. except:
  1074. log.exception( '(%s) Got JSON data from tool, but data is improperly formatted or no "type" key in data' % self.job_id )
  1075. log.debug( 'Offending data was: %s' % line )
  1076. continue
  1077. # Set the dataset id if it's a dataset entry and isn't set.
  1078. # This isn't insecure. We loop the job's output datasets in
  1079. # the finish method, so if a tool writes out metadata for a
  1080. # dataset id that it doesn't own, it'll just be ignored.
  1081. if line['type'] == 'dataset' and 'dataset_id' not in line:
  1082. try:
  1083. line['dataset_id'] = self.get_output_file_id( line['dataset'] )
  1084. except KeyError:
  1085. log.warning( '(%s) Tool provided job dataset-specific metadata without specifying a dataset' % self.job_id )
  1086. continue
  1087. self.tool_provided_job_metadata.append( line )
  1088. return self.tool_provided_job_metadata
  1089. def get_dataset_finish_context( self, job_context, dataset ):
  1090. for meta in self.get_tool_provided_job_metadata():
  1091. if meta['type'] == 'dataset' and meta['dataset_id'] == dataset.id:
  1092. return ExpressionContext( meta, job_context )
  1093. return job_context
  1094. def setup_external_metadata( self, exec_dir=None, tmp_dir=None, dataset_files_path=None, config_root=None, config_file=None, datatypes_config=None, set_extension=True, **kwds ):
  1095. # extension could still be 'auto' if this is the upload tool.
  1096. job = self.get_job()
  1097. if set_extension:
  1098. for output_dataset_assoc in job.output_datasets:
  1099. if output_dataset_assoc.dataset.ext == 'auto':
  1100. context = self.get_dataset_finish_context( dict(), output_dataset_assoc.dataset.dataset )
  1101. output_dataset_assoc.dataset.extension = context.get( 'ext', 'data' )
  1102. self.sa_session.flush()
  1103. if tmp_dir is None:
  1104. #this dir should should relative to the exec_dir
  1105. tmp_dir = self.app.config.new_file_path
  1106. if dataset_files_path is None:
  1107. dataset_files_path = self.app.model.Dataset.file_path
  1108. if config_root is None:
  1109. config_root = self.app.config.root
  1110. if config_file is None:
  1111. config_file = self.app.config.config_file
  1112. if datatypes_config is None:
  1113. datatypes_config = self.app.datatypes_registry.integrated_datatypes_configs
  1114. return self.external_output_metadata.setup_external_metadata( [ output_dataset_assoc.dataset for output_dataset_assoc in job.output_datasets ],
  1115. self.sa_session,
  1116. exec_dir=exec_dir,
  1117. tmp_dir=tmp_dir,
  1118. dataset_files_path=dataset_files_path,
  1119. config_root=config_root,
  1120. config_file=config_file,
  1121. datatypes_config=datatypes_config,
  1122. job_metadata=os.path.join( self.working_directory, TOOL_PROVIDED_JOB_METADATA_FILE ),
  1123. **kwds )
  1124. @property
  1125. def user( self ):
  1126. job = self.get_job()
  1127. if job.user is not None:
  1128. return job.user.email
  1129. elif job.galaxy_session is not None and job.galaxy_session.user is not None:
  1130. return job.galaxy_session.user.email
  1131. elif job.history is not None and job.history.user is not None:
  1132. return job.history.user.email
  1133. elif job.galaxy_session is not None:
  1134. return 'anonymous@' + job.galaxy_session.remote_addr.split()[-1]
  1135. else:
  1136. return 'anonymous@unknown'
  1137. def __link_file_check( self ):
  1138. """ outputs_to_working_directory breaks library uploads where data is
  1139. linked. This method is a hack that solves that problem, but is
  1140. specific to the upload tool and relies on an injected job param. This
  1141. method should be removed ASAP and replaced with some properly generic
  1142. and stateful way of determining link-only datasets. -nate
  1143. """
  1144. job = self.get_job()
  1145. param_dict = job.get_param_values( self.app )
  1146. return self.tool.id == 'upload1' and param_dict.get( 'link_data_only', None ) == 'link_to_files'
  1147. def _change_ownership( self, username, gid ):
  1148. job = self.get_job()
  1149. # FIXME: hardcoded path
  1150. cmd = [ '/usr/bin/sudo', '-E', self.app.config.external_chown_script, self.working_directory, username, str( gid ) ]
  1151. log.debug( '(%s) Changing ownership of working directory with: %s' % ( job.id, ' '.join( cmd ) ) )
  1152. p = subprocess.Popen( cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
  1153. # TODO: log stdout/stderr
  1154. stdout, stderr = p.communicate()
  1155. assert p.returncode == 0
  1156. def change_ownership_for_run( self ):
  1157. job = self.get_job()
  1158. if self.app.config.external_chown_script and job.user is not None:
  1159. try:
  1160. self._change_ownership( self.user_system_pwent[0], str( self.user_system_pwent[3] ) )
  1161. except:
  1162. log.exception( '(%s) Failed to change ownership of %s, making world-writable instead' % ( job.id, self.working_directory ) )
  1163. os.chmod( self.working_directory, 0777 )
  1164. def reclaim_ownership( self ):
  1165. job = self.get_job()
  1166. if self.app.config.external_chown_script and job.user is not None:
  1167. self._change_ownership( self.galaxy_system_pwent[0], str( self.galaxy_system_pwent[3] ) )
  1168. @property
  1169. def user_system_pwent( self ):
  1170. if self.__user_system_pwent is None:
  1171. job = self.get_job()
  1172. try:
  1173. self.__user_system_pwent = pwd.getpwnam( job.user.email.split('@')[0] )
  1174. except:
  1175. pass
  1176. return self.__user_system_pwent
  1177. @property
  1178. def galaxy_system_pwent( self ):
  1179. if self.__galaxy_system_pwent is None:
  1180. self.__galaxy_system_pwent = pwd.getpwuid(os.getuid())
  1181. return self.__galaxy_system_pwent
  1182. def get_output_destination( self, output_path ):
  1183. """
  1184. Destination for outputs marked as from_work_dir. This is the normal case,
  1185. just copy these files directly to the ulimate destination.
  1186. """
  1187. return output_path
  1188. @property
  1189. def requires_setting_metadata( self ):
  1190. if self.tool:
  1191. return self.tool.requires_setting_metadata
  1192. return False
  1193. class TaskWrapper(JobWrapper):
  1194. """
  1195. Extension of JobWrapper intended for running tasks.
  1196. Should be refactored into a generalized executable unit wrapper parent, then jobs and tasks.
  1197. """
  1198. # Abstract this to be more useful for running tasks that *don't* necessarily compose a job.
  1199. def __init__(self, task, queue):
  1200. super(TaskWrapper, self).__init__(task.job, queue)
  1201. self.task_id = task.id
  1202. working_directory = task.working_directory
  1203. self.working_directory = working_directory
  1204. job_dataset_path_rewriter = self._job_dataset_path_rewriter( self.working_directory )
  1205. self.dataset_path_rewriter = TaskPathRewriter( working_directory, job_dataset_path_rewriter )
  1206. if task.prepare_input_files_cmd is not None:
  1207. self.prepare_input_files_cmds = [ task.prepare_input_files_cmd ]
  1208. else:
  1209. self.prepare_input_files_cmds = None
  1210. self.status = task.states.NEW
  1211. def can_split( self ):
  1212. # Should the job handler split this job up? TaskWrapper should
  1213. # always return False as the job has already been split.
  1214. return False
  1215. def get_job( self ):
  1216. if self.job_id:
  1217. return self.sa_session.query( model.Job ).get( self.job_id )
  1218. else:
  1219. return None
  1220. def get_task( self ):
  1221. return self.sa_session.query(model.Task).get(self.task_id)
  1222. def get_id_tag(self):
  1223. # For compatibility with drmaa job runner and TaskWrapper, instead of using job_id directly
  1224. return self.get_task().get_id_tag()
  1225. def get_param_dict( self ):
  1226. """
  1227. Restore the dictionary of parameters from the database.
  1228. """
  1229. job = self.sa_session.query( model.Job ).get( self.job_id )
  1230. param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] )
  1231. param_dict = self.tool.params_from_strings( param_dict, self.app )
  1232. return param_dict
  1233. def prepare( self, compute_environment=None ):
  1234. """
  1235. Prepare the job to run by creating the working directory and the
  1236. config files.
  1237. """
  1238. # Restore parameters from the database
  1239. job = self._load_job()
  1240. task = self.get_task()
  1241. # DBTODO New method for generating command line for a task?
  1242. tool_evaluator = self._get_tool_evaluator( job )
  1243. compute_environment = compute_environment or self.default_compute_environment( job )
  1244. tool_evaluator.set_compute_environment( compute_environment )
  1245. self.sa_session.flush()
  1246. self.command_line, self.extra_filenames = tool_evaluator.build()
  1247. # FIXME: for now, tools get Galaxy's lib dir in their path
  1248. if self.command_line and self.command_line.startswith( 'python' ):
  1249. self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root
  1250. # Shell fragment to inject dependencies
  1251. self.dependency_shell_commands = self.tool.build_dependency_shell_commands()
  1252. # We need command_line persisted to the db in order for Galaxy to re-queue the job
  1253. # if the server was stopped and restarted before the job finished
  1254. task.command_line = self.command_line
  1255. self.sa_session.add( task )
  1256. self.sa_session.flush()
  1257. self.param_dict = tool_evaluator.param_dict
  1258. self.status = 'prepared'
  1259. return self.extra_filenames
  1260. def fail( self, message, exception=False ):
  1261. log.error("TaskWrapper Failure %s" % message)
  1262. self.status = 'error'
  1263. # How do we want to handle task failure? Fail the job and let it clean up?
  1264. def change_state( self, state, info=False ):
  1265. task = self.get_task()
  1266. self.sa_session.refresh( task )
  1267. if info:
  1268. task.info = info
  1269. task.state = state
  1270. self.sa_session.add( task )
  1271. self.sa_session.flush()
  1272. def get_state( self ):
  1273. task = self.get_task()
  1274. self.sa_session.refresh( task )
  1275. return task.state
  1276. def get_exit_code( self ):
  1277. task = self.get_task()
  1278. self.sa_session.refresh( task )
  1279. return task.exit_code
  1280. def set_runner( self, runner_url, external_id ):
  1281. task = self.get_task()
  1282. self.sa_session.refresh( task )
  1283. task.task_runner_name = runner_url
  1284. task.task_runner_external_id = external_id
  1285. # DBTODO Check task job_runner_stuff
  1286. self.sa_session.add( task )
  1287. self.sa_session.flush()
  1288. def finish( self, stdout, stderr, tool_exit_code=None ):
  1289. # DBTODO integrate previous finish logic.
  1290. # Simple finish for tasks. Just set the flag OK.
  1291. """
  1292. Called to indicate that the associated command has been run. Updates
  1293. the output datasets based on stderr and stdout from the command, and
  1294. the contents of the output files.
  1295. """
  1296. stdout = unicodify( stdout )
  1297. stderr = unicodify( stderr )
  1298. # This may have ended too soon
  1299. log.debug( 'task %s for job %d ended; exit code: %d'
  1300. % (self.task_id, self.job_id,
  1301. tool_exit_code if tool_exit_code != None else -256 ) )
  1302. # default post job setup_external_metadata
  1303. self.sa_session.expunge_all()
  1304. task = self.get_task()
  1305. # if the job was deleted, don't finish it
  1306. if task.state == task.states.DELETED:
  1307. # Job was deleted by an administrator
  1308. delete_files = self.app.config.cleanup_job in ( 'always', 'onsuccess' )
  1309. self.cleanup( delete_files=delete_files )
  1310. return
  1311. elif task.state == task.states.ERROR:
  1312. self.fail( task.info )
  1313. return
  1314. # Check what the tool returned. If the stdout or stderr matched
  1315. # regular expressions that indicate errors, then set an error.
  1316. # The same goes if the tool's exit code was in a given range.
  1317. if ( self.check_tool_output( stdout, stderr, tool_exit_code, task ) ):
  1318. task.state = task.states.OK
  1319. else:
  1320. task.state = task.states.ERROR
  1321. # Save stdout and stderr
  1322. if len( stdout ) > DATABASE_MAX_STRING_SIZE:
  1323. log.error( "stdout for task %d is greater than %s, only a portion will be logged to database" % ( task.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  1324. task.stdout = util.shrink_string_by_size( stdout, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  1325. if len( stderr ) > DATABASE_MAX_STRING_SIZE:
  1326. log.error( "stderr for task %d is greater than %s, only a portion will be logged to database" % ( task.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  1327. task.stderr = util.shrink_string_by_size( stderr, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  1328. task.exit_code = tool_exit_code
  1329. task.command_line = self.command_line
  1330. self.sa_session.flush()
  1331. def cleanup( self ):
  1332. # There is no task cleanup. The job cleans up for all tasks.
  1333. pass
  1334. def get_command_line( self ):
  1335. return self.command_line
  1336. def get_session_id( self ):
  1337. return self.session_id
  1338. def get_output_file_id( self, file ):
  1339. # There is no permanent output file for tasks.
  1340. return None
  1341. def get_tool_provided_job_metadata( self ):
  1342. # DBTODO Handle this as applicable for tasks.
  1343. return None
  1344. def get_dataset_finish_context( self, job_context, dataset ):
  1345. # Handled at the parent job level. Do nothing here.
  1346. pass
  1347. def setup_external_metadata( self, exec_dir=None, tmp_dir=None, dataset_files_path=None, config_root=None, config_file=None, datatypes_config=None, set_extension=True, **kwds ):
  1348. # There is no metadata setting for tasks. This is handled after the merge, at the job level.
  1349. return ""
  1350. def get_output_destination( self, output_path ):
  1351. """
  1352. Destination for outputs marked as from_work_dir. These must be copied with
  1353. the same basenme as the path for the ultimate output destination. This is
  1354. required in the task case so they can be merged.
  1355. """
  1356. return os.path.join( self.working_directory, os.path.basename( output_path ) )
  1357. class ComputeEnvironment( object ):
  1358. """ Definition of the job as it will be run on the (potentially) remote
  1359. compute server.
  1360. """
  1361. __metaclass__ = ABCMeta
  1362. @abstractmethod
  1363. def output_paths( self ):
  1364. """ Output DatasetPaths defined by job. """
  1365. @abstractmethod
  1366. def input_paths( self ):
  1367. """ Input DatasetPaths defined by job. """
  1368. @abstractmethod
  1369. def working_directory( self ):
  1370. """ Job working directory (potentially remote) """
  1371. @abstractmethod
  1372. def config_directory( self ):
  1373. """ Directory containing config files (potentially remote) """
  1374. @abstractmethod
  1375. def sep( self ):
  1376. """ os.path.sep for the platform this job will execute in.
  1377. """
  1378. @abstractmethod
  1379. def new_file_path( self ):
  1380. """ Location to dump new files for this job on remote server. """
  1381. @abstractmethod
  1382. def version_path( self ):
  1383. """ Location of the version file for the underlying tool. """
  1384. @abstractmethod
  1385. def unstructured_path_rewriter( self ):
  1386. """ Return a function that takes in a value, determines if it is path
  1387. to be rewritten (will be passed non-path values as well - onus is on
  1388. this function to determine both if its input is a path and if it should
  1389. be rewritten.)
  1390. """
  1391. class SimpleComputeEnvironment( object ):
  1392. def config_directory( self ):
  1393. return self.working_directory( )
  1394. def sep( self ):
  1395. return os.path.sep
  1396. def unstructured_path_rewriter( self ):
  1397. return lambda v: v
  1398. class SharedComputeEnvironment( SimpleComputeEnvironment ):
  1399. """ Default ComputeEnviornment for job and task wrapper to pass
  1400. to ToolEvaluator - valid when Galaxy and compute share all the relevant
  1401. file systems.
  1402. """
  1403. def __init__( self, job_wrapper, job ):
  1404. self.app = job_wrapper.app
  1405. self.job_wrapper = job_wrapper
  1406. self.job = job
  1407. def output_paths( self ):
  1408. return self.job_wrapper.get_output_fnames()
  1409. def input_paths( self ):
  1410. return self.job_wrapper.get_input_paths( self.job )
  1411. def working_directory( self ):
  1412. return self.job_wrapper.working_directory
  1413. def new_file_path( self ):
  1414. return os.path.abspath( self.app.config.new_file_path )
  1415. def version_path( self ):
  1416. return self.job_wrapper.get_version_string_path()
  1417. class NoopQueue( object ):
  1418. """
  1419. Implements the JobQueue / JobStopQueue interface but does nothing
  1420. """
  1421. def put( self, *args, **kwargs ):
  1422. return
  1423. def put_stop( self, *args ):
  1424. return
  1425. def shutdown( self ):
  1426. return
  1427. class ParallelismInfo(object):
  1428. """
  1429. Stores the information (if any) for running multiple instances of the tool in parallel
  1430. on the same set of inputs.
  1431. """
  1432. def __init__(self, tag):
  1433. self.method = tag.get('method')
  1434. if isinstance(tag, dict):
  1435. items = tag.iteritems()
  1436. else:
  1437. items = tag.attrib.items()
  1438. self.attributes = dict( [ item for item in items if item[ 0 ] != 'method' ])
  1439. if len(self.attributes) == 0:
  1440. # legacy basic mode - provide compatible defaults
  1441. self.attributes['split_size'] = 20
  1442. self.attributes['split_mode'] = 'number_of_parts'