/lib/galaxy/jobs/__init__.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 1663 lines · 1246 code · 138 blank · 279 comment · 235 complexity · 67fd55bd5e5369ca562536fa8927b8c2 MD5 · raw file

Large files are truncated click here to view the full file

  1. """
  2. Support for running a tool in Galaxy via an internal job management system
  3. """
  4. from abc import ABCMeta
  5. from abc import abstractmethod
  6. import time
  7. import copy
  8. import datetime
  9. import galaxy
  10. import logging
  11. import os
  12. import pwd
  13. import random
  14. import re
  15. import shutil
  16. import subprocess
  17. import sys
  18. import traceback
  19. from galaxy import model, util
  20. from galaxy.datatypes import metadata
  21. from galaxy.exceptions import ObjectInvalid, ObjectNotFound
  22. from galaxy.jobs.actions.post import ActionBox
  23. from galaxy.jobs.mapper import JobRunnerMapper
  24. from galaxy.jobs.runners import BaseJobRunner
  25. from galaxy.util.bunch import Bunch
  26. from galaxy.util.expressions import ExpressionContext
  27. from galaxy.util.json import from_json_string
  28. from galaxy.util import unicodify
  29. from .output_checker import check_output
  30. from .datasets import TaskPathRewriter
  31. from .datasets import OutputsToWorkingDirectoryPathRewriter
  32. from .datasets import NullDatasetPathRewriter
  33. from .datasets import DatasetPath
  34. log = logging.getLogger( __name__ )
  35. DATABASE_MAX_STRING_SIZE = util.DATABASE_MAX_STRING_SIZE
  36. DATABASE_MAX_STRING_SIZE_PRETTY = util.DATABASE_MAX_STRING_SIZE_PRETTY
  37. # This file, if created in the job's working directory, will be used for
  38. # setting advanced metadata properties on the job and its associated outputs.
  39. # This interface is currently experimental, is only used by the upload tool,
  40. # and should eventually become API'd
  41. TOOL_PROVIDED_JOB_METADATA_FILE = 'galaxy.json'
  42. class JobDestination( Bunch ):
  43. """
  44. Provides details about where a job runs
  45. """
  46. def __init__(self, **kwds):
  47. self['id'] = None
  48. self['url'] = None
  49. self['tags'] = None
  50. self['runner'] = None
  51. self['legacy'] = False
  52. self['converted'] = False
  53. # dict is appropriate (rather than a bunch) since keys may not be valid as attributes
  54. self['params'] = dict()
  55. super(JobDestination, self).__init__(**kwds)
  56. # Store tags as a list
  57. if self.tags is not None:
  58. self['tags'] = [ x.strip() for x in self.tags.split(',') ]
  59. class JobToolConfiguration( Bunch ):
  60. """
  61. Provides details on what handler and destination a tool should use
  62. A JobToolConfiguration will have the required attribute 'id' and optional
  63. attributes 'handler', 'destination', and 'params'
  64. """
  65. def __init__(self, **kwds):
  66. self['handler'] = None
  67. self['destination'] = None
  68. self['params'] = dict()
  69. super(JobToolConfiguration, self).__init__(**kwds)
  70. class JobConfiguration( object ):
  71. """A parser and interface to advanced job management features.
  72. These features are configured in the job configuration, by default, ``job_conf.xml``
  73. """
  74. DEFAULT_NWORKERS = 4
  75. def __init__(self, app):
  76. """Parse the job configuration XML.
  77. """
  78. self.app = app
  79. self.runner_plugins = []
  80. self.handlers = {}
  81. self.handler_runner_plugins = {}
  82. self.default_handler_id = None
  83. self.destinations = {}
  84. self.destination_tags = {}
  85. self.default_destination_id = None
  86. self.tools = {}
  87. self.limits = Bunch()
  88. # Initialize the config
  89. try:
  90. tree = util.parse_xml(self.app.config.job_config_file)
  91. self.__parse_job_conf_xml(tree)
  92. except IOError:
  93. log.warning( 'Job configuration "%s" does not exist, using legacy job configuration from Galaxy config file "%s" instead' % ( self.app.config.job_config_file, self.app.config.config_file ) )
  94. self.__parse_job_conf_legacy()
  95. def __parse_job_conf_xml(self, tree):
  96. """Loads the new-style job configuration from options in the job config file (by default, job_conf.xml).
  97. :param tree: Object representing the root ``<job_conf>`` object in the job config file.
  98. :type tree: ``xml.etree.ElementTree.Element``
  99. """
  100. root = tree.getroot()
  101. log.debug('Loading job configuration from %s' % self.app.config.job_config_file)
  102. # Parse job plugins
  103. plugins = root.find('plugins')
  104. if plugins is not None:
  105. for plugin in self.__findall_with_required(plugins, 'plugin', ('id', 'type', 'load')):
  106. if plugin.get('type') == 'runner':
  107. workers = plugin.get('workers', plugins.get('workers', JobConfiguration.DEFAULT_NWORKERS))
  108. runner_kwds = self.__get_params(plugin)
  109. runner_info = dict(id=plugin.get('id'),
  110. load=plugin.get('load'),
  111. workers=int(workers),
  112. kwds=runner_kwds)
  113. self.runner_plugins.append(runner_info)
  114. else:
  115. log.error('Unknown plugin type: %s' % plugin.get('type'))
  116. # Load tasks if configured
  117. if self.app.config.use_tasked_jobs:
  118. self.runner_plugins.append(dict(id='tasks', load='tasks', workers=self.app.config.local_task_queue_workers))
  119. # Parse handlers
  120. handlers = root.find('handlers')
  121. if handlers is not None:
  122. for handler in self.__findall_with_required(handlers, 'handler'):
  123. id = handler.get('id')
  124. if id in self.handlers:
  125. log.error("Handler '%s' overlaps handler with the same name, ignoring" % id)
  126. else:
  127. log.debug("Read definition for handler '%s'" % id)
  128. self.handlers[id] = (id,)
  129. for plugin in handler.findall('plugin'):
  130. if id not in self.handler_runner_plugins:
  131. self.handler_runner_plugins[id] = []
  132. self.handler_runner_plugins[id].append( plugin.get('id') )
  133. if handler.get('tags', None) is not None:
  134. for tag in [ x.strip() for x in handler.get('tags').split(',') ]:
  135. if tag in self.handlers:
  136. self.handlers[tag].append(id)
  137. else:
  138. self.handlers[tag] = [id]
  139. # Determine the default handler(s)
  140. self.default_handler_id = self.__get_default(handlers, self.handlers.keys())
  141. # Parse destinations
  142. destinations = root.find('destinations')
  143. for destination in self.__findall_with_required(destinations, 'destination', ('id', 'runner')):
  144. id = destination.get('id')
  145. job_destination = JobDestination(**dict(destination.items()))
  146. job_destination['params'] = self.__get_params(destination)
  147. self.destinations[id] = (job_destination,)
  148. if job_destination.tags is not None:
  149. for tag in job_destination.tags:
  150. if tag not in self.destinations:
  151. self.destinations[tag] = []
  152. self.destinations[tag].append(job_destination)
  153. # Determine the default destination
  154. self.default_destination_id = self.__get_default(destinations, self.destinations.keys())
  155. # Parse tool mappings
  156. tools = root.find('tools')
  157. if tools is not None:
  158. for tool in self.__findall_with_required(tools, 'tool'):
  159. # There can be multiple definitions with identical ids, but different params
  160. id = tool.get('id').lower().rstrip('/')
  161. if id not in self.tools:
  162. self.tools[id] = list()
  163. self.tools[id].append(JobToolConfiguration(**dict(tool.items())))
  164. self.tools[id][-1]['params'] = self.__get_params(tool)
  165. types = dict(registered_user_concurrent_jobs=int,
  166. anonymous_user_concurrent_jobs=int,
  167. walltime=str,
  168. output_size=int)
  169. self.limits = Bunch(registered_user_concurrent_jobs=None,
  170. anonymous_user_concurrent_jobs=None,
  171. walltime=None,
  172. walltime_delta=None,
  173. output_size=None,
  174. concurrent_jobs={})
  175. # Parse job limits
  176. limits = root.find('limits')
  177. if limits is not None:
  178. for limit in self.__findall_with_required(limits, 'limit', ('type',)):
  179. type = limit.get('type')
  180. if type == 'concurrent_jobs':
  181. id = limit.get('tag', None) or limit.get('id')
  182. self.limits.concurrent_jobs[id] = int(limit.text)
  183. elif limit.text:
  184. self.limits.__dict__[type] = types.get(type, str)(limit.text)
  185. if self.limits.walltime is not None:
  186. h, m, s = [ int( v ) for v in self.limits.walltime.split( ':' ) ]
  187. self.limits.walltime_delta = datetime.timedelta( 0, s, 0, 0, m, h )
  188. log.debug('Done loading job configuration')
  189. def __parse_job_conf_legacy(self):
  190. """Loads the old-style job configuration from options in the galaxy config file (by default, universe_wsgi.ini).
  191. """
  192. log.debug('Loading job configuration from %s' % self.app.config.config_file)
  193. # Always load local and lwr
  194. self.runner_plugins = [dict(id='local', load='local', workers=self.app.config.local_job_queue_workers), dict(id='lwr', load='lwr', workers=self.app.config.cluster_job_queue_workers)]
  195. # Load tasks if configured
  196. if self.app.config.use_tasked_jobs:
  197. self.runner_plugins.append(dict(id='tasks', load='tasks', workers=self.app.config.local_task_queue_workers))
  198. for runner in self.app.config.start_job_runners:
  199. self.runner_plugins.append(dict(id=runner, load=runner, workers=self.app.config.cluster_job_queue_workers))
  200. # Set the handlers
  201. for id in self.app.config.job_handlers:
  202. self.handlers[id] = (id,)
  203. self.handlers['default_job_handlers'] = self.app.config.default_job_handlers
  204. self.default_handler_id = 'default_job_handlers'
  205. # Set tool handler configs
  206. for id, tool_handlers in self.app.config.tool_handlers.items():
  207. self.tools[id] = list()
  208. for handler_config in tool_handlers:
  209. # rename the 'name' key to 'handler'
  210. handler_config['handler'] = handler_config.pop('name')
  211. self.tools[id].append(JobToolConfiguration(**handler_config))
  212. # Set tool runner configs
  213. for id, tool_runners in self.app.config.tool_runners.items():
  214. # Might have been created in the handler parsing above
  215. if id not in self.tools:
  216. self.tools[id] = list()
  217. for runner_config in tool_runners:
  218. url = runner_config['url']
  219. if url not in self.destinations:
  220. # Create a new "legacy" JobDestination - it will have its URL converted to a destination params once the appropriate plugin has loaded
  221. self.destinations[url] = (JobDestination(id=url, runner=url.split(':', 1)[0], url=url, legacy=True, converted=False),)
  222. for tool_conf in self.tools[id]:
  223. if tool_conf.params == runner_config.get('params', {}):
  224. tool_conf['destination'] = url
  225. break
  226. else:
  227. # There was not an existing config (from the handlers section) with the same params
  228. # rename the 'url' key to 'destination'
  229. runner_config['destination'] = runner_config.pop('url')
  230. self.tools[id].append(JobToolConfiguration(**runner_config))
  231. self.destinations[self.app.config.default_cluster_job_runner] = (JobDestination(id=self.app.config.default_cluster_job_runner, runner=self.app.config.default_cluster_job_runner.split(':', 1)[0], url=self.app.config.default_cluster_job_runner, legacy=True, converted=False),)
  232. self.default_destination_id = self.app.config.default_cluster_job_runner
  233. # Set the job limits
  234. self.limits = Bunch(registered_user_concurrent_jobs=self.app.config.registered_user_job_limit,
  235. anonymous_user_concurrent_jobs=self.app.config.anonymous_user_job_limit,
  236. walltime=self.app.config.job_walltime,
  237. walltime_delta=self.app.config.job_walltime_delta,
  238. output_size=self.app.config.output_size_limit,
  239. concurrent_jobs={})
  240. log.debug('Done loading job configuration')
  241. def __get_default(self, parent, names):
  242. """Returns the default attribute set in a parent tag like <handlers> or <destinations>, or return the ID of the child, if there is no explicit default and only one child.
  243. :param parent: Object representing a tag that may or may not have a 'default' attribute.
  244. :type parent: ``xml.etree.ElementTree.Element``
  245. :param names: The list of destination or handler IDs or tags that were loaded.
  246. :type names: list of str
  247. :returns: str -- id or tag representing the default.
  248. """
  249. rval = parent.get('default')
  250. if rval is not None:
  251. # If the parent element has a 'default' attribute, use the id or tag in that attribute
  252. if rval not in names:
  253. raise Exception("<%s> default attribute '%s' does not match a defined id or tag in a child element" % (parent.tag, rval))
  254. log.debug("<%s> default set to child with id or tag '%s'" % (parent.tag, rval))
  255. elif len(names) == 1:
  256. log.info("Setting <%s> default to child with id '%s'" % (parent.tag, names[0]))
  257. rval = names[0]
  258. else:
  259. raise Exception("No <%s> default specified, please specify a valid id or tag with the 'default' attribute" % parent.tag)
  260. return rval
  261. def __findall_with_required(self, parent, match, attribs=None):
  262. """Like ``xml.etree.ElementTree.Element.findall()``, except only returns children that have the specified attribs.
  263. :param parent: Parent element in which to find.
  264. :type parent: ``xml.etree.ElementTree.Element``
  265. :param match: Name of child elements to find.
  266. :type match: str
  267. :param attribs: List of required attributes in children elements.
  268. :type attribs: list of str
  269. :returns: list of ``xml.etree.ElementTree.Element``
  270. """
  271. rval = []
  272. if attribs is None:
  273. attribs = ('id',)
  274. for elem in parent.findall(match):
  275. for attrib in attribs:
  276. if attrib not in elem.attrib:
  277. log.warning("required '%s' attribute is missing from <%s> element" % (attrib, match))
  278. break
  279. else:
  280. rval.append(elem)
  281. return rval
  282. def __get_params(self, parent):
  283. """Parses any child <param> tags in to a dictionary suitable for persistence.
  284. :param parent: Parent element in which to find child <param> tags.
  285. :type parent: ``xml.etree.ElementTree.Element``
  286. :returns: dict
  287. """
  288. rval = {}
  289. for param in parent.findall('param'):
  290. rval[param.get('id')] = param.text
  291. return rval
  292. @property
  293. def default_job_tool_configuration(self):
  294. """The default JobToolConfiguration, used if a tool does not have an explicit defintion in the configuration. It consists of a reference to the default handler and default destination.
  295. :returns: JobToolConfiguration -- a representation of a <tool> element that uses the default handler and destination
  296. """
  297. return JobToolConfiguration(id='default', handler=self.default_handler_id, destination=self.default_destination_id)
  298. # Called upon instantiation of a Tool object
  299. def get_job_tool_configurations(self, ids):
  300. """Get all configured JobToolConfigurations for a tool ID, or, if given a list of IDs, the JobToolConfigurations for the first id in ``ids`` matching a tool definition.
  301. .. note::
  302. You should not mix tool shed tool IDs, versionless tool shed IDs, and tool config tool IDs that refer to the same tool.
  303. :param ids: Tool ID or IDs to fetch the JobToolConfiguration of.
  304. :type ids: list or str.
  305. :returns: list -- JobToolConfiguration Bunches representing <tool> elements matching the specified ID(s).
  306. Example tool ID strings include:
  307. * Full tool shed id: ``toolshed.example.org/repos/nate/filter_tool_repo/filter_tool/1.0.0``
  308. * Tool shed id less version: ``toolshed.example.org/repos/nate/filter_tool_repo/filter_tool``
  309. * Tool config tool id: ``filter_tool``
  310. """
  311. rval = []
  312. # listify if ids is a single (string) id
  313. ids = util.listify(ids)
  314. for id in ids:
  315. if id in self.tools:
  316. # If a tool has definitions that include job params but not a
  317. # definition for jobs without params, include the default
  318. # config
  319. for job_tool_configuration in self.tools[id]:
  320. if not job_tool_configuration.params:
  321. break
  322. else:
  323. rval.append(self.default_job_tool_configuration)
  324. rval.extend(self.tools[id])
  325. break
  326. else:
  327. rval.append(self.default_job_tool_configuration)
  328. return rval
  329. def __get_single_item(self, collection):
  330. """Given a collection of handlers or destinations, return one item from the collection at random.
  331. """
  332. # Done like this to avoid random under the assumption it's faster to avoid it
  333. if len(collection) == 1:
  334. return collection[0]
  335. else:
  336. return random.choice(collection)
  337. # This is called by Tool.get_job_handler()
  338. def get_handler(self, id_or_tag):
  339. """Given a handler ID or tag, return the provided ID or an ID matching the provided tag
  340. :param id_or_tag: A handler ID or tag.
  341. :type id_or_tag: str
  342. :returns: str -- A valid job handler ID.
  343. """
  344. if id_or_tag is None:
  345. id_or_tag = self.default_handler_id
  346. return self.__get_single_item(self.handlers[id_or_tag])
  347. def get_destination(self, id_or_tag):
  348. """Given a destination ID or tag, return the JobDestination matching the provided ID or tag
  349. :param id_or_tag: A destination ID or tag.
  350. :type id_or_tag: str
  351. :returns: JobDestination -- A valid destination
  352. Destinations are deepcopied as they are expected to be passed in to job
  353. runners, which will modify them for persisting params set at runtime.
  354. """
  355. if id_or_tag is None:
  356. id_or_tag = self.default_destination_id
  357. return copy.deepcopy(self.__get_single_item(self.destinations[id_or_tag]))
  358. def get_destinations(self, id_or_tag):
  359. """Given a destination ID or tag, return all JobDestinations matching the provided ID or tag
  360. :param id_or_tag: A destination ID or tag.
  361. :type id_or_tag: str
  362. :returns: list or tuple of JobDestinations
  363. Destinations are not deepcopied, so they should not be passed to
  364. anything which might modify them.
  365. """
  366. return self.destinations.get(id_or_tag, None)
  367. def get_job_runner_plugins(self, handler_id):
  368. """Load all configured job runner plugins
  369. :returns: list of job runner plugins
  370. """
  371. rval = {}
  372. if handler_id in self.handler_runner_plugins:
  373. plugins_to_load = [ rp for rp in self.runner_plugins if rp['id'] in self.handler_runner_plugins[handler_id] ]
  374. log.info( "Handler '%s' will load specified runner plugins: %s", handler_id, ', '.join( [ rp['id'] for rp in plugins_to_load ] ) )
  375. else:
  376. plugins_to_load = self.runner_plugins
  377. log.info( "Handler '%s' will load all configured runner plugins", handler_id )
  378. for runner in plugins_to_load:
  379. class_names = []
  380. module = None
  381. id = runner['id']
  382. load = runner['load']
  383. if ':' in load:
  384. # Name to load was specified as '<module>:<class>'
  385. module_name, class_name = load.rsplit(':', 1)
  386. class_names = [ class_name ]
  387. module = __import__( module_name )
  388. else:
  389. # Name to load was specified as '<module>'
  390. if '.' not in load:
  391. # For legacy reasons, try from galaxy.jobs.runners first if there's no '.' in the name
  392. module_name = 'galaxy.jobs.runners.' + load
  393. try:
  394. module = __import__( module_name )
  395. except ImportError:
  396. # No such module, we'll retry without prepending galaxy.jobs.runners.
  397. # All other exceptions (e.g. something wrong with the module code) will raise
  398. pass
  399. if module is None:
  400. # If the name included a '.' or loading from the static runners path failed, try the original name
  401. module = __import__( load )
  402. module_name = load
  403. if module is None:
  404. # Module couldn't be loaded, error should have already been displayed
  405. continue
  406. for comp in module_name.split( "." )[1:]:
  407. module = getattr( module, comp )
  408. if not class_names:
  409. # If there's not a ':', we check <module>.__all__ for class names
  410. try:
  411. assert module.__all__
  412. class_names = module.__all__
  413. except AssertionError:
  414. log.error( 'Runner "%s" does not contain a list of exported classes in __all__' % load )
  415. continue
  416. for class_name in class_names:
  417. runner_class = getattr( module, class_name )
  418. try:
  419. assert issubclass(runner_class, BaseJobRunner)
  420. except TypeError:
  421. log.warning("A non-class name was found in __all__, ignoring: %s" % id)
  422. continue
  423. except AssertionError:
  424. log.warning("Job runner classes must be subclassed from BaseJobRunner, %s has bases: %s" % (id, runner_class.__bases__))
  425. continue
  426. try:
  427. rval[id] = runner_class( self.app, runner[ 'workers' ], **runner.get( 'kwds', {} ) )
  428. except TypeError:
  429. log.exception( "Job runner '%s:%s' has not been converted to a new-style runner or encountered TypeError on load" % ( module_name, class_name ) )
  430. rval[id] = runner_class( self.app )
  431. log.debug( "Loaded job runner '%s:%s' as '%s'" % ( module_name, class_name, id ) )
  432. return rval
  433. def is_id(self, collection):
  434. """Given a collection of handlers or destinations, indicate whether the collection represents a tag or a real ID
  435. :param collection: A representation of a destination or handler
  436. :type collection: tuple or list
  437. :returns: bool
  438. """
  439. return type(collection) == tuple
  440. def is_tag(self, collection):
  441. """Given a collection of handlers or destinations, indicate whether the collection represents a tag or a real ID
  442. :param collection: A representation of a destination or handler
  443. :type collection: tuple or list
  444. :returns: bool
  445. """
  446. return type(collection) == list
  447. def is_handler(self, server_name):
  448. """Given a server name, indicate whether the server is a job handler
  449. :param server_name: The name to check
  450. :type server_name: str
  451. :return: bool
  452. """
  453. for collection in self.handlers.values():
  454. if server_name in collection:
  455. return True
  456. return False
  457. def convert_legacy_destinations(self, job_runners):
  458. """Converts legacy (from a URL) destinations to contain the appropriate runner params defined in the URL.
  459. :param job_runners: All loaded job runner plugins.
  460. :type job_runners: list of job runner plugins
  461. """
  462. for id, destination in [ ( id, destinations[0] ) for id, destinations in self.destinations.items() if self.is_id(destinations) ]:
  463. # Only need to deal with real destinations, not members of tags
  464. if destination.legacy and not destination.converted:
  465. if destination.runner in job_runners:
  466. destination.params = job_runners[destination.runner].url_to_destination(destination.url).params
  467. destination.converted = True
  468. if destination.params:
  469. log.debug("Legacy destination with id '%s', url '%s' converted, got params:" % (id, destination.url))
  470. for k, v in destination.params.items():
  471. log.debug(" %s: %s" % (k, v))
  472. else:
  473. log.debug("Legacy destination with id '%s', url '%s' converted, got params:" % (id, destination.url))
  474. else:
  475. log.warning("Legacy destination with id '%s' could not be converted: Unknown runner plugin: %s" % (id, destination.runner))
  476. class JobWrapper( object ):
  477. """
  478. Wraps a 'model.Job' with convenience methods for running processes and
  479. state management.
  480. """
  481. def __init__( self, job, queue ):
  482. self.job_id = job.id
  483. self.session_id = job.session_id
  484. self.user_id = job.user_id
  485. self.tool = queue.app.toolbox.tools_by_id.get( job.tool_id, None )
  486. self.queue = queue
  487. self.app = queue.app
  488. self.sa_session = self.app.model.context
  489. self.extra_filenames = []
  490. self.command_line = None
  491. # Tool versioning variables
  492. self.write_version_cmd = None
  493. self.version_string = ""
  494. self.galaxy_lib_dir = None
  495. # With job outputs in the working directory, we need the working
  496. # directory to be set before prepare is run, or else premature deletion
  497. # and job recovery fail.
  498. # Create the working dir if necessary
  499. try:
  500. self.app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
  501. self.working_directory = self.app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id))
  502. log.debug('(%s) Working directory for job is: %s' % (self.job_id, self.working_directory))
  503. except ObjectInvalid:
  504. raise Exception('Unable to create job working directory, job failure')
  505. self.dataset_path_rewriter = self._job_dataset_path_rewriter( self.working_directory )
  506. self.output_paths = None
  507. self.output_hdas_and_paths = None
  508. self.tool_provided_job_metadata = None
  509. # Wrapper holding the info required to restore and clean up from files used for setting metadata externally
  510. self.external_output_metadata = metadata.JobExternalOutputMetadataWrapper( job )
  511. self.job_runner_mapper = JobRunnerMapper( self, queue.dispatcher.url_to_destination, self.app.job_config )
  512. self.params = None
  513. if job.params:
  514. self.params = from_json_string( job.params )
  515. self.__user_system_pwent = None
  516. self.__galaxy_system_pwent = None
  517. def _job_dataset_path_rewriter( self, working_directory ):
  518. if self.app.config.outputs_to_working_directory:
  519. dataset_path_rewriter = OutputsToWorkingDirectoryPathRewriter( working_directory )
  520. else:
  521. dataset_path_rewriter = NullDatasetPathRewriter( )
  522. return dataset_path_rewriter
  523. def can_split( self ):
  524. # Should the job handler split this job up?
  525. return self.app.config.use_tasked_jobs and self.tool.parallelism
  526. def get_job_runner_url( self ):
  527. log.warning('(%s) Job runner URLs are deprecated, use destinations instead.' % self.job_id)
  528. return self.job_destination.url
  529. def get_parallelism(self):
  530. return self.tool.parallelism
  531. # legacy naming
  532. get_job_runner = get_job_runner_url
  533. @property
  534. def job_destination(self):
  535. """Return the JobDestination that this job will use to run. This will
  536. either be a configured destination, a randomly selected destination if
  537. the configured destination was a tag, or a dynamically generated
  538. destination from the dynamic runner.
  539. Calling this method for the first time causes the dynamic runner to do
  540. its calculation, if any.
  541. :returns: ``JobDestination``
  542. """
  543. return self.job_runner_mapper.get_job_destination(self.params)
  544. def get_job( self ):
  545. return self.sa_session.query( model.Job ).get( self.job_id )
  546. def get_id_tag(self):
  547. # For compatability with drmaa, which uses job_id right now, and TaskWrapper
  548. return self.get_job().get_id_tag()
  549. def get_param_dict( self ):
  550. """
  551. Restore the dictionary of parameters from the database.
  552. """
  553. job = self.get_job()
  554. param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] )
  555. param_dict = self.tool.params_from_strings( param_dict, self.app )
  556. return param_dict
  557. def get_version_string_path( self ):
  558. return os.path.abspath(os.path.join(self.app.config.new_file_path, "GALAXY_VERSION_STRING_%s" % self.job_id))
  559. def prepare( self, compute_environment=None ):
  560. """
  561. Prepare the job to run by creating the working directory and the
  562. config files.
  563. """
  564. self.sa_session.expunge_all() # this prevents the metadata reverting that has been seen in conjunction with the PBS job runner
  565. if not os.path.exists( self.working_directory ):
  566. os.mkdir( self.working_directory )
  567. job = self._load_job()
  568. def get_special( ):
  569. special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first()
  570. if not special:
  571. special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
  572. return special
  573. tool_evaluator = self._get_tool_evaluator( job )
  574. compute_environment = compute_environment or self.default_compute_environment( job )
  575. tool_evaluator.set_compute_environment( compute_environment, get_special=get_special )
  576. self.sa_session.flush()
  577. self.command_line, self.extra_filenames = tool_evaluator.build()
  578. # FIXME: for now, tools get Galaxy's lib dir in their path
  579. if self.command_line and self.command_line.startswith( 'python' ):
  580. self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root
  581. # Shell fragment to inject dependencies
  582. self.dependency_shell_commands = self.tool.build_dependency_shell_commands()
  583. # We need command_line persisted to the db in order for Galaxy to re-queue the job
  584. # if the server was stopped and restarted before the job finished
  585. job.command_line = self.command_line
  586. self.sa_session.add( job )
  587. self.sa_session.flush()
  588. # Return list of all extra files
  589. self.param_dict = tool_evaluator.param_dict
  590. version_string_cmd = self.tool.version_string_cmd
  591. if version_string_cmd:
  592. self.write_version_cmd = "%s > %s 2>&1" % ( version_string_cmd, compute_environment.version_path() )
  593. else:
  594. self.write_version_cmd = None
  595. return self.extra_filenames
  596. def default_compute_environment( self, job=None ):
  597. if not job:
  598. job = self.get_job()
  599. return SharedComputeEnvironment( self, job )
  600. def _load_job( self ):
  601. # Load job from database and verify it has user or session.
  602. # Restore parameters from the database
  603. job = self.get_job()
  604. if job.user is None and job.galaxy_session is None:
  605. raise Exception( 'Job %s has no user and no session.' % job.id )
  606. return job
  607. def _get_tool_evaluator( self, job ):
  608. # Hacky way to avoid cirular import for now.
  609. # Placing ToolEvaluator in either jobs or tools
  610. # result in ciruclar dependency.
  611. from galaxy.tools.evaluation import ToolEvaluator
  612. tool_evaluator = ToolEvaluator(
  613. app=self.app,
  614. job=job,
  615. tool=self.tool,
  616. local_working_directory=self.working_directory,
  617. )
  618. return tool_evaluator
  619. def fail( self, message, exception=False, stdout="", stderr="", exit_code=None ):
  620. """
  621. Indicate job failure by setting state and message on all output
  622. datasets.
  623. """
  624. job = self.get_job()
  625. self.sa_session.refresh( job )
  626. # if the job was deleted, don't fail it
  627. if not job.state == job.states.DELETED:
  628. # Check if the failure is due to an exception
  629. if exception:
  630. # Save the traceback immediately in case we generate another
  631. # below
  632. job.traceback = traceback.format_exc()
  633. # Get the exception and let the tool attempt to generate
  634. # a better message
  635. etype, evalue, tb = sys.exc_info()
  636. m = self.tool.handle_job_failure_exception( evalue )
  637. if m:
  638. message = m
  639. if self.app.config.outputs_to_working_directory:
  640. for dataset_path in self.get_output_fnames():
  641. try:
  642. shutil.move( dataset_path.false_path, dataset_path.real_path )
  643. log.debug( "fail(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) )
  644. except ( IOError, OSError ), e:
  645. log.error( "fail(): Missing output file in working directory: %s" % e )
  646. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  647. dataset = dataset_assoc.dataset
  648. self.sa_session.refresh( dataset )
  649. dataset.state = dataset.states.ERROR
  650. dataset.blurb = 'tool error'
  651. dataset.info = message
  652. dataset.set_size()
  653. dataset.dataset.set_total_size()
  654. dataset.mark_unhidden()
  655. if dataset.ext == 'auto':
  656. dataset.extension = 'data'
  657. # Update (non-library) job output datasets through the object store
  658. if dataset not in job.output_library_datasets:
  659. self.app.object_store.update_from_file(dataset.dataset, create=True)
  660. # Pause any dependent jobs (and those jobs' outputs)
  661. for dep_job_assoc in dataset.dependent_jobs:
  662. self.pause( dep_job_assoc.job, "Execution of this dataset's job is paused because its input datasets are in an error state." )
  663. self.sa_session.add( dataset )
  664. self.sa_session.flush()
  665. job.state = job.states.ERROR
  666. job.command_line = self.command_line
  667. job.info = message
  668. # TODO: Put setting the stdout, stderr, and exit code in one place
  669. # (not duplicated with the finish method).
  670. if ( len( stdout ) > DATABASE_MAX_STRING_SIZE ):
  671. stdout = util.shrink_string_by_size( stdout, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  672. log.info( "stdout for job %d is greater than %s, only a portion will be logged to database" % ( job.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  673. job.stdout = stdout
  674. if ( len( stderr ) > DATABASE_MAX_STRING_SIZE ):
  675. stderr = util.shrink_string_by_size( stderr, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
  676. log.info( "stderr for job %d is greater than %s, only a portion will be logged to database" % ( job.id, DATABASE_MAX_STRING_SIZE_PRETTY ) )
  677. job.stderr = stderr
  678. # Let the exit code be Null if one is not provided:
  679. if ( exit_code != None ):
  680. job.exit_code = exit_code
  681. self.sa_session.add( job )
  682. self.sa_session.flush()
  683. #Perform email action even on failure.
  684. for pja in [pjaa.post_job_action for pjaa in job.post_job_actions if pjaa.post_job_action.action_type == "EmailAction"]:
  685. ActionBox.execute(self.app, self.sa_session, pja, job)
  686. # If the job was deleted, call tool specific fail actions (used for e.g. external metadata) and clean up
  687. if self.tool:
  688. self.tool.job_failed( self, message, exception )
  689. delete_files = self.app.config.cleanup_job == 'always' or (self.app.config.cleanup_job == 'onsuccess' and job.state == job.states.DELETED)
  690. self.cleanup( delete_files=delete_files )
  691. def pause( self, job=None, message=None ):
  692. if job is None:
  693. job = self.get_job()
  694. if message is None:
  695. message = "Execution of this dataset's job is paused"
  696. if job.state == job.states.NEW:
  697. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  698. dataset_assoc.dataset.dataset.state = dataset_assoc.dataset.dataset.states.PAUSED
  699. dataset_assoc.dataset.info = message
  700. self.sa_session.add( dataset_assoc.dataset )
  701. job.state = job.states.PAUSED
  702. self.sa_session.add( job )
  703. def change_state( self, state, info=False ):
  704. job = self.get_job()
  705. self.sa_session.refresh( job )
  706. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  707. dataset = dataset_assoc.dataset
  708. self.sa_session.refresh( dataset )
  709. dataset.state = state
  710. if info:
  711. dataset.info = info
  712. self.sa_session.add( dataset )
  713. self.sa_session.flush()
  714. if info:
  715. job.info = info
  716. job.state = state
  717. self.sa_session.add( job )
  718. self.sa_session.flush()
  719. def get_state( self ):
  720. job = self.get_job()
  721. self.sa_session.refresh( job )
  722. return job.state
  723. def set_runner( self, runner_url, external_id ):
  724. log.warning('set_runner() is deprecated, use set_job_destination()')
  725. self.set_job_destination(self.job_destination, external_id)
  726. def set_job_destination( self, job_destination, external_id=None ):
  727. """
  728. Persist job destination params in the database for recovery.
  729. self.job_destination is not used because a runner may choose to rewrite
  730. parts of the destination (e.g. the params).
  731. """
  732. job = self.get_job()
  733. self.sa_session.refresh(job)
  734. log.debug('(%s) Persisting job destination (destination id: %s)' % (job.id, job_destination.id))
  735. job.destination_id = job_destination.id
  736. job.destination_params = job_destination.params
  737. job.job_runner_name = job_destination.runner
  738. job.job_runner_external_id = external_id
  739. self.sa_session.add(job)
  740. self.sa_session.flush()
  741. def finish( self, stdout, stderr, tool_exit_code=None ):
  742. """
  743. Called to indicate that the associated command has been run. Updates
  744. the output datasets based on stderr and stdout from the command, and
  745. the contents of the output files.
  746. """
  747. stdout = unicodify( stdout )
  748. stderr = unicodify( stderr )
  749. # default post job setup
  750. self.sa_session.expunge_all()
  751. job = self.get_job()
  752. # TODO: After failing here, consider returning from the function.
  753. try:
  754. self.reclaim_ownership()
  755. except:
  756. log.exception( '(%s) Failed to change ownership of %s, failing' % ( job.id, self.working_directory ) )
  757. return self.fail( job.info, stdout=stdout, stderr=stderr, exit_code=tool_exit_code )
  758. # if the job was deleted, don't finish it
  759. if job.state == job.states.DELETED or job.state == job.states.ERROR:
  760. # SM: Note that, at this point, the exit code must be saved in case
  761. # there was an error. Errors caught here could mean that the job
  762. # was deleted by an administrator (based on old comments), but it
  763. # could also mean that a job was broken up into tasks and one of
  764. # the tasks failed. So include the stderr, stdout, and exit code:
  765. return self.fail( job.info, stderr=stderr, stdout=stdout, exit_code=tool_exit_code )
  766. # Check the tool's stdout, stderr, and exit code for errors, but only
  767. # if the job has not already been marked as having an error.
  768. # The job's stdout and stderr will be set accordingly.
  769. # We set final_job_state to use for dataset management, but *don't* set
  770. # job.state until after dataset collection to prevent history issues
  771. if ( self.check_tool_output( stdout, stderr, tool_exit_code, job ) ):
  772. final_job_state = job.states.OK
  773. else:
  774. final_job_state = job.states.ERROR
  775. if self.write_version_cmd:
  776. version_filename = self.get_version_string_path()
  777. if os.path.exists(version_filename):
  778. self.version_string = open(version_filename).read()
  779. os.unlink(version_filename)
  780. if self.app.config.outputs_to_working_directory and not self.__link_file_check():
  781. for dataset_path in self.get_output_fnames():
  782. try:
  783. shutil.move( dataset_path.false_path, dataset_path.real_path )
  784. log.debug( "finish(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) )
  785. except ( IOError, OSError ):
  786. # this can happen if Galaxy is restarted during the job's
  787. # finish method - the false_path file has already moved,
  788. # and when the job is recovered, it won't be found.
  789. if os.path.exists( dataset_path.real_path ) and os.stat( dataset_path.real_path ).st_size > 0:
  790. log.warning( "finish(): %s not found, but %s is not empty, so it will be used instead" % ( dataset_path.false_path, dataset_path.real_path ) )
  791. else:
  792. # Prior to fail we need to set job.state
  793. job.state = final_job_state
  794. return self.fail( "Job %s's output dataset(s) could not be read" % job.id )
  795. job_context = ExpressionContext( dict( stdout=job.stdout, stderr=job.stderr ) )
  796. for dataset_assoc in job.output_datasets + job.output_library_datasets:
  797. context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset )
  798. #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur
  799. for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations: # need to update all associated output hdas, i.e. history was shared with job running
  800. trynum = 0
  801. while trynum < self.app.config.retry_job_output_collection:
  802. try:
  803. # Attempt to short circuit NFS attribute caching
  804. os.stat( dataset.dataset.file_name )
  805. os.chown( dataset.dataset.file_name, os.getuid(), -1 )
  806. trynum = self.app.config.retry_job_output_collection
  807. except ( OSError, ObjectNotFound ), e:
  808. trynum += 1
  809. log.warning( 'Error accessing %s, will retry: %s', dataset.dataset.file_name, e )
  810. time.sleep( 2 )
  811. dataset.blurb = 'done'
  812. dataset.peek = 'no peek'
  813. dataset.info = (dataset.info or '')
  814. if context['stdout'].strip():
  815. #Ensure white space between entries
  816. dataset.info = dataset.info.rstrip() + "\n" + context['stdout'].strip()
  817. if context['stderr'].strip():
  818. #Ensure white space between entries
  819. dataset.info = dataset.info.rstrip() + "\n" + context['stderr'].strip()
  820. dataset.tool_version = self.version_string
  821. dataset.set_size()
  822. if 'uuid' in context:
  823. dataset.dataset.uuid = context['uuid']
  824. # Update (non-library) job output datasets through the object store
  825. if dataset not in job.output_library_datasets:
  826. self.app.object_store.update_from_file(dataset.dataset, create=True)
  827. if job.states.ERROR == final_job_state:
  828. dataset.blurb = "error"
  829. dataset.mark_unhidden()
  830. elif dataset.has_data():
  831. # If the tool was expected to set the extension, attempt to retrieve it
  832. if dataset.ext == 'auto':
  833. dataset.extension = context.get( 'ext', 'data' )
  834. dataset.init_meta( copy_from=dataset )
  835. #if a dataset was copied, it won't appear in our dictionary:
  836. #either use the metadata from originating output dataset, or call set_meta on the copies
  837. #it would be quicker to just copy the metadata from the originating output dataset,
  838. #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta()
  839. if ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and self.app.config.retry_metadata_internally ):
  840. dataset.datatype.set_meta( dataset, overwrite=False ) # call datatype.set_meta directly for the initial set_meta call during dataset creation
  841. elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and job.states.ERROR != final_job_state:
  842. dataset._state = model.Dataset.states.FAILED_METADATA
  843. else:
  844. #load metadata from file
  845. #we need to no longer allow metadata to be edited while the job is still running,
  846. #since if it is edited, the metadata changed on the running output will no longer match
  847. #the metadata that was stored to disk for use via the external process,
  848. #and the changes made by the user will be lost, without warning or notice
  849. dataset.metadata.from_JSON_dict( self.external_output_metadata.get_output_filenames_by_dataset( dataset, self.sa_session ).filename_out )
  850. try:
  851. assert context.get( 'line_count', None ) is not None
  852. if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
  853. dataset.set_peek( line_count=context['line_count'], is_multi_byte=True )
  854. else:
  855. dataset.set_peek( line_count=context['line_count'] )
  856. except:
  857. if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte:
  858. dataset.set_peek( is_multi_byte=True )
  859. else:
  860. dataset.set_peek()
  861. try:
  862. # set the name if provided by the tool
  863. dataset.name = context['name']
  864. except:
  865. pass
  866. else:
  867. dataset.blurb = "empty"
  868. if dataset.ext == 'auto':
  869. dataset.extension = 'txt'
  870. self.sa_session.add( dataset )
  871. if job.states.ERROR == final_job_state:
  872. log.debug( "setting dataset state to ERROR" )
  873. # TODO: This is where the state is being set to error. Change it!
  874. dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR
  875. # Pause any dependent jobs (and those jobs' outputs)
  876. for dep_job_assoc in dataset_assoc.dataset.dependent_jobs:
  877. self.pause( dep_job_assoc.job, "Execution of this dataset's job is paused because its input datasets are in an error state." )
  878. else:
  879. dataset_assoc.dataset.dataset.state = model.Dataset.states.OK
  880. # If any of the rest of the finish method below raises an
  881. # exception, the fail method will run and set the datasets to
  882. # ERROR. The user will never see that the datasets are in error if
  883. # they were flushed as OK here, since upon doing so, the history
  884. # panel stops checking for updates. So allow the
  885. # self.sa_session.flush() at the bottom of this method set
  886. # the state instead.
  887. for pja in job.post_job_actions:
  888. ActionBox.execute(self.app, self.sa_session, pja.post_job_action, job)
  889. # Flush all the dataset and job changes above. Dataset state changes
  890. # will now be seen by the user.
  891. self.sa_session.flush()
  892. # Save stdout and stderr
  893. if len( job.stdout ) > DATABASE_MAX_STRING_SIZE:
  894. log.info( "stdout for job…