PageRenderTime 31ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/galaxy/jobs/transfer_manager.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 166 lines | 125 code | 4 blank | 37 comment | 33 complexity | 4132c49d3ca133b6cdcfa9fb36bf6edd MD5 | raw file
  1. """
  2. Manage transfers from arbitrary URLs to temporary files. Socket interface for
  3. IPC with multiple process configurations.
  4. """
  5. import os, subprocess, socket, logging, threading
  6. from galaxy import eggs
  7. from galaxy.util import listify, json
  8. log = logging.getLogger( __name__ )
  9. class TransferManager( object ):
  10. """
  11. Manage simple data transfers from URLs to temporary locations.
  12. """
  13. def __init__( self, app ):
  14. self.app = app
  15. self.sa_session = app.model.context.current
  16. self.command = 'python %s' % os.path.abspath( os.path.join( os.getcwd(), 'scripts', 'transfer.py' ) )
  17. if app.config.get_bool( 'enable_job_recovery', True ):
  18. # Only one Galaxy server process should be able to recover jobs! (otherwise you'll have nasty race conditions)
  19. self.running = True
  20. self.sleeper = Sleeper()
  21. self.restarter = threading.Thread( target=self.__restarter )
  22. self.restarter.start()
  23. def new( self, path=None, **kwd ):
  24. if 'protocol' not in kwd:
  25. raise Exception( 'Missing required parameter "protocol".' )
  26. protocol = kwd[ 'protocol' ]
  27. if protocol in [ 'http', 'https' ]:
  28. if 'url' not in kwd:
  29. raise Exception( 'Missing required parameter "url".' )
  30. elif protocol == 'scp':
  31. # TODO: add more checks here?
  32. if 'sample_dataset_id' not in kwd:
  33. raise Exception( 'Missing required parameter "sample_dataset_id".' )
  34. if 'file_path' not in kwd:
  35. raise Exception( 'Missing required parameter "file_path".' )
  36. transfer_job = self.app.model.TransferJob( state=self.app.model.TransferJob.states.NEW, params=kwd )
  37. self.sa_session.add( transfer_job )
  38. self.sa_session.flush()
  39. return transfer_job
  40. def run( self, transfer_jobs ):
  41. """
  42. This method blocks, so if invoking the transfer manager ever starts
  43. taking too long, we should move it to a thread. However, the
  44. transfer_manager will either daemonize or return after submitting to a
  45. running daemon, so it should be fairly quick to return.
  46. """
  47. transfer_jobs = listify( transfer_jobs )
  48. printable_tj_ids = ', '.join( [ str( tj.id ) for tj in transfer_jobs ] )
  49. log.debug( 'Initiating transfer job(s): %s' % printable_tj_ids )
  50. # Set all jobs running before spawning, or else updating the state may
  51. # clobber a state change performed by the worker.
  52. [ tj.__setattr__( 'state', tj.states.RUNNING ) for tj in transfer_jobs ]
  53. self.sa_session.add_all( transfer_jobs )
  54. self.sa_session.flush()
  55. for tj in transfer_jobs:
  56. params_dict = tj.params
  57. protocol = params_dict[ 'protocol' ]
  58. # The transfer script should daemonize fairly quickly - if this is
  59. # not the case, this process will need to be moved to a
  60. # non-blocking method.
  61. cmd = '%s %s' % ( self.command, tj.id )
  62. log.debug( 'Transfer command is: %s' % cmd )
  63. p = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT )
  64. p.wait()
  65. output = p.stdout.read( 32768 )
  66. if p.returncode != 0:
  67. log.error( 'Spawning transfer job failed: %s: %s' % ( tj.id, output ) )
  68. tj.state = tj.states.ERROR
  69. tj.info = 'Spawning transfer job failed: %s' % output.splitlines()[-1]
  70. self.sa_session.add( tj )
  71. self.sa_session.flush()
  72. def get_state( self, transfer_jobs, via_socket=False ):
  73. transfer_jobs = listify( transfer_jobs )
  74. rval = []
  75. for tj in transfer_jobs:
  76. if via_socket and tj.state not in tj.terminal_states and tj.socket:
  77. try:
  78. request = json.jsonrpc_request( method='get_state', id=True )
  79. sock = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
  80. sock.settimeout( 5 )
  81. sock.connect( ( 'localhost', tj.socket ) )
  82. sock.send( json.to_json_string( request ) )
  83. response = sock.recv( 8192 )
  84. valid, response = json.validate_jsonrpc_response( response, id=request['id'] )
  85. if not valid:
  86. # No valid response received, make some pseudo-json-rpc
  87. raise Exception( dict( code=128, message='Did not receive valid response from transfer daemon for state' ) )
  88. if 'error' in response:
  89. # Response was valid but Request resulted in an error
  90. raise Exception( response['error'])
  91. else:
  92. # Request was valid
  93. response['result']['transfer_job_id'] = tj.id
  94. rval.append( response['result'] )
  95. except Exception, e:
  96. # State checking via the transfer daemon failed, just
  97. # return the state from the database instead. Callers can
  98. # look for the 'error' member of the response to see why
  99. # the check failed.
  100. self.sa_session.refresh( tj )
  101. error = e.args
  102. if type( error ) != dict:
  103. error = dict( code=256, message='Error connecting to transfer daemon', data=str( e ) )
  104. rval.append( dict( transfer_job_id=tj.id, state=tj.state, error=error ) )
  105. else:
  106. self.sa_session.refresh( tj )
  107. rval.append( dict( transfer_job_id=tj.id, state=tj.state ) )
  108. for tj_state in rval:
  109. if tj_state['state'] in self.app.model.TransferJob.terminal_states:
  110. log.debug( 'Transfer job %s is in terminal state: %s' % ( tj_state['transfer_job_id'], tj_state['state'] ) )
  111. elif tj_state['state'] == self.app.model.TransferJob.states.PROGRESS and 'percent' in tj_state:
  112. log.debug( 'Transfer job %s is %s%% complete' % ( tj_state[ 'transfer_job_id' ], tj_state[ 'percent' ] ) )
  113. if len( rval ) == 1:
  114. return rval[0]
  115. return rval
  116. def __restarter( self ):
  117. log.info( 'Transfer job restarter starting up...' )
  118. while self.running:
  119. dead = []
  120. self.sa_session.expunge_all() # our session is threadlocal so this is safe.
  121. for tj in self.sa_session.query( self.app.model.TransferJob ) \
  122. .filter( self.app.model.TransferJob.state == self.app.model.TransferJob.states.RUNNING ):
  123. if not tj.pid:
  124. continue
  125. # This will only succeed if the process exists and is owned by the
  126. # user running Galaxy (unless that user is root, in which case it
  127. # can be owned by anyone - but you're not running Galaxy as root,
  128. # right?). This is not guaranteed proof that the transfer is alive
  129. # since another process may have assumed the original process' PID.
  130. # But that will only cause the transfer to not restart until that
  131. # process dies, which hopefully won't be too long from now... If
  132. # it becomes a problem, try to talk to the socket a few times and
  133. # restart the transfer if socket communication fails repeatedly.
  134. try:
  135. os.kill( tj.pid, 0 )
  136. except:
  137. self.sa_session.refresh( tj )
  138. if tj.state == tj.states.RUNNING:
  139. log.error( 'Transfer job %s is marked as running but pid %s appears to be dead.' % ( tj.id, tj.pid ) )
  140. dead.append( tj )
  141. if dead:
  142. self.run( dead )
  143. self.sleeper.sleep( 30 )
  144. log.info( 'Transfer job restarter shutting down...' )
  145. def shutdown( self ):
  146. self.running = False
  147. self.sleeper.wake()
  148. class Sleeper( object ):
  149. """
  150. Provides a 'sleep' method that sleeps for a number of seconds *unless*
  151. the notify method is called (from a different thread).
  152. """
  153. def __init__( self ):
  154. self.condition = threading.Condition()
  155. def sleep( self, seconds ):
  156. self.condition.acquire()
  157. self.condition.wait( seconds )
  158. self.condition.release()
  159. def wake( self ):
  160. self.condition.acquire()
  161. self.condition.notify()
  162. self.condition.release()