PageRenderTime 48ms CodeModel.GetById 11ms app.highlight 31ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/jobs/transfer_manager.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 166 lines | 125 code | 4 blank | 37 comment | 48 complexity | 4132c49d3ca133b6cdcfa9fb36bf6edd MD5 | raw file
  1"""
  2Manage transfers from arbitrary URLs to temporary files.  Socket interface for
  3IPC with multiple process configurations.
  4"""
  5import os, subprocess, socket, logging, threading
  6
  7from galaxy import eggs
  8from galaxy.util import listify, json
  9
 10log = logging.getLogger( __name__ )
 11
 12class TransferManager( object ):
 13    """
 14    Manage simple data transfers from URLs to temporary locations.
 15    """
 16    def __init__( self, app ):
 17        self.app = app
 18        self.sa_session = app.model.context.current
 19        self.command = 'python %s' % os.path.abspath( os.path.join( os.getcwd(), 'scripts', 'transfer.py' ) )
 20        if app.config.get_bool( 'enable_job_recovery', True ):
 21            # Only one Galaxy server process should be able to recover jobs! (otherwise you'll have nasty race conditions)
 22            self.running = True
 23            self.sleeper = Sleeper()
 24            self.restarter = threading.Thread( target=self.__restarter )
 25            self.restarter.start()
 26    def new( self, path=None, **kwd ):
 27        if 'protocol' not in kwd:
 28            raise Exception( 'Missing required parameter "protocol".' )
 29        protocol = kwd[ 'protocol' ]
 30        if protocol in [ 'http', 'https' ]:
 31            if 'url' not in kwd:
 32                raise Exception( 'Missing required parameter "url".' )
 33        elif protocol == 'scp':
 34            # TODO: add more checks here?
 35            if 'sample_dataset_id' not in kwd:
 36                raise Exception( 'Missing required parameter "sample_dataset_id".' )
 37            if 'file_path' not in kwd:
 38                raise Exception( 'Missing required parameter "file_path".' )
 39        transfer_job = self.app.model.TransferJob( state=self.app.model.TransferJob.states.NEW, params=kwd )
 40        self.sa_session.add( transfer_job )
 41        self.sa_session.flush()
 42        return transfer_job
 43    def run( self, transfer_jobs ):
 44        """
 45        This method blocks, so if invoking the transfer manager ever starts
 46        taking too long, we should move it to a thread.  However, the
 47        transfer_manager will either daemonize or return after submitting to a
 48        running daemon, so it should be fairly quick to return.
 49        """
 50        transfer_jobs = listify( transfer_jobs )
 51        printable_tj_ids = ', '.join( [ str( tj.id ) for tj in transfer_jobs ] )
 52        log.debug( 'Initiating transfer job(s): %s' % printable_tj_ids )
 53        # Set all jobs running before spawning, or else updating the state may
 54        # clobber a state change performed by the worker.
 55        [ tj.__setattr__( 'state', tj.states.RUNNING ) for tj in transfer_jobs ]
 56        self.sa_session.add_all( transfer_jobs )
 57        self.sa_session.flush()
 58        for tj in transfer_jobs:
 59            params_dict = tj.params
 60            protocol = params_dict[ 'protocol' ]
 61            # The transfer script should daemonize fairly quickly - if this is
 62            # not the case, this process will need to be moved to a
 63            # non-blocking method.
 64            cmd = '%s %s' % ( self.command, tj.id )
 65            log.debug( 'Transfer command is: %s' % cmd )
 66            p = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT )
 67            p.wait()
 68            output = p.stdout.read( 32768 )
 69            if p.returncode != 0:
 70                log.error( 'Spawning transfer job failed: %s: %s' % ( tj.id, output ) )
 71                tj.state = tj.states.ERROR
 72                tj.info = 'Spawning transfer job failed: %s' % output.splitlines()[-1]
 73                self.sa_session.add( tj )
 74                self.sa_session.flush()
 75    def get_state( self, transfer_jobs, via_socket=False ):
 76        transfer_jobs = listify( transfer_jobs )
 77        rval = []
 78        for tj in transfer_jobs:
 79            if via_socket and tj.state not in tj.terminal_states and tj.socket:
 80                try:
 81                    request = json.jsonrpc_request( method='get_state', id=True )
 82                    sock = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
 83                    sock.settimeout( 5 )
 84                    sock.connect( ( 'localhost', tj.socket ) )
 85                    sock.send( json.to_json_string( request ) )
 86                    response = sock.recv( 8192 )
 87                    valid, response = json.validate_jsonrpc_response( response, id=request['id'] )
 88                    if not valid:
 89                        # No valid response received, make some pseudo-json-rpc
 90                        raise Exception( dict( code=128, message='Did not receive valid response from transfer daemon for state' ) )
 91                    if 'error' in response:
 92                        # Response was valid but Request resulted in an error
 93                        raise Exception( response['error'])
 94                    else:
 95                        # Request was valid
 96                        response['result']['transfer_job_id'] = tj.id
 97                        rval.append( response['result'] )
 98                except Exception, e:
 99                    # State checking via the transfer daemon failed, just
100                    # return the state from the database instead.  Callers can
101                    # look for the 'error' member of the response to see why
102                    # the check failed.
103                    self.sa_session.refresh( tj )
104                    error = e.args
105                    if type( error ) != dict:
106                        error = dict( code=256, message='Error connecting to transfer daemon', data=str( e ) )
107                    rval.append( dict( transfer_job_id=tj.id, state=tj.state, error=error ) )
108            else:
109                self.sa_session.refresh( tj )
110                rval.append( dict( transfer_job_id=tj.id, state=tj.state ) )
111        for tj_state in rval:
112            if tj_state['state'] in self.app.model.TransferJob.terminal_states:
113                log.debug( 'Transfer job %s is in terminal state: %s' % ( tj_state['transfer_job_id'], tj_state['state'] ) )
114            elif tj_state['state'] == self.app.model.TransferJob.states.PROGRESS and 'percent' in tj_state:
115                log.debug( 'Transfer job %s is %s%% complete' % ( tj_state[ 'transfer_job_id' ], tj_state[ 'percent' ] ) )
116        if len( rval ) == 1:
117            return rval[0]
118        return rval
119    def __restarter( self ):
120        log.info( 'Transfer job restarter starting up...' )
121        while self.running:
122            dead = []
123            self.sa_session.expunge_all() # our session is threadlocal so this is safe.
124            for tj in self.sa_session.query( self.app.model.TransferJob ) \
125                          .filter( self.app.model.TransferJob.state == self.app.model.TransferJob.states.RUNNING ):
126                if not tj.pid:
127                    continue
128                # This will only succeed if the process exists and is owned by the
129                # user running Galaxy (unless that user is root, in which case it
130                # can be owned by anyone - but you're not running Galaxy as root,
131                # right?).  This is not guaranteed proof that the transfer is alive
132                # since another process may have assumed the original process' PID.
133                # But that will only cause the transfer to not restart until that
134                # process dies, which hopefully won't be too long from now...  If
135                # it becomes a problem, try to talk to the socket a few times and
136                # restart the transfer if socket communication fails repeatedly.
137                try:
138                    os.kill( tj.pid, 0 )
139                except:
140                    self.sa_session.refresh( tj )
141                    if tj.state == tj.states.RUNNING:
142                        log.error( 'Transfer job %s is marked as running but pid %s appears to be dead.' % ( tj.id, tj.pid ) )
143                        dead.append( tj )
144            if dead:
145                self.run( dead )
146            self.sleeper.sleep( 30 )
147        log.info( 'Transfer job restarter shutting down...' )
148    def shutdown( self ):
149        self.running = False
150        self.sleeper.wake()
151
152class Sleeper( object ):
153    """
154    Provides a 'sleep' method that sleeps for a number of seconds *unless*
155    the notify method is called (from a different thread).
156    """
157    def __init__( self ):
158        self.condition = threading.Condition()
159    def sleep( self, seconds ):
160        self.condition.acquire()
161        self.condition.wait( seconds )
162        self.condition.release()
163    def wake( self ):
164        self.condition.acquire()
165        self.condition.notify()
166        self.condition.release()