PageRenderTime 73ms CodeModel.GetById 3ms app.highlight 60ms RepoModel.GetById 2ms app.codeStats 0ms

/lib/galaxy/cloud/providers/ec2.py

https://bitbucket.org/ajish/galaxy-omelogic
Python | 1033 lines | 1006 code | 14 blank | 13 comment | 59 complexity | d6f582a6b4ede3dc825c5f2ce22096b4 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1import subprocess, threading, os, errno, time, datetime
  2from Queue import Queue, Empty
  3from datetime import datetime
  4
  5from galaxy import model # Database interaction class
  6from galaxy.model import mapping
  7from galaxy.datatypes.data import nice_size
  8from galaxy.util.bunch import Bunch
  9from galaxy.cloud import UCIwrapper
 10from Queue import Queue
 11from sqlalchemy import or_, and_
 12
 13import galaxy.eggs
 14galaxy.eggs.require("boto")
 15from boto.ec2.connection import EC2Connection
 16from boto.ec2.regioninfo import RegionInfo
 17import boto.exception
 18import boto
 19
 20import logging
 21log = logging.getLogger( __name__ )
 22
 23uci_states = Bunch(
 24    NEW_UCI = "newUCI",
 25    NEW = "new",
 26    CREATING = "creating",
 27    DELETING_UCI = "deletingUCI",
 28    DELETING = "deleting",
 29    SUBMITTED_UCI = "submittedUCI",
 30    SUBMITTED = "submitted",
 31    SHUTTING_DOWN_UCI = "shutting-downUCI",
 32    SHUTTING_DOWN = "shutting-down",
 33    AVAILABLE = "available",
 34    RUNNING = "running",
 35    PENDING = "pending",
 36    ERROR = "error",
 37    DELETED = "deleted",
 38    SNAPSHOT_UCI = "snapshotUCI",
 39    SNAPSHOT = "snapshot"
 40)
 41
 42instance_states = Bunch(
 43    TERMINATED = "terminated",
 44    SUBMITTED = "submitted",
 45    RUNNING = "running",
 46    PENDING = "pending",
 47    SHUTTING_DOWN = "shutting-down",
 48    ERROR = "error"
 49)
 50
 51store_status = Bunch(
 52    WAITING = "waiting",
 53    IN_USE = "in-use",
 54    CREATING = "creating",
 55    DELETED = 'deleted',
 56    ERROR = "error"
 57)
 58
 59snapshot_status = Bunch(
 60    SUBMITTED = 'submitted',
 61    PENDING = 'pending',
 62    COMPLETED = 'completed',
 63    DELETE = 'delete',
 64    DELETED= 'deleted',
 65    ERROR = "error"
 66)
 67
 68class EC2CloudProvider( object ):
 69    """
 70    Amazon EC2-based cloud provider implementation for managing instances. 
 71    """
 72    STOP_SIGNAL = object()
 73    def __init__( self, app ):
 74        self.type = "ec2" # cloud provider type (e.g., ec2, eucalyptus, opennebula)
 75        self.zone = "us-east-1a"
 76        self.security_group = "galaxyWeb"
 77        self.queue = Queue()
 78        self.sa_session = app.model.context
 79        
 80        self.threads = []
 81        nworkers = 5
 82        log.info( "Starting EC2 cloud controller workers..." )
 83        for i in range( nworkers  ):
 84            worker = threading.Thread( target=self.run_next )
 85            worker.start()
 86            self.threads.append( worker )
 87        log.debug( "%d EC2 cloud workers ready", nworkers )
 88        
 89    def shutdown( self ):
 90        """Attempts to gracefully shut down the monitor thread"""
 91        log.info( "sending stop signal to worker threads in EC2 cloud manager" )
 92        for i in range( len( self.threads ) ):
 93            self.queue.put( self.STOP_SIGNAL )
 94        log.info( "EC2 cloud manager stopped" )
 95    
 96    def put( self, uci_wrapper ):
 97        """
 98        Add uci_wrapper object to the end of the request queue to be handled by 
 99        this cloud provider.
100        """
101        state = uci_wrapper.get_uci_state()
102        uci_wrapper.change_state( state.split('U')[0] ) # remove 'UCI' from end of state description (i.e., mark as accepted and ready for processing)
103        self.queue.put( uci_wrapper )
104        
105    def run_next( self ):
106        """Process next request, waiting until one is available if necessary."""
107        cnt = 0
108        while 1:
109            
110            uci_wrapper = self.queue.get()
111            uci_state = uci_wrapper.get_uci_state()
112            if uci_state is self.STOP_SIGNAL:
113                return
114            try:
115                if uci_state==uci_states.NEW:
116                    self.create_uci( uci_wrapper )
117                elif uci_state==uci_states.DELETING:
118                    self.delete_uci( uci_wrapper )
119                elif uci_state==uci_states.SUBMITTED:
120                    self.start_uci( uci_wrapper )
121                elif uci_state==uci_states.SHUTTING_DOWN:
122                    self.stop_uci( uci_wrapper )
123                elif uci_state==uci_states.SNAPSHOT:
124                    self.snapshot_uci( uci_wrapper )
125            except:
126                log.exception( "Uncaught exception executing cloud request." )
127            cnt += 1
128            
129    def get_connection( self, uci_wrapper ):
130        """
131        Establishes cloud connection using user's credentials associated with given UCI
132        """
133        log.debug( 'Establishing %s cloud connection.' % self.type )
134        provider = uci_wrapper.get_provider()
135        try:
136            region = RegionInfo( None, provider.region_name, provider.region_endpoint )
137        except Exception, ex:
138            err = "Selecting region with cloud provider failed: " + str( ex )
139            log.error( err )
140            uci_wrapper.set_error( err, True )
141            return None
142        try:
143            conn = EC2Connection( aws_access_key_id=uci_wrapper.get_access_key(), 
144                                  aws_secret_access_key=uci_wrapper.get_secret_key(), 
145                                  is_secure=provider.is_secure, 
146                                  region=region, 
147                                  path=provider.path )
148        except boto.exception.EC2ResponseError, e:
149            err = "Establishing connection with cloud failed: " + str( e )
150            log.error( err )
151            uci_wrapper.set_error( err, True )
152            return None
153        
154        return conn
155        
156    def check_key_pair( self, uci_wrapper, conn ):
157        """
158        Check if a key pair associated with this UCI exists on cloud provider.
159        If yes, return key pair name; otherwise, generate a key pair with the cloud
160        provider and, again, return key pair name.
161        Key pair name for given UCI is generated from UCI's name and suffix '_kp' 
162        """
163        kp = None
164        kp_name = uci_wrapper.get_name().replace(' ','_') + "_kp"
165        log.debug( "Checking user's key pair: '%s'" % kp_name )
166        try:
167            kp = conn.get_key_pair( kp_name )
168            uci_kp_name = uci_wrapper.get_key_pair_name()
169            uci_material = uci_wrapper.get_key_pair_material()
170            if kp != None:
171                if kp.name != uci_kp_name or uci_material == None:
172                    # key pair exists on the cloud but not in local database, so re-generate it (i.e., delete and then create)
173                    try: 
174                        conn.delete_key_pair( kp_name )
175                        kp = self.create_key_pair( conn, kp_name )
176                        uci_wrapper.set_key_pair( kp.name, kp.material )
177                    except boto.exception.EC2ResponseError, e:
178                        err = "EC2 response error while deleting key pair: " + str( e )
179                        log.error( err )
180                        uci_wrapper.set_error( err, True )
181            else:
182                try:
183                    kp = self.create_key_pair( conn, kp_name )
184                    uci_wrapper.set_key_pair( kp.name, kp.material )
185                except boto.exception.EC2ResponseError, e:
186                    err = "EC2 response error while creating key pair: " + str( e )
187                    log.error( err )
188                    uci_wrapper.set_error( err, True )
189                except Exception, ex:
190                    err = "Exception while creating key pair: " + str( ex )
191                    log.error( err )
192                    uci_wrapper.set_error( err, True )             
193        except boto.exception.EC2ResponseError, e: # No keypair under this name exists so create it
194            if e.code == 'InvalidKeyPair.NotFound': 
195                log.info( "No keypair found, creating keypair '%s'" % kp_name )
196                kp = self.create_key_pair( conn, kp_name )
197                uci_wrapper.set_key_pair( kp.name, kp.material )
198            else:
199                err = "EC2 response error while retrieving key pair: " + str( e )
200                log.error( err )
201                uci_wrapper.set_error( err, True )
202                        
203        if kp != None:
204            return kp.name
205        else:
206            return None
207    
208    def create_key_pair( self, conn, kp_name ):
209        """ Initiate creation of key pair under kp_name by current cloud provider. """
210        try:
211            return conn.create_key_pair( kp_name )
212        except boto.exception.EC2ResponseError, e: 
213            return None
214    
215    def get_mi_id( self, uci_wrapper, i_index ):
216        """
217        Get appropriate machine image (mi) based on instance size.
218        """
219        i_type = uci_wrapper.get_instance_type( i_index )
220        if i_type=='m1.small' or i_type=='c1.medium':
221            arch = 'i386'
222        else:
223            arch = 'x86_64' 
224        
225        mi = self.sa_session.query( model.CloudImage ).filter_by( deleted=False, provider_type=self.type, architecture=arch ).first()
226        if mi:
227            return mi.image_id
228        else:
229            err = "Machine image could not be retrieved"
230            log.error( "%s for UCI '%s'." % (err, uci_wrapper.get_name() ) )
231            uci_wrapper.set_error( err+". Contact site administrator to ensure needed machine image is registered.", True )
232            return None
233            
234    def create_uci( self, uci_wrapper ):
235        """ 
236        Create User Configured Instance (UCI) - i.e., create storage volume on cloud provider
237        and register relevant information in local Galaxy database.
238        """
239        conn = self.get_connection( uci_wrapper )
240        if uci_wrapper.get_uci_availability_zone()=='':
241            log.info( "Availability zone for UCI (i.e., storage volume) was not selected, using default zone: %s" % self.zone )
242            uci_wrapper.set_store_availability_zone( self.zone )
243        
244        log.info( "Creating volume in zone '%s'..." % uci_wrapper.get_uci_availability_zone() )
245        # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t
246        # current UCI is 0, so reference it in following methods
247        vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None )
248        uci_wrapper.set_store_volume_id( 0, vol.id )
249        
250        # Wait for a while to ensure volume was created
251#        vol_status = vol.status
252#        for i in range( 30 ):
253#            if vol_status is not "available":
254#                log.debug( 'Updating volume status; current status: %s' % vol_status )
255#                vol_status = vol.status
256#                time.sleep(3)
257#            if i is 29:
258#                log.debug( "Error while creating volume '%s'; stuck in state '%s'; deleting volume." % ( vol.id, vol_status ) )
259#                conn.delete_volume( vol.id )
260#                uci_wrapper.change_state( uci_state='error' )
261#                return
262        
263        # Retrieve created volume again to get updated status
264        try:
265            vl = conn.get_all_volumes( [vol.id] )
266        except boto.exception.EC2ResponseError, e: 
267            err = "EC2 response error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( e )
268            log.error( err )
269            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
270            uci_wrapper.set_error( err, True )
271            return
272        except Exception, ex:
273            err = "Error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( ex )
274            log.error( err )
275            uci_wrapper.set_error( err, True )
276            return
277        
278        if len( vl ) > 0:
279            uci_wrapper.change_state( uci_state=vl[0].status )
280            uci_wrapper.set_store_status( vol.id, vl[0].status )
281        else:
282            err = "Volume '" + vol.id +"' not found by EC2 after being created."
283            log.error( err )
284            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
285            uci_wrapper.set_error( err, True )
286
287    def delete_uci( self, uci_wrapper ):
288        """ 
289        Delete UCI - i.e., delete all storage volumes associated with this UCI. 
290        NOTE that this implies deletion of any and all data associated
291        with this UCI from the cloud. All data will be deleted.
292        Information in local Galaxy database is marked as deleted but not actually removed
293        from the database. 
294        """
295        conn = self.get_connection( uci_wrapper )
296        vl = [] # volume list
297        count = 0 # counter for checking if all volumes assoc. w/ UCI were deleted
298        
299        # Get all volumes assoc. w/ UCI, delete them from cloud as well as in local DB
300        vl = uci_wrapper.get_all_stores()
301        deletedList = []
302        failedList = []
303        for v in vl:
304            log.debug( "Deleting volume with id='%s'" % v.volume_id )
305            try:
306                if conn.delete_volume( v.volume_id ):
307                    deletedList.append( v.volume_id )
308                    v.deleted = True
309                    self.sa_session.add( v )
310                    self.sa_session.flush()
311                    count += 1
312                else:
313                    failedList.append( v.volume_id )
314            except boto.exception.EC2ResponseError, e:
315                err = "EC2 response error while deleting storage volume '" + v.volume_id + "': " + str( e )
316                log.error( err )
317                uci_wrapper.set_store_error( err, store_id = v.volume_id )
318                uci_wrapper.set_error( err, True )
319            
320        # Delete UCI if all of associated 
321        if count == len( vl ):
322            uci_wrapper.set_deleted()
323        else:
324            err = "Deleting following volume(s) failed: " + str( failedList ) + ". However, these volumes were successfully deleted: " \
325                  + str( deletedList ) + ". MANUAL intervention and processing needed."
326            log.error( err )
327            uci_wrapper.set_error( err, True )
328            
329    def snapshot_uci( self, uci_wrapper ):
330        """
331        Initiate creation of a snapshot by cloud provider for all storage volumes 
332        associated with this UCI. 
333        """
334        if uci_wrapper.get_uci_state() != uci_states.ERROR:
335            conn = self.get_connection( uci_wrapper )
336            
337            snapshots = uci_wrapper.get_snapshots( status = snapshot_status.SUBMITTED )
338            for snapshot in snapshots:
339                log.debug( "Snapshot DB id: '%s', volume id: '%s'" % ( snapshot.id, snapshot.store.volume_id ) )
340                try:
341                    snap = conn.create_snapshot( volume_id=snapshot.store.volume_id )
342                    snap_id = str( snap ).split(':')[1]
343                    uci_wrapper.set_snapshot_id( snapshot.id, snap_id )
344                    sh = conn.get_all_snapshots( snap_id ) # get updated status
345                    uci_wrapper.set_snapshot_status( status=sh[0].status, snap_id=snap_id )
346                except boto.exception.EC2ResponseError, e:
347                    err = "EC2 response error while creating snapshot: " + str( e )
348                    log.error( err )
349                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
350                    uci_wrapper.set_error( err, True )
351                    return
352                except Exception, ex:
353                    err = "Error while creating snapshot: " + str( ex )
354                    log.error( err )
355                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
356                    uci_wrapper.set_error( err, True )
357                    return
358                    
359            uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
360                
361    def add_storage_to_uci( self, name ):
362        """ Adds more storage to specified UCI 
363        TODO"""
364    
365    def dummy_start_uci( self, uci_wrapper ):
366        
367        uci = uci_wrapper.get_uci()
368        log.debug( "Would be starting instance '%s'" % uci.name )
369        uci_wrapper.change_state( uci_state.PENDING )
370#        log.debug( "Sleeping a bit... (%s)" % uci.name )
371#        time.sleep(20)
372#        log.debug( "Woke up! (%s)" % uci.name )
373        
374    def start_uci( self, uci_wrapper ):
375        """
376        Start instance(s) of given UCI on the cloud.  
377        """ 
378        if uci_wrapper.get_uci_state() != uci_states.ERROR:
379             conn = self.get_connection( uci_wrapper )
380             self.check_key_pair( uci_wrapper, conn )
381             if uci_wrapper.get_key_pair_name() == None:
382                err = "Key pair not found"
383                log.error( "%s for UCI '%s'." % ( err, uci_wrapper.get_name() ) )
384                uci_wrapper.set_error( err + ". Try resetting the state and starting the instance again.", True )
385                return
386             
387             i_indexes = uci_wrapper.get_instances_indexes( state=instance_states.SUBMITTED ) # Get indexes of i_indexes associated with this UCI that are in 'submitted' state
388             log.debug( "Starting instances with IDs: '%s' associated with UCI '%s' " % ( i_indexes, uci_wrapper.get_name(),  ) )
389             if len( i_indexes ) > 0:
390                 for i_index in i_indexes:
391                    # Get machine image for current instance
392                    mi_id = self.get_mi_id( uci_wrapper, i_index )
393                    log.debug( "mi_id: %s, uci_wrapper.get_key_pair_name(): %s" % ( mi_id, uci_wrapper.get_key_pair_name() ) )
394                    uci_wrapper.set_mi( i_index, mi_id )
395                    
396                    if mi_id != None:
397                        # Check if galaxy security group exists (and create it if it does not)
398                        log.debug( "Setting up '%s' security group." % self.security_group )
399                        try:
400                            conn.get_all_security_groups( [self.security_group] ) # security groups
401                        except boto.exception.EC2ResponseError, e:
402                            if e.code == 'InvalidGroup.NotFound': 
403                                log.info( "No security group found, creating security group '%s'" % self.security_group )
404                                try:
405                                    gSecurityGroup = conn.create_security_group(self.security_group, 'Security group for Galaxy.')
406                                    gSecurityGroup.authorize( 'tcp', 80, 80, '0.0.0.0/0' ) # Open HTTP port
407                                    gSecurityGroup.authorize( 'tcp', 22, 22, '0.0.0.0/0' ) # Open SSH port
408                                except boto.exception.EC2ResponseError, ee:
409                                    err = "EC2 response error while creating security group: " + str( ee )
410                                    log.error( err )
411                                    uci_wrapper.set_error( err, True )
412                            else:
413                                err = "EC2 response error while retrieving security group: " + str( e )
414                                log.error( err )
415                                uci_wrapper.set_error( err, True )
416                    
417                        
418                        if uci_wrapper.get_uci_state() != uci_states.ERROR:
419                            # Start an instance
420                            log.debug( "Starting instance for UCI '%s'" % uci_wrapper.get_name() )
421                            #TODO: Once multiple volumes can be attached to a single instance, update 'userdata' composition            
422                            userdata = uci_wrapper.get_store_volume_id()+"|"+uci_wrapper.get_access_key()+"|"+uci_wrapper.get_secret_key() 
423                            log.debug( "Using following command: conn.run_instances( image_id='%s', key_name='%s', security_groups=['%s'], user_data=[OMITTED], instance_type='%s', placement='%s' )" 
424                                       % ( mi_id, uci_wrapper.get_key_pair_name(), self.security_group, uci_wrapper.get_instance_type( i_index ), uci_wrapper.get_uci_availability_zone() ) )
425                            reservation = None
426                            try:
427                                reservation = conn.run_instances( image_id=mi_id, 
428                                                                  key_name=uci_wrapper.get_key_pair_name(), 
429                                                                  security_groups=[self.security_group], 
430                                                                  user_data=userdata,
431                                                                  instance_type=uci_wrapper.get_instance_type( i_index ),  
432                                                                  placement=uci_wrapper.get_uci_availability_zone() )
433                            except boto.exception.EC2ResponseError, e:
434                                err = "EC2 response error when starting UCI '"+ uci_wrapper.get_name() +"': " + str( e )
435                                log.error( err )
436                                uci_wrapper.set_error( err, True )
437                            except Exception, ex:
438                                err = "Error when starting UCI '" + uci_wrapper.get_name() + "': " + str( ex )
439                                log.error( err )
440                                uci_wrapper.set_error( err, True )
441                            # Record newly available instance data into local Galaxy database
442                            if reservation:
443                                l_time = datetime.utcnow()
444    #                            uci_wrapper.set_instance_launch_time( self.format_time( reservation.instances[0].launch_time ), i_index=i_index )
445                                uci_wrapper.set_instance_launch_time( l_time, i_index=i_index )
446                                if not uci_wrapper.uci_launch_time_set():
447                                    uci_wrapper.set_uci_launch_time( l_time )
448                                try:
449                                    uci_wrapper.set_reservation_id( i_index, str( reservation ).split(":")[1] )
450                                    # TODO: if more than a single instance will be started through single reservation, change this reference to element [0]
451                                    i_id = str( reservation.instances[0]).split(":")[1] 
452                                    uci_wrapper.set_instance_id( i_index, i_id )
453                                    s = reservation.instances[0].state 
454                                    uci_wrapper.change_state( s, i_id, s )
455                                    uci_wrapper.set_security_group_name( self.security_group, i_id=i_id )
456                                    vol_id = uci_wrapper.get_store_volume_id( store_id=0 ) # TODO: Once more that one vol/UCI is allowed, update this!
457                                    uci_wrapper.set_store_status( vol_id, store_status.WAITING )
458                                    log.debug( "Instance of UCI '%s' started, current state: '%s'" % ( uci_wrapper.get_name(), uci_wrapper.get_uci_state() ) )
459                                except boto.exception.EC2ResponseError, e:
460                                    err = "EC2 response error when retrieving instance information for UCI '" + uci_wrapper.get_name() + "': " + str( e )
461                                    log.error( err )
462                                    uci_wrapper.set_error( err, True )
463                        else:
464                            log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
465             else:
466                err = "No instances in state '"+ instance_states.SUBMITTED +"' found for UCI '" + uci_wrapper.get_name() + \
467                      "'. Nothing to start."
468                log.error( err )
469                uci_wrapper.set_error( err, True )
470        else:
471            log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
472                    
473    def stop_uci( self, uci_wrapper):
474        """ 
475        Stop all of cloud instances associated with given UCI. 
476        """
477        conn = self.get_connection( uci_wrapper )
478        
479        # Get all instances associated with given UCI
480        il = uci_wrapper.get_instances_ids() # instance list
481        # Process list of instances and remove any references to empty instance id's
482        for i in il:
483            if i is None:
484                il.remove( i )
485        log.debug( 'List of instances being terminated: %s' % il )
486        rl = conn.get_all_instances( il ) # Reservation list associated with given instances
487        
488        # Initiate shutdown of all instances under given UCI
489        cnt = 0
490        stopped = []
491        not_stopped = []
492        for r in rl:
493            for inst in r.instances:
494                log.debug( "Sending stop signal to instance '%s' associated with reservation '%s'." % ( inst, r ) )
495                try:
496                    inst.stop()
497                    uci_wrapper.set_stop_time( datetime.utcnow(), i_id=inst.id )
498                    uci_wrapper.change_state( instance_id=inst.id, i_state=inst.update() )
499                    stopped.append( inst )
500                except boto.exception.EC2ResponseError, e:
501                    not_stopped.append( inst )
502                    err = "EC2 response error when stopping instance '" + inst.instance_id + "': " + str(e)
503                    log.error( err )
504                    uci_wrapper.set_error( err, True )
505                
506        uci_wrapper.reset_uci_launch_time()
507        log.debug( "Termination was initiated for all instances of UCI '%s'." % uci_wrapper.get_name() )
508
509
510#        dbInstances = get_instances( trans, uci ) #TODO: handle list!
511#        
512#        # Get actual cloud instance object
513#        cloudInstance = get_cloud_instance( conn, dbInstances.instance_id )
514#        
515#        # TODO: Detach persistent storage volume(s) from instance and update volume data in local database
516#        stores = get_stores( trans, uci )
517#        for i, store in enumerate( stores ):
518#            log.debug( "Detaching volume '%s' to instance '%s'." % ( store.volume_id, dbInstances.instance_id ) )
519#            mntDevice = store.device
520#            volStat = None
521##            Detaching volume does not work with Eucalyptus Public Cloud, so comment it out
522##            try:
523##                volStat = conn.detach_volume( store.volume_id, dbInstances.instance_id, mntDevice )
524##            except:
525##                log.debug ( 'Error detaching volume; still going to try and stop instance %s.' % dbInstances.instance_id )
526#            store.attach_time = None
527#            store.device = None
528#            store.i_id = None
529#            store.status = volStat
530#            log.debug ( '***** volume status: %s' % volStat )
531#   
532#        
533#        # Stop the instance and update status in local database
534#        cloudInstance.stop()
535#        dbInstances.stop_time = datetime.utcnow()
536#        while cloudInstance.state != 'terminated':
537#            log.debug( "Stopping instance %s state; current state: %s" % ( str( cloudInstance ).split(":")[1], cloudInstance.state ) )
538#            time.sleep(3)
539#            cloudInstance.update()
540#        dbInstances.state = cloudInstance.state
541#        
542#        # Reset relevant UCI fields
543#        uci.state = 'available'
544#        uci.launch_time = None
545#          
546#        # Persist
547#        session = trans.sa_session
548##        session.save_or_update( stores )
549#        session.save_or_update( dbInstances ) # TODO: Is this going to work w/ multiple instances stored in dbInstances variable?
550#        session.save_or_update( uci )
551#        session.flush()
552#        trans.log_event( "User stopped cloud instance '%s'" % uci.name )
553#        trans.set_message( "Galaxy instance '%s' stopped." % uci.name )
554
555    def update( self ):
556        """ 
557        Run status update on all instances that are in 'running', 'pending', or 'shutting-down' state.
558        Run status update on all storage volumes whose status is 'in-use', 'creating', or 'None'.
559        Run status update on all snapshots whose status is 'pending' or 'delete'  
560        Run status update on any zombie UCIs, i.e., UCI's that is in 'submitted' state for an 
561        extended period of time.
562        
563        Reason behind this method is to sync state of local DB and real-world resources
564        """
565        log.debug( "Running general status update for %s UCIs..." % self.type )
566        # Update instances
567        instances = self.sa_session.query( model.CloudInstance ) \
568            .filter( or_( model.CloudInstance.table.c.state==instance_states.RUNNING, 
569                          model.CloudInstance.table.c.state==instance_states.PENDING,  
570                          model.CloudInstance.table.c.state==instance_states.SHUTTING_DOWN ) ) \
571            .all()
572        for inst in instances:
573            if self.type == inst.uci.credentials.provider.type:
574                log.debug( "[%s] Running general status update on instance '%s'" % ( inst.uci.credentials.provider.type, inst.instance_id ) )
575                self.update_instance( inst )
576            
577        # Update storage volume(s)
578        stores = self.sa_session.query( model.CloudStore ) \
579            .filter( or_( model.CloudStore.table.c.status==store_status.IN_USE, 
580                          model.CloudStore.table.c.status==store_status.CREATING,
581                          model.CloudStore.table.c.status==store_status.WAITING,
582                          model.CloudStore.table.c.status==None ) ) \
583            .all()
584        for store in stores:
585            if self.type == store.uci.credentials.provider.type: # and store.volume_id != None:
586                log.debug( "[%s] Running general status update on store with local database ID: '%s'" % ( store.uci.credentials.provider.type, store.id ) )
587                self.update_store( store )
588#            else:
589#                log.error( "[%s] There exists an entry for UCI (%s) storage volume without an ID. Storage volume might have been created with "
590#                           "cloud provider though. Manual check is recommended." % ( store.uci.credentials.provider.type, store.uci.name ) )
591#                store.uci.error = "There exists an entry in local database for a storage volume without an ID. Storage volume might have been created " \
592#                            "with cloud provider though. Manual check is recommended. After understanding what happened, local database entry for given " \
593#                            "storage volume should be updated."
594#                store.status = store_status.ERROR
595#                store.uci.state = uci_states.ERROR
596#                store.uci.flush()
597#                store.flush()
598        
599        # Update pending snapshots or delete ones marked for deletion
600        snapshots = self.sa_session.query( model.CloudSnapshot ) \
601            .filter( or_( model.CloudSnapshot.table.c.status == snapshot_status.PENDING, model.CloudSnapshot.table.c.status == snapshot_status.DELETE ) ) \
602            .all()
603        for snapshot in snapshots:
604            if self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.PENDING:
605                log.debug( "[%s] Running general status update on snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
606                self.update_snapshot( snapshot )
607            elif self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.DELETE:
608                log.debug( "[%s] Initiating deletion of snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
609                self.delete_snapshot( snapshot )
610             
611        # Attempt at updating any zombie UCIs (i.e., instances that have been in SUBMITTED state for longer than expected - see below for exact time)
612        zombies = self.sa_session.query( model.UCI ).filter_by( state=uci_states.SUBMITTED ).all()
613        for zombie in zombies:
614            z_instances = self.sa_session.query( model.CloudInstance ) \
615                .filter_by( uci_id=zombie.id ) \
616                .filter( or_( model.CloudInstance.table.c.state != instance_states.TERMINATED,
617                              model.CloudInstance.table.c.state == None ) ) \
618                .all()
619            for z_inst in z_instances:
620                if self.type == z_inst.uci.credentials.provider.type:
621#                    log.debug( "z_inst.id: '%s', state: '%s'" % ( z_inst.id, z_inst.state ) )
622                    td = datetime.utcnow() - z_inst.update_time
623                    if td.seconds > 180: # if instance has been in SUBMITTED state for more than 3 minutes
624                        log.debug( "[%s] Running zombie repair update on instance with DB id '%s'" % ( z_inst.uci.credentials.provider.type, z_inst.id ) )
625                        self.process_zombie( z_inst )
626        
627    def update_instance( self, inst ):
628        """
629        Update information in local database for given instance as it is obtained from cloud provider.
630        Along with updating information about given instance, information about the UCI controlling
631        this instance is also updated.
632        """
633        # Get credentials associated wit this instance
634        uci_id = inst.uci_id
635        uci = self.sa_session.query( model.UCI ).get( uci_id )
636        self.sa_session.refresh( uci )
637        conn = self.get_connection_from_uci( uci )
638        
639        # Get reservations handle for given instance
640        try:
641            rl= conn.get_all_instances( [inst.instance_id] )
642        except boto.exception.EC2ResponseError, e:
643            err = "Retrieving instance(s) from cloud failed for UCI '"+ uci.name +"' during general status update: " + str( e )
644            log.error( err )
645            uci.error = err
646            uci.state = uci_states.ERROR
647            self.sa_session.add( uci )
648            self.sa_session.flush()
649            return None
650
651        # Because references to reservations are deleted shortly after instances have been terminated, getting an empty list as a response to a query
652        # typically means the instance has successfully shut down but the check was not performed in short enough amount of time. Until an alternative solution
653        # is found, below code sets state of given UCI to 'error' to indicate to the user something out of ordinary happened.
654        if len( rl ) == 0:
655            err = "Instance ID '"+inst.instance_id+"' was not found by the cloud provider. Instance might have crashed or otherwise been terminated."+ \
656                "Manual check is recommended."
657            log.error( err )
658            inst.error = err
659            uci.error = err
660            inst.state = instance_states.TERMINATED
661            uci.state = uci_states.ERROR
662            uci.launch_time = None
663            self.sa_session.add( inst )
664            self.sa_session.add( uci )
665            self.sa_session.flush()
666        # Update instance status in local DB with info from cloud provider
667        for r in rl:
668            for i, cInst in enumerate( r.instances ):
669                try:
670                    s = cInst.update()
671                    log.debug( "Checking state of cloud instance '%s' associated with UCI '%s' and reservation '%s'. State='%s'" % ( cInst, uci.name, r, s ) )
672                    if  s != inst.state:
673                        inst.state = s
674                        self.sa_session.add( inst )
675                        self.sa_session.flush()
676                         # After instance has shut down, ensure UCI is marked as 'available'
677                        if s == instance_states.TERMINATED and uci.state != uci_states.ERROR:
678                            uci.state = uci_states.AVAILABLE
679                            uci.launch_time = None
680                            self.sa_session.add( uci )
681                            self.sa_session.flush()
682                    # Making sure state of UCI is updated. Once multiple instances become associated with single UCI, this will need to be changed.
683                    if s != uci.state and s != instance_states.TERMINATED: 
684                        uci.state = s                    
685                        self.sa_session.add( uci )
686                        self.sa_session.flush()
687                    if cInst.public_dns_name != inst.public_dns:
688                        inst.public_dns = cInst.public_dns_name
689                        self.sa_session.add( inst )
690                        self.sa_session.flush()
691                    if cInst.private_dns_name != inst.private_dns:
692                        inst.private_dns = cInst.private_dns_name
693                        self.sa_session.add( inst )
694                        self.sa_session.flush()
695                except boto.exception.EC2ResponseError, e:
696                    err = "Updating instance status from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
697                    log.error( err )
698                    uci.error = err
699                    uci.state = uci_states.ERROR
700                    self.sa_session.add( uci )
701                    self.sa_session.flush()
702                    return None
703                
704    def update_store( self, store ):
705        """
706        Update information in local database for given storage volume as it is obtained from cloud provider.
707        Along with updating information about given storage volume, information about the UCI controlling
708        this storage volume is also updated.
709        """
710        # Get credentials associated wit this store
711        uci_id = store.uci_id
712        uci = self.sa_session.query( model.UCI ).get( uci_id )
713        self.sa_session.refresh( uci )
714        conn = self.get_connection_from_uci( uci )
715        
716        # Get reservations handle for given store 
717        try:
718            log.debug( "Updating storage volume command: vl = conn.get_all_volumes( [%s] )" % store.volume_id )
719            vl = conn.get_all_volumes( [store.volume_id] )
720        except boto.exception.EC2ResponseError, e:
721            err = "Retrieving volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
722            log.error( err )
723            uci.error = err
724            uci.state = uci_states.ERROR
725            self.sa_session.add( uci )
726            self.sa_session.flush()
727            return None
728        
729        # Update store status in local DB with info from cloud provider
730        if len(vl) > 0:
731            try:
732                log.debug( "Storage volume '%s' current status: '%s'" % (store.volume_id, vl[0].status ) )
733                if store.status != vl[0].status:
734                    # In case something failed during creation of UCI but actual storage volume was created and yet 
735                    #  UCI state remained as 'new', try to remedy this by updating UCI state here 
736                    if ( store.status == None ) and ( store.volume_id != None ):
737                        uci.state = vl[0].status
738                        self.sa_session.add( uci )
739                        self.sa_session.flush()
740                    # If UCI was marked in state 'CREATING', update its status to reflect new status
741                    elif ( uci.state == uci_states.CREATING ):
742                        uci.state = vl[0].status
743                        self.sa_session.add( uci )
744                        self.sa_session.flush()
745                            
746                    store.status = vl[0].status
747                    self.sa_session.add( store )
748                    self.sa_session.flush()
749                    if store.inst != None:
750                        if store.inst.instance_id != vl[0].instance_id:
751                            store.inst.instance_id = vl[0].instance_id
752                            self.sa_session.add( store )
753                            self.sa_session.flush()
754                    if store.attach_time != vl[0].attach_time:
755                        store.attach_time = vl[0].attach_time
756                        self.sa_session.add( store )
757                        self.sa_session.flush()
758                    if store.device != vl[0].device:
759                        store.device = vl[0].device
760                        self.sa_session.add( store )
761                        self.sa_session.flush()
762            except boto.exception.EC2ResponseError, e:
763                err = "Updating status of volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
764                log.error( err )
765                uci.error = err
766                uci.state = uci_states.ERROR
767                self.sa_session.add( uci )
768                self.sa_session.flush()
769                return None
770        else:
771            err = "No storage volumes returned by cloud provider on general update"
772            log.error( "%s for UCI '%s'" % ( err, uci.name ) )
773            store.status = store_status.ERROR
774            store.error = err
775            uci.error = err
776            uci.state = uci_states.ERROR
777            self.sa_session.add( uci )
778            self.sa_session.add( store )
779            self.sa_session.flush()
780   
781    def update_snapshot( self, snapshot ):
782        """
783        Update information in local database for given snapshot as it is obtained from cloud provider.
784        Along with updating information about given snapshot, information about the UCI controlling
785        this snapshot is also updated.
786        """
787        # Get credentials associated wit this store
788        uci_id = snapshot.uci_id
789        uci = self.sa_session.query( model.UCI ).get( uci_id )
790        self.sa_session.refresh( uci )
791        conn = self.get_connection_from_uci( uci )
792        
793        try:
794            log.debug( "Updating status of snapshot '%s'" % snapshot.snapshot_id )
795            snap = conn.get_all_snapshots( [snapshot.snapshot_id] ) 
796            if len( snap ) > 0:
797                log.debug( "Snapshot '%s' status: %s" % ( snapshot.snapshot_id, snap[0].status ) )
798                snapshot.status = snap[0].status
799                self.sa_session.add( snapshot )
800                self.sa_session.flush()
801            else:
802                err = "No snapshots returned by EC2 on general update"
803                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
804                snapshot.status = snapshot_status.ERROR
805                snapshot.error = err
806                uci.error = err
807                uci.state = uci_states.ERROR
808                self.sa_session.add( uci )
809                self.sa_session.add( snapshot )
810                self.sa_session.flush()
811        except boto.exception.EC2ResponseError, e:
812            err = "EC2 response error while updating snapshot status: " + str( e )
813            log.error( err )
814            snapshot.status = snapshot_status.ERROR
815            snapshot.error = err
816            uci.error = err
817            uci.state = uci_states.ERROR
818            self.sa_session.add( uci )
819            self.sa_session.add( snapshot )
820            self.sa_session.flush()
821        except Exception, ex:
822            err = "Error while updating snapshot status: " + str( ex )
823            log.error( err )
824            snapshot.status = snapshot_status.ERROR
825            snapshot.error = err
826            uci.error = err
827            uci.state = uci_states.ERROR
828            self.sa_session.add( uci )
829            self.sa_session.add( snapshot )
830            self.sa_session.flush()
831        
832    def delete_snapshot( self, snapshot ):
833        """
834        Initiate deletion of given snapshot from cloud provider.
835        """
836        if snapshot.status == snapshot_status.DELETE:
837            # Get credentials associated wit this store
838            uci_id = snapshot.uci_id
839            uci = self.sa_session.query( model.UCI ).get( uci_id )
840            self.sa_session.refresh( uci )
841            conn = self.get_connection_from_uci( uci )
842            
843            try:
844                log.debug( "Deleting snapshot '%s'" % snapshot.snapshot_id )
845                snap = conn.delete_snapshot( snapshot.snapshot_id )
846                if snap == True:
847                    snapshot.deleted = True
848                    snapshot.status = snapshot_status.DELETED
849                    self.sa_session.add( snapshot )
850                    self.sa_session.flush()
851                return snap
852            except boto.exception.EC2ResponseError, e:
853                err = "EC2 response error while deleting snapshot: " + str( e )
854                log.error( err )
855                snapshot.status = snapshot_status.ERROR
856                snapshot.error = err
857                uci.error = err
858                uci.state = uci_states.ERROR
859                self.sa_session.add( uci )
860                self.sa_session.add( snapshot )
861                self.sa_session.flush()
862            except Exception, ex:
863                err = "Error while deleting snapshot: " + str( ex )
864                log.error( err )
865                snapshot.status = snapshot_status.ERROR
866                snapshot.error = err
867                uci.error = err
868                uci.state = uci_states.ERROR
869                self.sa_session.add( uci )
870                self.sa_session.add( snapshot )
871                self.sa_session.flush()
872        else:
873            err = "Cannot delete snapshot '"+snapshot.snapshot_id+"' because its status is '"+snapshot.status+"'. Only snapshots with '" + \
874                        snapshot_status.COMPLETED+"' status can be deleted."
875            log.error( err )
876            snapshot.error = err
877            self.sa_session.add( snapshot )
878            self.sa_session.flush()
879            
880    def process_zombie( self, inst ):
881        """
882        Attempt at discovering if starting a cloud instance was successful but local database was not updated
883        accordingly or if something else failed and instance was never started. Currently, no automatic 
884        repairs are being attempted; instead, appropriate error messages are set.
885        """
886        uci_id = inst.uci_id
887        uci = self.sa_session.query( model.UCI ).get( uci_id )
888        self.sa_session.refresh( uci )
889        
890        # Check if any instance-specific information was written to local DB; if 'yes', set instance and UCI's error message 
891        # suggesting manual check.
892        if inst.launch_time != None or inst.reservation_id != None or inst.instance_id != None:
893            # Try to recover state - this is best-case effort, so if something does not work immediately, not
894            # recovery steps are attempted. Recovery is based on hope that instance_id is available in local DB; if not,
895            # report as error.
896            # Fields attempting to be recovered are: reservation_id, instance status, and launch_time 
897            if inst.instance_id != None:
898                conn = self.get_connection_from_uci( uci )
899                rl = conn.get_all_instances( [inst.instance_id] ) # reservation list
900                # Update local DB with relevant data from instance
901                if inst.reservation_id == None:
902                    try:
903                        inst.reservation_id = str(rl[0]).split(":")[1]
904                    except: # something failed, so skip
905                        pass
906                
907                try:
908                    state = rl[0].instances[0].update()
909                    inst.state = state
910                    uci.state = state
911                    self.sa_session.add( inst )
912                    self.sa_session.add( uci )
913                    self.sa_session.flush()
914                except: # something failed, so skip
915                    pass
916                
917                if inst.launch_time == None:
918                    try:
919                        launch_time = self.format_time( rl[0].instances[0].launch_time )
920                        inst.launch_time = launch_time
921                        self.sa_session.add( inst )
922                        self.sa_session.flush() 
923                        if inst.uci.launch_time == None:
924                            uci.launch_time = launch_time
925                            self.sa_session.add( uci )
926                            self.sa_session.flush()
927                    except: # something failed, so skip
928                        pass
929            else:
930                err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
931                      "' seems to have failed. Because it appears that cloud instance might have gotten started, manual check is recommended."
932                inst.error = err
933                inst.state = instance_states.ERROR
934                inst.uci.error = err
935                inst.uci.state = uci_states.ERROR
936                log.error( err )
937                self.sa_session.add( inst )
938                self.sa_session.add( uci )
939                self.sa_session.flush()         
940                
941        else: #Instance most likely never got processed, so set error message suggesting user to try starting instance again.
942            err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
943                  "' seems to have failed. Because it appears that cloud instance never got started, it should be safe to reset state and try " \
944                  "starting the instance again."
945            inst.error = err
946            inst.state = instance_states.ERROR
947            uci.error = err
948            uci.state = uci_states.ERROR
949            log.error( err )
950            self.sa_session.add( inst )
951            self.sa_session.add( uci )
952            self.sa_session.flush()
953#            uw = UCIwrapper( inst.uci )
954#            log.debug( "Try automatically re-submitting UCI '%s'." % uw.get_name() )
955
956    def get_connection_from_uci( self, uci ):
957        """
958        Establish and return connection to cloud provider. Information needed to do so is obtained
959        directly from uci database object.
960        """
961        log.debug( 'Establishing %s cloud connection' % self.type )
962        a_key = uci.credentials.access_key
963        s_key = uci.credentials

Large files files are truncated, but you can click here to view the full file