PageRenderTime 7ms CodeModel.GetById 6ms app.highlight 76ms RepoModel.GetById 1ms app.codeStats 1ms

/lib/galaxy/cloud/providers/eucalyptus.py

https://bitbucket.org/afgane/galaxy-central-cloud
Python | 1039 lines | 935 code | 27 blank | 77 comment | 65 complexity | 7dc733a90ef920d6a637fd832094d258 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1import subprocess, threading, os, errno, time, datetime
  2from Queue import Queue, Empty
  3from datetime import datetime
  4
  5from galaxy import model # Database interaction class
  6from galaxy.model import mapping
  7from galaxy.datatypes.data import nice_size
  8from galaxy.util.bunch import Bunch
  9from galaxy.cloud import UCIwrapper
 10from Queue import Queue
 11from sqlalchemy import or_, and_
 12
 13import galaxy.eggs
 14galaxy.eggs.require("boto")
 15from boto.ec2.connection import EC2Connection
 16from boto.ec2.regioninfo import RegionInfo
 17import boto.exception
 18import boto
 19
 20import logging
 21log = logging.getLogger( __name__ )
 22
 23uci_states = Bunch(
 24    NEW_UCI = "newUCI",
 25    NEW = "new",
 26    CREATING = "creating",
 27    DELETING_UCI = "deletingUCI",
 28    DELETING = "deleting",
 29    SUBMITTED_UCI = "submittedUCI",
 30    SUBMITTED = "submitted",
 31    SHUTTING_DOWN_UCI = "shutting-downUCI",
 32    SHUTTING_DOWN = "shutting-down",
 33    ADD_STORAGE_UCI = "add-storageUCI",
 34    ADD_STORAGE = "add-storage",
 35    AVAILABLE = "available",
 36    RUNNING = "running",
 37    PENDING = "pending",
 38    ERROR = "error",
 39    DELETED = "deleted",
 40    SNAPSHOT_UCI = "snapshotUCI",
 41    SNAPSHOT = "snapshot"
 42)
 43
 44instance_states = Bunch(
 45    TERMINATED = "terminated",
 46    SUBMITTED = "submitted",
 47    RUNNING = "running",
 48    ADDING = "adding-storage",
 49    PENDING = "pending",
 50    SHUTTING_DOWN = "shutting-down",
 51    ERROR = "error"
 52)
 53
 54store_status = Bunch(
 55    WAITING = "waiting",
 56    IN_USE = "in-use",
 57    ADDING = "adding",
 58    CREATING = "creating",
 59    DELETED = 'deleted',
 60    ERROR = "error"
 61)
 62
 63snapshot_status = Bunch(
 64    SUBMITTED = 'submitted',
 65    PENDING = 'pending',
 66    COMPLETED = 'completed',
 67    DELETE = 'delete',
 68    DELETED= 'deleted',
 69    ERROR = "error"
 70)
 71
 72class EucalyptusCloudProvider( object ):
 73    """
 74    Eucalyptus-based cloud provider implementation for managing instances. 
 75    """
 76    STOP_SIGNAL = object()
 77    def __init__( self, app ):
 78        self.type = "eucalyptus" # cloud provider type (e.g., ec2, eucalyptus, opennebula)
 79        self.zone = "epc"
 80        self.queue = Queue()
 81        self.sa_session = app.model.context
 82        
 83        self.threads = []
 84        nworkers = 5
 85        log.info( "Starting eucalyptus cloud controller workers..." )
 86        for i in range( nworkers  ):
 87            worker = threading.Thread( target=self.run_next )
 88            worker.start()
 89            self.threads.append( worker )
 90        log.debug( "%d eucalyptus cloud workers ready", nworkers )
 91        
 92    def shutdown( self ):
 93        """Attempts to gracefully shut down the monitor thread"""
 94        log.info( "sending stop signal to worker threads in eucalyptus cloud manager" )
 95        for i in range( len( self.threads ) ):
 96            self.queue.put( self.STOP_SIGNAL )
 97        log.info( "eucalyptus cloud manager stopped" )
 98    
 99    def put( self, uci_wrapper ):
100        """
101        Add uci_wrapper object to the end of the request queue to be handled by 
102        this cloud provider.
103        """
104        state = uci_wrapper.get_uci_state()
105        uci_wrapper.change_state( state.split('U')[0] ) # remove 'UCI' from end of state description (i.e., mark as accepted and ready for processing)
106        self.queue.put( uci_wrapper )
107        
108    def run_next( self ):
109        """Process next request, waiting until one is available if necessary."""
110        cnt = 0
111        while 1:
112            uci_wrapper = self.queue.get()
113            uci_state = uci_wrapper.get_uci_state()
114            if uci_state is self.STOP_SIGNAL:
115                return
116            try:
117                if uci_state==uci_states.NEW:
118                    self.create_uci( uci_wrapper )
119                elif uci_state==uci_states.DELETING:
120                    self.delete_uci( uci_wrapper )
121                elif uci_state==uci_states.SUBMITTED:
122                    self.start_uci( uci_wrapper )
123                    #self.dummy_start_uci( uci_wrapper )
124                elif uci_state==uci_states.SHUTTING_DOWN:
125                    self.stop_uci( uci_wrapper )
126                elif uci_state==uci_states.SNAPSHOT:
127                    self.snapshot_uci( uci_wrapper )
128                elif uci_state==uci_states.ADD_STORAGE:
129                    self.add_storage_to_uci( uci_wrapper )
130            except:
131                log.exception( "Uncaught exception executing cloud request." )
132            cnt += 1
133            
134    def get_connection( self, uci_wrapper ):
135        """
136        Establishes cloud connection using user's credentials associated with given UCI
137        """
138        log.debug( 'Establishing %s cloud connection.' % self.type )
139        provider = uci_wrapper.get_provider()
140        try:
141            region = RegionInfo( None, provider.region_name, provider.region_endpoint )
142        except Exception, ex:
143            err = "Selecting region with cloud provider failed: " + str( ex )
144            log.error( err )
145            uci_wrapper.set_error( err, True )
146            return None        
147        try:
148            conn = EC2Connection( aws_access_key_id=uci_wrapper.get_access_key(), 
149                                  aws_secret_access_key=uci_wrapper.get_secret_key(), 
150                                  is_secure=provider.is_secure, 
151                                  port=provider.port, 
152                                  region=region, 
153                                  path=provider.path )
154        except boto.exception.EC2ResponseError, e:
155            err = "Establishing connection with cloud failed: " + str( e )
156            log.error( err )
157            uci_wrapper.set_error( err, True )
158            return None
159        
160        return conn
161        
162    def check_key_pair( self, uci_wrapper, conn ):
163        """
164        Check if a key pair associated with this UCI exists on cloud provider.
165        If yes, return key pair name; otherwise, generate a key pair with the cloud
166        provider and, again, return key pair name.
167        Key pair name for given UCI is generated from UCI's name and suffix '_kp' 
168        """
169        kp = None
170        kp_name = uci_wrapper.get_name().replace(' ','_') + "_kp"
171        log.debug( "Checking user's key pair: '%s'" % kp_name )
172        try:
173            kp = conn.get_key_pair( kp_name )
174            uci_kp_name = uci_wrapper.get_key_pair_name()
175            uci_material = uci_wrapper.get_key_pair_material()
176            if kp != None:
177                if kp.name != uci_kp_name or uci_material == None:
178                    # key pair exists on the cloud but not in local database, so re-generate it (i.e., delete and then create)
179                    try: 
180                        conn.delete_key_pair( kp_name )
181                        kp = self.create_key_pair( conn, kp_name )
182                        uci_wrapper.set_key_pair( kp.name, kp.material )
183                    except boto.exception.EC2ResponseError, e:
184                        err = "EC2 response error while deleting key pair: " + str( e )
185                        log.error( err )
186                        uci_wrapper.set_error( err, True )
187            else:
188                try:
189                    kp = self.create_key_pair( conn, kp_name )
190                    uci_wrapper.set_key_pair( kp.name, kp.material )
191                except boto.exception.EC2ResponseError, e:
192                    err = "EC2 response error while creating key pair: " + str( e )
193                    log.error( err )
194                    uci_wrapper.set_error( err, True )
195                except Exception, ex:
196                    err = "Exception while creating key pair: " + str( ex )
197                    log.error( err )
198                    uci_wrapper.set_error( err, True )
199        except boto.exception.EC2ResponseError, e: # No keypair under this name exists so create it
200            if e.code == 'InvalidKeyPair.NotFound': 
201                log.info( "No keypair found, creating keypair '%s'" % kp_name )
202                kp = self.create_key_pair( conn, kp_name )
203                uci_wrapper.set_key_pair( kp.name, kp.material )
204            else:
205                err = "EC2 response error while retrieving key pair: " + str( e )
206                log.error( err )
207                uci_wrapper.set_error( err, True )
208                        
209        if kp != None:
210            return kp.name
211        else:
212            return None
213    
214    def create_key_pair( self, conn, kp_name ):
215        """ Initiate creation of key pair under kp_name by current cloud provider. """
216        try:
217            return conn.create_key_pair( kp_name )
218        except boto.exception.EC2ResponseError, e: 
219            return None
220    
221    def get_mi_id( self, uci_wrapper, i_index ):
222        """
223        Get appropriate machine image (mi) ID based on instance type.
224        """
225        i_type = uci_wrapper.get_instance_type( i_index )
226        if i_type=='m1.small' or i_type=='c1.medium':
227            arch = 'i386'
228        else:
229            arch = 'x86_64' 
230        
231        mi = self.sa_session.query( model.CloudImage ).filter_by( deleted=False, provider_type=self.type, architecture=arch ).first()
232        if mi:
233            return mi.image_id
234        else:
235            err = "Machine image could not be retrieved"
236            log.error( "%s for UCI '%s'." % (err, uci_wrapper.get_name() ) )
237            uci_wrapper.set_error( err+". Contact site administrator to ensure needed machine image is registered.", True )
238            return None
239            
240    def create_uci( self, uci_wrapper ):
241        """ 
242        Create User Configured Instance (UCI) - i.e., create storage volume on cloud provider
243        and register relevant information in local Galaxy database.
244        """
245        conn = self.get_connection( uci_wrapper )
246        
247        # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t
248        # current UCI is 0; therefore, it can be referenced in following code
249        log.info( "Creating volume in zone '%s'..." % uci_wrapper.get_uci_availability_zone() )
250        if uci_wrapper.get_uci_availability_zone()=='':
251            log.info( "Availability zone for UCI (i.e., storage volume) was not selected, using default zone: %s" % self.zone )
252            uci_wrapper.set_store_availability_zone( self.zone )
253        
254#        log.debug( "Creating volume; using command: conn.create_volume( %s, '%s', snapshot=None )" % ( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone() ))
255#        vol = conn.create_volume( uci_wrapper.get_store_size( 0 ), uci_wrapper.get_uci_availability_zone(), snapshot=None )
256#        uci_wrapper.set_store_volume_id( 0, vol.id ) 
257        store = uci_wrapper.get_all_stores_in_status( store_status.ADDING )[0] # Because at UCI creation time only 1 storage volume can be created, reference it directly
258        
259        log.info( "Creating storage volume in zone '%s' of size '%s'..." % ( uci_wrapper.get_uci_availability_zone(), store.size ) )
260        # Because only 1 storage volume may be created at UCI config time, index of this storage volume in local Galaxy DB w.r.t
261        # current UCI is 0, so reference it in following methods
262        vol = conn.create_volume( store.size, uci_wrapper.get_uci_availability_zone(), snapshot=None )
263        uci_wrapper.set_store_volume_id( store.id, vol.id )
264        
265        # Retrieve created volume again to get updated status
266        try:
267            vl = conn.get_all_volumes( [vol.id] )
268        except boto.exception.EC2ResponseError, e: 
269            err = "EC2 response error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( e )
270            log.error( err )
271            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
272            uci_wrapper.set_error( err, True )
273            return
274        except Exception, ex:
275            err = "Error while retrieving (i.e., updating status) of just created storage volume '" + vol.id + "': " + str( ex )
276            log.error( err )
277            uci_wrapper.set_error( err, True )
278            return
279        
280        if len( vl ) > 0:
281            # EPC does not allow creation of storage volumes (it deletes one as soon as it is created, so manually set uci_state here)
282            if vl[0].status == store_status.DELETED:
283                uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
284            else:
285                uci_wrapper.change_state( uci_state=vl[0].status )
286            uci_wrapper.set_store_status( vol.id, vl[0].status )
287        else:
288            err = "Volume '" + vol.id +"' not found by EC2 after being created."
289            log.error( err )
290            uci_wrapper.set_store_status( vol.id, uci_states.ERROR )
291            uci_wrapper.set_error( err, True )
292
293    def delete_uci( self, uci_wrapper ):
294        """ 
295        Delete UCI - i.e., delete all storage volumes associated with this UCI. 
296        NOTE that this implies deletion of any and all data associated
297        with this UCI from the cloud. All data will be deleted.
298        Information in local Galaxy database is marked as deleted but not actually removed
299        from the database. 
300        """
301        conn = self.get_connection( uci_wrapper )
302        vl = [] # volume list
303        count = 0 # counter for checking if all volumes assoc. w/ UCI were deleted
304        
305        # Get all volumes assoc. w/ UCI, delete them from cloud as well as in local DB
306        vl = uci_wrapper.get_all_stores()
307        deletedList = []
308        failedList = []
309        for v in vl:
310            log.debug( "Deleting volume with id='%s'" % v.volume_id )
311            try:
312                if conn.delete_volume( v.volume_id ):
313                    deletedList.append( v.volume_id )
314                    v.deleted = True
315                    self.sa_session.add( v )
316                    self.sa_session.flush()
317                    count += 1
318                else:
319                    failedList.append( v.volume_id )
320            except boto.exception.EC2ResponseError, e:
321                err = "EC2 response error while deleting storage volume '" + v.volume_id + "': " + str( e )
322                log.error( err )
323                uci_wrapper.set_store_error( err, store_id = v.volume_id )
324                uci_wrapper.set_error( err, True )
325                
326        # Delete UCI if all of associated 
327        if count == len( vl ):
328            uci_wrapper.set_deleted()
329        else:
330            err = "Deleting following volume(s) failed: "+ str( failedList )+". However, these volumes were successfully deleted: " \
331                  + str( deletedList ) +". MANUAL intervention and processing needed."
332            log.error( err )
333            uci_wrapper.set_error( err, True )
334            
335    def snapshot_uci( self, uci_wrapper ):
336        """
337        Initiate creation of a snapshot by cloud provider for all storage volumes 
338        associated with this UCI. 
339        """
340        if uci_wrapper.get_uci_state() != uci_states.ERROR:
341            conn = self.get_connection( uci_wrapper )
342            
343            snapshots = uci_wrapper.get_snapshots( status = snapshot_status.SUBMITTED )
344            for snapshot in snapshots:
345                log.debug( "Snapshot DB id: '%s', volume id: '%s'" % ( snapshot.id, snapshot.store.volume_id ) )
346                try:
347                    snap = conn.create_snapshot( volume_id=snapshot.store.volume_id )
348                    snap_id = str( snap ).split(':')[1]
349                    uci_wrapper.set_snapshot_id( snapshot.id, snap_id )
350                    sh = conn.get_all_snapshots( snap_id ) # get updated status
351                    uci_wrapper.set_snapshot_status( status=sh[0].status, snap_id=snap_id )
352                except boto.exception.EC2ResponseError, e:
353                    err = "Cloud provider response error while creating snapshot: " + str( e )
354                    log.error( err )
355                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
356                    uci_wrapper.set_error( err, True )
357                    return
358                except Exception, ex:
359                    err = "Error while creating snapshot: " + str( ex )
360                    log.error( err )
361                    uci_wrapper.set_snapshot_error( error=err, snap_index=snapshot.id, set_status=True )
362                    uci_wrapper.set_error( err, True )
363                    return
364                    
365            uci_wrapper.change_state( uci_state=uci_states.AVAILABLE )
366        
367#        if uci_wrapper.get_uci_state() != uci_states.ERROR:
368#            
369#            snapshots = uci_wrapper.get_snapshots( status = 'submitted' )
370#            for snapshot in snapshots:
371#                uci_wrapper.set_snapshot_id( snapshot.id, None, 'euca_error' )
372#            
373#            log.debug( "Eucalyptus snapshot attempted by user for UCI '%s'" % uci_wrapper.get_name() )
374#            uci_wrapper.set_error( "Eucalyptus does not support creation of snapshots at this moment. No snapshot or other changes were performed. \
375#                        Feel free to resent state of this instance and use it normally.", True )
376            
377            
378    def add_storage_to_uci( self, uci_wrapper ):
379        """ Adds more storage to specified UCI """
380        uci_wrapper.set_error( "Adding storage to eucalyptus-based clouds is not yet supported.", True )
381    
382    def dummy_start_uci( self, uci_wrapper ):
383        
384        uci = uci_wrapper.get_uci()
385        log.debug( "Would be starting instance '%s'" % uci.name )
386#        uci_wrapper.change_state( uci_states.SUBMITTED_UCI )
387#        log.debug( "Set UCI state to SUBMITTED_UCI" )
388        log.debug( "Sleeping a bit... (%s)" % uci.name )
389        time.sleep(10)
390        log.debug( "Woke up! (%s)" % uci.name )
391        
392    def start_uci( self, uci_wrapper ):
393        """
394        Start instance(s) of given UCI on the cloud.  
395        """ 
396        if uci_wrapper.get_uci_state() != uci_states.ERROR:
397            conn = self.get_connection( uci_wrapper )
398            self.check_key_pair( uci_wrapper, conn )
399            if uci_wrapper.get_key_pair_name() == None:
400                err = "Key pair not found"
401                log.error( "%s for UCI '%s'." % ( err, uci_wrapper.get_name() ) )
402                uci_wrapper.set_error( err + ". Try resetting the state and starting the instance again.", True )
403                return
404            
405            i_indexes = uci_wrapper.get_instances_indexes( state=instance_states.SUBMITTED ) # Get indexes of i_indexes associated with this UCI that are in 'submitted' state
406            log.debug( "Starting instances with IDs: '%s' associated with UCI '%s' " % ( i_indexes, uci_wrapper.get_name(),  ) )
407            if len( i_indexes ) > 0:
408                for i_index in i_indexes:
409                    # Get machine image for current instance
410                    mi_id = self.get_mi_id( uci_wrapper, i_index )
411                    log.debug( "mi_id: %s, uci_wrapper.get_key_pair_name(): %s" % ( mi_id, uci_wrapper.get_key_pair_name() ) )
412                    uci_wrapper.set_mi( i_index, mi_id )
413                               
414                    if uci_wrapper.get_uci_state() != uci_states.ERROR:
415                        # Start an instance
416                        log.debug( "Starting UCI instance '%s'" % uci_wrapper.get_name() )
417                        log.debug( "Using following command: conn.run_instances( image_id='%s', key_name='%s', instance_type='%s' )" 
418                                   % ( mi_id, uci_wrapper.get_key_pair_name(), uci_wrapper.get_instance_type( i_index ) ) )
419                        reservation = None
420                        try:
421                            reservation = conn.run_instances( image_id=mi_id, 
422                                                              key_name=uci_wrapper.get_key_pair_name(),
423                                                              instance_type=uci_wrapper.get_instance_type( i_index ) )
424                        except boto.exception.EC2ResponseError, e:
425                            err = "EC2 response error when starting UCI '"+ uci_wrapper.get_name() +"': " + str( e )
426                            log.error( err )
427                            uci_wrapper.set_error( err, True )
428                        except Exception, ex:
429                            err = "Error when starting UCI '" + uci_wrapper.get_name() + "': " + str( ex )
430                            log.error( err )
431                            uci_wrapper.set_error( err, True )
432                        # Record newly available instance data into local Galaxy database
433                        if reservation:
434                            l_time = datetime.utcnow()
435#                            uci_wrapper.set_instance_launch_time( self.format_time( reservation.instances[0].launch_time ), i_index=i_index )
436                            uci_wrapper.set_instance_launch_time( l_time, i_index=i_index )
437                            if not uci_wrapper.uci_launch_time_set():
438                                uci_wrapper.set_uci_launch_time( l_time )
439                            try:
440                                uci_wrapper.set_reservation_id( i_index, str( reservation ).split(":")[1] )
441                                # TODO: if more than a single instance will be started through single reservation, change this reference from element [0]
442                                i_id = str( reservation.instances[0]).split(":")[1]
443                                uci_wrapper.set_instance_id( i_index, i_id )
444                                s = reservation.instances[0].state
445                                uci_wrapper.change_state( s, i_id, s )
446                                vol_id = uci_wrapper.get_store_volume_id( store_id=0 ) # TODO: Once more that one vol/UCI is allowed, update this!
447                                uci_wrapper.set_store_status( vol_id, store_status.WAITING )
448                                log.debug( "Instance of UCI '%s' started, current state: '%s'" % ( uci_wrapper.get_name(), uci_wrapper.get_uci_state() ) )
449                            except boto.exception.EC2ResponseError, e:
450                                err = "EC2 response error when retrieving instance information for UCI '" + uci_wrapper.get_name() + "': " + str( e )
451                                log.error( err )
452                                uci_wrapper.set_error( err, True )
453                    else:
454                        log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
455            else:
456                err = "No instances in state '"+ instance_states.SUBMITTED +"' found for UCI '" + uci_wrapper.get_name() + \
457                      "'. Nothing to start."
458                log.error( err )
459                uci_wrapper.set_error( err, True )
460        else:
461            log.error( "UCI '%s' is in 'error' state, starting instance was aborted." % uci_wrapper.get_name() )
462        
463    def stop_uci( self, uci_wrapper):
464        """ 
465        Stop all cloud instances associated with given UCI. 
466        """
467        conn = self.get_connection( uci_wrapper )
468        
469        # Get all instances associated with given UCI
470        il = uci_wrapper.get_instances_ids() # instance list
471        # Process list of instances and remove any references to empty instance id's
472        for i in il:
473            if i is None:
474                il.remove( i )
475        log.debug( 'List of instances being terminated: %s' % il )
476        rl = conn.get_all_instances( il ) # Reservation list associated with given instances
477                        
478        # Initiate shutdown of all instances under given UCI
479        cnt = 0
480        stopped = []
481        not_stopped = []
482        for r in rl:
483            for inst in r.instances:
484                log.debug( "Sending stop signal to instance '%s' associated with reservation '%s' (UCI: %s)." % ( inst, r, uci_wrapper.get_name() ) )
485                try:
486                    inst.stop()
487                    uci_wrapper.set_stop_time( datetime.utcnow(), i_id=inst.id )
488                    uci_wrapper.change_state( instance_id=inst.id, i_state=inst.update() )
489                    stopped.append( inst )
490                except boto.exception.EC2ResponseError, e:
491                    not_stopped.append( inst )
492                    err = "EC2 response error when stopping instance '" + inst.instance_id + "': " + str( e )
493                    log.error( err )
494                    uci_wrapper.set_error( err, True )
495                
496        uci_wrapper.reset_uci_launch_time()
497        log.debug( "Termination was initiated for all instances of UCI '%s'." % uci_wrapper.get_name() )
498
499#        dbInstances = get_instances( trans, uci ) #TODO: handle list!
500#        
501#        # Get actual cloud instance object
502#        cloudInstance = get_cloud_instance( conn, dbInstances.instance_id )
503#        
504#        # TODO: Detach persistent storage volume(s) from instance and update volume data in local database
505#        stores = get_stores( trans, uci )
506#        for i, store in enumerate( stores ):
507#            log.debug( "Detaching volume '%s' to instance '%s'." % ( store.volume_id, dbInstances.instance_id ) )
508#            mntDevice = store.device
509#            volStat = None
510##            Detaching volume does not work with Eucalyptus Public Cloud, so comment it out
511##            try:
512##                volStat = conn.detach_volume( store.volume_id, dbInstances.instance_id, mntDevice )
513##            except:
514##                log.debug ( 'Error detaching volume; still going to try and stop instance %s.' % dbInstances.instance_id )
515#            store.attach_time = None
516#            store.device = None
517#            store.inst.instance_id = None
518#            store.status = volStat
519#            log.debug ( '***** volume status: %s' % volStat )
520#        
521#        # Stop the instance and update status in local database
522#        cloudInstance.stop()
523#        dbInstances.stop_time = datetime.utcnow()
524#        while cloudInstance.state != 'terminated':
525#            log.debug( "Stopping instance %s state; current state: %s" % ( str( cloudInstance ).split(":")[1], cloudInstance.state ) )
526#            time.sleep(3)
527#            cloudInstance.update()
528#        dbInstances.state = cloudInstance.state
529#        
530#        # Reset relevant UCI fields
531#        uci.state = 'available'
532#        uci.launch_time = None
533#          
534#        # Persist
535#        session = trans.sa_session
536##        session.save_or_update( stores )
537#        session.save_or_update( dbInstances ) # TODO: Is this going to work w/ multiple instances stored in dbInstances variable?
538#        session.save_or_update( uci )
539#        session.flush()
540#        trans.log_event( "User stopped cloud instance '%s'" % uci.name )
541#        trans.set_message( "Galaxy instance '%s' stopped." % uci.name )
542
543    def update( self ):
544        """ 
545        Run status update on all instances that are in 'running', 'pending', or 'shutting-down' state.
546        Run status update on all storage volumes whose status is 'in-use', 'creating', or 'None'.
547        Run status update on all snapshots whose status is 'pending' or 'delete'  
548        Run status update on any zombie UCIs, i.e., UCI's that is in 'submitted' state for an 
549        extended period of time.
550        
551        Reason behind this method is to sync state of local DB and real-world resources
552        """
553        log.debug( "Running general status update for %s UCIs..." % self.type )
554        # Update instances
555        instances = self.sa_session.query( model.CloudInstance ) \
556            .filter( or_( model.CloudInstance.table.c.state==instance_states.RUNNING, 
557                          model.CloudInstance.table.c.state==instance_states.PENDING, 
558                          model.CloudInstance.table.c.state==instance_states.SHUTTING_DOWN ) ) \
559            .all()
560        for inst in instances:
561            if self.type == inst.uci.credentials.provider.type:
562                log.debug( "[%s] Running general status update on instance '%s'" % ( inst.uci.credentials.provider.type, inst.instance_id ) )
563                self.update_instance( inst )
564        
565        # Update storage volume(s)
566        stores = self.sa_session.query( model.CloudStore ) \
567            .filter( or_( model.CloudStore.table.c.status==store_status.IN_USE, 
568                          model.CloudStore.table.c.status==store_status.CREATING,
569                          model.CloudStore.table.c.status==store_status.WAITING,
570                          model.CloudStore.table.c.status==None ) ) \
571            .all()
572        for store in stores:
573            if self.type == store.uci.credentials.provider.type: # and store.volume_id != None:
574                log.debug( "[%s] Running general status update on store with local database ID: '%s'" % ( store.uci.credentials.provider.type, store.id ) )
575                self.update_store( store )
576        
577        # Update pending snapshots or delete ones marked for deletion
578        snapshots = self.sa_session.query( model.CloudSnapshot ) \
579            .filter( or_( model.CloudSnapshot.table.c.status == snapshot_status.PENDING, model.CloudSnapshot.table.c.status == snapshot_status.DELETE ) ) \
580            .all()
581        for snapshot in snapshots:
582            if self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.PENDING:
583                log.debug( "[%s] Running general status update on snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
584                self.update_snapshot( snapshot )
585            elif self.type == snapshot.uci.credentials.provider.type and snapshot.status == snapshot_status.DELETE:
586                log.debug( "[%s] Initiating deletion of snapshot '%s'" % ( snapshot.uci.credentials.provider.type, snapshot.snapshot_id ) )
587                self.delete_snapshot( snapshot )
588        
589        # Attempt at updating any zombie UCIs (i.e., instances that have been in SUBMITTED state for longer than expected - see below for exact time)
590        zombies = self.sa_session.query( model.UCI ).filter_by( state=uci_states.SUBMITTED ).all()
591        for zombie in zombies:
592            log.debug( "zombie UCI: %s" % zombie.name )
593            z_instances = self.sa_session.query( model.CloudInstance ) \
594                .filter( or_( model.CloudInstance.table.c.state != instance_states.TERMINATED,
595                              model.CloudInstance.table.c.state == None ) ) \
596                .all()
597            for z_inst in z_instances:
598                if self.type == z_inst.uci.credentials.provider.type:
599#                    log.debug( "z_inst.id: '%s', state: '%s'" % ( z_inst.id, z_inst.state ) )
600                    td = datetime.utcnow() - z_inst.update_time
601#                    log.debug( "z_inst.id: %s, time delta is %s sec" % ( z_inst.id, td.seconds ) )
602                    if td.seconds > 180: # if instance has been in SUBMITTED state for more than 3 minutes
603                        log.debug( "[%s](td=%s) Running zombie repair update on instance with DB id '%s'" % ( z_inst.uci.credentials.provider.type, td.seconds, z_inst.id ) )
604                        self.process_zombie( z_inst )
605                
606    def update_instance( self, inst ):
607        """
608        Update information in local database for given instance as it is obtained from cloud provider.
609        Along with updating information about given instance, information about the UCI controlling
610        this instance is also updated.
611        """
612        # Get credentials associated wit this instance
613        uci_id = inst.uci_id
614        uci = self.sa_session.query( model.UCI ).get( uci_id )
615        self.sa_session.refresh( uci )
616        conn = self.get_connection_from_uci( uci )
617        
618        # Get reservations handle for given instance
619        try:
620            rl= conn.get_all_instances( [inst.instance_id] )
621        except boto.exception.EC2ResponseError, e:
622            err = "Retrieving instance(s) from cloud failed for UCI '"+ uci.name +"' during general status update: " + str( e )
623            log.error( err )
624            uci.error = err
625            uci.state = uci_states.ERROR
626            self.sa_session.add( uci )
627            self.sa_session.flush()
628            return None
629
630        # Because references to reservations are deleted shortly after instances have been terminated, getting an empty list as a response to a query
631        # typically means the instance has successfully shut down but the check was not performed in short enough amount of time. Until an alternative solution
632        # is found, below code sets state of given UCI to 'error' to indicate to the user something out of ordinary happened.
633        if len( rl ) == 0:
634            err = "Instance ID '"+inst.instance_id+"' was not found by the cloud provider. Instance might have crashed or otherwise been terminated."+ \
635                "Manual check is recommended."
636            log.error( err )
637            inst.error = err
638            uci.error = err
639            inst.state = instance_states.TERMINATED
640            uci.state = uci_states.ERROR
641            uci.launch_time = None
642            self.sa_session.add( inst )
643            self.sa_session.add( uci )
644            self.sa_session.flush()
645        # Update instance status in local DB with info from cloud provider
646        for r in rl:
647            for i, cInst in enumerate( r.instances ):
648                try:
649                    s = cInst.update()
650                    log.debug( "Checking state of cloud instance '%s' associated with UCI '%s' and reservation '%s'. State='%s'" % ( cInst, uci.name, r, s ) )
651                    if  s != inst.state:
652                        inst.state = s
653                        self.sa_session.add( inst )
654                        self.sa_session.flush()
655                         # After instance has shut down, ensure UCI is marked as 'available'
656                        if s == instance_states.TERMINATED and uci.state != uci_states.ERROR:
657                            uci.state = uci_states.AVAILABLE
658                            uci.launch_time = None
659                            self.sa_session.add( uci )
660                            self.sa_session.flush()
661                    # Making sure state of UCI is updated. Once multiple instances become associated with single UCI, this will need to be changed.
662                    if s != uci.state and s != instance_states.TERMINATED: 
663                        uci.state = s                    
664                        self.sa_session.add( uci )
665                        self.sa_session.flush()
666                    if cInst.public_dns_name != inst.public_dns:
667                        inst.public_dns = cInst.public_dns_name
668                        self.sa_session.add( inst )
669                        self.sa_session.flush()
670                    if cInst.private_dns_name != inst.private_dns:
671                        inst.private_dns = cInst.private_dns_name
672                        self.sa_session.add( inst )
673                        self.sa_session.flush()
674                except boto.exception.EC2ResponseError, e:
675                    err = "Updating instance status from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
676                    log.error( err )
677                    uci.error = err
678                    uci.state = uci_states.ERROR
679                    self.sa_session.add( uci )
680                    self.sa_session.flush()
681                    return None
682                
683    def update_store( self, store ):
684        """
685        Update information in local database for given storage volume as it is obtained from cloud provider.
686        Along with updating information about given storage volume, information about the UCI controlling
687        this storage volume is also updated.
688        """
689        # Get credentials associated wit this store
690        uci_id = store.uci_id
691        uci = self.sa_session.query( model.UCI ).get( uci_id )
692        self.sa_session.refresh( uci )
693        conn = self.get_connection_from_uci( uci )
694        
695        if store.volume_id != None:
696            # Get reservations handle for given store 
697            try:
698                log.debug( "Updating storage volume command: vl = conn.get_all_volumes( [%s] )" % store.volume_id )
699                vl = conn.get_all_volumes( [store.volume_id] )
700            except boto.exception.EC2ResponseError, e:
701                err = "Retrieving volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
702                log.error( err )
703                uci.error = err
704                uci.state = uci_states.ERROR
705                self.sa_session.add( uci )
706                self.sa_session.flush()
707                return None
708            
709            # Update store status in local DB with info from cloud provider
710            if len(vl) > 0:
711                try:
712                    log.debug( "Storage volume '%s' current status: '%s'" % (store.volume_id, vl[0].status ) )
713                    if store.status != vl[0].status:
714                        # In case something failed during creation of UCI but actual storage volume was created and yet 
715                        #  UCI state remained as 'new', try to remedy this by updating UCI state here 
716                        if ( store.status == None ) and ( store.volume_id != None ):
717                            uci.state = vl[0].status
718                            self.sa_session.add( uci )
719                            self.sa_session.flush()
720                        # If UCI was marked in state 'CREATING', update its status to reflect new status
721                        elif ( uci.state == uci_states.CREATING ):
722                            # Because Eucalyptus Public Cloud (EPC) deletes volumes immediately after they are created, artificially
723                            # set status of given UCI to 'available' based on storage volume's availability zone (i.e., it's residing
724                            # in EPC as opposed to some other Eucalyptus based cloud that allows creation of storage volumes.
725                            if store.availability_zone == 'epc':
726                                uci.state = uci_states.AVAILABLE
727                            else:
728                                uci.state = vl[0].status
729
730                            self.sa_session.add( uci )
731                            self.sa_session.flush()
732                                
733                        store.status = vl[0].status
734                        self.sa_session.add( store )
735                        self.sa_session.flush()
736                    if store.inst != None:
737                        if store.inst.instance_id != vl[0].instance_id:
738                            store.inst.instance_id = vl[0].instance_id
739                            self.sa_session.add( store )
740                            self.sa_session.flush()
741                    if store.attach_time != vl[0].attach_time:
742                        store.attach_time = vl[0].attach_time
743                        self.sa_session.add( store )
744                        self.sa_session.flush()
745                    if store.device != vl[0].device:
746                        store.device = vl[0].device
747                        self.sa_session.add( store )
748                        self.sa_session.flush()
749                except boto.exception.EC2ResponseError, e:
750                    err = "Updating status of volume(s) from cloud failed for UCI '"+ uci.name + "' during general status update: " + str( e )
751                    log.error( err )
752                    uci.error = err
753                    uci.state = uci_states.ERROR
754                    self.sa_session.add( uci )
755                    self.sa_session.flush()
756                    return None
757            else:
758                err = "No storage volumes returned by cloud provider on general update"
759                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
760                store.status = store_status.ERROR
761                store.error = err
762                uci.error = err
763                uci.state = uci_states.ERROR
764                self.sa_session.add( uci )
765                self.sa_session.add( store )
766                self.sa_session.flush()
767        else:
768            err = "Missing storage volume ID in local database on general update. Manual check is needed to check " \
769                  "if storage volume was actually created by cloud provider."
770            log.error( "%s (for UCI '%s')" % ( err, uci.name ) )
771            store.status = store_status.ERROR
772            store.error = err
773            uci.error = err
774            uci.state = uci_states.ERROR
775            self.sa_session.add( uci )
776            self.sa_session.add( store )
777            self.sa_session.flush()
778   
779    def update_snapshot( self, snapshot ):
780        """
781        Update information in local database for given snapshot as it is obtained from cloud provider.
782        Along with updating information about given snapshot, information about the UCI controlling
783        this snapshot is also updated.
784        """
785        # Get credentials associated wit this store
786        uci_id = snapshot.uci_id
787        uci = self.sa_session.query( model.UCI ).get( uci_id )
788        self.sa_session.refresh( uci )
789        conn = self.get_connection_from_uci( uci )
790        
791        try:
792            log.debug( "Updating status of snapshot '%s'" % snapshot.snapshot_id )
793            snap = conn.get_all_snapshots( [snapshot.snapshot_id] ) 
794            if len( snap ) > 0:
795                log.debug( "Snapshot '%s' status: %s" % ( snapshot.snapshot_id, snap[0].status ) )
796                snapshot.status = snap[0].status
797                self.sa_session.add( snapshot )
798                self.sa_session.flush()
799            else:
800                err = "No snapshots returned by EC2 on general update"
801                log.error( "%s for UCI '%s'" % ( err, uci.name ) )
802                snapshot.status = snapshot_status.ERROR
803                snapshot.error = err
804                uci.error = err
805                uci.state = uci_states.ERROR
806                self.sa_session.add( uci )
807                self.sa_session.add( snapshot )
808                self.sa_session.flush()
809        except boto.exception.EC2ResponseError, e:
810            err = "EC2 response error while updating snapshot status: " + str( e )
811            log.error( err )
812            snapshot.status = snapshot_status.ERROR
813            snapshot.error = err
814            uci.error = err
815            uci.state = uci_states.ERROR
816            self.sa_session.add( uci )
817            self.sa_session.add( snapshot )
818            self.sa_session.flush()
819        except Exception, ex:
820            err = "Error while updating snapshot status: " + str( ex )
821            log.error( err )
822            snapshot.status = snapshot_status.ERROR
823            snapshot.error = err
824            uci.error = err
825            uci.state = uci_states.ERROR
826            self.sa_session.add( uci )
827            self.sa_session.add( snapshot )
828            self.sa_session.flush()
829        
830    def delete_snapshot( self, snapshot ):
831        """
832        Initiate deletion of given snapshot from cloud provider.
833        """
834        if snapshot.status == snapshot_status.DELETE:
835            # Get credentials associated wit this store
836            uci_id = snapshot.uci_id
837            uci = self.sa_session.query( model.UCI ).get( uci_id )
838            self.sa_session.refresh( uci )
839            conn = self.get_connection_from_uci( uci )
840            
841            try:
842                log.debug( "Deleting snapshot '%s'" % snapshot.snapshot_id )
843                snap = conn.delete_snapshot( snapshot.snapshot_id )
844                if snap == True:
845                    snapshot.deleted = True
846                    snapshot.status = snapshot_status.DELETED
847                    self.sa_session.add( snapshot )
848                    self.sa_session.flush()
849                return snap
850            except boto.exception.EC2ResponseError, e:
851                err = "EC2 response error while deleting snapshot: " + str( e )
852                log.error( err )
853                snapshot.status = snapshot_status.ERROR
854                snapshot.error = err
855                uci.error = err
856                uci.state = uci_states.ERROR
857                self.sa_session.add( uci )
858                self.sa_session.add( snapshot )
859                self.sa_session.flush()
860            except Exception, ex:
861                err = "Error while deleting snapshot: " + str( ex )
862                log.error( err )
863                snapshot.status = snapshot_status.ERROR
864                snapshot.error = err
865                uci.error = err
866                uci.state = uci_states.ERROR
867                self.sa_session.add( uci )
868                self.sa_session.add( snapshot )
869                self.sa_session.flush()
870        else:
871            err = "Cannot delete snapshot '"+snapshot.snapshot_id+"' because its status is '"+snapshot.status+"'. Only snapshots with '" + \
872                        snapshot_status.COMPLETED+"' status can be deleted."
873            log.error( err )
874            snapshot.error = err
875            self.sa_session.add( snapshot )
876            self.sa_session.flush()
877            
878    def process_zombie( self, inst ):
879        """
880        Attempt at discovering if starting a cloud instance was successful but local database was not updated
881        accordingly or if something else failed and instance was never started. Currently, no automatic 
882        repairs are being attempted; instead, appropriate error messages are set.
883        """
884        uci_id = inst.uci_id
885        uci = self.sa_session.query( model.UCI ).get( uci_id )
886        self.sa_session.refresh( uci )
887        
888        # Check if any instance-specific information was written to local DB; if 'yes', set instance and UCI's error message 
889        # suggesting manual check.
890        if inst.launch_time != None or inst.reservation_id != None or inst.instance_id != None:
891            # Try to recover state - this is best-case effort, so if something does not work immediately, not
892            # recovery steps are attempted. Recovery is based on hope that instance_id is available in local DB; if not,
893            # report as error.
894            # Fields attempting to be recovered are: reservation_id, instance status, and launch_time 
895            if inst.instance_id != None:
896                conn = self.get_connection_from_uci( uci )
897                rl = conn.get_all_instances( [inst.instance_id] ) # reservation list
898                # Update local DB with relevant data from instance
899                if inst.reservation_id == None:
900                    try:
901                        inst.reservation_id = str(rl[0]).split(":")[1]
902                    except: # something failed, so skip
903                        pass
904                
905                try:
906                    state = rl[0].instances[0].update()
907                    inst.state = state
908                    uci.state = state
909                    self.sa_session.add( inst )
910                    self.sa_session.add( uci )
911                    self.sa_session.flush()
912                except: # something failed, so skip
913                    pass
914                
915                if inst.launch_time == None:
916                    try:
917                        launch_time = self.format_time( rl[0].instances[0].launch_time )
918                        inst.launch_time = launch_time
919                        self.sa_session.add( inst )
920                        self.sa_session.flush() 
921                        if inst.uci.launch_time == None:
922                            uci.launch_time = launch_time
923                            self.sa_session.add( uci )
924                            self.sa_session.flush()
925                    except: # something failed, so skip
926                        pass
927            else:
928                err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
929                      "' seems to have failed. Because it appears that cloud instance might have gotten started, manual check is recommended."
930                inst.error = err
931                inst.state = instance_states.ERROR
932                inst.uci.error = err
933                inst.uci.state = uci_states.ERROR
934                log.error( err )
935                self.sa_session.add( inst )
936                self.sa_session.add( uci )
937                self.sa_session.flush()         
938                
939        else: #Instance most likely never got processed, so set error message suggesting user to try starting instance again.
940            err = "Starting a machine instance (DB id: '"+str(inst.id)+"') associated with this UCI '" + str(inst.uci.name) + \
941                  "' seems to have failed. Because it appears that cloud instance never got started, it should be safe to reset state and try " \
942                  "starting the instance again."
943            inst.error = err
944            inst.state = instance_states.ERROR
945            uci.error = err
946            uci.state = uci_states.ERROR
947            log.error( err )
948            self.sa_session.add( inst )
949            self.sa_session.add( uci )
950            self.sa_session.flush()
951#            uw = UCIwrapper( inst.uci )
952#            log.debug( "Try automatically re-submitting UCI '%s'." % uw.get_name() )
953
954    def get_connection_from_uci( self, uci ):
955        """
956        Establish and return connection to cloud provider. Information needed to do so is obtained
957        directly from uci database object.
958        """
959        log.debug( 'Establishing %s cloud connection' % self.type )
960        a_key = uci.credentials.access_key
961        s_key = uci.credentials.secret_key
962        # Get connection
963        try:
964            region = RegionInfo( None, uci.credentials.provider.region_name, uci.credentials.provider.region_endpoint )
965#            log.debug( "[%s] Using following command to connect to cl…

Large files files are truncated, but you can click here to view the full file